234234234

python 实现 pdf表格转excel

pip install pdfplumber  -i https://pypi.tuna.tsinghua.edu.cn/simple

pip install tqdm  -i https://pypi.tuna.tsinghua.edu.cn/simple

 

import pdfplumber
from openpyxl import Workbook
from tqdm import tqdm
# file_name = data_folder+'医保药品分类与代码数据库更新202110.pdf'
file_name = 'C:\\Users\\mm\\Desktop\\123.pdf'
data_name = 'C:\\Users\\mm\\Desktop\\123.xls'

def analysis_table(pdf_file_path):
    # 打开表格
    workbook = Workbook()
    sheet = workbook.active

    # 打开pdf
    with pdfplumber.open(pdf_file_path) as pdf:
        # 遍历每页pdf 17476/17855
        for page in tqdm(pdf.pages):
            # 提取表格信息
            try:
                table = page.extract_table()
                # print(table)
                # 格式化表格数据
                for i, row in enumerate(table):
                    sheet.append(row[1:])
            except:
                break
    workbook.save(filename=data_name)

analysis_table(file_name)

 

posted @ 2023-04-20 15:09  你若愿意,我一定去  阅读(201)  评论(0编辑  收藏  举报
23423423423