python 实现 pdf表格转excel
pip install pdfplumber -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install tqdm -i https://pypi.tuna.tsinghua.edu.cn/simple
import pdfplumber from openpyxl import Workbook from tqdm import tqdm # file_name = data_folder+'医保药品分类与代码数据库更新202110.pdf' file_name = 'C:\\Users\\mm\\Desktop\\123.pdf' data_name = 'C:\\Users\\mm\\Desktop\\123.xls' def analysis_table(pdf_file_path): # 打开表格 workbook = Workbook() sheet = workbook.active # 打开pdf with pdfplumber.open(pdf_file_path) as pdf: # 遍历每页pdf 17476/17855 for page in tqdm(pdf.pages): # 提取表格信息 try: table = page.extract_table() # print(table) # 格式化表格数据 for i, row in enumerate(table): sheet.append(row[1:]) except: break workbook.save(filename=data_name) analysis_table(file_name)