读取PDF中表格
练习笔记
""" # @Time : 2022/11/14 14:04 # @Author : Gina Gao # @File : # @Software: PyCharm # @Descript:表格跨页显示 """ import pandas as pd import pdfplumber path = r'C:\Users\EDY\Desktop\小高天天乐测试版.pdf' pdf_read=pdfplumber.open(path) print(pdf_read.pages) content='' df = pd.DataFrame() for i in range(len(pdf_read.pages)): page =pdf_read.pages[i] page_content = page.extract_text() page_t = page.extract_tables() # content=content+page_content df1=pd.DataFrame(page_t) df=df.append(df1) #将表格第一行作为列名 df.columns=df.iloc[0].values df=df.iloc[1:] # 索引列重置 df.reset_index(inplace=True,drop=True) print(df)