读取PDF中表格

练习笔记

"""
# @Time : 2022/11/14 14:04
# @Author : Gina Gao
# @File :
# @Software: PyCharm
# @Descript:表格跨页显示
"""
import pandas as pd
import pdfplumber
path = r'C:\Users\EDY\Desktop\小高天天乐测试版.pdf'
pdf_read=pdfplumber.open(path)
print(pdf_read.pages)
content=''
df = pd.DataFrame()
for i in range(len(pdf_read.pages)):
    page =pdf_read.pages[i]
    page_content = page.extract_text()
    page_t = page.extract_tables()
    # content=content+page_content
    df1=pd.DataFrame(page_t)
    df=df.append(df1)
#将表格第一行作为列名
df.columns=df.iloc[0].values
df=df.iloc[1:]
# 索引列重置
df.reset_index(inplace=True,drop=True)
print(df)

 

posted @ 2022-11-14 17:38  yongqi-911  阅读(61)  评论(0编辑  收藏  举报