Live2D

pdf读取 pdf

PDF读取
#
!/usr/bin/env python #-*- coding:utf-8 -*- from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument from pdfminer.pdfpage import PDFPage from pdfminer.pdfinterp import PDFResourceManager from pdfminer.pdfinterp import PDFPageInterpreter from pdfminer.layout import LAParams from pdfminer.converter import PDFPageAggregator def readpdf(url): #"C:\\dgfegg\\Desktop\\综述.pdf" txt=open("pdf.txt",'w',encoding='utf-8') fp=open(url,"rb") list=[] parser=PDFParser(fp) doc=PDFDocument(parser) parser.set_document(doc) resource=PDFResourceManager() laparam=LAParams() device=PDFPageAggregator(resource,laparams=laparam) interpreter=PDFPageInterpreter(resource,device) for page in PDFPage.create_pages(doc): interpreter.process_page(page) layout=device.get_result() for out in layout: if hasattr(out, "get_text"): txt.write(out.get_text()) fp.close() txt.close() txt=open("pdf.txt",'r',encoding='utf-8') for each in txt: list.append(each) return list list=readpdf(r'C:\123\345\567.pdf') for each in list: print(each)

 

posted @ 2019-03-02 21:17  -涂涂-  阅读(95)  评论(0编辑  收藏  举报