pdfminer模块批量处理PDF文件

from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, PDFTextExtractionNotAllowed
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal, LAParams, LTTextLineHorizontal, LTFigure, LTRect, LTLine, LTCurve
import os
 
 
class PdfForString(object):
    def __init__(self):
        self.pdf_list = os.listdir(r'E:\StockExchange\PDF')  # 获取PDF文件夹中所有pdf名称
        #  存储文档资源
        self.src = PDFResourceManager()
        #  设备对象
        self.device = PDFPageAggregator(self.src, laparams=LAParams())
        # 解释器对象
        self.inter = PDFPageInterpreter(self.src, self.device)
 
    # 生成pdf路径
    def for_string(self):
        for pdf in self.pdf_list:
            pdf_path = os.path.join(os.path.dirname(os.path.dirname(__file__)) + '/PDF', pdf)
            yield pdf_path
 
    # 解析pdf
    def pdf_analysis(self):
        for path in self.for_string():
            pd_file = open(path, 'rb')
            parser = PDFParser(pd_file)  # pdf文件解析对象
 
            #  pdf文档对象
            document = PDFDocument()
            parser.set_document(document)
            document.set_parser(parser)
            pages = document.get_pages()
            yield pages
 
    # 获取PDF信息
    def get_string(self):
        for pages in self.pdf_analysis():
            for page in pages:
                self.inter.process_page(page)
                layout = self.device.get_result()
                for x in layout:
                    if isinstance(x, LTTextBoxHorizontal):
                        print(str(x.get_text()))
            # break
 
 
PdfForString().get_string()

--------转自屁桃

posted @ 2019-11-28 11:21  Stone李  阅读(585)  评论(0编辑  收藏  举报
var a_idx = 0; jQuery(document).ready(function($) { $("body").click(function(e) { var a = new Array("","","","","","","♪","",""); var $i = $("").text(a[a_idx]); a_idx = (a_idx + 1) % a.length; var x = e.pageX, y = e.pageY; $i.css({ "z-index": 999999999999999999999999999999999999999999999999999999999999999999999, "top": y - 20, "left": x, "position": "absolute", "font-weight": "bold", "color": "rgb("+~~(255*Math.random())+","+~~(255*Math.random())+","+~~(255*Math.random())+")" }); $("body").append($i); $i.animate({ "top": y - 180, "opacity": 0 }, 1500, function() { $i.remove(); }); }); });