PDF转换提取文字QT与python的简单应用笔记
原文链接:https://blog.csdn.net/XMG9017/article/details/126782483?spm=1001.2014.3001.5501
1 # -*- coding: utf-8 -*- 2 3 # Form implementation generated from reading ui file 'PDFto_txt.ui' 4 # 5 # Created by: PyQt5 UI code generator 5.15.4 6 # 7 # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 # run again. Do not edit this file unless you know what you are doing. 9 10 11 from PyQt5 import QtCore, QtGui, QtWidgets 12 13 14 class Ui_Form(object): 15 def setupUi(self, Form): 16 Form.setObjectName("Form") 17 Form.resize(791, 507) 18 Form.setFocusPolicy(QtCore.Qt.NoFocus) 19 self.groupBox = QtWidgets.QGroupBox(Form) 20 self.groupBox.setGeometry(QtCore.QRect(80, 20, 611, 421)) 21 self.groupBox.setObjectName("groupBox") 22 self.pushButton = QtWidgets.QPushButton(self.groupBox) 23 self.pushButton.setGeometry(QtCore.QRect(190, 90, 291, 41)) 24 self.pushButton.setObjectName("pushButton") 25 self.label = QtWidgets.QLabel(self.groupBox) 26 self.label.setGeometry(QtCore.QRect(50, 140, 81, 31)) 27 self.label.setObjectName("label") 28 self.lineEdit = QtWidgets.QLineEdit(self.groupBox) 29 self.lineEdit.setGeometry(QtCore.QRect(130, 140, 451, 31)) 30 self.lineEdit.setObjectName("lineEdit") 31 self.pushButton_2 = QtWidgets.QPushButton(self.groupBox) 32 self.pushButton_2.setGeometry(QtCore.QRect(130, 200, 91, 41)) 33 self.pushButton_2.setObjectName("pushButton_2") 34 self.pushButton_3 = QtWidgets.QPushButton(self.groupBox) 35 self.pushButton_3.setGeometry(QtCore.QRect(130, 280, 91, 41)) 36 self.pushButton_3.setObjectName("pushButton_3") 37 self.pushButton_4 = QtWidgets.QPushButton(self.groupBox) 38 self.pushButton_4.setGeometry(QtCore.QRect(430, 280, 91, 41)) 39 self.pushButton_4.setObjectName("pushButton_4") 40 self.pushButton_5 = QtWidgets.QPushButton(self.groupBox) 41 self.pushButton_5.setGeometry(QtCore.QRect(280, 200, 91, 41)) 42 self.pushButton_5.setObjectName("pushButton_5") 43 self.pushButton_6 = QtWidgets.QPushButton(self.groupBox) 44 self.pushButton_6.setGeometry(QtCore.QRect(430, 200, 91, 41)) 45 self.pushButton_6.setObjectName("pushButton_6") 46 self.label_2 = QtWidgets.QLabel(self.groupBox) 47 self.label_2.setGeometry(QtCore.QRect(70, 30, 521, 41)) 48 self.label_2.setText("") 49 self.label_2.setObjectName("label_2") 50 51 self.retranslateUi(Form) 52 QtCore.QMetaObject.connectSlotsByName(Form) 53 54 def retranslateUi(self, Form): 55 _translate = QtCore.QCoreApplication.translate 56 Form.setWindowTitle(_translate("Form", "PDF转换工具")) 57 self.groupBox.setTitle(_translate("Form", "主菜单")) 58 self.pushButton.setText(_translate("Form", "选择文件(*.pdf)")) 59 self.label.setText(_translate("Form", "待处理文件")) 60 self.label_2.setText(_translate("Form", "欢迎使用PDF转换工具")) 61 self.pushButton_2.setText(_translate("Form", "pdf转txt")) 62 self.pushButton_3.setText(_translate("Form", "打开处理结果")) 63 self.pushButton_4.setText(_translate("Form", "退出")) 64 self.pushButton_5.setText(_translate("Form", "pdf转word")) 65 self.pushButton_6.setText(_translate("Form", "pdf转excel"))
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | import pdfplumber import pandas as pd import os import time import shutil if os.path.isdir( '处理结果' ): shutil.rmtree( '处理结果' ) os.makedirs( '处理结果' ) # os.mkdir('处理结果') from PyQt5 import QtWidgets from PyQt5.QtCore import QFileInfo from PyQt5.QtWidgets import QFileDialog, QMessageBox from PDFto_txt import Ui_Form class mywindow(QtWidgets.QWidget, Ui_Form): def __init__( self ): super (mywindow, self ).__init__() self .setupUi( self ) ########################################上边区域代码基本通用(都是这样调用QT的界面代码) self .pushButton.clicked.connect( self .shuruwenjianjia) #QT槽和信号的函数调用 self .pushButton_3.clicked.connect( self .DKJG) self .pushButton_2.clicked.connect( self .pdf_txt) self .pushButton_4.clicked.connect( self .jieshu) self .pushButton_5.clicked.connect( self .pdf_word) self .pushButton_6.clicked.connect( self .pdf_excel) ###############################################################下方为def函数区域 #打开处理结果 def DKJG( self ): import os start_directory = ( '处理结果' ) os.startfile(start_directory) def pdf_txt( self ): with pdfplumber. open (fileName) as pdf: content = '' # len(pdf.pages)为PDF文档页数 for i in range ( len (pdf.pages)): # pdf.pages[i] 是读取PDF文档第i+1页 page = pdf.pages[i] # page.extract_text()函数即读取文本内容,下面这步是去掉文档最下面的页码 page_content = '\n' .join(page.extract_text().split( '\n' )[: - 1 ]) content = content + page_content with open ( "处理结果\pdf-txt.txt" , 'w' ) as f: zhuanhuan = f.write(content) print ( '处理完成' ) # self.label_2.setText(_translate("Form", "处理完成")) self .label_2.setText( 'pdf转换txt处理完成!' ) def pdf_word( self ): with pdfplumber. open (fileName) as pdf: content = '' # len(pdf.pages)为PDF文档页数 for i in range ( len (pdf.pages)): # pdf.pages[i] 是读取PDF文档第i+1页 page = pdf.pages[i] # page.extract_text()函数即读取文本内容,下面这步是去掉文档最下面的页码 page_content = '\n' .join(page.extract_text().split( '\n' )[: - 1 ]) content = content + page_content with open ( "处理结果\pdf-word.docx" , 'w' ) as f: zhuanhuan = f.write(content) print ( '处理完成' ) # self.label_2.setText(_translate("Form", "处理完成")) self .label_2.setText( 'pdf转换word处理完成!' ) def pdf_excel( self ): with pdfplumber. open (fileName) as pdf: content = '' # len(pdf.pages)为PDF文档页数 for i in range ( len (pdf.pages)): # pdf.pages[i] 是读取PDF文档第i+1页 page = pdf.pages[i] # page.extract_text()函数即读取文本内容,下面这步是去掉文档最下面的页码 page_content = '\n' .join(page.extract_text().split( '\n' )[: - 1 ]) content = content + page_content with open ( "处理结果\pdf-word.xlsx" , 'w' ) as f: zhuanhuan = f.write(content) print ( '处理完成' ) # self.label_2.setText(_translate("Form", "处理完成")) self .label_2.setText( 'pdf转换excel处理完成!' ) def shuruwenjianjia( self ): # shuru_lujing = QFileDialog.getExistingDirectory(self, "选择文件夹", "/") global fileName fileName, filetype = QFileDialog.getOpenFileName( self , "选择PDF文件" , "/" , "Text Files (*.pdf)" ) print ( str (fileName)) # 打印文件全部路径(包括文件名和后缀名)和文件类型 # print(shuru_lujing) # fileinfo = QFileInfo(fileName) # file_path = fileinfo.absolutePath() # print(file_path) self .lineEdit.setText(fileName) def jieshu( self ): import os # os.exit() os.close() if __name__ = = "__main__" : import sys app = QtWidgets.QApplication(sys.argv) ui = mywindow() ui.show() sys.exit(app.exec_()) |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 周边上新:园子的第一款马克杯温暖上架