python UI编程之实现word页码统计

用python制作了一个统计word页码的工具,能够统计选定目录下所有word文件的页码。用PyPDF实现了word页码统计功能,用PyQt设计实现了工具UI界面。

主窗口界面代码main.py

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'Main.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again.  Do not edit this file unless you know what you are doing.


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_Dialog(object):
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(808, 808)
        Dialog.setAutoFillBackground(True)
        Dialog.setStyleSheet("")
        self.pushButton = QtWidgets.QPushButton(Dialog)
        self.pushButton.setGeometry(QtCore.QRect(0, 0, 231, 41))
        self.pushButton.setIconSize(QtCore.QSize(30, 30))
        self.pushButton.setObjectName("pushButton")

        font = QtGui.QFont()
        font.setFamily("Arial")  # 括号里可以设置成自己想要的其它字体
        font.setPointSize(10)  # 括号里的数字可以设置成自己想要的字体大小
        self.pushButton.setFont(font)
        self.retranslateUi(Dialog)

        QtCore.QMetaObject.connectSlotsByName(Dialog)

    def retranslateUi(self, Dialog):
        _translate = QtCore.QCoreApplication.translate
        Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
        self.pushButton.setText(_translate("Dialog", "开始统计页码"))

主窗口功能代码WordHelper.py

import sys
from PyQt5.QtGui import QColor, QBrush, QPixmap
from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
import os

from PageWindow import PageWindow
from mainwindow import *
from Page import *

class MainWindow(QMainWindow, Ui_Dialog):
    def __init__(self):
        super(MainWindow, self).__init__()
        self.setupUi(self)
        self.setGeometry(100,100,1024,600)
        self.setWindowTitle('Word助手')
        palette = QtGui.QPalette()
        palette.setBrush(self.backgroundRole(), QBrush(QPixmap("./win10.jpg").scaled(self.size(), QtCore.Qt.IgnoreAspectRatio, QtCore.Qt.SmoothTransformation)))
        self.setPalette(palette)
        self.setAutoFillBackground(True)
        self.setFixedSize(1024,600)

if __name__ == '__main__':
    app = QApplication(sys.argv)
    main = MainWindow()
    main.show()
    pageWindow = PageWindow()
    main.pushButton.clicked.connect(pageWindow.openPage)
    sys.exit(app.exec_())

统计窗口界面代码Page.py

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'Page.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again.  Do not edit this file unless you know what you are doing.


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_pageWindow(object):
    def setupUi(self, pageWindow):
        pageWindow.setObjectName("pageWindow")
        pageWindow.resize(792, 811)
        self.label = QtWidgets.QLabel(pageWindow)
        self.label.setGeometry(QtCore.QRect(40, 30, 71, 21))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label.setFont(font)
        self.label.setObjectName("label")
        self.listWidget = QtWidgets.QListWidget(pageWindow)
        self.listWidget.setGeometry(QtCore.QRect(40, 110, 681, 231))
        self.listWidget.setObjectName("listWidget")
        self.tableWidget = QtWidgets.QTableWidget(pageWindow)
        self.tableWidget.setGeometry(QtCore.QRect(40, 410, 681, 281))
        self.tableWidget.setObjectName("tableWidget")
        self.tableWidget.setColumnCount(0)
        self.tableWidget.setRowCount(0)
        self.totalLabel = QtWidgets.QLabel(pageWindow)
        self.totalLabel.setGeometry(QtCore.QRect(40, 720, 251, 21))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.totalLabel.setFont(font)
        self.totalLabel.setObjectName("totalLabel")
        self.line = QtWidgets.QFrame(pageWindow)
        self.line.setGeometry(QtCore.QRect(20, 350, 721, 21))
        self.line.setFrameShape(QtWidgets.QFrame.HLine)
        self.line.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line.setObjectName("line")
        self.line_2 = QtWidgets.QFrame(pageWindow)
        self.line_2.setGeometry(QtCore.QRect(10, 20, 20, 331))
        self.line_2.setFrameShape(QtWidgets.QFrame.VLine)
        self.line_2.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_2.setObjectName("line_2")
        self.line_3 = QtWidgets.QFrame(pageWindow)
        self.line_3.setGeometry(QtCore.QRect(730, 20, 20, 331))
        self.line_3.setFrameShape(QtWidgets.QFrame.VLine)
        self.line_3.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_3.setObjectName("line_3")
        self.line_4 = QtWidgets.QFrame(pageWindow)
        self.line_4.setGeometry(QtCore.QRect(730, 370, 20, 331))
        self.line_4.setFrameShape(QtWidgets.QFrame.VLine)
        self.line_4.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_4.setObjectName("line_4")
        self.line_5 = QtWidgets.QFrame(pageWindow)
        self.line_5.setGeometry(QtCore.QRect(10, 370, 20, 331))
        self.line_5.setFrameShape(QtWidgets.QFrame.VLine)
        self.line_5.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_5.setObjectName("line_5")
        self.line_6 = QtWidgets.QFrame(pageWindow)
        self.line_6.setGeometry(QtCore.QRect(20, 700, 721, 21))
        self.line_6.setFrameShape(QtWidgets.QFrame.HLine)
        self.line_6.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_6.setObjectName("line_6")
        self.line_7 = QtWidgets.QFrame(pageWindow)
        self.line_7.setGeometry(QtCore.QRect(20, 10, 721, 21))
        self.line_7.setFrameShape(QtWidgets.QFrame.HLine)
        self.line_7.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_7.setObjectName("line_7")
        self.layoutWidget = QtWidgets.QWidget(pageWindow)
        self.layoutWidget.setGeometry(QtCore.QRect(40, 50, 661, 30))
        self.layoutWidget.setObjectName("layoutWidget")
        self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.label_2 = QtWidgets.QLabel(self.layoutWidget)
        font = QtGui.QFont()
        font.setPointSize(8)
        self.label_2.setFont(font)
        self.label_2.setObjectName("label_2")
        self.horizontalLayout.addWidget(self.label_2)
        self.lineEdit = QtWidgets.QLineEdit(self.layoutWidget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.openButton = QtWidgets.QPushButton(self.layoutWidget)
        font = QtGui.QFont()
        font.setPointSize(7)
        self.openButton.setFont(font)
        self.openButton.setObjectName("openButton")
        self.horizontalLayout.addWidget(self.openButton)
        self.layoutWidget1 = QtWidgets.QWidget(pageWindow)
        self.layoutWidget1.setGeometry(QtCore.QRect(40, 370, 681, 30))
        self.layoutWidget1.setObjectName("layoutWidget1")
        self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.layoutWidget1)
        self.horizontalLayout_2.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
        self.label_3 = QtWidgets.QLabel(self.layoutWidget1)
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label_3.setFont(font)
        self.label_3.setObjectName("label_3")
        self.horizontalLayout_2.addWidget(self.label_3)
        spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
        self.horizontalLayout_2.addItem(spacerItem)
        self.resultButton = QtWidgets.QPushButton(self.layoutWidget1)
        font = QtGui.QFont()
        font.setPointSize(7)
        self.resultButton.setFont(font)
        self.resultButton.setObjectName("resultButton")
        self.horizontalLayout_2.addWidget(self.resultButton)

        self.retranslateUi(pageWindow)
        QtCore.QMetaObject.connectSlotsByName(pageWindow)

    def retranslateUi(self, pageWindow):
        _translate = QtCore.QCoreApplication.translate
        pageWindow.setWindowTitle(_translate("pageWindow", "Form"))
        self.label.setText(_translate("pageWindow", "源"))
        self.totalLabel.setText(_translate("pageWindow", "合计页码:"))
        self.label_2.setText(_translate("pageWindow", "请选择word文档所在目录:"))
        self.openButton.setText(_translate("pageWindow", "打开目录"))
        self.label_3.setText(_translate("pageWindow", "结果"))
        self.resultButton.setText(_translate("pageWindow", "开始统计"))

统计窗口功能代码PageWindow.py

import _thread
import os
import threading

from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidgetItem
import mytool.wordtopdf as wordtopdf

from Page import *

class PageWindow(QMainWindow, Ui_pageWindow):
    def __init__(self):
        super(PageWindow, self).__init__()
        self.setupUi(self)
        self.setWindowTitle("统计页码")
        self.tableWidget.setColumnWidth(0,600)
        self.tableWidget.setColumnWidth(1,100)
        self.tableWidget.setColumnCount(2)
        item = QtWidgets.QTableWidgetItem()
        item.setText("文件名")
        self.tableWidget.setHorizontalHeaderItem(0, item)
        item = QtWidgets.QTableWidgetItem()
        item.setText("页数")
        self.tableWidget.setHorizontalHeaderItem(1, item)
        self.openButton.clicked.connect(self.openDir)
        self.resultButton.clicked.connect(self.calculateClick)
        self.file_list = []

    def openDir(self):
        dirStr = QFileDialog.getExistingDirectory(self, "打开目录或文件", "E:\资料\论文");
        self.lineEdit.setText(dirStr)
        self.getAllWordFiles(self.lineEdit.text())
        self.listWidget.addItems(self.file_list)

    def openPage(self):
        self.show()

    def getAllWordFiles(self, dirStr):
        for file in os.listdir(dirStr):
            suffix = file.split('.')[-1]
            if suffix=='doc' or suffix=='docx':
                file_path = os.path.join(dirStr, file)
                self.file_list.append(file_path)

    def calculateClick(self):
        self.totalLabel.setText("正在统计......")
        _thread.start_new_thread(self.calculate, ())

    def calculate(self):
        value_list = wordtopdf.wordtopdf1(self.file_list)
        print (value_list)
        total_pages = str(value_list[0])
        self.totalLabel.setText("合计页码为:" + total_pages)
        result_list = value_list[1]
        self.tableWidget.setRowCount(len(result_list))
        self.tableWidget.setColumnCount(2)
        for i in range(self.tableWidget.rowCount()):
            for j in range(self.tableWidget.columnCount()):
                content = result_list[i][j]
                newItem = QTableWidgetItem(content)
                self.tableWidget.setItem(i, j, newItem)

PyPDF统计页面代码--word2pdf.py

# -*- coding:utf-8 -*-
import os
from time import sleep

from win32com.client import Dispatch, DispatchEx  # 导入pywin32模块的client包下的函数
from win32com.client import constants  #  导入pywin32模块的client包下的保存COM常量的类
from win32com.client import gencache    #  导入pywin32模块的client包下的gencache函数
from PyPDF2 import  PdfFileReader  # 获取页码用
import pythoncom  # 导入封装了OLE自动化API的模块,该模块为pywin32的子模块
totalPages = 0  # 记录总页数的全局变量
returnlist = []  # 保存文件列表的全局变量

# Word转换为PDF(多个文件)
def wordtopdf(filelist,targetpath):
    valueList = []
    try:
        pythoncom.CoInitialize()   # 调用线程初始化COM库,解决调用Word 2007时出现“尚未调用CoInitialize”错误的问题
        gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
        # 开始转换
        w = Dispatch("Word.Application")
        for fullfilename in filelist:
            temp = fullfilename.split('\\')
            path = temp[0]
            softfilename = os.path.splitext(temp[1])
            filename = temp[1]
            os.chdir(path)
            doc = os.path.abspath(filename)
            # filename, ext = os.path.splitext(doc)
            os.chdir(targetpath)
            pdfname = softfilename[0] + ".pdf"
            output = os.path.abspath(pdfname)
            pdf_name = output

            # 文档路径需要为绝对路径,因为Word启动后当前路径不是调用脚本时的当前路径。
            try:
                doc = w.Documents.Open(doc, ReadOnly=1)
                doc.ExportAsFixedFormat(output, constants.wdExportFormatPDF, \
                                        Item=constants.wdExportDocumentWithMarkup,
                                        CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
            except Exception as e:
                print(e)
            if os.path.isfile(pdf_name):
                valueList.append(pdf_name)
            else:
                print('转换失败!')
                return False
        w.Quit(constants.wdDoNotSaveChanges)
        return valueList
    except TypeError as e:
        print('出错了!')
        print(e)
        return -1

# Word转换为PDF并提取页码
def wordtopdf1(filelist):
    # global totalPages  # 全局变量
    totalPages = 0
    valueList = []
    try:
        pythoncom.CoInitialize()  # 调用线程初始化COM库,解决调用Word 2007时出现“尚未调用CoInitialize”错误的问题
        gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
        # 开始转换
        w = Dispatch("Word.Application")
        for fullfilename in filelist:
            temp = fullfilename.split('\\')
            path = temp[0]
            filename = temp[1]
            os.chdir(path)
            doc = os.path.abspath(filename)
            filename, ext = os.path.splitext(doc)
            output = filename + '.pdf'
            a = os.path.join(path ,"pdf")
            pdf_name = output

            # 文档路径需要为绝对路径,因为Word启动后当前路径不是调用脚本时的当前路径。
            try:
                sleep(1)
                doc = w.Documents.Open(doc, ReadOnly=1)
                doc.ExportAsFixedFormat(output, constants.wdExportFormatPDF, \
                                        Item=constants.wdExportDocumentWithMarkup,
                                        CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
            except Exception as e:
                print(e)
            if os.path.isfile(pdf_name):
                # 获取页码
                pages = getPdfPageNum(pdf_name)   # 获取页码
                valueList.append([fullfilename,str(pages)])
                totalPages += pages  # 累加页码
                os.remove(pdf_name)  # 删除生成的PDF文件
            else:
                print('转换失败!')
                return False
        w.Quit(constants.wdDoNotSaveChanges)
        return totalPages,valueList
    except TypeError as e:
        print('出错了!')
        print(e)
        return -1


####################### 统计页码 ############################################

def getPdfPageNum(path):
    with open(path, "rb") as file:
        doc = PdfFileReader(file)
        pagecount = doc.getNumPages()
    return pagecount


####################### 提取目录 ############################################

def getPdfOutlines(pdfpath,listpath,isList):
    print("提取目录")
    with open(pdfpath, "rb") as file:
        doc = PdfFileReader(file)
        outlines = doc.getOutlines()  # 获取大纲
        global returnlist  # 全局变量,保存大纲的列表
        returnlist = []   # 创建一个空列表
        mylist = getOutline(outlines,isList)  # 递归获取大纲
        w = DispatchEx("Word.Application")  # 创建Word文档应用程序对象
        w.Visible = 1
        w.DisplayAlerts = 0
        doc1 = w.Documents.Add()# 添加一个Word文档对象
        range1 = doc1.Range(0,0)
        for item in mylist:       # 通过循环将获取的目录列表插入到Word文档对象中
             range1.InsertAfter(item)
        outpath = os.path.join(listpath,'list.docx') # 连接Word文档路径

        doc1.SaveAs(outpath)  # 保存文件
        doc1.Close()  # 关闭Word文档对象
        w.Quit()  # 退出Word文档应用程序对象
    return outpath


def getOutline(obj,isList):  # 递归获取大纲
    global returnlist
    for o in obj:
        if type(o).__name__ == 'Destination':
            # mypage = getRealPage(doc, pagecount, o.get('/Page').idnum)
            if isList:  # 包括页码
                returnlist.append( o.get('/Title') + "\t\t" + str(o.get('/Page') + 1) + "\n")
            else:       # 不包括页码
                returnlist.append(o.get('/Title') + "\n")
        elif type(o).__name__ == 'list':
            getOutline(o,isList)  # 递归调用获取大纲
    return returnlist

最终效果

posted @ 2023-01-09 23:21  z5onk0  阅读(89)  评论(0编辑  收藏  举报