Linux上使用python处理docx转pdf教程
今天在使用flask将生成好的docx文档转化为pdf的过程中,遇到了一些问题,本来在windows上转化的好好的,但是到了Linux上却是直接报错显示ModuleNotFoundError: No module named 'win32com'
。
很明显他说的是在Linux系统下并没有win32com这个模块,所以通过百度发现python使用pdf2docx这个包将docx转化为pdf的使用环境必须为windows,那么在Linux上我们应该使用什么来进行文档的转化呢?百度后发现了一个解决方法:传送门。
但是上述代码的时间应该是有段时间了,pywpsrpc
的代码已经更新了,目前的最新使用代码可以去github上访问官方文档。
需要注意的是需要安装好qt5-default
, 我使用的Linux版本是Ubuntu23.04,在安装的时候会报错,所以百度后提供了别人的解决方案粘在下面。
以下是代码解决方案,代码来自于官方,我仅仅调用了其中的函数:
#!/usr/bin/python3 #** # * Copyright (c) 2020 Weitian Leung # * # * This file is part of pywpsrpc. # * # * This file is distributed under the MIT License. # * See the LICENSE file for details. # * #* import os import sys import argparse from pywpsrpc.rpcwpsapi import (createWpsRpcInstance, wpsapi) from pywpsrpc.common import (S_OK, QtApp) formats = { "doc": wpsapi.wdFormatDocument, "docx": wpsapi.wdFormatXMLDocument, "rtf": wpsapi.wdFormatRTF, "html": wpsapi.wdFormatHTML, "pdf": wpsapi.wdFormatPDF, "xml": wpsapi.wdFormatXML, } class ConvertException(Exception): def __init__(self, text, hr): self.text = text self.hr = hr def __str__(self): return """Convert failed: Details: {} ErrCode: {} """.format(self.text, hex(self.hr & 0xFFFFFFFF)) def convert_to(paths, format, abort_on_fails=False): hr, rpc = createWpsRpcInstance() if hr != S_OK: raise ConvertException("Can't create the rpc instance", hr) hr, app = rpc.getWpsApplication() if hr != S_OK: raise ConvertException("Can't get the application", hr) # we don't need the gui app.Visible = False docs = app.Documents def _handle_result(hr): if abort_on_fails and hr != S_OK: raise ConvertException("convert_file failed", hr) for path in paths: abs_path = os.path.realpath(path) if os.path.isdir(abs_path): files = [(os.path.join(abs_path, f)) for f in os.listdir(abs_path)] for file in files: hr = convert_file(file, docs, format) _handle_result(hr) else: hr = convert_file(abs_path, docs, format) _handle_result(hr) app.Quit() def convert_file(file, docs, format): hr, doc = docs.Open(file, ReadOnly=True) if hr != S_OK: return hr out_dir = os.path.dirname(os.path.realpath(file)) + "/out" os.makedirs(out_dir, exist_ok=True) # you have to handle if the new_file already exists new_file = out_dir + "/" + os.path.splitext(os.path.basename(file))[0] + "." + format ret = doc.SaveAs2(new_file, FileFormat=formats[format]) # always close the doc doc.Close(wpsapi.wdDoNotSaveChanges) return ret def main(): parser = argparse.ArgumentParser() parser.add_argument("--format", "-f", required=True, metavar="<DOC_TYPE>", choices=["doc", "docx", "rtf", "html", "pdf", "xml"], help="convert to <DOC_TYPE>,") parser.add_argument("--abort", "-a", action="store_true", help="abort if one convert fails") parser.add_argument("path", metavar="<path>", nargs='+', help="the <path> can be one or more file or folder") args = parser.parse_args() qApp = QtApp(sys.argv) try: convert_to(args.path, args.format, args.abort) except ConvertException as e: print(e) if __name__ == "__main__": main()
上面是官方代码,下面是我的flask调用函数:
from .convertto import convert_to @medical.route('/insertMoadlDocx', methods=['POST', 'GET']) @login_required def insertModalDocx(): try: image_id = request.form.get('image_id') # 查询相关信息 medical_picture_info = MedicalPicture.query.filter_by(id=image_id).first() user_info = User.query.filter_by(id=medical_picture_info.user_id).first() user_message_info = UserMessage.query.filter_by(user_id=user_info.id).first() modal_list_info = ModalList.query.filter_by(image_id=image_id).all() # 读取docx模板 template_path = os.path.join(current_app.root_path, 'static', 'word', 'template.docx') doc = Document(template_path) # 替换表格占位符 placeholders = { '{{username}}': user_info.username, '{{name}}': user_message_info.name, '{{sex}}': '男' if user_message_info.sex == 1 else '女', '{{age}}': str(user_message_info.age), '{{imageType}}': medical_picture_info.imageType, '{{uploadTime}}': str(medical_picture_info.uploadTime), '{{phone}}': user_message_info.phone, '{{idCard}}': str(user_message_info.idCard), '{{asset}}': user_message_info.asset } for table in doc.tables: for row in table.rows: for cell in row.cells: for key, value in placeholders.items(): if key in cell.text: # 保留原始字体格式 for paragraph in cell.paragraphs: for run in paragraph.runs: if key in run.text: run.text = run.text.replace(key, value) # 循环插入ModalList信息 for index, item in enumerate(modal_list_info): if index == 0: # 如果是第一条记录,直接替换原有的占位符 for paragraph in doc.paragraphs: if '{{description}}' in paragraph.text: paragraph.text = paragraph.text.replace('{{description}}','\t' + item.description) if '{{image}}' in paragraph.text: # 删除原有的占位符 paragraph.text = paragraph.text.replace('{{image}}', '') # 添加图片 run = paragraph.add_run() image_path = os.path.join(current_app.root_path, item.image.lstrip('/')) run.add_picture(image_path, width=docx.shared.Cm(14.5), height=docx.shared.Cm(5.2)) else: # 如果不是第一条记录,在报告医师信息的上一行插入新段落并插入数据 paragraphs_copy, paragraphs_iter = tee(doc.paragraphs) for i, paragraph in enumerate(paragraphs_iter): if '报告医师:' in paragraph.text: # 在报告医师信息的上一行插入空白行 doc.paragraphs[i - 1].insert_paragraph_before() # 在空白行之后插入新段落 new_paragraph = doc.paragraphs[i].insert_paragraph_before() # 插入诊断描述和图片信息 new_run1 = new_paragraph.add_run(f"诊断描述:\n") new_run1.font.name = '宋体' # 设置字体为宋体 new_run1.font.size = Pt(12) # 设置字号为12磅 new_paragraph.add_run('\t') # 添加制表符实现缩进 new_paragraph.add_run(item.description) new_run2 = new_paragraph.add_run(f"\n诊断图片:\n") # 设置字体为宋体 new_run2.font.name = '宋体' # 设置字体为宋体 new_run2.font.size = Pt(12) # 设置字号为12磅 image_path = os.path.join(current_app.root_path, item.image.lstrip('/')) new_paragraph.add_run().add_picture(image_path, width=docx.shared.Cm(14.5), height=docx.shared.Cm(5.2)) break # 添加一个空行,用于分隔不同的记录 doc.add_paragraph() docx_filename = f"{image_id}_{user_message_info.name}_{medical_picture_info.imageType}.docx" folder_name = os.path.splitext(docx_filename)[0] # 去掉文件尾缀 docx_folder = os.path.join(current_app.root_path, 'static', 'word', folder_name) # 使用去掉尾缀后的文件名作为文件夹名 # 确保文件夹存在,如果不存在则创建 if not os.path.exists(docx_folder): os.makedirs(docx_folder) # 保存 DOCX 文件 docx_path = os.path.join(docx_folder, docx_filename) doc.save(docx_path) # from win32com.client import pythoncom # 导入 pythoncom # pythoncom.CoInitialize() # 初始化 COM 线程 # # 构建 PDF 文件路径 # pdf_filename = docx_filename.replace('.docx', '.pdf') # pdf_folder = docx_folder # 与 DOCX 文件相同的目录 # pdf_path = os.path.join(pdf_folder, pdf_filename) # # # 将 DOCX 文件转换为 PDF # convert(docx_path, pdf_path) # 创建 PDF 文件 pdf_filename = docx_filename.replace('.docx', '.pdf') convert_to([docx_path], "pdf") # 构建目标文件的路径 docx_save_path = os.path.join('/static', 'word', folder_name, docx_filename) pdf_save_path = os.path.join('/static', 'word', folder_name, 'out', pdf_filename) # 替换所有路径中的反斜杠为正斜杠 docx_save_path = docx_save_path.replace('\\', '/') pdf_save_path = pdf_save_path.replace('\\', '/') # 将路径保存到数据库中 medical_picture_info.pdf_path = pdf_save_path medical_picture_info.docx_path = docx_save_path db.session.commit() # 返回 JSON 响应 return jsonify({'message': '报告生成成功!'}), 200 except Exception as e: # 返回 JSON 响应,表示修改失败 return jsonify({'error': str(e)}), 500
本文作者:信2005-2刘海涛
本文链接:https://www.cnblogs.com/lht020321/p/18111285
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南