Libreoffice 各类文件转换的filtername
LIBREOFFICE_DOC_FAMILIES = [ "TextDocument", "WebDocument", "Spreadsheet", "Presentation", "Graphics" ] LIBREOFFICE_IMPORT_TYPES = { "docx": { "FilterName": "MS Word 2007 XML" }, "pdf": { "FilterName": "PDF - Portable Document Format" }, "jpg": { "FilterName": "JPEG - Joint Photographic Experts Group" }, "html": { "FilterName": "HTML Document" }, "odp": { "FilterName": "OpenDocument Presentation (Flat XML)" }, "pptx": { "FilterName": "Microsoft PowerPoint 2007 XML" } } LIBREOFFICE_EXPORT_TYPES = { "pdf": { LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"} }, "jpg": { LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"}, LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"} }, "html": { LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"}, LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"}, LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"}, LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"}, LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"} }, "docx": { LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"} }, "odp": { LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"} }, "pptx": { LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"} } }
转:
convert_test
#!/usr/bin/env python3 """ VIEW COMPLETE CODE AT ===================== * https://github.com/six519/libreoffice_convert THANKS ====== * Thanks to: Mirko Nasato for his PyODConverter http://www.artofsolving.com/opensource/pyodconverter TESTED USING ============ * Fedora release 20 (Heisenbug) * Python 3.3.2 INSTALL DEPENDENCIES ==================== * yum install libreoffice-sdk """ import uno import subprocess import time import os from com.sun.star.beans import PropertyValue LIBREOFFICE_DEFAULT_PORT = 6519 LIBREOFFICE_DEFAULT_HOST = "localhost" LIBREOFFICE_DOC_FAMILIES = [ "TextDocument", "WebDocument", "Spreadsheet", "Presentation", "Graphics" ] LIBREOFFICE_IMPORT_TYPES = { "docx": { "FilterName": "MS Word 2007 XML" }, "pdf": { "FilterName": "PDF - Portable Document Format" }, "jpg": { "FilterName": "JPEG - Joint Photographic Experts Group" }, "html": { "FilterName": "HTML Document" }, "odp": { "FilterName": "OpenDocument Presentation (Flat XML)" }, "pptx": { "FilterName": "Microsoft PowerPoint 2007 XML" } } LIBREOFFICE_EXPORT_TYPES = { "pdf": { LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"}, LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"} }, "jpg": { LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"}, LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"} }, "html": { LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"}, LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"}, LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"}, LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"}, LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"} }, "docx": { LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"} }, "odp": { LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"} }, "pptx": { LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"} } } class PythonLibreOffice(object): def __init__(self, host=LIBREOFFICE_DEFAULT_HOST, port=LIBREOFFICE_DEFAULT_PORT): self.host = host self.port = port self.local_context = uno.getComponentContext() self.resolver = self.local_context.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self.local_context) self.connectionString = "socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (LIBREOFFICE_DEFAULT_HOST, LIBREOFFICE_DEFAULT_PORT) self.context = None self.desktop = None self.runUnoProcess() self.__lastErrorMessage = "" try: self.context = self.resolver.resolve("uno:%s" % self.connectionString) self.desktop = self.context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", self.context) except Exception as e: self.__lastErrorMessage = str(e) @property def lastError(self): return self.__lastErrorMessage def terminateProcess(self): try: if self.desktop: self.desktop.terminate() except Exception as e: self.__lastErrorMessage = str(e) return False return True def convertFile(self, outputFormat, inputFilename): if self.desktop: tOldFileName = os.path.splitext(inputFilename) outputFilename = "%s.%s" % (tOldFileName[0], outputFormat) inputFormat = tOldFileName[1].replace(".","") inputUrl = uno.systemPathToFileUrl(os.path.abspath(inputFilename)) outputUrl = uno.systemPathToFileUrl(os.path.abspath(outputFilename)) if inputFormat in LIBREOFFICE_IMPORT_TYPES: inputProperties = { "Hidden": True } inputProperties.update(LIBREOFFICE_IMPORT_TYPES[inputFormat]) doc = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self.propertyTuple(inputProperties)) try: doc.refresh() except: pass docFamily = self.getDocumentFamily(doc) if docFamily: try: outputProperties = LIBREOFFICE_EXPORT_TYPES[outputFormat][docFamily] doc.storeToURL(outputUrl, self.propertyTuple(outputProperties)) doc.close(True) return True except Exception as e: self.__lastErrorMessage = str(e) self.terminateProcess() return False def propertyTuple(self, propDict): properties = [] for k,v in propDict.items(): property = PropertyValue() property.Name = k property.Value = v properties.append(property) return tuple(properties) def getDocumentFamily(self, doc): try: if doc.supportsService("com.sun.star.text.GenericTextDocument"): return LIBREOFFICE_DOC_FAMILIES[0] if doc.supportsService("com.sun.star.text.WebDocument"): return LIBREOFFICE_DOC_FAMILIES[1] if doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"): return LIBREOFFICE_DOC_FAMILIES[2] if doc.supportsService("com.sun.star.presentation.PresentationDocument"): return LIBREOFFICE_DOC_FAMILIES[3] if doc.supportsService("com.sun.star.drawing.DrawingDocument"): return LIBREOFFICE_DOC_FAMILIES[4] except: pass return None def runUnoProcess(self): subprocess.Popen('soffice --headless --norestore --accept="%s"' % self.connectionString, shell=True, stdin=None, stdout=None, stderr=None) time.sleep(3) if __name__ == "__main__": test_libreoffice = PythonLibreOffice() #convert MS Word Document file (docx) to PDF test_libreoffice.convertFile("pdf", "document.docx")
联系方式:emhhbmdfbGlhbmcxOTkxQDEyNi5jb20=
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· DeepSeek 开源周回顾「GitHub 热点速览」
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了