python实现excel转换成pdf
1、安装
需要安装pywin32包,以实现对Office文件的操作,可以批量转换为pdf文件。支持 doc, docx, ppt, pptx, xls, xlsx 等格式。
1 | pip install pywin32 |
2、office文件 (word, ppt, excel等) 转为pdf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | #-*- coding:utf-8 -*- import os from win32com.client import Dispatch, constants, gencache, DispatchEx class PDFConverter: def __init__( self , pathname, export = '.' ): self ._handle_postfix = [ 'doc' , 'docx' , 'ppt' , 'pptx' , 'xls' , 'xlsx' ] self ._filename_list = list () self ._export_folder = os.path.join(os.path.abspath( '.' ), 'pdfconver' ) if not os.path.exists( self ._export_folder): os.mkdir( self ._export_folder) self ._enumerate_filename(pathname) def _enumerate_filename( self , pathname): ''' 读取所有文件名 ''' full_pathname = os.path.abspath(pathname) if os.path.isfile(full_pathname): if self ._is_legal_postfix(full_pathname): self ._filename_list.append(full_pathname) else : raise TypeError( '文件 {} 后缀名不合法!仅支持如下文件类型:{}。' . format (pathname, '、' .join( self ._handle_postfix))) elif os.path.isdir(full_pathname): for relpath, _, files in os.walk(full_pathname): for name in files: filename = os.path.join(full_pathname, relpath, name) if self ._is_legal_postfix(filename): self ._filename_list.append(os.path.join(filename)) else : raise TypeError( '文件/文件夹 {} 不存在或不合法!' . format (pathname)) def _is_legal_postfix( self , filename): return filename.split( '.' )[ - 1 ].lower() in self ._handle_postfix and not os.path.basename(filename).startswith( '~' ) def run_conver( self ): ''' 进行批量处理,根据后缀名调用函数执行转换 ''' print ( '需要转换的文件数:' , len ( self ._filename_list)) for filename in self ._filename_list: postfix = filename.split( '.' )[ - 1 ].lower() funcCall = getattr ( self , postfix) print ( '原文件:' , filename) funcCall(filename) print ( '转换完成!' ) def doc( self , filename): ''' doc 和 docx 文件转换 ''' name = os.path.basename(filename).split( '.' )[ 0 ] + '.pdf' exportfile = os.path.join( self ._export_folder, name) print ( '保存 PDF 文件:' , exportfile) gencache.EnsureModule( '{00020905-0000-0000-C000-000000000046}' , 0 , 8 , 4 ) w = Dispatch( "Word.Application" ) doc = w.Documents. Open (filename) doc.ExportAsFixedFormat(exportfile, constants.wdExportFormatPDF, Item = constants.wdExportDocumentWithMarkup, CreateBookmarks = constants.wdExportCreateHeadingBookmarks) w.Quit(constants.wdDoNotSaveChanges) def docx( self , filename): self .doc(filename) def xls( self , filename): ''' xls 和 xlsx 文件转换 ''' name = os.path.basename(filename).split( '.' )[ 0 ] + '.pdf' exportfile = os.path.join( self ._export_folder, name) xlApp = DispatchEx( "Excel.Application" ) xlApp.Visible = False xlApp.DisplayAlerts = 0 books = xlApp.Workbooks. Open (filename, False ) books.ExportAsFixedFormat( 0 , exportfile) books.Close( False ) print ( '保存 PDF 文件:' , exportfile) xlApp.Quit() def xlsx( self , filename): self .xls(filename) def ppt( self , filename): ''' ppt 和 pptx 文件转换 ''' name = os.path.basename(filename).split( '.' )[ 0 ] + '.pdf' exportfile = os.path.join( self ._export_folder, name) gencache.EnsureModule( '{00020905-0000-0000-C000-000000000046}' , 0 , 8 , 4 ) p = Dispatch( "PowerPoint.Application" ) ppt = p.Presentations. Open (filename, False , False , False ) ppt.ExportAsFixedFormat(exportfile, 2 , PrintRange = None ) print ( '保存 PDF 文件:' , exportfile) p.Quit() def pptx( self , filename): self .ppt(filename) if __name__ = = "__main__" : # 支持文件夹批量导入 folder = 'tmp' pathname = os.path.join(os.path.abspath( '.' ), folder) # 也支持单个文件的转换 # pathname = 'test.doc' pdfConverter = PDFConverter(pathname) pdfConverter.run_conver() |
3、excel的不同sheet存为pdf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | #-*- coding:utf-8 -*- import os from win32com.client import Dispatch, constants, gencache, DispatchEx import xlrd class PDFConverter: def __init__( self , pathname,sheetnum, export = '.' ): self .sheetnum = sheetnum self ._handle_postfix = [ 'doc' , 'docx' , 'ppt' , 'pptx' , 'xls' , 'xlsx' ] self ._filename_list = list () self ._export_folder = os.path.join(os.path.abspath( '.' ), 'pdfconver' ) if not os.path.exists( self ._export_folder): os.mkdir( self ._export_folder) self ._enumerate_filename(pathname) def _enumerate_filename( self , pathname): ''' 读取所有文件名 ''' full_pathname = os.path.abspath(pathname) if os.path.isfile(full_pathname): if self ._is_legal_postfix(full_pathname): self ._filename_list.append(full_pathname) else : raise TypeError( '文件 {} 后缀名不合法!仅支持如下文件类型:{}。' . format (pathname, '、' .join( self ._handle_postfix))) elif os.path.isdir(full_pathname): for relpath, _, files in os.walk(full_pathname): for name in files: filename = os.path.join(full_pathname, relpath, name) if self ._is_legal_postfix(filename): self ._filename_list.append(os.path.join(filename)) else : raise TypeError( '文件/文件夹 {} 不存在或不合法!' . format (pathname)) def _is_legal_postfix( self , filename): return filename.split( '.' )[ - 1 ].lower() in self ._handle_postfix and not os.path.basename(filename).startswith( '~' ) def run_conver( self ): ''' 进行批量处理,根据后缀名调用函数执行转换 ''' print ( '需要转换的文件数:' , len ( self ._filename_list)) for filename in self ._filename_list: postfix = filename.split( '.' )[ - 1 ].lower() funcCall = getattr ( self , postfix) print ( '原文件:' , filename) funcCall(filename) print ( '转换完成!' ) def xls( self , filename): ''' xls 和 xlsx 文件转换 ''' xlApp = DispatchEx( "Excel.Application" ) xlApp.Visible = False xlApp.DisplayAlerts = 0 books = xlApp.Workbooks. Open (filename, False ) # 循环保存每一个sheet for i in range ( 1 , self .sheetnum + 1 ): sheetName = books.Sheets(i).Name xlSheet = books.Worksheets(sheetName) name = sheetName + '.pdf' exportfile = os.path.join( self ._export_folder, name) xlSheet.ExportAsFixedFormat( 0 , exportfile) print ( '保存 PDF 文件:' , exportfile) books.Close( False ) xlApp.Quit() def xlsx( self , filename): self .xls(filename) if __name__ = = "__main__" : # 支持单个文件的转换 pathname = u '原始数据.xlsx' # 获取到文件的sheet数 b = xlrd.open_workbook(pathname) sheetnum = len (b.sheets()) pdfConverter = PDFConverter(pathname, sheetnum) pdfConverter.run_conver() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)
2021-04-02 typedef的用法总结
2021-04-02 #define 设计的精巧