C#实现文档转换成PDF
网上有很多将doc、ppt、xls等类型的文档转换成pdf的方法,目前了解到的有两大类:
1.使用虚拟打印机将doc、ppt、xls等类型的文档
2.使用OFFICE COM组件
我采用了第二种方法实现,初步测试通过,还没有放到服务器上进行批量实时转换的测试。
下面开始介绍详细步骤:
1.安装OFFICE 2007.我安装的是OFFICE 2007 Professional Plus版。安装后提示要激活,开始没有激活也能使用,只是每次一打开office软件就提示要激活,实在忍受不了,就下了一个激活破解补丁。我用的是Office 2007 最新全系列激活验证破解补丁(适用于2007任何版本)绿色免费版激活的。
2.安装"另存为PDF或XPS加载项",可以从官网下载,其他地方也有一大把下载链接。我是从这个地址下载的
3.新建项目,添加如下引用:
Microsoft PowerPoint 12.0 Object Library
Microsoft Word 12.0 Object Library
Microsoft Excel 12.0 Object Library
这三个引用在“添加引用”对话框的COM选项卡里,只有安装了OFFICE 2007后才能看到,系统了安装的是OFFICE 2003的话,看到的是11.0的。
4.添加以上COM引用后,在项目的引用目录下,会看到自动添加了“Microsoft.Office.Interop.Word”、“Microsoft.Office.Interop.Excel”、“Microsoft.Office.Interop.PowerPoint”、“Microsoft.Office.Core”四个引用项,分别右击前三个,选属性,选择”嵌入互操作类型“值为false。如不做此项操作,编译项目时会出现”无法嵌入互操作类型“Microsoft.Office.Interop.Excel.ApplicationClass”。请改用适用的接口“的错误提示。
5.在代码中添加如下命名空间引用:
1 using Microsoft.Office.Core; 2 using Microsoft.Office.Interop.Excel; 3 using Microsoft.Office.Interop.PowerPoint; 4 using Word = Microsoft.Office.Interop.Word; 5 using Excel = Microsoft.Office.Interop.Excel; 6 using PowerPoint = Microsoft.Office.Interop.PowerPoint;
开始我以为不用第1-3行,结果发现没有这三行编译通不过。第4-6行的作用仅仅是为了在后面代码中简写命名空间。
6.添加如下三个转换函数:
1 //将word文档转换成PDF格式 2 private bool Convert(string sourcePath, string targetPath, Word.WdExportFormat exportFormat) 3 { 4 bool result; 5 object paramMissing = Type.Missing; 6 Word.ApplicationClass wordApplication = new Word.ApplicationClass(); 7 Word._Document wordDocument = null; 8 try 9 { 10 object paramSourceDocPath = sourcePath; 11 string paramExportFilePath = targetPath; 12 13 Word.WdExportFormat paramExportFormat = exportFormat; 14 bool paramOpenAfterExport = false; 15 Word.WdExportOptimizeFor paramExportOptimizeFor = 16 Word.WdExportOptimizeFor.wdExportOptimizeForPrint; 17 Word.WdExportRange paramExportRange = Word.WdExportRange.wdExportAllDocument; 18 int paramStartPage = 0; 19 int paramEndPage = 0; 20 Word.WdExportItem paramExportItem = Word.WdExportItem.wdExportDocumentContent; 21 bool paramIncludeDocProps = true; 22 bool paramKeepIRM = true; 23 Word.WdExportCreateBookmarks paramCreateBookmarks = 24 Word.WdExportCreateBookmarks.wdExportCreateWordBookmarks; 25 bool paramDocStructureTags = true; 26 bool paramBitmapMissingFonts = true; 27 bool paramUseISO19005_1 = false; 28 29 wordDocument = wordApplication.Documents.Open( 30 ref paramSourceDocPath, ref paramMissing, ref paramMissing, 31 ref paramMissing, ref paramMissing, ref paramMissing, 32 ref paramMissing, ref paramMissing, ref paramMissing, 33 ref paramMissing, ref paramMissing, ref paramMissing, 34 ref paramMissing, ref paramMissing, ref paramMissing, 35 ref paramMissing); 36 37 if (wordDocument != null) 38 wordDocument.ExportAsFixedFormat(paramExportFilePath, 39 paramExportFormat, paramOpenAfterExport, 40 paramExportOptimizeFor, paramExportRange, paramStartPage, 41 paramEndPage, paramExportItem, paramIncludeDocProps, 42 paramKeepIRM, paramCreateBookmarks, paramDocStructureTags, 43 paramBitmapMissingFonts, paramUseISO19005_1, 44 ref paramMissing); 45 result = true; 46 } 47 finally 48 { 49 if (wordDocument != null) 50 { 51 wordDocument.Close(ref paramMissing, ref paramMissing, ref paramMissing); 52 wordDocument = null; 53 } 54 if (wordApplication != null) 55 { 56 wordApplication.Quit(ref paramMissing, ref paramMissing, ref paramMissing); 57 wordApplication = null; 58 } 59 GC.Collect(); 60 GC.WaitForPendingFinalizers(); 61 GC.Collect(); 62 GC.WaitForPendingFinalizers(); 63 } 64 return result; 65 } 66 67 //将excel文档转换成PDF格式 68 private bool Convert(string sourcePath, string targetPath, XlFixedFormatType targetType) 69 { 70 bool result; 71 object missing = Type.Missing; 72 Excel.ApplicationClass application = null; 73 Workbook workBook = null; 74 try 75 { 76 application = new Excel.ApplicationClass(); 77 object target = targetPath; 78 object type = targetType; 79 workBook = application.Workbooks.Open(sourcePath, missing, missing, missing, missing, missing, 80 missing, missing, missing, missing, missing, missing, missing, missing, missing); 81 82 workBook.ExportAsFixedFormat(targetType, target, XlFixedFormatQuality.xlQualityStandard, true, false, missing, missing, missing, missing); 83 result = true; 84 } 85 catch 86 { 87 result = false; 88 } 89 finally 90 { 91 if (workBook != null) 92 { 93 workBook.Close(true, missing, missing); 94 workBook = null; 95 } 96 if (application != null) 97 { 98 application.Quit(); 99 application = null; 100 } 101 GC.Collect(); 102 GC.WaitForPendingFinalizers(); 103 GC.Collect(); 104 GC.WaitForPendingFinalizers(); 105 } 106 return result; 107 } 108 109 //将ppt文档转换成PDF格式 110 private bool Convert(string sourcePath, string targetPath, PpSaveAsFileType targetFileType) 111 { 112 bool result; 113 object missing = Type.Missing; 114 PowerPoint.ApplicationClass application = null; 115 Presentation persentation = null; 116 try 117 { 118 application = new PowerPoint.ApplicationClass(); 119 persentation = application.Presentations.Open(sourcePath, MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse); 120 persentation.SaveAs(targetPath, targetFileType, Microsoft.Office.Core.MsoTriState.msoTrue); 121 122 result = true; 123 } 124 catch 125 { 126 result = false; 127 } 128 finally 129 { 130 if (persentation != null) 131 { 132 persentation.Close(); 133 persentation = null; 134 } 135 if (application != null) 136 { 137 application.Quit(); 138 application = null; 139 } 140 GC.Collect(); 141 GC.WaitForPendingFinalizers(); 142 GC.Collect(); 143 GC.WaitForPendingFinalizers(); 144 } 145 return result; 146 }
7.调用相应函数进行转换:
Convert("C:\\1.doc", "C:\\1.pdf", wd);
开发工具是Visual Studio 2010,Windows XP SP3操作系统,OFFICE 2007.调试了doc,ppt,xls三种格式文件的转换,word测试了一个12页的文档,全部是中文文字。Excel测试了一个有3个sheet的文档(中英文和数字,某些行有背景色),转换后3个表格全部转到一个pdf文件中,无错误。转换速度很快,只是在转换PPT的时候出现了一个“正在发布...”的对话框,完成后对话框消失。不知道如何禁止出现提示框,优质稻的朋友看到贴后麻烦回复告知。
为方便使用,将文档转换的代码做成了一个dll,在项目中直接调用dll中的函数即可进行转换。欢迎点击下载。