PDF转化Word
昨天碰到一个PDF转化Word的需求,用WPS转化的时候告诉我需要会员,作为程序员的我这个忍不了。果断自己做了一个简单的转化程序,分享出来。1.作为自己的一个学习资料;2.希望能够对你们有所帮助。
做这个需求有两个方向:1.Spire.pdf 2.Aspose.pdf
First,spire.pdf,亲测no;为什么呢?首先他的免费版是有限制的,最大转化为10页,如果咱们的pdf文件超过10页这样是不合理的,而且还会有一个红色的水印,很恶心。我也联系了他们的官网人员,他们给我的商用版连接5000大洋,至此和spire.pdf说了拜拜。
Second,通过自己的技术交流群拿到了这个破解版的Aspose.pdf(版本控制在21.8.0之前,我的是21.5.0),他很完美的解决了我们这个文件页数受限制的问题,而且不带任何水印,非常完美。但是有一点不足的事:如果pdf是扫描件的话,转化出来的是图片,这一点本来我的思路是通过图片提取文字,但是不知道是Aspose.pdf的内部转化有问题还是我没处理好,这一点有些许的bug报错,然后我耽误了一天的时间后放弃了。下面我将把我的代码上传,欢迎大家下载使用,且如果能够解决图片提取文字的话,希望能够和我交流!
1 using Aspose.OCR; 2 using Aspose.Pdf; 3 using System; 4 using System.Collections.Generic; 5 using System.ComponentModel; 6 using System.Data; 7 using System.Drawing; 8 using System.IO; 9 using System.Linq; 10 using System.Text; 11 using System.Threading.Tasks; 12 using System.Windows.Forms; 13 using WinformControlLibraryExtension; 14 15 namespace PDFConvertWord 16 { 17 public partial class Form1 : Form 18 { 19 public Form1() 20 { 21 InitializeComponent(); 22 new Aspose.Pdf.License().SetLicense(new MemoryStream(Convert.FromBase64String("DQo8TGljZW5zZT4NCjxEYXRhPg0KPExpY2Vuc2VkVG8+VGhlIFdvcmxkIEJhbms8L0xpY2Vuc2VkVG8+DQo8RW1haWxUbz5ra3VtYXIzQHdvcmxkYmFua2dyb3VwLm9yZzwvRW1haWxUbz4NCjxMaWNlbnNlVHlwZT5EZXZlbG9wZXIgU21hbGwgQnVzaW5lc3M8L0xpY2Vuc2VUeXBlPg0KPExpY2Vuc2VOb3RlPjEgRGV2ZWxvcGVyIEFuZCAxIERlcGxveW1lbnQgTG9jYXRpb248L0xpY2Vuc2VOb3RlPg0KPE9yZGVySUQ+MjEwMzE2MTg1OTU3PC9PcmRlcklEPg0KPFVzZXJJRD43NDQ5MTY8L1VzZXJJRD4NCjxPRU0+VGhpcyBpcyBub3QgYSByZWRpc3RyaWJ1dGFibGUgbGljZW5zZTwvT0VNPg0KPFByb2R1Y3RzPg0KPFByb2R1Y3Q+QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0Pg0KPC9Qcm9kdWN0cz4NCjxFZGl0aW9uVHlwZT5Qcm9mZXNzaW9uYWw8L0VkaXRpb25UeXBlPg0KPFNlcmlhbE51bWJlcj4wM2ZiMTk5YS01YzhhLTQ4ZGItOTkyZS1kMDg0ZmYwNjZkMGM8L1NlcmlhbE51bWJlcj4NCjxTdWJzY3JpcHRpb25FeHBpcnk+MjAyMjA1MTY8L1N1YnNjcmlwdGlvbkV4cGlyeT4NCjxMaWNlbnNlVmVyc2lvbj4zLjA8L0xpY2Vuc2VWZXJzaW9uPg0KPExpY2Vuc2VJbnN0cnVjdGlvbnM+aHR0cHM6Ly9wdXJjaGFzZS5hc3Bvc2UuY29tL3BvbGljaWVzL3VzZS1saWNlbnNlPC9MaWNlbnNlSW5zdHJ1Y3Rpb25zPg0KPC9EYXRhPg0KPFNpZ25hdHVyZT5XbkJYNnJOdHpCclNMV3pBdFlqOEtkdDFLSUI5MlFrL2xEbFNmMlM1TFRIWGdkcS9QQ2NqWHVORmp0NEJuRmZwNFZLc3VsSjhWeFExakIwbmM0R1lWcWZLek14SFFkaXFuZU03NTJaMjlPbmdyVW40Yk0rc1l6WWVSTE9UOEpxbE9RN05rRFU0bUk2Z1VyQ3dxcjdnUVYxbDJJWkJxNXMzTEFHMFRjQ1ZncEE9PC9TaWduYXR1cmU+DQo8L0xpY2Vuc2U+DQo="))); 23 new Aspose.OCR.License().SetLicense(new MemoryStream(Convert.FromBase64String("DQo8TGljZW5zZT4NCjxEYXRhPg0KPExpY2Vuc2VkVG8+VGhlIFdvcmxkIEJhbms8L0xpY2Vuc2VkVG8+DQo8RW1haWxUbz5ra3VtYXIzQHdvcmxkYmFua2dyb3VwLm9yZzwvRW1haWxUbz4NCjxMaWNlbnNlVHlwZT5EZXZlbG9wZXIgU21hbGwgQnVzaW5lc3M8L0xpY2Vuc2VUeXBlPg0KPExpY2Vuc2VOb3RlPjEgRGV2ZWxvcGVyIEFuZCAxIERlcGxveW1lbnQgTG9jYXRpb248L0xpY2Vuc2VOb3RlPg0KPE9yZGVySUQ+MjEwMzE2MTg1OTU3PC9PcmRlcklEPg0KPFVzZXJJRD43NDQ5MTY8L1VzZXJJRD4NCjxPRU0+VGhpcyBpcyBub3QgYSByZWRpc3RyaWJ1dGFibGUgbGljZW5zZTwvT0VNPg0KPFByb2R1Y3RzPg0KPFByb2R1Y3Q+QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0Pg0KPC9Qcm9kdWN0cz4NCjxFZGl0aW9uVHlwZT5Qcm9mZXNzaW9uYWw8L0VkaXRpb25UeXBlPg0KPFNlcmlhbE51bWJlcj4wM2ZiMTk5YS01YzhhLTQ4ZGItOTkyZS1kMDg0ZmYwNjZkMGM8L1NlcmlhbE51bWJlcj4NCjxTdWJzY3JpcHRpb25FeHBpcnk+MjAyMjA1MTY8L1N1YnNjcmlwdGlvbkV4cGlyeT4NCjxMaWNlbnNlVmVyc2lvbj4zLjA8L0xpY2Vuc2VWZXJzaW9uPg0KPExpY2Vuc2VJbnN0cnVjdGlvbnM+aHR0cHM6Ly9wdXJjaGFzZS5hc3Bvc2UuY29tL3BvbGljaWVzL3VzZS1saWNlbnNlPC9MaWNlbnNlSW5zdHJ1Y3Rpb25zPg0KPC9EYXRhPg0KPFNpZ25hdHVyZT5XbkJYNnJOdHpCclNMV3pBdFlqOEtkdDFLSUI5MlFrL2xEbFNmMlM1TFRIWGdkcS9QQ2NqWHVORmp0NEJuRmZwNFZLc3VsSjhWeFExakIwbmM0R1lWcWZLek14SFFkaXFuZU03NTJaMjlPbmdyVW40Yk0rc1l6WWVSTE9UOEpxbE9RN05rRFU0bUk2Z1VyQ3dxcjdnUVYxbDJJWkJxNXMzTEFHMFRjQ1ZncEE9PC9TaWduYXR1cmU+DQo8L0xpY2Vuc2U+DQo="))); 24 } 25 26 private void button1_Click(object sender, EventArgs e) 27 { 28 System.Windows.Forms.OpenFileDialog fd = new OpenFileDialog(); 29 fd.Title = "选择文件";//选择框名称 30 fd.Filter = "All files (*.pdf)|*.pdf|(*.doc)|*.doc|(*.png)|*.png|(*.jpg)|*.jpg|(*.jpeg)|*.jpeg|(*.bmp)|*.bmp";//选择文件的类型为Xls表格 31 if (fd.ShowDialog() == DialogResult.OK)//当点击确定 32 { 33 int LblNum = fd.FileName.Length; //Label内容长度 34 int RowNum = 10; //每行显示的字数 35 36 float FontWidth = lbl_message.Width / lbl_message.Text.Length; //每个字符的宽度 37 int RowHeight = 15; //每行的高度 38 39 int ColNum = (LblNum - (LblNum / RowNum) * RowNum) == 0 ? (LblNum / RowNum) : (LblNum / RowNum) + 1; //列数 40 lbl_message.AutoSize = false; //设置AutoSize 41 lbl_message.Width = (int)(FontWidth * 10.0); //设置显示宽度 42 lbl_message.Height = RowHeight * ColNum; //设置显示高度 43 lbl_message.Text = fd.FileName.Trim(); //文件路径 44 // SelectFilePath.Text = SelectFilePath.Text.Replace("\\", "/"); 45 } 46 47 } 48 49 private void button2_Click(object sender, EventArgs e) 50 { 51 if (string.IsNullOrEmpty(lbl_message.Text)) 52 { 53 MessageBox.Show("请选择相应的文件内容!Retry"); 54 return; 55 } 56 57 try 58 { 59 MaskingExt.Show(this, new MaskingExt.MaskingSettings() { TextOrientation = MaskingExt.MaskingTextOrientations.Right }); 60 Document pdfDocument = new Document(lbl_message.Text); 61 62 // Save the file into MS document format 63 pdfDocument.Save(@"D:\" + "Administrator.doc", Aspose.Pdf.SaveFormat.Doc); 64 65 MaskingExt.Hide(this); 66 MessageBox.Show("转化成功!请在D盘根目录查看--Administrator的DOC文档"); 67 } 68 catch (Exception ex) 69 { 70 MessageBox.Show("转化失败!错误信息:" + ex.Message); 71 MaskingExt.Hide(this); 72 } 73 74 } 75 76 private void Form1_Load(object sender, EventArgs e) 77 { 78 79 } 80 81 #region 写入文本文档中去 82 private void WriteForTxt(string path, string contentSrt) 83 { 84 FileStream fs = new FileStream(path, FileMode.Append); 85 StreamWriter wr = null; 86 wr = new StreamWriter(fs); 87 wr.WriteLine(contentSrt); 88 wr.Close(); 89 } 90 #endregion 91 92 private void button3_Click(object sender, EventArgs e) 93 { 94 /* Document pdfDocument = new Document(lbl_message.Text); 95 for (int i = 0; i < pdfDocument.Pages.Count; i++) { 96 Page page = pdfDocument.Pages[i]; 97 //page.im 98 System.Drawing.Image[] images = page.ExtractImages(); 99 if (images != null && images.Length > 0) 100 { 101 ListImage.AddRange(images); 102 } 103 }*/ 104 if (string.IsNullOrEmpty(lbl_message.Text)) 105 { 106 MessageBox.Show("请选择相应的文件内容!Retry"); 107 return; 108 } 109 110 #region 图片提取文字 111 // Initialize an instance of AsposeOcr 112 try 113 { 114 MaskingExt.Show(this, new MaskingExt.MaskingSettings() { TextOrientation = MaskingExt.MaskingTextOrientations.Right }); 115 AsposeOcr api = new AsposeOcr(); 116 117 // Recognize image 118 string result = api.RecognizeImage(lbl_message.Text); 119 120 // The path to the documents directory. 121 122 // Initialize an instance of AsposeOcr 123 /* var api = new AsposeOcr(); 124 125 // Recognize image 126 string result = api.RecognizeLine(lbl_message.Text);*/ 127 128 WriteForTxt("D:\\Output.txt",result); 129 MaskingExt.Hide(this); 130 // Display the recognized t 131 } 132 catch (Exception ex) 133 { 134 MessageBox.Show("转化失败!" + ex.Message); 135 MaskingExt.Hide(this); 136 } 137 //Console.WriteLine(result); 138 139 #endregion 140 141 } 142 } 143 }