PDF转化Word

昨天碰到一个PDF转化Word的需求,用WPS转化的时候告诉我需要会员,作为程序员的我这个忍不了。果断自己做了一个简单的转化程序,分享出来。1.作为自己的一个学习资料;2.希望能够对你们有所帮助。

做这个需求有两个方向:1.Spire.pdf  2.Aspose.pdf 
First,spire.pdf,亲测no;为什么呢?首先他的免费版是有限制的,最大转化为10页,如果咱们的pdf文件超过10页这样是不合理的,而且还会有一个红色的水印,很恶心。我也联系了他们的官网人员,他们给我的商用版连接5000大洋,至此和spire.pdf说了拜拜。

Second,通过自己的技术交流群拿到了这个破解版的Aspose.pdf(版本控制在21.8.0之前,我的是21.5.0),他很完美的解决了我们这个文件页数受限制的问题,而且不带任何水印,非常完美。但是有一点不足的事:如果pdf是扫描件的话,转化出来的是图片,这一点本来我的思路是通过图片提取文字,但是不知道是Aspose.pdf的内部转化有问题还是我没处理好,这一点有些许的bug报错,然后我耽误了一天的时间后放弃了。下面我将把我的代码上传,欢迎大家下载使用,且如果能够解决图片提取文字的话,希望能够和我交流!

  1 using Aspose.OCR;
  2 using Aspose.Pdf;
  3 using System;
  4 using System.Collections.Generic;
  5 using System.ComponentModel;
  6 using System.Data;
  7 using System.Drawing;
  8 using System.IO;
  9 using System.Linq;
 10 using System.Text;
 11 using System.Threading.Tasks;
 12 using System.Windows.Forms;
 13 using WinformControlLibraryExtension;
 14 
 15 namespace PDFConvertWord
 16 {
 17     public partial class Form1 : Form
 18     {
 19         public Form1()
 20         {
 21             InitializeComponent();
 22             new Aspose.Pdf.License().SetLicense(new MemoryStream(Convert.FromBase64String("DQo8TGljZW5zZT4NCjxEYXRhPg0KPExpY2Vuc2VkVG8+VGhlIFdvcmxkIEJhbms8L0xpY2Vuc2VkVG8+DQo8RW1haWxUbz5ra3VtYXIzQHdvcmxkYmFua2dyb3VwLm9yZzwvRW1haWxUbz4NCjxMaWNlbnNlVHlwZT5EZXZlbG9wZXIgU21hbGwgQnVzaW5lc3M8L0xpY2Vuc2VUeXBlPg0KPExpY2Vuc2VOb3RlPjEgRGV2ZWxvcGVyIEFuZCAxIERlcGxveW1lbnQgTG9jYXRpb248L0xpY2Vuc2VOb3RlPg0KPE9yZGVySUQ+MjEwMzE2MTg1OTU3PC9PcmRlcklEPg0KPFVzZXJJRD43NDQ5MTY8L1VzZXJJRD4NCjxPRU0+VGhpcyBpcyBub3QgYSByZWRpc3RyaWJ1dGFibGUgbGljZW5zZTwvT0VNPg0KPFByb2R1Y3RzPg0KPFByb2R1Y3Q+QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0Pg0KPC9Qcm9kdWN0cz4NCjxFZGl0aW9uVHlwZT5Qcm9mZXNzaW9uYWw8L0VkaXRpb25UeXBlPg0KPFNlcmlhbE51bWJlcj4wM2ZiMTk5YS01YzhhLTQ4ZGItOTkyZS1kMDg0ZmYwNjZkMGM8L1NlcmlhbE51bWJlcj4NCjxTdWJzY3JpcHRpb25FeHBpcnk+MjAyMjA1MTY8L1N1YnNjcmlwdGlvbkV4cGlyeT4NCjxMaWNlbnNlVmVyc2lvbj4zLjA8L0xpY2Vuc2VWZXJzaW9uPg0KPExpY2Vuc2VJbnN0cnVjdGlvbnM+aHR0cHM6Ly9wdXJjaGFzZS5hc3Bvc2UuY29tL3BvbGljaWVzL3VzZS1saWNlbnNlPC9MaWNlbnNlSW5zdHJ1Y3Rpb25zPg0KPC9EYXRhPg0KPFNpZ25hdHVyZT5XbkJYNnJOdHpCclNMV3pBdFlqOEtkdDFLSUI5MlFrL2xEbFNmMlM1TFRIWGdkcS9QQ2NqWHVORmp0NEJuRmZwNFZLc3VsSjhWeFExakIwbmM0R1lWcWZLek14SFFkaXFuZU03NTJaMjlPbmdyVW40Yk0rc1l6WWVSTE9UOEpxbE9RN05rRFU0bUk2Z1VyQ3dxcjdnUVYxbDJJWkJxNXMzTEFHMFRjQ1ZncEE9PC9TaWduYXR1cmU+DQo8L0xpY2Vuc2U+DQo=")));
 23             new Aspose.OCR.License().SetLicense(new MemoryStream(Convert.FromBase64String("DQo8TGljZW5zZT4NCjxEYXRhPg0KPExpY2Vuc2VkVG8+VGhlIFdvcmxkIEJhbms8L0xpY2Vuc2VkVG8+DQo8RW1haWxUbz5ra3VtYXIzQHdvcmxkYmFua2dyb3VwLm9yZzwvRW1haWxUbz4NCjxMaWNlbnNlVHlwZT5EZXZlbG9wZXIgU21hbGwgQnVzaW5lc3M8L0xpY2Vuc2VUeXBlPg0KPExpY2Vuc2VOb3RlPjEgRGV2ZWxvcGVyIEFuZCAxIERlcGxveW1lbnQgTG9jYXRpb248L0xpY2Vuc2VOb3RlPg0KPE9yZGVySUQ+MjEwMzE2MTg1OTU3PC9PcmRlcklEPg0KPFVzZXJJRD43NDQ5MTY8L1VzZXJJRD4NCjxPRU0+VGhpcyBpcyBub3QgYSByZWRpc3RyaWJ1dGFibGUgbGljZW5zZTwvT0VNPg0KPFByb2R1Y3RzPg0KPFByb2R1Y3Q+QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0Pg0KPC9Qcm9kdWN0cz4NCjxFZGl0aW9uVHlwZT5Qcm9mZXNzaW9uYWw8L0VkaXRpb25UeXBlPg0KPFNlcmlhbE51bWJlcj4wM2ZiMTk5YS01YzhhLTQ4ZGItOTkyZS1kMDg0ZmYwNjZkMGM8L1NlcmlhbE51bWJlcj4NCjxTdWJzY3JpcHRpb25FeHBpcnk+MjAyMjA1MTY8L1N1YnNjcmlwdGlvbkV4cGlyeT4NCjxMaWNlbnNlVmVyc2lvbj4zLjA8L0xpY2Vuc2VWZXJzaW9uPg0KPExpY2Vuc2VJbnN0cnVjdGlvbnM+aHR0cHM6Ly9wdXJjaGFzZS5hc3Bvc2UuY29tL3BvbGljaWVzL3VzZS1saWNlbnNlPC9MaWNlbnNlSW5zdHJ1Y3Rpb25zPg0KPC9EYXRhPg0KPFNpZ25hdHVyZT5XbkJYNnJOdHpCclNMV3pBdFlqOEtkdDFLSUI5MlFrL2xEbFNmMlM1TFRIWGdkcS9QQ2NqWHVORmp0NEJuRmZwNFZLc3VsSjhWeFExakIwbmM0R1lWcWZLek14SFFkaXFuZU03NTJaMjlPbmdyVW40Yk0rc1l6WWVSTE9UOEpxbE9RN05rRFU0bUk2Z1VyQ3dxcjdnUVYxbDJJWkJxNXMzTEFHMFRjQ1ZncEE9PC9TaWduYXR1cmU+DQo8L0xpY2Vuc2U+DQo=")));
 24         }
 25 
 26         private void button1_Click(object sender, EventArgs e)
 27         {
 28             System.Windows.Forms.OpenFileDialog fd = new OpenFileDialog();
 29             fd.Title = "选择文件";//选择框名称        
 30             fd.Filter = "All files (*.pdf)|*.pdf|(*.doc)|*.doc|(*.png)|*.png|(*.jpg)|*.jpg|(*.jpeg)|*.jpeg|(*.bmp)|*.bmp";//选择文件的类型为Xls表格          
 31             if (fd.ShowDialog() == DialogResult.OK)//当点击确定               
 32             {
 33                 int LblNum = fd.FileName.Length;   //Label内容长度 
 34                 int RowNum = 10;   //每行显示的字数
 35 
 36                 float FontWidth = lbl_message.Width / lbl_message.Text.Length;    //每个字符的宽度 
 37                 int RowHeight = 15;   //每行的高度
 38 
 39                 int ColNum = (LblNum - (LblNum / RowNum) * RowNum) == 0 ? (LblNum / RowNum) : (LblNum / RowNum) + 1;   //列数 
 40                 lbl_message.AutoSize = false;    //设置AutoSize 
 41                 lbl_message.Width = (int)(FontWidth * 10.0);   //设置显示宽度 
 42                 lbl_message.Height = RowHeight * ColNum;   //设置显示高度
 43                 lbl_message.Text = fd.FileName.Trim();  //文件路径
 44                                                         //     SelectFilePath.Text = SelectFilePath.Text.Replace("\\", "/");
 45             }
 46 
 47         }
 48 
 49         private void button2_Click(object sender, EventArgs e)
 50         {
 51             if (string.IsNullOrEmpty(lbl_message.Text))
 52             {
 53                 MessageBox.Show("请选择相应的文件内容!Retry");
 54                 return;
 55             }
 56 
 57             try
 58             {
 59                 MaskingExt.Show(this, new MaskingExt.MaskingSettings() { TextOrientation = MaskingExt.MaskingTextOrientations.Right });
 60                 Document pdfDocument = new Document(lbl_message.Text);
 61 
 62                 // Save the file into MS document format
 63                 pdfDocument.Save(@"D:\" + "Administrator.doc", Aspose.Pdf.SaveFormat.Doc);
 64 
 65                 MaskingExt.Hide(this);
 66                 MessageBox.Show("转化成功!请在D盘根目录查看--Administrator的DOC文档");
 67             }
 68             catch (Exception ex)
 69             {
 70                 MessageBox.Show("转化失败!错误信息:" + ex.Message);
 71                 MaskingExt.Hide(this);
 72             }
 73 
 74         }
 75 
 76         private void Form1_Load(object sender, EventArgs e)
 77         {
 78 
 79         }
 80 
 81         #region  写入文本文档中去
 82         private void WriteForTxt(string path, string contentSrt)
 83         {
 84             FileStream fs = new FileStream(path, FileMode.Append);
 85             StreamWriter wr = null;
 86             wr = new StreamWriter(fs);
 87             wr.WriteLine(contentSrt);
 88             wr.Close();
 89         }
 90         #endregion
 91 
 92         private void button3_Click(object sender, EventArgs e)
 93         {
 94             /* Document pdfDocument = new Document(lbl_message.Text);
 95              for (int i = 0; i < pdfDocument.Pages.Count; i++) {
 96                  Page page = pdfDocument.Pages[i];
 97                  //page.im
 98                  System.Drawing.Image[] images = page.ExtractImages();
 99                  if (images != null && images.Length > 0)
100                  {
101                      ListImage.AddRange(images);
102                  }
103              }*/
104             if (string.IsNullOrEmpty(lbl_message.Text))
105             {
106                 MessageBox.Show("请选择相应的文件内容!Retry");
107                 return;
108             }
109 
110             #region 图片提取文字
111             // Initialize an instance of AsposeOcr
112             try
113             {
114                 MaskingExt.Show(this, new MaskingExt.MaskingSettings() { TextOrientation = MaskingExt.MaskingTextOrientations.Right });
115                 AsposeOcr api = new AsposeOcr();
116 
117                 // Recognize image
118                 string result = api.RecognizeImage(lbl_message.Text);
119 
120                 // The path to the documents directory.
121 
122                 // Initialize an instance of AsposeOcr
123                 /* var api = new AsposeOcr();
124 
125                  // Recognize image
126                  string result = api.RecognizeLine(lbl_message.Text);*/
127 
128                 WriteForTxt("D:\\Output.txt",result);
129                 MaskingExt.Hide(this);
130                 // Display the recognized t
131             }
132             catch (Exception ex)
133             {
134                 MessageBox.Show("转化失败!" + ex.Message);
135                 MaskingExt.Hide(this);
136             }
137             //Console.WriteLine(result);
138 
139             #endregion
140 
141         }
142     }
143 }
View Code

 

posted @ 2022-03-08 16:43  PandaHuB  阅读(638)  评论(0编辑  收藏  举报