HTML网页保存为PDF文件

开发中遇到的问题,需要把网站的页面保存为PDF文件

所以自己研究,总结如下:

 

 

一、Pechkin:html->pdf

1.WinForm中转换为PDF

  a.在项目添加引用,引用 -> 管理NuGet程序包

 

b.在导出PDF按钮中添加方法

 1 SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
 2                 .SetMargins(new Margins() { Left = 10, Right = 10, Top = 0, Bottom = 0 }) //设置边距
 3                 .SetPaperOrientation(false) //设置纸张方向为横向
 4                 .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张为A4纸大小
 5 
 6             byte[] buf = sc.Convert(new ObjectConfig(), getWebContent());
 7 
 8             if (buf == null)
 9             {
10                 MessageBox.Show("Error converting!");
11                 return;
12             }
13 
14             File.WriteAllBytes(@"d:\google-news123.pdf", buf);
15 
16             try
17             {
18                 string fn = Path.GetTempFileName() + ".pdf";
19                 FileStream fs = new FileStream(fn, FileMode.Create);
20                 fs.Write(buf, 0, buf.Length);
21                 fs.Close();
22 
23                 //MessageBox.Show("操作成功,文件已保存至F盘下", "提示");
24 
25                 Process myProcess = new Process();
26                 myProcess.StartInfo.FileName = fn;
27                 myProcess.Start();
28 
29                 //SaveFileDialog();
30             }
31             catch { }
View Code

 相关方法

 1 private int ConvertToHundredthsInch(int millimeter)
 2         {
 3             return (int)((millimeter * 10.0) / 2.54);
 4         }
 5 
 6         /// <summary>
 7         /// 获取网站内容,包含了 HTML+CSS+JS
 8         /// </summary>
 9         /// <returns>String返回网页信息</returns>
10         public string getWebContent()
11         {
12             try
13             {
14                 WebClient MyWebClient = new WebClient();
15                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
16                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
17                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan_new.asp?nstr=jwmlYCBYPDcHJlX2VudHJ5X2lkPTIyMjkyMDE1MDc5MTk1MjcyOSZ0b2lwPTExNA==");
18                 //从指定网站下载数据
19                 string pageHtml = Encoding.UTF8.GetString(pageData);
20                 //如果获取网站页面采用的是GB2312,则使用这句       
21                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
22                 if (!isBool)
23                 {
24                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
25                     pageHtml = pageHtml1;
26                 }
27                 else
28                 {
29                     string pageHtml2 = Encoding.Default.GetString(pageData);
30                     pageHtml = pageHtml2;
31                 }
32                 return pageHtml;
33             }
34 
35             catch (WebException webEx)
36             {
37                 Console.WriteLine(webEx.Message.ToString());
38                 return webEx.Message;
39             }
40         }
41 
42         /// <summary>
43         /// 判断是否有乱码
44         /// </summary>
45         /// <param name="txt"></param>
46         /// <returns></returns>
47         public bool isMessyCode(string txt)
48         {
49             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
50             for (var i = 0; i < bytes.Length; i++)
51             {
52                 if (i < bytes.Length - 3)
53                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
54                     {
55                         return true;
56                     }
57             }
58             return false;
59         }
相关方法

优缺点

  1.只能保存到制定的目录中,并且直接打开文件

  2.网页中的图片导不出来

  3.可能会出现乱码

  4.生成项目的时候需要把相应的DLL拷贝进去,不然不能生成

 

这是另外一种方法:http://www.cnblogs.com/lsgsanxiao/p/4878077.html

 

 

 

2.WEB网站中转换为PDF

  项目Demo http://pan.baidu.com/s/1gfhRR8n

  a.项目相关引用与上面相同

  b.网站中采用JS调用一般处理程序的方式

1 function createPdf() {
2             window.open("CreatePdf.ashx?html=222222222222233324243");
3         }
View Code
  1 using System;
  2 using System.Drawing.Printing;
  3 using System.IO;
  4 using System.Net;
  5 using System.Text;
  6 using System.Web;
  7 using Pechkin;
  8 using Pechkin.Synchronized;
  9 
 10 namespace WebApplication3
 11 {
 12     /// <summary>
 13     /// CreatePdf 的摘要说明
 14     /// </summary>
 15     public class CreatePdf : IHttpHandler
 16     {
 17 
 18         public void ProcessRequest(HttpContext context)
 19         {
 20             string htmlFile = context.Request["html"];
 21 
 22             string html = getWebContent();
 23             SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
 24                                     .SetMargins(new Margins() { Left = 0, Right = 0, Top = 0, Bottom = 0 }) //设置边距
 25                                     .SetPaperOrientation(false) //设置纸张方向为横向
 26                                     .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张大小50mm * 100mm
 27 
 28             byte[] buf = sc.Convert(new ObjectConfig(), html);
 29 
 30             if (buf == null)
 31             {
 32                 context.Response.ContentType = "text/plain";
 33                 context.Response.Write("Error converting!");
 34             }
 35 
 36             try
 37             {
 38                 context.Response.Clear();
 39 
 40 
 41                 //方式1:提示浏览器下载pdf   
 42                 context.Response.AddHeader("content-disposition", "attachment;filename=" + htmlFile + ".pdf");
 43                 context.Response.ContentType = "application/octet-stream";
 44                 context.Response.BinaryWrite(buf);
 45 
 46                 //方式2:直接在浏览器打开pdf
 47                 //context.Response.ContentType = "application/pdf";
 48                 //context.Response.OutputStream.Write(buf, 0, buf.Length);
 49 
 50                 context.Response.End();
 51 
 52             }
 53             catch (Exception e)
 54             {
 55                 context.Response.ContentType = "text/plain";
 56                 context.Response.Write(e.Message);
 57             }
 58         }
 59 
 60         public bool IsReusable
 61         {
 62             get
 63             {
 64                 return false;
 65             }
 66         }
 67 
 68         private int ConvertToHundredthsInch(int millimeter)
 69         {
 70             return (int)((millimeter * 10.0) / 2.54);
 71         }
 72 
 73         /// <summary>
 74         /// 获取网站内容,包含了 HTML+CSS+JS
 75         /// </summary>
 76         /// <returns>String返回网页信息</returns>
 77         public string getWebContent()
 78         {
 79             try
 80             {
 81                 WebClient MyWebClient = new WebClient();
 82                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
 83                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
 84                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan.asp?nstr=AAfFJb_SVvcHJlX2VudHJ5X2lkPTIyMzEyMDE1MDgxMTY0NDUzOSZ0b2lwPTExNA==");
 85                 //从指定网站下载数据
 86                 string pageHtml = Encoding.UTF8.GetString(pageData);
 87                 //如果获取网站页面采用的是GB2312,则使用这句       
 88                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
 89                 if (!isBool)
 90                 {
 91                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
 92                     pageHtml = pageHtml1;
 93                 }
 94                 else
 95                 {
 96                     string pageHtml2 = Encoding.Default.GetString(pageData);
 97                     pageHtml = pageHtml2;
 98                 }
 99                 return pageHtml;
100             }
101 
102             catch (WebException webEx)
103             {
104                 Console.WriteLine(webEx.Message.ToString());
105                 return webEx.Message;
106             }
107         }
108 
109         /// <summary>
110         /// 判断是否有乱码
111         /// </summary>
112         /// <param name="txt"></param>
113         /// <returns></returns>
114         public bool isMessyCode(string txt)
115         {
116             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
117             for (var i = 0; i < bytes.Length; i++)
118             {
119                 if (i < bytes.Length - 3)
120                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
121                     {
122                         return true;
123                     }
124             }
125             return false;
126         }
127     }
128 }
一般处理程序

 

posted @ 2016-06-03 14:19  殇琉璃  阅读(8712)  评论(0编辑  收藏  举报