HTML网页保存为PDF文件
开发中遇到的问题,需要把网站的页面保存为PDF文件
所以自己研究,总结如下:
一、Pechkin:html->pdf
1.WinForm中转换为PDF
a.在项目添加引用,引用 -> 管理NuGet程序包
b.在导出PDF按钮中添加方法
1 SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig() 2 .SetMargins(new Margins() { Left = 10, Right = 10, Top = 0, Bottom = 0 }) //设置边距 3 .SetPaperOrientation(false) //设置纸张方向为横向 4 .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张为A4纸大小 5 6 byte[] buf = sc.Convert(new ObjectConfig(), getWebContent()); 7 8 if (buf == null) 9 { 10 MessageBox.Show("Error converting!"); 11 return; 12 } 13 14 File.WriteAllBytes(@"d:\google-news123.pdf", buf); 15 16 try 17 { 18 string fn = Path.GetTempFileName() + ".pdf"; 19 FileStream fs = new FileStream(fn, FileMode.Create); 20 fs.Write(buf, 0, buf.Length); 21 fs.Close(); 22 23 //MessageBox.Show("操作成功,文件已保存至F盘下", "提示"); 24 25 Process myProcess = new Process(); 26 myProcess.StartInfo.FileName = fn; 27 myProcess.Start(); 28 29 //SaveFileDialog(); 30 } 31 catch { }
相关方法
1 private int ConvertToHundredthsInch(int millimeter) 2 { 3 return (int)((millimeter * 10.0) / 2.54); 4 } 5 6 /// <summary> 7 /// 获取网站内容,包含了 HTML+CSS+JS 8 /// </summary> 9 /// <returns>String返回网页信息</returns> 10 public string getWebContent() 11 { 12 try 13 { 14 WebClient MyWebClient = new WebClient(); 15 MyWebClient.Credentials = CredentialCache.DefaultCredentials; 16 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据 17 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan_new.asp?nstr=jwmlYCBYPDcHJlX2VudHJ5X2lkPTIyMjkyMDE1MDc5MTk1MjcyOSZ0b2lwPTExNA=="); 18 //从指定网站下载数据 19 string pageHtml = Encoding.UTF8.GetString(pageData); 20 //如果获取网站页面采用的是GB2312,则使用这句 21 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息 22 if (!isBool) 23 { 24 string pageHtml1 = Encoding.UTF8.GetString(pageData); 25 pageHtml = pageHtml1; 26 } 27 else 28 { 29 string pageHtml2 = Encoding.Default.GetString(pageData); 30 pageHtml = pageHtml2; 31 } 32 return pageHtml; 33 } 34 35 catch (WebException webEx) 36 { 37 Console.WriteLine(webEx.Message.ToString()); 38 return webEx.Message; 39 } 40 } 41 42 /// <summary> 43 /// 判断是否有乱码 44 /// </summary> 45 /// <param name="txt"></param> 46 /// <returns></returns> 47 public bool isMessyCode(string txt) 48 { 49 var bytes = Encoding.UTF8.GetBytes(txt); //239 191 189 50 for (var i = 0; i < bytes.Length; i++) 51 { 52 if (i < bytes.Length - 3) 53 if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189) 54 { 55 return true; 56 } 57 } 58 return false; 59 }
优缺点
1.只能保存到制定的目录中,并且直接打开文件
2.网页中的图片导不出来
3.可能会出现乱码
4.生成项目的时候需要把相应的DLL拷贝进去,不然不能生成
这是另外一种方法:http://www.cnblogs.com/lsgsanxiao/p/4878077.html
2.WEB网站中转换为PDF
项目Demo http://pan.baidu.com/s/1gfhRR8n
a.项目相关引用与上面相同
b.网站中采用JS调用一般处理程序的方式
1 function createPdf() { 2 window.open("CreatePdf.ashx?html=222222222222233324243"); 3 }
1 using System; 2 using System.Drawing.Printing; 3 using System.IO; 4 using System.Net; 5 using System.Text; 6 using System.Web; 7 using Pechkin; 8 using Pechkin.Synchronized; 9 10 namespace WebApplication3 11 { 12 /// <summary> 13 /// CreatePdf 的摘要说明 14 /// </summary> 15 public class CreatePdf : IHttpHandler 16 { 17 18 public void ProcessRequest(HttpContext context) 19 { 20 string htmlFile = context.Request["html"]; 21 22 string html = getWebContent(); 23 SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig() 24 .SetMargins(new Margins() { Left = 0, Right = 0, Top = 0, Bottom = 0 }) //设置边距 25 .SetPaperOrientation(false) //设置纸张方向为横向 26 .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张大小50mm * 100mm 27 28 byte[] buf = sc.Convert(new ObjectConfig(), html); 29 30 if (buf == null) 31 { 32 context.Response.ContentType = "text/plain"; 33 context.Response.Write("Error converting!"); 34 } 35 36 try 37 { 38 context.Response.Clear(); 39 40 41 //方式1:提示浏览器下载pdf 42 context.Response.AddHeader("content-disposition", "attachment;filename=" + htmlFile + ".pdf"); 43 context.Response.ContentType = "application/octet-stream"; 44 context.Response.BinaryWrite(buf); 45 46 //方式2:直接在浏览器打开pdf 47 //context.Response.ContentType = "application/pdf"; 48 //context.Response.OutputStream.Write(buf, 0, buf.Length); 49 50 context.Response.End(); 51 52 } 53 catch (Exception e) 54 { 55 context.Response.ContentType = "text/plain"; 56 context.Response.Write(e.Message); 57 } 58 } 59 60 public bool IsReusable 61 { 62 get 63 { 64 return false; 65 } 66 } 67 68 private int ConvertToHundredthsInch(int millimeter) 69 { 70 return (int)((millimeter * 10.0) / 2.54); 71 } 72 73 /// <summary> 74 /// 获取网站内容,包含了 HTML+CSS+JS 75 /// </summary> 76 /// <returns>String返回网页信息</returns> 77 public string getWebContent() 78 { 79 try 80 { 81 WebClient MyWebClient = new WebClient(); 82 MyWebClient.Credentials = CredentialCache.DefaultCredentials; 83 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据 84 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan.asp?nstr=AAfFJb_SVvcHJlX2VudHJ5X2lkPTIyMzEyMDE1MDgxMTY0NDUzOSZ0b2lwPTExNA=="); 85 //从指定网站下载数据 86 string pageHtml = Encoding.UTF8.GetString(pageData); 87 //如果获取网站页面采用的是GB2312,则使用这句 88 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息 89 if (!isBool) 90 { 91 string pageHtml1 = Encoding.UTF8.GetString(pageData); 92 pageHtml = pageHtml1; 93 } 94 else 95 { 96 string pageHtml2 = Encoding.Default.GetString(pageData); 97 pageHtml = pageHtml2; 98 } 99 return pageHtml; 100 } 101 102 catch (WebException webEx) 103 { 104 Console.WriteLine(webEx.Message.ToString()); 105 return webEx.Message; 106 } 107 } 108 109 /// <summary> 110 /// 判断是否有乱码 111 /// </summary> 112 /// <param name="txt"></param> 113 /// <returns></returns> 114 public bool isMessyCode(string txt) 115 { 116 var bytes = Encoding.UTF8.GetBytes(txt); //239 191 189 117 for (var i = 0; i < bytes.Length; i++) 118 { 119 if (i < bytes.Length - 3) 120 if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189) 121 { 122 return true; 123 } 124 } 125 return false; 126 } 127 } 128 }