帮助类5

#region  获取网页的HTML内容
 2          // 获取网页的HTML内容,指定Encoding
 3         public static string GetHtml(string url, Encoding encoding)
 4          {
 5              byte[] buf = new WebClient().DownloadData(url);
 6              if (encoding != null) return encoding.GetString(buf);
 7              string html = Encoding.UTF8.GetString(buf);
 8              encoding = GetEncoding(html);
 9              if (encoding == null || encoding == Encoding.UTF8) return html;
10              return encoding.GetString(buf);
11          }
12          // 根据网页的HTML内容提取网页的Encoding
13         public static Encoding GetEncoding(string html)
14          {




using System;
 2 using System.Collections.Generic;
 3 using System.Linq;
 4 using System.Text;
 5 using System.IO;
 6 using System.Net;
 7 using System.Web;
 8 using System.Security.Cryptography;
 9 using System.Text.RegularExpressions;
10 using System.Web.Script.Serialization;
11 using System.Data;
12 using System.Collections;
13 using System.Runtime.Serialization.Json;
14 using System.Configuration;
15 using System.Reflection;
15 string pattern = @"(?i)\bcharset=(?<charset>[-a-zA-Z_0-9]+)"; 16 string charset = Regex.Match(html, pattern).Groups["charset"].Value; 17 try { return Encoding.GetEncoding(charset); } 18 catch (ArgumentException) { return null; } 19 } 20 #endregion
posted on 2016-08-25 23:11  張暁磊  阅读(141)  评论(0编辑  收藏  举报