#region 获取网页的HTML内容
2 // 获取网页的HTML内容,指定Encoding
3 public static string GetHtml(string url, Encoding encoding)
4 {
5 byte[] buf = new WebClient().DownloadData(url);
6 if (encoding != null) return encoding.GetString(buf);
7 string html = Encoding.UTF8.GetString(buf);
8 encoding = GetEncoding(html);
9 if (encoding == null || encoding == Encoding.UTF8) return html;
10 return encoding.GetString(buf);
11 }
12 // 根据网页的HTML内容提取网页的Encoding
13 public static Encoding GetEncoding(string html)
14 {
using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.IO;
6 using System.Net;
7 using System.Web;
8 using System.Security.Cryptography;
9 using System.Text.RegularExpressions;
10 using System.Web.Script.Serialization;
11 using System.Data;
12 using System.Collections;
13 using System.Runtime.Serialization.Json;
14 using System.Configuration;
15 using System.Reflection;
15 string pattern = @"(?i)\bcharset=(?<charset>[-a-zA-Z_0-9]+)"; 16 string charset = Regex.Match(html, pattern).Groups["charset"].Value; 17 try { return Encoding.GetEncoding(charset); } 18 catch (ArgumentException) { return null; } 19 } 20 #endregion