解决繁体页面的乱码问题
HttpWebRequest 默认请求URL,返回的文本是乱码!需要设置请求参数:
httpWebRequest.ContentType = contentType;
httpWebRequest.Referer = url;
httpWebRequest.Accept = accept;
httpWebRequest.UserAgent = userAgent;
httpWebRequest.Method = "GET";
For Example:
http://tw.search.yahoo.com/search?p=NBA&fr=yfp&ei=utf-8&v=0
关键是要配置请求参数!
以下是代码:
private string contentType = "application/x-www-form-urlencoded";
private string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
private string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
private Encoding encoding = Encoding.GetEncoding("iso-8859-1");
public string GetHtml(string url)
{
HttpWebRequest httpWebRequest;
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url);
httpWebRequest.ContentType = contentType;
httpWebRequest.Referer = url;
httpWebRequest.Accept = accept;
httpWebRequest.UserAgent = userAgent;
httpWebRequest.Method = "GET";
HttpWebResponse httpWebResponse;
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(new DeflateStream(responseStream, CompressionMode.Decompress),encoding);
StreamReader streamReader = new StreamReader(responseStream);
string html = streamReader.ReadToEnd();
streamReader.Close();
responseStream.Close();
return html;
}
以下是调用代码:
string rtnHtml = GetHtml("http://tw.search.yahoo.com/search?p=NBA&fr=yfp&ei=utf-8&v=0");