.NET中获取网页乱码问题
在msdn中有通过WebRequest和WebResponse来获取网页的功能,但是我试着用上面的方法取得的网页中文字体都是乱码。代码如下:
using System;
using System.Net;
using System.Text;
using System.IO;
class ClientGet {
public static void Main(string[] args)
{
if (args.Length < 1)
{
showusage();
return;
}
// Get the URI from the command line.
Uri site = new Uri(args[0]);
// Create the request instance.
WebRequest wReq = WebRequest.Create(site);
// Set the HTTP-specific UserAgent property
if (wReq is HttpWebRequest)
{
((HttpWebRequest)wReq).UserAgent =
".NET Framework Example Client";
}
// Get the response instance
WebResponse wResp = wReq.GetResponse();
// Read an HTTP-specific property.
if (wResp is HttpWebResponse)
{
DateTime updated = ((HttpWebResponse)wResp).LastModified;
}
// Get the response stream.
Stream respStream = wResp.GetResponseStream();
// This example uses a StreamReader to read the entire response
// into a string and then writes the string to the console.
StreamReader reader =
new StreamReader(respStream, Encoding.ASCII);
String respHTML = reader.ReadToEnd();
Console.WriteLine(respHTML);
// Close the response and response stream.
wResp.Close();
}
public static void showusage()
{
Console.WriteLine("Attempts to GET a URI.");
Console.WriteLine("\r\nUsage:");
Console.WriteLine(" ClientGet URI");
Console.WriteLine("Example:");
Console.WriteLine(" ClientGet http://www.contoso.com/");
}
}
因为微软的msdn原来就是英文,没有考虑到中文编码的问题,所以取回的都是Ascii码,而且System.Text里面定义的几个扩展类里面也没有专门针对GB2312的编码转换的类,在进行Url编码的时候也会存在问题,但是System.Text.Encoding里面提供了进行编码转换的方式,针对这些问题,都需要对编码进行转换。这里随手写了一个示例,主要利用了System.Text.Encoding.GetEncoding方法。
private void button2_Click(object sender, System.EventArgs e)
{
string m_Url="http://mp3.baidu.com/m?f=ms&tn=baidump3&ct=134217728&rn=&word="+this.ConvertToGb2312("哈哈")+"&lm=0";
MessageBox.Show(m_Url);
WebRequest myReq=WebRequest.Create(m_Url);
WebHeaderCollection myHead=myReq.Headers;
WebResponse myRes=myReq.GetResponse();
Stream myStream=myRes.GetResponseStream();
StreamReader myReader=new StreamReader(myStream,System.Text.Encoding.GetEncoding("GB2312"));
string myWebStr=myReader.ReadToEnd();
//MessageBox.Show(myWebStr);
textBox1.Text=myWebStr;
Int32 i=myWebStr.IndexOf(@"target=_blank>");
Int32 m=myWebStr.IndexOf(@"<font",i);
string result=myWebStr.Substring(i,Convert.ToInt16(m-i));
MessageBox.Show(result);
}
private void Form1_Load(object sender, System.EventArgs e)
{
}
private void button3_Click(object sender, System.EventArgs e)
{
}
public string ConvertToGb2312(string str)
{
String m_Start=str;
//String s=HttpUtility.
//把unicode的转换为GB2312
System.Text.UnicodeEncoding unicode=new UnicodeEncoding();
System.Text.Encoding gb2312=System.Text.Encoding.GetEncoding("GB2312");
byte[] m=unicode.GetBytes(m_Start);
byte[] s;
//进行转换
s=System.Text.Encoding.Convert(unicode,gb2312,m);
//string m_End=gb2312.GetString(s);
//string m_End=System.Web.HttpUtility.UrlDecode("http://www.baidu.com/s?ie=gb2312&bs=C%23%2Curl%B5%D8%D6%B7%B1%E0%C2%EB&sr=&z=&wd=C%23%2Cunicode%2C%D7%AA%BB%BB%2CGB2312&ct=0&cl=3&f=8");
return System.Web.HttpUtility.UrlEncode(s);
}
}