.NET中获取网页乱码问题

在msdn中有通过WebRequest和WebResponse来获取网页的功能,但是我试着用上面的方法取得的网页中文字体都是乱码。代码如下:
using System;
using System.Net;
using System.Text;
using System.IO;

class ClientGet {
   public static void Main(string[] args)
   {
      if (args.Length < 1)
      {
        showusage();
        return;
      }

      // Get the URI from the command line.
      Uri site = new Uri(args[0]);

      // Create the request instance.
      WebRequest wReq = WebRequest.Create(site);

      // Set the HTTP-specific UserAgent property
      if (wReq is HttpWebRequest)
      {
        ((HttpWebRequest)wReq).UserAgent =
          ".NET Framework Example Client";
      }

      // Get the response instance
      WebResponse wResp = wReq.GetResponse();

      // Read an HTTP-specific property.
      if (wResp is HttpWebResponse)
      {
        DateTime updated = ((HttpWebResponse)wResp).LastModified;
      }

      // Get the response stream.
      Stream respStream = wResp.GetResponseStream();

     // This example uses a StreamReader to read the entire response
     // into a string and then writes the string to the console.
     StreamReader reader =
       new StreamReader(respStream, Encoding.ASCII);
     String respHTML = reader.ReadToEnd();
     Console.WriteLine(respHTML);

     // Close the response and response stream.
     wResp.Close();
   }
   public static void showusage()
   {
      Console.WriteLine("Attempts to GET a URI.");
      Console.WriteLine("\r\nUsage:");
      Console.WriteLine("  ClientGet URI");
      Console.WriteLine("Example:");
      Console.WriteLine("  ClientGet http://www.contoso.com/");
   }
}

因为微软的msdn原来就是英文,没有考虑到中文编码的问题,所以取回的都是Ascii码,而且System.Text里面定义的几个扩展类里面也没有专门针对GB2312的编码转换的类,在进行Url编码的时候也会存在问题,但是System.Text.Encoding里面提供了进行编码转换的方式,针对这些问题,都需要对编码进行转换。这里随手写了一个示例,主要利用了System.Text.Encoding.GetEncoding方法。

  private void button2_Click(object sender, System.EventArgs e)
  {
   

   string m_Url="http://mp3.baidu.com/m?f=ms&tn=baidump3&ct=134217728&rn=&word="+this.ConvertToGb2312("哈哈")+"&lm=0";

   MessageBox.Show(m_Url);
   WebRequest myReq=WebRequest.Create(m_Url);
   
   WebHeaderCollection myHead=myReq.Headers;
   

   WebResponse myRes=myReq.GetResponse();

   
   Stream myStream=myRes.GetResponseStream();
   

   StreamReader myReader=new StreamReader(myStream,System.Text.Encoding.GetEncoding("GB2312"));
   
   string myWebStr=myReader.ReadToEnd();

   //MessageBox.Show(myWebStr);

   textBox1.Text=myWebStr;

   Int32 i=myWebStr.IndexOf(@"target=_blank>");
   Int32 m=myWebStr.IndexOf(@"<font",i);
   string result=myWebStr.Substring(i,Convert.ToInt16(m-i));
   MessageBox.Show(result);
   
  }

  private void Form1_Load(object sender, System.EventArgs e)
  {
  
  }

  private void button3_Click(object sender, System.EventArgs e)
  {
   
  }

  public string ConvertToGb2312(string str)
  {
   String m_Start=str;
   
   //String s=HttpUtility.
   //把unicode的转换为GB2312
   System.Text.UnicodeEncoding unicode=new UnicodeEncoding();

   System.Text.Encoding gb2312=System.Text.Encoding.GetEncoding("GB2312");

   byte[] m=unicode.GetBytes(m_Start);

   byte[] s;
   //进行转换
   s=System.Text.Encoding.Convert(unicode,gb2312,m);

   //string m_End=gb2312.GetString(s);
   
   //string m_End=System.Web.HttpUtility.UrlDecode("http://www.baidu.com/s?ie=gb2312&bs=C%23%2Curl%B5%D8%D6%B7%B1%E0%C2%EB&sr=&z=&wd=C%23%2Cunicode%2C%D7%AA%BB%BB%2CGB2312&ct=0&cl=3&f=8");

   return System.Web.HttpUtility.UrlEncode(s);
  }

 }

posted @ 2012-06-26 16:53  老Key  阅读(234)  评论(0编辑  收藏  举报