C#下载网页

System.Net.WebClient wc = new System.Net.WebClient();     
Byte[] pageData = wc.DownloadData("网页地址");     
string s= System.Text.Encoding.Default.GetString(pageData);   
//s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码  

 c# 获取网页源码,自动判断编码格式新方法!

var data = new System.Net.WebClient { }.DownloadData(this.textBox1.Text); //根据textBox1的网址下载html
            var r_utf8 = new System.IO.StreamReader(new System.IO.MemoryStream(data), Encoding.UTF8); //将html放到utf8编码的StreamReader内
            var r_gbk = new System.IO.StreamReader(new System.IO.MemoryStream(data), Encoding.Default); //将html放到gbk编码的StreamReader内
            var t_utf8 = r_utf8.ReadToEnd(); //读出html内容
            var t_gbk = r_gbk.ReadToEnd(); //读出html内容
            if (!isLuan(t_utf8)) //判断utf8是否有乱码
            {
                htm = t_utf8;
                this.Text = "utf8";
            }
            else
            {
                htm = t_gbk;
                this.Text = "gbk";
            }
            this.textBox2.Text = htm;


bool isLuan(string txt)
        {
            var bytes = Encoding.UTF8.GetBytes(txt);
            //239 191 189
            for (var i = 0; i < bytes.Length; i++)
            {
                if(i < bytes.Length - 3)
                if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
                {
                    return true;
                }
            }
            return false;
        }

 

posted @ 2016-09-02 23:33  lunawzh  阅读(2006)  评论(0编辑  收藏  举报