C#下载网页源码的方法

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data; using System.Net; using System.IO; using System.Text.RegularExpressions;

namespace ConsoleApplication1 { class Program { static void Main(string[] args) { WebClient client = new WebClient(); string URLAddress = "https://files.cnblogs.com/scy251147/EDaemonSolution.zip"; int n = URLAddress.LastIndexOf("/"); string fileName = URLAddress.Substring(n + 1, URLAddress.Length - n - 1); string Dir = "D:"; string Path = Dir + "\\" + fileName + ".zip"; try { WebRequest myre = WebRequest.Create(URLAddress);

} catch (WebException ex) { Console.WriteLine(ex.ToString()); } try { client.DownloadFile(URLAddress, fileName); Stream str = client.OpenRead(URLAddress); StreamReader reader = new StreamReader(str); byte[] mbyte = new byte[100000]; int allmybyte = (int)mbyte.Length; int strartbyte = 0; while (allmybyte > 0) { int m = str.Read(mbyte, strartbyte, allmybyte); if (m == 0) { break; } strartbyte += m; allmybyte -= m; } FileStream fstr = new FileStream(Path, FileMode.OpenOrCreate, FileAccess.Write); fstr.Write(mbyte, 0, strartbyte); str.Close(); fstr.Close(); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } } } }

C#下载网页源码的方法，用到msxml2

using System.Text.RegularExpressions;

using MSXML2;

private string gethtm(string link)//------------------------------------下载网页源码

{

MSXML2.XMLHTTP xmlhttp = new MSXML2.XMLHTTP();

Thread.Sleep(5);

xmlhttp.open("GET", link, false, null, null);

Thread.Sleep(5);

xmlhttp.send("");

Thread.Sleep(5);

Byte[] b = (Byte[])xmlhttp.responseBody;

string str_txt_htm = Encoding.GetEncoding("GB2312").GetString(b).Trim();

//txtbox.Text = str_txt_htm;

//Thread.Sleep(5);//暂停线程

xmlhttp = null;

b = null;

Regex str_re_htm = new Regex(@"<\s*body(.|\n)*", RegexOptions.IgnoreCase);

Thread.Sleep(5);//暂停线程

MatchCollection str_re_txt_htm = str_re_htm.Matches(str_txt_htm);

str_txt_htm = str_re_txt_htm[0].ToString();

Thread.Sleep(5);//暂停线程

str_re_htm = new Regex(@"(<[.|\n]*?script(.|\n)*?/[.|\n]*?script[\n]*>)|(\n)", RegexOptions.IgnoreCase);

Thread.Sleep(5);//暂停线程

str_txt_htm = str_re_htm.Replace(str_txt_htm, "");

str_txt_htm = str_txt_htm.Replace("\\", "/");

//File.Delete(@"C:\Documents and Settings\zjc\Local Settings\Temporary Internet Files\" + Path.GetFileNameWithoutExtension(link)+"[1].htm");

//Directory.

str_re_htm = null;

link = null;

return str_txt_htm;

}

posted @ 2013-05-16 20:53 晴天有时下鱼阅读(372) 评论(0) 编辑收藏举报

刷新页面返回顶部

晴天有时下鱼

C#下载网页源码的方法

C#下载网页源码的方法，用到msxml2

公告