C#下载网页源码的方法
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data; using System.Net; using System.IO; using System.Text.RegularExpressions;
namespace ConsoleApplication1 { class Program { static void Main(string[] args) { WebClient client = new WebClient(); string URLAddress = "https://files.cnblogs.com/scy251147/EDaemonSolution.zip"; int n = URLAddress.LastIndexOf("/"); string fileName = URLAddress.Substring(n + 1, URLAddress.Length - n - 1); string Dir = "D:"; string Path = Dir + "\\" + fileName + ".zip"; try { WebRequest myre = WebRequest.Create(URLAddress);
} catch (WebException ex) { Console.WriteLine(ex.ToString()); } try { client.DownloadFile(URLAddress, fileName); Stream str = client.OpenRead(URLAddress); StreamReader reader = new StreamReader(str); byte[] mbyte = new byte[100000]; int allmybyte = (int)mbyte.Length; int strartbyte = 0; while (allmybyte > 0) { int m = str.Read(mbyte, strartbyte, allmybyte); if (m == 0) { break; } strartbyte += m; allmybyte -= m; } FileStream fstr = new FileStream(Path, FileMode.OpenOrCreate, FileAccess.Write); fstr.Write(mbyte, 0, strartbyte); str.Close(); fstr.Close(); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } } } }
C#下载网页源码的方法,用到msxml2
using System.Text.RegularExpressions;
using MSXML2;
private string gethtm(string link)//------------------------------------下载网页源码
{
MSXML2.XMLHTTP xmlhttp = new MSXML2.XMLHTTP();
Thread.Sleep(5);
xmlhttp.open("GET", link, false, null, null);
Thread.Sleep(5);
xmlhttp.send("");
Thread.Sleep(5);
Byte[] b = (Byte[])xmlhttp.responseBody;
string str_txt_htm = Encoding.GetEncoding("GB2312").GetString(b).Trim();
//txtbox.Text = str_txt_htm;
//Thread.Sleep(5);//暂停线程
xmlhttp = null;
b = null;
Regex str_re_htm = new Regex(@"<\s*body(.|\n)*", RegexOptions.IgnoreCase);
Thread.Sleep(5);//暂停线程
MatchCollection str_re_txt_htm = str_re_htm.Matches(str_txt_htm);
str_txt_htm = str_re_txt_htm[0].ToString();
Thread.Sleep(5);//暂停线程
str_re_htm = new Regex(@"(<[.|\n]*?script(.|\n)*?/[.|\n]*?script[\n]*>)|(\n)", RegexOptions.IgnoreCase);
Thread.Sleep(5);//暂停线程
str_txt_htm = str_re_htm.Replace(str_txt_htm, "");
str_txt_htm = str_txt_htm.Replace("\\", "/");
//File.Delete(@"C:\Documents and Settings\zjc\Local Settings\Temporary Internet Files\" + Path.GetFileNameWithoutExtension(link)+"[1].htm");
//Directory.
str_re_htm = null;
link = null;
return str_txt_htm;
}