爬虫 蜘蛛 信息采集

HttpWebRequest
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("");
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
string content=sr.ReadToEnd();
resStream.Close();
sr.Close();

webrequest,WebClient
System.Net.WebClient wc = new System.Net.WebClient();
wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
Byte[] pageData = wc.DownloadData("");
string content= System.Text.Encoding.Default.GetString(pageData);

/// <summary> /// 实现登录 /// </summary> /// <param name="targetURL">请求的路径,必须是实现登录的路径(*)</param> /// <param name="cc">用于维持cookies Or Session</param> /// <param name="param">Post提交的信息(用户名,密码)</param> /// <returns>html page</returns> public static CookieContainer cc = new CookieContainer();//维持cookie或Session public static string PostAndGetHTML(string targetURL, Hashtable param) { //formData用于保存提交的信息 string formData = ""; foreach (DictionaryEntry de in param) { formData += de.Key.ToString() + "=" + de.Value.ToString() + "&"; } if (formData.Length > 0) formData = formData.Substring(0, formData.Length - 1); //去除最后一个 '&' //把提交的信息转码(post提交必须转码) ASCIIEncoding encoding = new ASCIIEncoding(); byte[] data = encoding.GetBytes(formData); //开始创建请求 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL); request.Method = "POST"; //提交方式:post request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = data.Length; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124)"; request.AllowAutoRedirect = true; request.KeepAlive = true; Stream newStream = request.GetRequestStream(); newStream.Write(data, 0, data.Length);//将请求的信息写入request newStream.Close(); request.CookieContainer = cc; //向服务器发送请求 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //获得Cookie 保存到Appliction中 string cookieHeader = request.CookieContainer.GetCookieHeader(new Uri("http://login.xiaonei.com/Login.do")); HttpContext.Current.Application.Lock(); HttpContext.Current.Application["cookieHeader"] = cookieHeader; HttpContext.Current.Application.UnLock(); return "OK"; } /// <summary> /// 访问其他页面 /// </summary> /// <param name="strUrl"></param> /// <returns></returns> public static string ReGetHtml(string strUrl) { //第二次请求 HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(strUrl); string cookhead = HttpContext.Current.Application["cookieHeader"].ToString(); request1.Method = "GET"; request1.Headers.Add("cookie:"+cookhead); request1.KeepAlive = true; request1.AllowAutoRedirect = true; HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse(); Stream stream2 = response1.GetResponseStream();//获得回应的数据流 //将数据流转成 String string result1 = new StreamReader(stream2, System.Text.Encoding.UTF8).ReadToEnd(); return result1; }


posted on 2010-03-30 09:27  风乔  阅读(275)  评论(0编辑  收藏  举报

导航