网站数据抓取

1:
private
bool ImportSubjectQuesData(int bankId, int categoryId, int curpage, int pagesize) { string postData = "bankid=" + System.Web.HttpUtility.UrlEncode(bankId.ToString(), System.Text.Encoding.ASCII); postData += "&categoryid=" + System.Web.HttpUtility.UrlEncode(categoryId.ToString(), System.Text.Encoding.ASCII); postData += "&curpage=" + System.Web.HttpUtility.UrlEncode(curpage.ToString(), System.Text.Encoding.ASCII); ....参数省.... string url = "http://www.*****.com/Web/Handler1.ashx?action=queslistquery"; byte[] byteArray = Encoding.UTF8.GetBytes(postData); var webRequest = (HttpWebRequest)WebRequest.Create(new Uri(url)); webRequest.Method = "post"; webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.ContentLength = byteArray.Length; Stream newStream = webRequest.GetRequestStream(); newStream.Write(byteArray, 0, byteArray.Length); newStream.Close(); //接收 HttpWebResponse response = (HttpWebResponse)webRequest.GetResponse(); StreamReader php = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")); string result = php.ReadToEnd(); int Index = result.IndexOf("###"); var retCount = DeserializeObject(result.Substring(0, Index)); int Count = Convert.ToInt32(retCount.quescount.Value); int intPagesize = Convert.ToInt32(pagesize); int pageCount = Count % intPagesize == 0 ? Count / intPagesize : Count / intPagesize + 1; result = result.Substring(Index + "###".Length); var questlt = DeserializeObject(result); //数据录入 return InserData(questlt); }


2:

string url = txtshiUrl.Text + i;
var webRequest = (HttpWebRequest)WebRequest.Create(new Uri(url));
webRequest.Method = "GET";
webRequest.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
webRequest.ContentType = "text/html; charset=gbk";
webRequest.KeepAlive = true;
webRequest.Referer = url;
webRequest.CookieContainer = new CookieContainer();
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
responseStream = webResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(responseStream, Encoding.GetEncoding("gbk"));
string shtml = streamReader.ReadToEnd();

 

 

posted on 2013-04-21 23:29  朝着  阅读(207)  评论(0编辑  收藏  举报