Web爬虫的C#请求发送
public class HttpControler { //post请求发送 private Encoding m_Encoding = Encoding.GetEncoding("gb2312"); public string Request(string strUrl,string postStr) { HttpWebRequest tHWRq = (HttpWebRequest)HttpWebRequest.Create(strUrl); tHWRq.CookieContainer = new CookieContainer(); CookieContainer cookie = tHWRq.CookieContainer;//如果用不到Cookie,删去即可 //以下是发送的http头,随便加,其中referer挺重要的,有些网站会根据这个来反盗链 tHWRq.Referer = "http://www.cninfo.com.cn/cninfo-new/announcement/show"; tHWRq.Accept = "application/json, text/javascript, */*; q=0.01"; tHWRq.Headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"; //tHWRq.Headers["Accept-Charset"] = "GBK,utf-8;q=0.7,*;q=0.3"; tHWRq.Headers["Accept-Encoding"] = "gzip, deflate"; tHWRq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; tHWRq.KeepAlive = true; //上面的http头看情况而定,但是下面俩必须加 tHWRq.ContentType = "application/x-www-form-urlencoded; charset=UTF-8"; tHWRq.Method = "POST"; tHWRq.Timeout = 600 * 1000; Encoding encoding = Encoding.UTF8;//根据网站的编码自定义 byte[] postData = encoding.GetBytes(postStr);//postDataStr即为发送的数据,格式还是和上次说的一样 try { tHWRq.ContentLength = postData.Length; Stream requestStream = tHWRq.GetRequestStream(); requestStream.Write(postData, 0, postData.Length); requestStream.Close(); using (HttpWebResponse tHWRp = (HttpWebResponse)tHWRq.GetResponse()) { using (Stream tStreamRp = tHWRp.GetResponseStream()) { using (StreamReader tSR = new StreamReader(tStreamRp, m_Encoding)) { string result = tSR.ReadToEnd(); tHWRq.Abort(); return result;//请求响应后返回的内容 } } } } catch (Exception e) { try { tHWRq.Abort(); } catch (Exception err) { throw err; } return "NoUrl"; } } //Get请求发送 public bool RequestCode(string strUrl,string path) { HttpWebRequest tHWRq = (HttpWebRequest)HttpWebRequest.Create(strUrl); tHWRq.CookieContainer = new CookieContainer(); CookieContainer cookie = tHWRq.CookieContainer;//如果用不到Cookie,删去即可 //以下是发送的http头,随便加,其中referer挺重要的,有些网站会根据这个来反盗链 tHWRq.Referer = "http://www.cninfo.com.cn/cninfo-new/announcement/show"; tHWRq.Accept = "application/json, text/javascript, */*; q=0.01"; tHWRq.Headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"; tHWRq.Headers["Accept-Charset"] = "GBK,utf-8;q=0.7,*;q=0.3"; tHWRq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; tHWRq.KeepAlive = true; //上面的http头看情况而定,但是下面俩必须加 tHWRq.ContentType = "application/x-www-form-urlencoded; charset=UTF-8"; tHWRq.Method = "GET"; tHWRq.Timeout = 600 * 1000; string result = null; try { using (HttpWebResponse tHWRp = (HttpWebResponse)tHWRq.GetResponse()) { using (Stream tStreamRp = tHWRp.GetResponseStream()) { using (StreamReader tSR = new StreamReader(tStreamRp)) { result = tSR.ReadToEnd(); } } } //正则表达式过滤想要的内容 string patternCode = "\"code\":\"\\d{6,}\""; List<string> lstCode = new List<string>(); Regex rgxUrl = new Regex(patternCode, RegexOptions.IgnoreCase); MatchCollection matches = rgxUrl.Matches(result); if (matches.Count > 0) { foreach (Match matPage in matches) { string codeItem = matPage.Value; if (!string.IsNullOrEmpty(codeItem)) { string code = codeItem.Substring(codeItem.IndexOf(":") + 1); lstCode.Add(code); } } } using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write)) { using (StreamWriter sw = new StreamWriter(fs)) { foreach (string code in lstCode) { sw.WriteLine(code); } } } tHWRq.Abort(); return true; } catch (Exception e) { try { tHWRq.Abort(); } catch (Exception err) { throw err; } return false; } } }