Web爬虫的C#请求发送

public class HttpControler
    {
        //post请求发送
        private Encoding m_Encoding = Encoding.GetEncoding("gb2312");
        public string Request(string strUrl,string postStr)
        {
            HttpWebRequest tHWRq = (HttpWebRequest)HttpWebRequest.Create(strUrl);
            tHWRq.CookieContainer = new CookieContainer();
            CookieContainer cookie = tHWRq.CookieContainer;//如果用不到Cookie,删去即可  
            //以下是发送的http头,随便加,其中referer挺重要的,有些网站会根据这个来反盗链  
            tHWRq.Referer = "http://www.cninfo.com.cn/cninfo-new/announcement/show";
            tHWRq.Accept = "application/json, text/javascript, */*; q=0.01";
            tHWRq.Headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3";
            //tHWRq.Headers["Accept-Charset"] = "GBK,utf-8;q=0.7,*;q=0.3";
            tHWRq.Headers["Accept-Encoding"] = "gzip, deflate";          
            tHWRq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
            tHWRq.KeepAlive = true;
            //上面的http头看情况而定,但是下面俩必须加  
            tHWRq.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            tHWRq.Method = "POST";
            tHWRq.Timeout = 600 * 1000;


            Encoding encoding = Encoding.UTF8;//根据网站的编码自定义  
           
            byte[] postData = encoding.GetBytes(postStr);//postDataStr即为发送的数据,格式还是和上次说的一样 

            try
            {
                tHWRq.ContentLength = postData.Length;
                Stream requestStream = tHWRq.GetRequestStream();
                requestStream.Write(postData, 0, postData.Length);
                requestStream.Close();
                using (HttpWebResponse tHWRp = (HttpWebResponse)tHWRq.GetResponse())
                {
                    using (Stream tStreamRp = tHWRp.GetResponseStream())
                    {
                        using (StreamReader tSR = new StreamReader(tStreamRp, m_Encoding))
                        {
                            string result = tSR.ReadToEnd();
                            tHWRq.Abort();
                            return result;//请求响应后返回的内容
                        }
                    }
                }
            }
            catch (Exception e)
            {
                try
                {
                    tHWRq.Abort();
                }
                catch (Exception err)
                {
                    throw err;
                }
                return "NoUrl";
            }

        }

        //Get请求发送
        public bool RequestCode(string strUrl,string path)
        {
            HttpWebRequest tHWRq = (HttpWebRequest)HttpWebRequest.Create(strUrl);
            tHWRq.CookieContainer = new CookieContainer();
            CookieContainer cookie = tHWRq.CookieContainer;//如果用不到Cookie,删去即可  
            //以下是发送的http头,随便加,其中referer挺重要的,有些网站会根据这个来反盗链  
            tHWRq.Referer = "http://www.cninfo.com.cn/cninfo-new/announcement/show";
            tHWRq.Accept = "application/json, text/javascript, */*; q=0.01";
            tHWRq.Headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3";
            tHWRq.Headers["Accept-Charset"] = "GBK,utf-8;q=0.7,*;q=0.3";
            tHWRq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
            tHWRq.KeepAlive = true;
            //上面的http头看情况而定,但是下面俩必须加  
            tHWRq.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            tHWRq.Method = "GET";
            tHWRq.Timeout = 600 * 1000;
            string result = null;
            try
            {
                using (HttpWebResponse tHWRp = (HttpWebResponse)tHWRq.GetResponse())
                {
                    using (Stream tStreamRp = tHWRp.GetResponseStream())
                    {
                        using (StreamReader tSR = new StreamReader(tStreamRp))
                        {
                            result = tSR.ReadToEnd();
                        }
                    }
                }
                //正则表达式过滤想要的内容
                string patternCode = "\"code\":\"\\d{6,}\"";
                List<string> lstCode = new List<string>();
                Regex rgxUrl = new Regex(patternCode, RegexOptions.IgnoreCase);
                MatchCollection matches = rgxUrl.Matches(result);
                if (matches.Count > 0)
                {
                    foreach (Match matPage in matches)
                    {
                        string codeItem = matPage.Value;
                        if (!string.IsNullOrEmpty(codeItem))
                        {
                            string code = codeItem.Substring(codeItem.IndexOf(":") + 1);
                            lstCode.Add(code);
                        }
                    }
                } 

                using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write))
                {
                    using (StreamWriter sw = new StreamWriter(fs))
                    {
                        foreach (string code in lstCode) 
                        {
                            sw.WriteLine(code);
                        }
                       
                    }
                }
                tHWRq.Abort();
                return true;
            }
            catch (Exception e)
            {
                try
                {
                    tHWRq.Abort();
                }
                catch (Exception err)
                {
                    throw err;
                }
                return false;
            }
        }
    }

 

posted @ 2016-01-28 15:36  萨姆大叔  阅读(544)  评论(0编辑  收藏  举报