抓取网页数据

 1 /// <summary>
 2         /// 人民网取数据
 3         /// </summary>
 4         /// <returns></returns>
 5         private MatchCollection GetDataInternetWeb(string strSourceUrl,string strRegex,int source)
 6         {
 7             try
 8             {
 9                 
10                 string web_url = strSourceUrl;
11                 string all_code = "";
12                 HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url);
13                 WebResponse all_codeResponse = all_codeRequest.GetResponse();
14                 StreamReader the_Reader=null;
15                 
16                 the_Reader = new StreamReader(all_codeResponse.GetResponseStream(), System.Text.Encoding.GetEncoding("gb2312"));
17 
18                 all_code = the_Reader.ReadToEnd();
19                 the_Reader.Close();
20 
21                 string p = strRegex;//@"<li><span>\d+</span>.*?<\/li>";
22                 Regex re = new Regex(p, RegexOptions.IgnoreCase);
23                 return re.Matches(all_code);
24             }
25             catch (Exception ex)
26             {35             }
36 
37             return null;
38         }

 

posted on 2012-12-24 10:00  hardy_Wang  阅读(173)  评论(0编辑  收藏  举报

导航