c#抓取网页数据

写了一个简单的抓取网页数据的小例子,代码如下:

 1  //根据Url地址得到网页的html源码 
 2     private string GetWebContent(string Url) 
 3     {
 4         string strResult = ""; ;
 5         try 
 6         { 
 7             HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); 
 8         //声明一个HttpWebRequest请求 
 9             request.Timeout = 30000; 
10             //设置连接超时时间 
11             request.Headers.Set("Pragma", "no-cache"); 
12             HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 
13             Stream streamReceive = response.GetResponseStream(); 
14             Encoding encoding = Encoding.GetEncoding("GB2312"); 
15             StreamReader streamReader = new StreamReader(streamReceive, encoding); 
16             strResult = streamReader.ReadToEnd(); 
17         } 
18         catch 
19         { 
20             
21         } 
22     return strResult; 
23     } 
24 //为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用 
25 
26 //以下是程序具体实现过程: 
27     protected void btn_Click(object sender, EventArgs e) 
28     { 
29         //要抓取的URL地址 
30         string Url = "http://www.awtrip.com/"; 
31         //得到指定Url的源码 
32         string strWebContent = GetWebContent(Url);
33         //Response.Write(strWebContent);
34         //取出和数据有关的那段源码 
35         int iBodyStart = strWebContent.IndexOf("<body", 0);
36         int iStart = strWebContent.IndexOf("热门目的地旅游", iBodyStart);
37         int iTableStart = strWebContent.IndexOf("<ul", iStart);
38         int iTableEnd = strWebContent.IndexOf("</ul>", iTableStart);
39         string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 5);
40         //生成HtmlDocument 
41         WebBrowser webb = new WebBrowser();
42         webb.Navigate("about:blank");
43         HtmlDocument htmldoc = webb.Document.OpenNew(true);
44         htmldoc.Write(strWeb);
45         HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("li");
46         StringBuilder strlist = new StringBuilder();
47         foreach (HtmlElement tr in htmlTR)
48         {
49             strlist.AppendFormat(tr.GetElementsByTagName("a")[0].InnerText+"$");
50         }
51         Response.Write(strlist.ToString().Remove(strlist.ToString().Length-1));
52         ////最后再插入数据库 
53        
54     } 

引用:

using System.Net;
using System.IO;
using System.Text;
using System.Windows.Forms;
View Code

运行时可能为遇到“当前线程不在单线程单元中,因此无法实例化 ActiveX 控件”的问题,把aspx页面顶部的AutoEventWireup设置为ture就可以了

posted @ 2013-09-06 11:22  蓝默小筑  阅读(2734)  评论(0编辑  收藏  举报