C# 按地址获取网页数据并解析
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Windows.Forms; using System.Net; using System.IO; namespace OneHand { class googleMap { //根据Url地址得到网页的html源码 public static string GetWebContent(string Url) { string strResult = ""; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); //声明一个HttpWebRequest请求 request.Timeout = 30000; //设置连接超时时间 request.Headers.Set("Pragma", "no-cache"); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream streamReceive = response.GetResponseStream(); Encoding encoding = Encoding.GetEncoding("GB2312"); StreamReader streamReader = new StreamReader(streamReceive, encoding); strResult = streamReader.ReadToEnd(); } catch { MessageBox.Show("出错"); } return strResult; } } }
调用解析
private string Search(string argAddress) { //要抓取的URL地址 StringBuilder sb = new StringBuilder(); sb.Append("http://ditu.google.cn/maps"); sb.Append("?"); sb.Append("hl=zh-CN&newwindow=1&safe=strict"); sb.Append("&q="); sb.Append(argAddress); sb.Append("&bav=on.2,or.&bvm=bv.44158598,d.dGI&biw=1440&bih=775&um=1&ie=UTF-8&sa=N&tab=wl"); //得到指定Url的源码 string strWebContent = googleMap.GetWebContent(sb.ToString()); //生成HtmlDocument WebBrowser webb = new WebBrowser(); webb.Navigate("about:blank"); HtmlDocument htmldoc = webb.Document.OpenNew(true); htmldoc.Write(strWebContent); HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR"); foreach (HtmlElement tr in htmlTR) { string address = string.Empty; try { string resultspanel = tr.Document.GetElementById("resultspanel").Document.GetElementById("panel_A_2").InnerText; string[] ContentLines = resultspanel.Split(new string[] { "\r\n" }, StringSplitOptions.None);//不忽略空行 address = ContentLines[1]; } catch { }; //插入DataTable if (address != string.Empty) { return address; } else { continue; } } return ""; }