C# 按地址获取网页数据并解析

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;

namespace OneHand
{
    class googleMap
    {
        //根据Url地址得到网页的html源码 
        public static string GetWebContent(string Url)
        {
            string strResult = "";
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                //声明一个HttpWebRequest请求 
                request.Timeout = 30000;
                //设置连接超时时间 
                request.Headers.Set("Pragma", "no-cache");
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream streamReceive = response.GetResponseStream();
                Encoding encoding = Encoding.GetEncoding("GB2312");
                StreamReader streamReader = new StreamReader(streamReceive, encoding);
                strResult = streamReader.ReadToEnd();
            }
            catch
            {
                MessageBox.Show("出错");
            }
            return strResult;
        } 
    }
}

调用解析

  private string Search(string argAddress)
       {
           //要抓取的URL地址 
           StringBuilder sb = new StringBuilder();
           sb.Append("http://ditu.google.cn/maps");
           sb.Append("?");
           sb.Append("hl=zh-CN&newwindow=1&safe=strict");
           sb.Append("&q=");
           sb.Append(argAddress);
           sb.Append("&bav=on.2,or.&bvm=bv.44158598,d.dGI&biw=1440&bih=775&um=1&ie=UTF-8&sa=N&tab=wl");

           //得到指定Url的源码 
           string strWebContent = googleMap.GetWebContent(sb.ToString());

           //生成HtmlDocument 
           WebBrowser webb = new WebBrowser();
           webb.Navigate("about:blank");
           HtmlDocument htmldoc = webb.Document.OpenNew(true);
           htmldoc.Write(strWebContent);
           HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");
           foreach (HtmlElement tr in htmlTR)
           {
               string address = string.Empty;
               try
               {
                   string resultspanel = tr.Document.GetElementById("resultspanel").Document.GetElementById("panel_A_2").InnerText;

                   string[] ContentLines = resultspanel.Split(new string[] { "\r\n" }, StringSplitOptions.None);//不忽略空行

                   address = ContentLines[1];
               }
               catch { };
               //插入DataTable 
               if (address != string.Empty)
               {
                   return address;
               }
               else
               {
                   continue;
               }
           }

           return "";
       }

 

posted @ 2013-04-03 16:57  V.Wang  阅读(592)  评论(1编辑  收藏  举报