c#语言输入关键字,抓取你想要的所有网址

      
      
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using mshtml;
using System.Collections;
using System.Threading;

namespace 遍历百度网页
{
    public delegate void baidu111();
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        
        }
        private void Form1_Load(object sender, EventArgs e)
        {
            listView1.GridLines = true;
        }

        Thread thread;
         void baidu()
        {
            baidu111();
        }

         void baidu111()
         {
             if (webBrowser1.InvokeRequired)
             {
                 baidu111 dr = new baidu111(baidu111);
                 this.Invoke(dr);
             }
             else
             {
                 string url = "http://www.baidu.com/s?wd=" + textBox1.Text;
                 webBrowser1.Navigate(url.Trim());
             }
         }
        //点击停止
         private void button2_Click(object sender, EventArgs e)
         {
             
         }

        private void button1_Click(object sender, EventArgs e)
        {
        // System.Diagnostics.Process.Start("http://www.baidu.com");
          //webBrowser1.Navigate("http://www.baidu.com/");
            thread = new Thread(new ThreadStart(baidu));
            thread.IsBackground = true;
            thread.Start();
         

            baidu();
        }
        int i = 0;
        public void bianli()
        {
            IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2;
            foreach (IHTMLElement ele in doc.all)
            {  
                if (ele.innerText == "下一页>")
                {
                    ele.click();
                    i++;
                    if (i==3)
                    {
                        break; 
                    }
                   
                }
            }
        }

        /// <summary>
        /// 判读网页是否加载完成
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            if (i==3)
            {
                return;
            }
            if (e.Url == webBrowser1.Document.Url)
            {
                bianliwangye();
           
            }
        }
        /// <summary>
        /// 枚举获取百度搜索页面的所有网址
        /// </summary>
        /// 

        ArrayList all=new ArrayList();
        string a1;
     
        public void bianliwangye()
        {            
            IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码
            IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签
            //MessageBox.Show(hc.ToString());
          
            foreach (IHTMLElement h in hc)//遍历标签
            {
                if (h.className == "g" || h.className == "c-showurl")//以标签classname判读
                {
                    string a = h.innerHTML;//获取标签文本内容
                    if (a.Contains("&nbsp"))
                    {
                        int b = a.IndexOf("&nbsp");
                        a1 = a.Substring(0, b);//截取网址
                        MessageBox.Show(a1);
                        all.Add(a1);
                    }
                }
            }
            bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页   
         
        }

        private void button3_Click(object sender, EventArgs e)
        {
            listView1.Columns.Add("编号");
            listView1.Columns.Add("获取到的网址",400);
            
            for (int i = 0; i < all.Count; i++)
            {
                listView1.Items.Add((i+1).ToString());
                listView1.Items[i].SubItems.Add(all[i].ToString());
            }

        }

      
      
    }
}

 

posted @ 2015-07-04 13:48  zhenximeiyitian  阅读(379)  评论(0编辑  收藏  举报