实现百度搜索页面网页遍历
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using mshtml; using System.Threading; using mshtml; namespace WindowsFormsApplication3 { public partial class Form1 : Form { public Form1() { InitializeComponent(); } /// <summary> /// 在控件中打开网页 /// </summary> public void baidu() { string url = "http://www.baidu.com/s?wd="+textBox1.Text; webBrowser1.Navigate(url.Trim()); } private void button1_Click(object sender, EventArgs e) { baidu(); } /// <summary> /// 打开新网页不会跳转到其他浏览器 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void webBrowser1_NewWindow(object sender, CancelEventArgs e) { e.Cancel = true; webBrowser1.Navigate(webBrowser1.StatusText); } /// <summary> /// 获取网页所有节点,遍历所有节点,如有标签的文本值是"下一页",模拟点击,进入下一页 /// </summary> public void bianli() { IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2; foreach (IHTMLElement ele in doc.all) { if (ele.innerText == "下一页>") { ele.click(); break; } } } /// <summary> /// 判读网页是否加载完成 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { if (e.Url==webBrowser1.Document.Url) { bianliwangye(); } } private void button2_Click(object sender, EventArgs e) { //bianli(); bianliwangye(); } /// <summary> /// 枚举获取百度搜索页面的所有网址 /// </summary> public void bianliwangye() { IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码 IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签 //MessageBox.Show(hc.ToString()); foreach (IHTMLElement h in hc)//遍历标签 { if (h.className=="g"||h.className=="c-showurl")//以标签classname判读 { string a= h.innerHTML;//获取标签文本内容 if (a.Contains(" ")) { int b = a.IndexOf(" "); string a1 = a.Substring(0, b);//截取网址 MessageBox.Show(a1); } } } bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页 } } }