c# winform webBrowser爬取数据
int index=0;
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { if (webBrowser1.ReadyState == WebBrowserReadyState.Complete)//是否完全加载完成 { textBox2.AppendText("就绪.....\r\n"); if (index == 0) { if (e.Url.ToString() == webBrowser1.Url.ToString()) { this.cobURL.Text = this.webBrowser1.Document.Url.ToString(); this.TabText.Text = this.webBrowser1.DocumentTitle; HtmlDocument hd = webBrowser1.Document; HtmlElement he = hd.GetElementsByTagName("input").GetElementsByName("keywords")[0]; he.InnerText = this.textBox6.Text.Trim().ToString();//你要查的内容 HtmlElement bt = webBrowser1.Document.All["header-search-button"]; bt.InvokeMember("click");//主页面,触发点击事件,根据你要查的内容去进入子页面 textBox2.AppendText("正在进入子页面.....\r\n"); index++; } } else if (index == 1) { textBox2.AppendText("进入"+ e.Url.ToString() + "\r\n"); if (e.Url.ToString() == webBrowser1.Url.ToString()) { this.cobURL.Text = this.webBrowser1.Document.Url.ToString(); this.TabText.Text = this.webBrowser1.DocumentTitle; HtmlDocument hd = webBrowser1.Document; if (hd.GetElementById("DatasheetsTable1")==null) { textBox2.AppendText("该地址未能找到所需数据.....\r\n"); textBox2.AppendText("获取结束.....\r\n"); webBrowser1.DocumentCompleted -= new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted); index = 0; return; } string ceshi= hd.GetElementById("DatasheetsTable1").FirstChild.OuterText.ToString(); string[] sArray = ceshi.Split(new string[] { "\r\n"}, StringSplitOptions.RemoveEmptyEntries); string bz= sArray[1].Replace("c数据表头", "");//根据情况处理 bz= bz.Trim().ToString();//获取数据c textBox3.Text = bz; string ceshi1 = hd.GetElementById("SpecificationTable1").FirstChild.OuterText.ToString(); string[] sArray1 = ceshi1.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); string fz = sArray1[7].Replace("a数据表头", ""); fz = fz.Trim().ToString();//获取数据a textBox4.Text = fz; string qjfz = sArray1[8].Replace("b数据表头", ""); qjfz = qjfz.Trim().ToString();//获取数据b textBox5.Text = qjfz; index = 0; textBox2.AppendText("获取结束.....\r\n"); webBrowser1.DocumentCompleted -= new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted); } } } }
private void button7_Click(object sender, EventArgs e) { if (textBox6.Text.ToString()!="") { textBox2.Clear(); textBox3.Text = ""; textBox4.Text = ""; textBox5.Text = ""; textBox2.AppendText("正在获取中.....\r\n"); this.webBrowser1.Url = new Uri("地址"); this.webBrowser1.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted); } }
界面大致,webBrowser隐藏了,把webBrowser的ScriptErrorsSuppressed要设置为True,负责会弹script错误