c# winform webBrowser爬取数据

int index=0;
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            if (webBrowser1.ReadyState == WebBrowserReadyState.Complete)//是否完全加载完成
            {
                textBox2.AppendText("就绪.....\r\n");
                if (index == 0)
                {
                    if (e.Url.ToString() == webBrowser1.Url.ToString())
                    {
                        this.cobURL.Text = this.webBrowser1.Document.Url.ToString();
                        this.TabText.Text = this.webBrowser1.DocumentTitle;

                        HtmlDocument hd = webBrowser1.Document;
                        HtmlElement he = hd.GetElementsByTagName("input").GetElementsByName("keywords")[0];

                        he.InnerText = this.textBox6.Text.Trim().ToString();//你要查的内容
                        HtmlElement bt = webBrowser1.Document.All["header-search-button"];
                        bt.InvokeMember("click");//主页面,触发点击事件,根据你要查的内容去进入子页面
                        textBox2.AppendText("正在进入子页面.....\r\n");
                        index++;
                    }
                }
                else if (index == 1)
                {
                    textBox2.AppendText("进入"+ e.Url.ToString() + "\r\n");
                    if (e.Url.ToString() == webBrowser1.Url.ToString())
                    {
                        this.cobURL.Text = this.webBrowser1.Document.Url.ToString();
                        this.TabText.Text = this.webBrowser1.DocumentTitle;

                        HtmlDocument hd = webBrowser1.Document;
                        if (hd.GetElementById("DatasheetsTable1")==null)
                        {
                            textBox2.AppendText("该地址未能找到所需数据.....\r\n");
                            textBox2.AppendText("获取结束.....\r\n");
                            webBrowser1.DocumentCompleted -= new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted);
                            index = 0;
                            return;
                        }
                        string ceshi= hd.GetElementById("DatasheetsTable1").FirstChild.OuterText.ToString();
                        string[] sArray = ceshi.Split(new string[] { "\r\n"}, StringSplitOptions.RemoveEmptyEntries);
                        string bz= sArray[1].Replace("c数据表头", "");//根据情况处理
                        bz= bz.Trim().ToString();//获取数据c
                        textBox3.Text = bz;

                        string ceshi1 = hd.GetElementById("SpecificationTable1").FirstChild.OuterText.ToString();
                        string[] sArray1 = ceshi1.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
                        string fz = sArray1[7].Replace("a数据表头", ""); 
                        fz = fz.Trim().ToString();//获取数据a
                        textBox4.Text = fz;
                        string qjfz = sArray1[8].Replace("b数据表头", "");
                        qjfz = qjfz.Trim().ToString();//获取数据b
                        textBox5.Text = qjfz;

                        index = 0;
                        textBox2.AppendText("获取结束.....\r\n");
                        webBrowser1.DocumentCompleted -= new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted);
                    }
                }
                
                
            }
        }
private void button7_Click(object sender, EventArgs e)
        {
            if (textBox6.Text.ToString()!="")
            {
                textBox2.Clear();
                textBox3.Text = "";
                textBox4.Text = "";
                textBox5.Text = "";
                textBox2.AppendText("正在获取中.....\r\n");
                this.webBrowser1.Url = new Uri("地址");
                this.webBrowser1.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted);
            }
            
        }

界面大致,webBrowser隐藏了,把webBrowser的ScriptErrorsSuppressed要设置为True,负责会弹script错误

 

posted @ 2020-07-30 19:49  岳凯歌  阅读(1153)  评论(0编辑  收藏  举报