.net抓取网页数据

1、想通过代码获得某个页面的数据,首先根据右键查看页面源代码,通过分析。再通过下面代码,修改,一步步查找出所需内容,存入数据库。

 //根据Url地址得到网页的html源码 
        private string GetWebContent(string Url)
        {
            string strResult = "";
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                //声明一个HttpWebRequest请求 
                request.Timeout = 30000;
                //设置连接超时时间 
                request.Headers.Set("Pragma", "no-cache");
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream streamReceive = response.GetResponseStream();
                Encoding encoding = Encoding.GetEncoding("utf-8");
                StreamReader streamReader = new StreamReader(streamReceive, encoding);
                strResult = streamReader.ReadToEnd();
            }
            catch
            {
                MessageBox.Show("出错");
            }
            return strResult;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            //要抓取的URL地址 
            string Url = "http://kxt.com/data/20.html";    //☆☆☆☆☆☆☆☆☆☆ 
            //得到指定Url的源码 
            string strWebContent = GetWebContent(Url);
            richTextBox1.Text = strWebContent;
            //取出和数据有关的那段源码 
            int iBodyStart = strWebContent.IndexOf("<body", 0);
            int iStart = strWebContent.IndexOf("历史数据", iBodyStart);
            int iTableStart = strWebContent.IndexOf("<ul", iStart);
            int iTableEnd = strWebContent.IndexOf("</ul>", iTableStart);
            string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 5);
            //生成HtmlDocument 
            WebBrowser webb = new WebBrowser();
            webb.Navigate("about:blank");
            HtmlDocument htmldoc = webb.Document.OpenNew(true);
            htmldoc.Write(strWeb);
            HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("li");
            int i = 0;
            foreach (HtmlElement tr in htmlTR)
            {
                i++;
                if (i == 1 )
                {
                    continue;
                }
                if (i ==  htmlTR.Count - 2)
                {
                    break;
                }
                HtmlElementCollection spans = tr.GetElementsByTagName("span");
                
                string dateTime = spans[0].InnerText;
                
                string netWeightOunce = spans[1].InnerText;
                string netWeightTon = spans[2].InnerText;
                string totalValue = spans[3].InnerText;
                string regulation = spans[4].InnerText;
                //string affectOil = spans[5].InnerText;
                

                //Id, UpdateTime, NetWeightOunce, NetWeightTon, TotalValue, Regulation, FinanceTime
                SqlServer ado=new SqlServer();
                ado.AddField("UpdateTime",DateTime.Now);
                ado.AddField("NetWeightOunce", netWeightOunce);
                ado.AddField("NetWeightTon", netWeightTon);
                ado.AddField("TotalValue", totalValue);
               // ado.AddField("EffectOil", affectOil);
                ado.AddField("Regulation", regulation);
                ado.AddField("FinanceTime", Convert.ToDateTime(dateTime).ToString("yyyy-MM-dd"));//☆☆☆☆☆☆☆☆☆☆

                ado.Insert("Silver");//☆☆☆☆☆☆☆☆☆☆
               
            }
           
            MessageBox.Show("OK");

        } 

 

posted @ 2015-08-03 14:15  随缘梦中人  阅读(1093)  评论(0编辑  收藏  举报