• 00
  • :
  • 00
  • :
  • 00

C#获取百度新歌TOP50

先上软件效果图

 

代码如下
1.根据Url地址得到网页的html源码

 1  public static string GetWebContent(string Url)
 2         {
 3             string strResult = "";
 4             try
 5             {
 6                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
 7                 //声明一个HttpWebRequest请求
 8                 request.Timeout = 30000;
 9                 //设置连接超时时间
10                 request.Headers.Set("Pragma", "no-cache");
11                 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
12                 Stream streamReceive = response.GetResponseStream();
13                 Encoding encoding = Encoding.GetEncoding("GB2312");
14                 StreamReader streamReader = new StreamReader(streamReceive, Encoding.UTF8);
15                 strResult = streamReader.ReadToEnd();
16             }
17             catch
18             {
19                 MessageBox.Show("出错");
20             }
21             return strResult;
22         }

2.根据元素判断是否使用了指定名字的样式,此方法有不妥之处,请大师指点
  之所以重写是因为li.GetAttribute("class")获取不到例如,<a class=style1>这种没有引号的格式,所以就自己写了一个,检索不是很规范,

 bool ContaintClass(HtmlElement li, string ClassName)
        {
            string Html = li.OuterHtml;
            string str = Html.Substring(1, Html.IndexOf('>')-1);
            if (str.Contains(ClassName))
                return true;
            else
                return false;
        }

3.使用

 private void btn_GetList_Click(object sender, EventArgs e)
        {

            DataTable dt = new DataTable();
            dt.Columns.Add("OrderID");
            dt.Columns.Add("MusicName");
            dt.Columns.Add("Singer");

            string DataURL = (string)txt_DataURL.SelectedValue;
            if (String.IsNullOrEmpty(DataURL))
            {
                MessageBox.Show("数据来源不能为空!", "警告", MessageBoxButtons.OK, MessageBoxIcon.Warning);
            }
            string StrWebContent = GetHTMLContent.GetWebContent(DataURL);

            int ListStart = StrWebContent.IndexOf("<div class=\"top-list-item\">");
            int ULStart = StrWebContent.IndexOf("<ul", ListStart);
            int ULEnd = StrWebContent.IndexOf("</ul>", ULStart);

            string strweb = StrWebContent.Substring(ULStart, ULEnd);


            //生成HtmlDocument 
            WebBrowser webb = new WebBrowser();
            webb.Navigate("about:blank");
            HtmlDocument htmldoc = webb.Document.OpenNew(true);
            htmldoc.Write(strweb);
            HtmlElementCollection htmlli = htmldoc.GetElementsByTagName("li");

            string ClassName_OrderID = "index-num";
            string ClassName_SontTitle = "song-title";
            string ClassName_Singer = "singer";

            string[] ClassNames = new string[] { ClassName_OrderID, ClassName_SontTitle, ClassName_Singer };

            foreach (HtmlElement li in htmlli)
            {


                HtmlElementCollection htmlSpan = li.GetElementsByTagName("span");

                string[] musicInfo = new string[3];

                int i = 0;
                int InfoIndex = 0;
                while (i >= 0 && i < htmlSpan.Count)
                {
                    if (ContaintClass(htmlSpan[i], ClassNames[InfoIndex]))
                    {
                        musicInfo[InfoIndex] = htmlSpan[i].InnerText;
                        InfoIndex++;
                        if (InfoIndex > 2) break;
                    }
                    i++;

                }
                DataRow dr = dt.NewRow();
                dr["OrderID"] = musicInfo[0];
                dr["MusicName"] = musicInfo[1];
                dr["Singer"] = musicInfo[2];
                dt.Rows.Add(dr);

            }
            dataGridView1.DataSource = dt;

        }

代码都很简单

posted @ 2013-08-04 15:08  Garson_Zhang  阅读(343)  评论(1编辑  收藏  举报