正则爬取天气预报获取

自己做了个Demo,实现天气预报信息的抓取。

 1 using System;
 2 using System.Collections.Generic;
 3 using System.ComponentModel;
 4 using System.Data;
 5 using System.Drawing;
 6 using System.IO;
 7 using System.Linq;
 8 using System.Net;
 9 using System.Text;
10 using System.Text.RegularExpressions;
11 using System.Threading.Tasks;
12 using System.Windows.Forms;
13 
14 namespace WindowsFormsApplication1
15 {
16     public partial class Form2 : Form
17     {
18         public Form2()
19         {
20             InitializeComponent();
21             MatchTest();
22         }
23        
24         public class weather
25         {
26             /// <summary>
27             /// 时间
28             /// </summary>
29             public string date { get; set; }
30             /// <summary>
31             /// 天气情况
32             /// </summary>
33             public string wea { get; set; }
34             /// <summary>
35             /// 温度
36             /// </summary>
37             public string temp { get; set; }
38 
39         }
40         public void MatchTest()
41         {
42             string url = "http://www.weather.com.cn/weather/101210101.shtml";
43             try
44             {
45                 HttpWebRequest wrqContent = (HttpWebRequest)WebRequest.Create(url);  
46                 wrqContent.Timeout = 300000;
47                 var wrpContent = (HttpWebResponse)wrqContent.GetResponse();
48                 if (wrpContent != null)  
49                 {
50                     var strReader = new StreamReader(wrpContent.GetResponseStream(), Encoding.GetEncoding("utf-8"));
51                     string strHtml =strReader.ReadToEnd();
52                     string re= @"(?<=(<ul class=""t clearfix"">)).*?(?=</ul>)";
53                     MatchCollection result = Regex.Matches(strHtml.Replace('\r', ' ').Replace('\n', ' ').Trim(), re, RegexOptions.IgnoreCase | RegexOptions.Multiline);
54                     List<weather> lisWea = new List<weather>();
55                     foreach (Match item in result)
56                     {
57                         string sTempRe = @"(?<=<li class=""sky skyid lv[2|3]( on)?"">).*?(?=</li>)";
58                         var tdMatchs = Regex.Matches(item.Value, sTempRe, RegexOptions.IgnoreCase | RegexOptions.Multiline);
59                         foreach (Match item2 in tdMatchs)
60                         {
61                             if (string.IsNullOrEmpty(item2.Value.Trim())) { continue; }
62                             string sTempRe2 = @"(?is)(?<=(<*>))[^<>]*(?=(</))";
63                             var tdMatchs2 = Regex.Matches(item2.Value, sTempRe2, RegexOptions.IgnoreCase | RegexOptions.Multiline);
64                             if(tdMatchs2.Count>0)
65                             {
66                                 weather weath = new weather()
67                                 {
68                                     date = tdMatchs2[0].Groups[0].Value,
69                                     wea = tdMatchs2[3].Groups[0].Value,
70                                     temp = tdMatchs2[4].Groups[0].Value +"/"+ tdMatchs2[5].Groups[0].Value,
71                                 };
72                                 lisWea.Add(weath);
73                             }
74                             
75                         }
76                     }
77                     if(lisWea.Count>0)
78                     {
79                         lblMatchTest.Text = null;
80                         foreach (var item in lisWea)
81                         {
82                             lblMatchTest.Text += (item.date + "  " + item.wea + "  " + item.temp+"\n");
83                         }
84                     }
85                 }  
86             }
87             catch (Exception e)
88             {
89                 throw(e);
90             }
91         }
92 
93     
94     }
95 }

 


 

posted @ 2017-06-08 10:02  在今朝  阅读(827)  评论(0编辑  收藏  举报