正则爬取天气预报获取
自己做了个Demo,实现天气预报信息的抓取。
1 using System; 2 using System.Collections.Generic; 3 using System.ComponentModel; 4 using System.Data; 5 using System.Drawing; 6 using System.IO; 7 using System.Linq; 8 using System.Net; 9 using System.Text; 10 using System.Text.RegularExpressions; 11 using System.Threading.Tasks; 12 using System.Windows.Forms; 13 14 namespace WindowsFormsApplication1 15 { 16 public partial class Form2 : Form 17 { 18 public Form2() 19 { 20 InitializeComponent(); 21 MatchTest(); 22 } 23 24 public class weather 25 { 26 /// <summary> 27 /// 时间 28 /// </summary> 29 public string date { get; set; } 30 /// <summary> 31 /// 天气情况 32 /// </summary> 33 public string wea { get; set; } 34 /// <summary> 35 /// 温度 36 /// </summary> 37 public string temp { get; set; } 38 39 } 40 public void MatchTest() 41 { 42 string url = "http://www.weather.com.cn/weather/101210101.shtml"; 43 try 44 { 45 HttpWebRequest wrqContent = (HttpWebRequest)WebRequest.Create(url); 46 wrqContent.Timeout = 300000; 47 var wrpContent = (HttpWebResponse)wrqContent.GetResponse(); 48 if (wrpContent != null) 49 { 50 var strReader = new StreamReader(wrpContent.GetResponseStream(), Encoding.GetEncoding("utf-8")); 51 string strHtml =strReader.ReadToEnd(); 52 string re= @"(?<=(<ul class=""t clearfix"">)).*?(?=</ul>)"; 53 MatchCollection result = Regex.Matches(strHtml.Replace('\r', ' ').Replace('\n', ' ').Trim(), re, RegexOptions.IgnoreCase | RegexOptions.Multiline); 54 List<weather> lisWea = new List<weather>(); 55 foreach (Match item in result) 56 { 57 string sTempRe = @"(?<=<li class=""sky skyid lv[2|3]( on)?"">).*?(?=</li>)"; 58 var tdMatchs = Regex.Matches(item.Value, sTempRe, RegexOptions.IgnoreCase | RegexOptions.Multiline); 59 foreach (Match item2 in tdMatchs) 60 { 61 if (string.IsNullOrEmpty(item2.Value.Trim())) { continue; } 62 string sTempRe2 = @"(?is)(?<=(<*>))[^<>]*(?=(</))"; 63 var tdMatchs2 = Regex.Matches(item2.Value, sTempRe2, RegexOptions.IgnoreCase | RegexOptions.Multiline); 64 if(tdMatchs2.Count>0) 65 { 66 weather weath = new weather() 67 { 68 date = tdMatchs2[0].Groups[0].Value, 69 wea = tdMatchs2[3].Groups[0].Value, 70 temp = tdMatchs2[4].Groups[0].Value +"/"+ tdMatchs2[5].Groups[0].Value, 71 }; 72 lisWea.Add(weath); 73 } 74 75 } 76 } 77 if(lisWea.Count>0) 78 { 79 lblMatchTest.Text = null; 80 foreach (var item in lisWea) 81 { 82 lblMatchTest.Text += (item.date + " " + item.wea + " " + item.temp+"\n"); 83 } 84 } 85 } 86 } 87 catch (Exception e) 88 { 89 throw(e); 90 } 91 } 92 93 94 } 95 }
![](https://images2015.cnblogs.com/blog/1178640/201706/1178640-20170608100138293-1647479909.png)