分享一个天气历史数据的采集脚本
最近一个项目中需要用到过往的天气数据, 我找到了天气后报这个网站 (www.tianqihoubao.com), 并在SS中完成了相关采集, 和大家分享一下.
首先分析这个网站提供了两种信息:
1. 省市关系
2. 天气记录
对应的我们创建数据结构:
[Serializable] public class Province { public string ProvinceName; public string ProvinceUrl; } [Serializable] public class City { public Province Province; public string CityName; public string CityUrl; } [Serializable] public class WeatherDataSet { public City City; public string Title; public string Url; } [Serializable] public class WeatherData { public WeatherDataSet DataSet; public string Date; public string TextWeather; public string Temp; public string Wind; }
>> 网站一共提供34个直辖市/省/特区的天气信息, 完整的列表在: http://www.tianqihoubao.com/lishi/index.htm
对应的采集语句是:
var list = Default.SelectNodes("#content DT a");
>> 每个省都有下辖的城市列表, 如: http://www.tianqihoubao.com/lishi/hebei.htm
对应的采集语句是:
var list = Default.SelectNodes("#content DD a");
>> 每个城市都有一个历史天气记录列表, 如: http://www.tianqihoubao.com/lishi/shijiazhuang.html
对应的采集语句是:
Default.SelectNodes("#content>div.pcity a");
>> 进入每条历史天气记录, 就可以得到当月的天气数据了:
对应的采集语句是:
var list = Default.SelectNodes("#content>table.b tr:gt(0)"); foreach(var item in list) { var date = item.SelectSingleNode("td:eq(0)").Text(); var textWeather = item.SelectSingleNode("td:eq(1)").Text(); var temp = item.SelectSingleNode("td:eq(2)").Text(); var wind = item.SelectSingleNode("td:eq(3)").Text(); }
将这些语句分别包装为方法, 并将结果绑定到最开始定义的数据结构中:
public List<Province> GetProvinceList() {...} //获取直辖市/省/特区 public List<City> GetCityList(Province province) {...} //获取城市列表 public List<WeatherDataSet> GetWeatherDataSet(City city) {...} //获取指定城市的天气历史记录集 public List<WeatherData> GetWeatherData(WeatherDataSet ds) {...} //获取天气历史数据
>> 完整的脚本: (复制到SS中即可直接运行)
SS下载地址为: http://www.gdtsearch.com/products.spiderstudio.docapi.htm
public void Run() { Logger.ClearAll(); Default.ScriptErrorsSuppressed = true; var pl = GetProvinceList(); foreach(var p in pl) { Logger.Log(p.ProvinceName); Logger.Log(p.ProvinceUrl); } var cl = GetCityList(pl[1]); foreach(var c in cl) { Logger.Log(c.Province.ProvinceName); Logger.Log(c.Province.ProvinceUrl); Logger.Log(c.CityName); Logger.Log(c.CityUrl); } var ds = GetWeatherDataSet(cl[1]); foreach(var d in ds) { Logger.Log(d.City.CityName); Logger.Log(d.Title); Logger.Log(d.Url); } var dl = GetWeatherData(ds[0]); foreach(var d in dl) { Logger.Log(d.DataSet.Title); Logger.Log(d.Date); Logger.Log(d.TextWeather); Logger.Log(d.Temp); Logger.Log(d.Wind); } } public List<Province> GetProvinceList() { Default.Navigate("http://www.tianqihoubao.com/lishi/index.htm"); Default.Ready("#content DT"); var list = Default.SelectNodes("#content DT a"); var result = new List<Province>(); foreach(var item in list) { var p = new Province(); p.ProvinceName = item.Text(); p.ProvinceUrl = item.Attr("href"); p.ProvinceUrl = new Uri(Default.Url, p.ProvinceUrl).ToString(); result.Add(p); } return result; } public List<City> GetCityList(Province province) { Default.Navigate(province.ProvinceUrl); Default.Ready("#content DD"); var list = Default.SelectNodes("#content DD a"); var result = new List<City>(); foreach(var item in list) { var c = new City(); c.Province = province; c.CityName = item.Text(); c.CityUrl = item.Attr("href"); c.CityUrl = new Uri(Default.Url, c.CityUrl).ToString(); result.Add(c); } return result; } public List<WeatherDataSet> GetWeatherDataSet(City city) { Default.Navigate(city.CityUrl); Default.Ready("#content>div.pcity"); var list = Default.SelectNodes("#content>div.pcity a"); var result = new List<WeatherDataSet>(); foreach(var item in list) { var ds = new WeatherDataSet(); ds.Title = item.Text(); ds.Url = item.Attr("href"); ds.Url = new Uri(Default.Url, ds.Url).ToString(); ds.City = city; result.Add(ds); } return result; } public List<WeatherData> GetWeatherData(WeatherDataSet ds) { Default.Navigate(ds.Url); Default.Ready("#content>table.b"); var list = Default.SelectNodes("#content>table.b tr:gt(0)"); var result = new List<WeatherData>(); foreach(var item in list) { var d = new WeatherData(); d.DataSet = ds; d.Date = item.SelectSingleNode("td:eq(0)").Text(); d.TextWeather = item.SelectSingleNode("td:eq(1)").Text(); d.Temp = item.SelectSingleNode("td:eq(2)").Text(); d.Wind = item.SelectSingleNode("td:eq(3)").Text(); result.Add(d); } return result; } [Serializable] public class Province { public string ProvinceName; public string ProvinceUrl; } [Serializable] public class City { public Province Province; public string CityName; public string CityUrl; } [Serializable] public class WeatherDataSet { public City City; public string Title; public string Url; } [Serializable] public class WeatherData { public WeatherDataSet DataSet; public string Date; public string TextWeather; public string Temp; public string Wind; }
>> 运行效果: