.net利用爬虫爬取中国天气网的天气信息
1.首先需要添加引用Newtonsoft.Json.dll,这个可以直接从网上获取
2.获取NuGet包HtmlAgilityPack 这是一个非常好用的解析html的工具
3.下载城市编码的数据,下载地址是https://apip.weatherdt.com/float/static/js/city.js 该js直接打开可能会乱码,没关系,在页面上右键,另存为,将它变为txt文件,这时你会发现里面的东西不是乱码了。文件放在bin/debug下,可根据需求自行修改
4.根据城市名称获取城市编码
public string GetCityCode(string cityName) { //城市编码 string code = ""; //读取城市信息文件 string filePath = AppDomain.CurrentDomain.BaseDirectory + "city.txt"; FileStream fs = new FileStream(filePath, FileMode.Open); StreamReader sr = new StreamReader(fs); var json = sr.ReadToEnd(); sr.Close(); fs.Close(); //将获取到的数据解析成json并直接获取第三级城市编码 JArray jobject = (JArray)JsonConvert.DeserializeObject(json); for (int i = 0; i < jobject.Count; i++) { var arr = (JArray)jobject[i]["children"]; for (int j = 0; j < arr.Count; j++) { var citys = (JArray)arr[j]["children"]; for (int m = 0; m < citys.Count; m++) { if (citys[m]["zh"].ToString() == cityName) { code = citys[m]["id"].ToString(); break; } } } } return code; }
5.获取关键数据
public string InsertWeatherInfo2() { string result = "0"; var city = ConfigurationManager.AppSettings["address"]; //解析地址取得想要的信息 //解析html HtmlWeb webClient = new HtmlWeb(); string cityCode = GetCityCode(city); if (cityCode != "") { try { HtmlAgilityPack.HtmlDocument doc = webClient.Load("http://www.weather.com.cn/weather1d/" + cityCode + ".shtml"); var json = doc.DocumentNode.SelectSingleNode("//script[contains(text(), 'observe24h_data')]").InnerHtml.Split(';')[0].Replace("var observe24h_data = ", ""); string hour_now = DateTime.Now.Hour.ToString(); //降水量 var jangshui = "0"; //气温 var qiwen = ""; //湿度 var shidu = ""; //风速 var fengsu = ""; //风向 var fengxiang = ""; //监测时间 var time = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); //guid var guid = Guid.NewGuid().ToString().ToLower(); JObject jobject = (JObject)JsonConvert.DeserializeObject(json); //获取所有json中的子集 var arr = (JArray)jobject["od"]["od2"]; for (int i = 0; i < arr.Count; i++) { //取得当前日期的数据 if ((string)arr[i]["od21"] == hour_now) { jangshui = arr[i]["od26"].ToString(); qiwen = arr[i]["od22"].ToString(); shidu = arr[i]["od27"].ToString(); fengsu = arr[i]["od25"].ToString(); fengxiang = arr[i]["od24"].ToString(); break; } } //向数据库执行插入操作 } catch (Exception ex) { result = ex.ToString(); } } return result; }