使用python爬取天气预报,[python入门案例]
1 # 天气网余姚地区爬虫案例 2 import requests 3 from lxml import etree 4 5 6 class WeatherSpider: 7 8 def __init__(self): 9 self.url = "http://www.weather.com.cn/weather/101210404.shtml" 10 self.headers = { 11 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"} 12 13 def get_url_content(self): 14 return requests.get(self.url, headers=self.headers).content.decode() 15 16 def get_weather_data(self, html): 17 tmp_html = etree.HTML(html) 18 tomorrow_doc = tmp_html.xpath("//div[contains(@class,'con') and contains(@class,'today')]//div[@class='c7d']/ul/li[2]")[0] 19 weather_data = {} 20 weather_data["date"] = tomorrow_doc.xpath("./h1/text()")[0] 21 weather_data["weather"] = tomorrow_doc.xpath("./p[@class='wea']/@title")[0] 22 weather_data["temperature_max"] = tomorrow_doc.xpath("./p[@class='tem']/span/text()")[0] 23 weather_data["temperature_min"] = tomorrow_doc.xpath("./p[@class='tem']/i/text()")[0] 24 weather_data["air_speed"] = tomorrow_doc.xpath("./p[@class='win']/i/text()")[0] 25 return weather_data 26 27 def run(self): 28 # 获取url请求内容 29 content_html = self.get_url_content() 30 # 根据url内容获取天气数据 31 data = self.get_weather_data(content_html) 32 # 打印爬取的天气数据 33 print(data) 34 35 36 if __name__ == '__main__': 37 spider = WeatherSpider() 38 spider.run()
爬取结果
自己刚学爬虫不久,利用爬虫爬取天气网,每次可以抓取第二天的天气状况,一个小demo