python爬虫,利用BeautifulSoup库实现爬取中国天气预报网信息的代码
import re from bs4 import BeautifulSoup from bs4 import UnicodeDammit import urllib.request """ 获取天气预报网中天气预报数据的一段爬虫代码。需要先安装bs4,lxml等几个第3方库 """ url = 'http://www.weather.com.cn/weather/101010100.shtml' headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/80.0.3987.16 Safari/537.36"} req = urllib.request.Request(url, headers = headers) data = urllib.request.urlopen(req) data = data.read() dammit = UnicodeDammit(data, ['utf8','gbk']) data = dammit.unicode_markup soup = BeautifulSoup(data, 'lxml') lis = soup.select("ul[class= 't clearfix'] li") count = 0 for li in lis: date = li.select('h1')[0].text weather = li.select("p[class = 'wea']")[0].text if count > 0: temperature = li.select("p[class = 'tem'] span")[0].text + "/" + li.select("p[class = 'tem'] i")[0].text else: temperature = li.select("p[class = 'tem'] i")[0].text #wind = li.select("p[class = 'win'] em")[0].text + li.select("p[class = 'win'] i")[0].text print(date,weather,temperature) count += 1
运行结果如下:
22日(今天) 晴 -3℃
23日(明天) 晴 11/-1℃
24日(后天) 阴转多云 11/2℃
25日(周二) 多云 10/-1℃
26日(周三) 多云转阴 8/-2℃
27日(周四) 阴转小雪 5/-3℃
28日(周五) 多云 7/-1℃