爬虫14-find_all中国天气网爬虫
from bs4 import BeautifulSoup import requests from pyecharts import Bar headers={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" } ALL_data=[] def parse_page(url): response=requests.get(url,headers=headers) text=response.content.decode("utf-8") soup=BeautifulSoup(text,'html5lib') #soup = BeautifulSoup(text, 'lxml') #港澳台地区标签不规范。lxml解析器容错力小,改成html5lib解析器 conMidtab=soup.find('div',class_='conMidtab')#取出第一个conMidtab标签 tables=conMidtab.find_all('table')#注意find_all返回的是一个列表 for table in tables: trs=table.find_all('tr')[2:] for index,tr in enumerate(trs): tds=tr.find_all('td') city_td=tds[0] if index == 0: city_td=tds[1] city=list(city_td.stripped_strings)[0] temp_td=tds[-2] min_temp=list(temp_td.stripped_strings)[0] ALL_data.append({"city":city,"min_temp":int(min_temp)}) # print({"city":city,"min_temp":int(min_temp)}) def main(): urls={ 'http://www.weather.com.cn/textFC/hb.shtml', 'http://www.weather.com.cn/textFC/db.shtml', 'http://www.weather.com.cn/textFC/hd.shtml', 'http://www.weather.com.cn/textFC/hz.shtml', 'http://www.weather.com.cn/textFC/hn.shtml', 'http://www.weather.com.cn/textFC/xb.shtml', 'http://www.weather.com.cn/textFC/xn.shtml', 'http://www.weather.com.cn/textFC/gat.shtml' } for url in urls: parse_page(url) #根据最低气温排序 ALL_data.sort(key=lambda data:data['min_temp']) #print(ALL_data) data=ALL_data[:] # for city_temp in data: # city=city_temp['city'] # cities.append(city) cities = list(map(lambda x: x['city'], data)) temps = list(map(lambda x: x['min_temp'], data)) chart=Bar("中国天气温度排行榜") chart.add("",cities,temps) chart.render("temperature.html") if __name__ == '__main__': main()