爬虫14-find_all中国天气网爬虫

from bs4 import BeautifulSoup
import requests
from pyecharts import Bar
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
}
ALL_data=[]
def parse_page(url):
    response=requests.get(url,headers=headers)
    text=response.content.decode("utf-8")
    soup=BeautifulSoup(text,'html5lib')
    #soup = BeautifulSoup(text, 'lxml') #港澳台地区标签不规范。lxml解析器容错力小，改成html5lib解析器
    conMidtab=soup.find('div',class_='conMidtab')#取出第一个conMidtab标签
    tables=conMidtab.find_all('table')#注意find_all返回的是一个列表
    for table in tables:
        trs=table.find_all('tr')[2:]
        for index,tr in enumerate(trs):
            tds=tr.find_all('td')
            city_td=tds[0]
            if index == 0:
                city_td=tds[1]
            city=list(city_td.stripped_strings)[0]
            temp_td=tds[-2]
            min_temp=list(temp_td.stripped_strings)[0]
            ALL_data.append({"city":city,"min_temp":int(min_temp)})
            # print({"city":city,"min_temp":int(min_temp)})
def main():
    urls={
        'http://www.weather.com.cn/textFC/hb.shtml',
        'http://www.weather.com.cn/textFC/db.shtml',
        'http://www.weather.com.cn/textFC/hd.shtml',
        'http://www.weather.com.cn/textFC/hz.shtml',
        'http://www.weather.com.cn/textFC/hn.shtml',
        'http://www.weather.com.cn/textFC/xb.shtml',
        'http://www.weather.com.cn/textFC/xn.shtml',
        'http://www.weather.com.cn/textFC/gat.shtml'
    }
    for url in urls:
        parse_page(url)

    #根据最低气温排序
    ALL_data.sort(key=lambda data:data['min_temp'])
    #print(ALL_data)

    data=ALL_data[:]
    # for city_temp in data:
    #     city=city_temp['city']
    #     cities.append(city)
    cities = list(map(lambda x: x['city'], data))
    temps = list(map(lambda x: x['min_temp'], data))
    chart=Bar("中国天气温度排行榜")
    chart.add("",cities,temps)
    chart.render("temperature.html")
if __name__ == '__main__':
    main()
posted @ 2020-03-13 11:28 胡辣汤王子阅读(270) 评论(0) 编辑收藏举报
刷新页面返回顶部
胡辣汤王子

爬虫14-find_all中国天气网爬虫

公告