爬取丁香医生数据,制作全球疫情可视化

一、获取数据

锁定数据来源,丁香医生app

使用fiddler抓取数据链接(这里就不详细说了,这个app没难度)

import json
import re
import requests
import datetime
today
= datetime.date.today().strftime('%Y%m%d') # 20200315 def crawl_dxy_data(): """ 爬取丁香医生实时统计数据,保存到data目录下,以当前日期作为文件名,存JSON文件 """ headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; TAS-AN00 Build/TAS-AN00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36 dxyapp_name/aspirin dxyapp_version/8.6.1 dxyapp_system_version/5.1.1 dxyapp_client_id/19a3d982dedd4da4bda7e303992bffe3 dxyapp_ac/d5424fa6-adff-4b0a-8917-4264daf4a348 dxyapp_sid/47121485-cea5-4144-80a8-8ecda8510946" } try: response = requests.get('https://ncov.dxy.cn//ncovh5/view/pneumonia_top?from=dxy&source=&link=&share=0', headers=headers) response.encoding = response.apparent_encoding url_text = response.text print(url_text) url_content = re.search(r'window.getListByCountryTypeService2true = (.*?)}]}catch', url_text, re.S) texts = url_content.group() content = texts.replace('window.getListByCountryTypeService2true = ', '').replace('}catch', '') json_data = json.loads(content) with open('data/app-' + today + '.json', 'w', encoding='UTF-8') as f: json.dump(json_data, f, ensure_ascii=False) except Exception as e: print("请求或解析数据失败:{}".format(e))

二、解析数据

# 解析数据
def crawl_parser_data():
    """
    获取全球国家的名称和确诊数量
    """
    with open('data/app-' + today + '.json', 'r', encoding='UTF-8') as file:
        json_array = json.loads(file.read())

    county_lists = []  # [provinceName,confirmedCount]
    for province in json_array:
        provinceName = province.get("provinceName")
        confirmedCount = province.get("confirmedCount")
        if provinceName and confirmedCount:
            county_lists.append([provinceName, confirmedCount])
    print(county_lists)
    return county_lists

三、制作可视化地图

这里使用pyecharts模块,https://pyecharts.org/#/zh-cn/

文档比较详细,简单的案例都有

这里最主要的是生成一个英文国家名字和确诊量的列表数据,由于爬取到的国家名字都是中文的,所以需要转换下,开始想手动转的,发现200多个国家太多了,所以又写了一个有道的翻译脚本,把所有的中文国家名转成了英文,有道脚本就不贴了,官网上有https://ai.youdao.com/DOCSIRMA/html/%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E7%BF%BB%E8%AF%91/API%E6%96%87%E6%A1%A3/%E6%96%87%E6%9C%AC%E7%BF%BB%E8%AF%91%E6%9C%8D%E5%8A%A1/%E6%96%87%E6%9C%AC%E7%BF%BB%E8%AF%91%E6%9C%8D%E5%8A%A1-API%E6%96%87%E6%A1%A3.html

# 制作地图
def create_map(county_lists):
    # 自定义的每一段的范围,以及每一段的特别的样式。
    pieces = [
        {'min': 10000000, 'color': '#540d0d'},
        {'max': 9999999, 'min': 1000000, 'color': '#9c1414'},
        {'max': 999999, 'min': 100000, 'color': '#d92727'},
        {'max': 99999, 'min': 10000, 'color': '#ed3232'},
        {'max': 9999, 'min': 1000, 'color': '#f27777'},
        {'max': 999, 'min': 1, 'color': '#f7adad'},
        {'max': 0, 'color': '#f7e4e4'},
    ]
    # map由于传入的国家名字需要英文,所以还得转换下,这里需要注意:英文名字必须和地图显示的要一样,所以这里比较花时间,理了很长时间
    POPULATION = [['China', '中国'], ['India', '印度'], ['United States', '美国'], ['Indonesia', '印度尼西亚'], ['Brazil', '巴西'],
                  ['Pakistan', '巴基斯坦'], ['Nigeria', '尼日利亚'], ['Bangladesh', '孟加拉国'], ['Russia', '俄罗斯'],
                  ['Mexico', '墨西哥'], ['Japan', '日本'], ['Ethiopia', '埃塞俄比亚'], ['Philippines', '菲律宾'], ['Egypt', '埃及'],
                  ['Vietnam', '越南'], ['Dem. Rep. Congo', '刚果(布)'], ['Turkey', '土耳其'], ['Iran', '伊朗'], ['Germany', '德国'],
                  ['Thailand', '泰国'], ['United Kingdom', '英国'], ['France', '法国'], ['Tanzania', '坦桑尼亚'],
                  ['Italy', '意大利'], ['South Africa', '南非'], ['Myanmar', '缅甸'], ['Kenya', '肯尼亚'], ['Korea', '韩国'],
                  ['Colombia', '哥伦比亚'], ['Spain', '西班牙'], ['Uganda', '乌干达'], ['Argentina', '阿根廷'], ['Ukraine', '乌克兰'],
                  ['Algeria', '阿尔及利亚'], ['Sudan', '苏丹'], ['Iraq', '伊拉克'], ['Poland', '波兰'], ['Canada', '加拿大'],
                  ['Afghanistan', '阿富汗'], ['Morocco', '摩洛哥'], ['Saudi Arabia', '沙特阿拉伯'], ['Peru', '秘鲁'],
                  ['Uzbekistan', '乌兹别克斯坦'], ['Venezuela', '委内瑞拉'], ['Malaysia', '马来西亚'], ['Angola', '安哥拉'],
                  ['Mozambique', '莫桑比克'], ['Ghana', '加纳'], ['Nepal', '尼泊尔(Never Ever Part As Lovers)'],
                  ['Yemen', '也门共和国'], ['Madagascar', '马达加斯加'], ['Dem. Dep. Korea', '朝鲜'], ["Côte d'Ivoire", '科特迪瓦'],
                  ['Cameroon', '喀麦隆'], ['Australia', '澳大利亚'], ['Taiwan', '中国台湾'], ['Niger', '尼日尔'],
                  ['Sri Lanka', '斯里兰卡'], ['Burkina Faso', '布基纳法索'], ['Malawi', '马拉维'], ['Mali', '马里'],
                  ['Romania', '罗马尼亚'], ['Kazakhstan', '哈萨克斯坦'], ['Syria', '叙利亚'], ['Chile', '智利'], ['Zambia', '赞比亚共和国'],
                  ['Guatemala', '危地马拉'], ['Zimbabwe', '津巴布韦'], ['Netherlands', '荷兰'], ['Ecuador', '厄瓜多尔'],
                  ['Senegal', '塞内加尔'], ['Cambodia', '柬埔寨'], ['Chad', '乍得'], ['Somalia', '索马里'], ['Guinea', '几内亚'],
                  ['S. Sudan', '南苏丹'], ['Rwanda', '卢旺达'], ['Benin', '贝宁'], ['Tunisia', '突尼斯'], ['Burundi', '布隆迪共和国'],
                  ['Belgium', '比利时'], ['Cuba', '古巴'], ['Bolivia', '玻利维亚'], ['Haiti', '海地'], ['Greece', '希腊'],
                  ['Dominican Rep.', '多米尼加'], ['Czechia', '捷克'], ['Portugal', '葡萄牙'], ['Jordan', '约旦'],
                  ['Sweden', '瑞典'], ['Azerbaijan', '阿塞拜疆'], ['United Arab Emirates', '阿拉伯联合酋长国'], ['Hungary', '匈牙利'],
                  ['Honduras', '洪都拉斯'], ['Belarus', '白俄罗斯'], ['Tajikistan', '塔吉克斯坦'], ['Austria', '奥地利'],
                  ['Serbia', '塞尔维亚'], ['Switzerland', '瑞士'], ['Papua New Guinea', '巴布亚新几内亚'], ['Israel', '以色列'],
                  ['Togo', '多哥'], ['Sierra Leone', '塞拉利昂'], ['Hong Kong', '中国香港'], ['Laos', '老挝'], ['Bulgaria', '保加利亚'],
                  ['Paraguay', '巴拉圭'], ['Libya', '利比亚'], ['El Salvador', '萨尔瓦多'], ['Nicaragua', '尼加拉瓜'],
                  ['Kyrgyzstan', '吉尔吉斯斯坦'], ['Lebanon', '黎巴嫩'], ['Turkmenistan', '土库曼斯坦'], ['Singapore', '新加坡'],
                  ['Denmark', '丹麦'], ['Finland', '芬兰'], ['Congo', '刚果(金)'], ['Slovakia', '斯洛伐克'], ['Norway', '挪威'],
                  ['Eritrea', '厄立特里亚'], ['State of Palestine', '巴勒斯坦国'], ['Oman', '阿曼'], ['Costa Rica', '哥斯达黎加'],
                  ['Liberia', '利比里亚'], ['Ireland', '爱尔兰'], ['Central African Rep.', '中非共和国'],
                  ['New Zealand', '新西兰'], ['Mauritania', '毛里塔尼亚'], ['Kuwait', '科威特'], ['Panama', '巴拿马'],
                  ['Croatia', '克罗地亚'], ['Moldova', '摩尔多瓦'], ['Georgia', '格鲁吉亚'], ['Puerto Rico', '波多黎各'],
                  ['Bosnia and Herzegovina', '波斯尼亚和黑塞哥维那'], ['Uruguay', '乌拉圭'], ['Mongolia', '蒙古'],
                  ['Albania', '阿尔巴尼亚'], ['Armenia', '亚美尼亚'], ['Jamaica', '牙买加'], ['Lithuania', '立陶宛'], ['Qatar', '卡塔尔'],
                  ['Namibia', '纳米比亚'], ['Botswana', '博茨瓦纳'], ['Lesotho', '莱索托'], ['Gambia', '冈比亚'], ['Gabon', '加蓬'],
                  ['North Macedonia', '北马其顿'], ['Slovenia', '斯洛文尼亚'], ['Guinea-Bissau', '几内亚比绍'], ['Latvia', '拉脱维亚'],
                  ['Bahrain', ['巴林']], ['Swaziland', '史瓦济兰'], ['Trinidad and Tobago', '特立尼达和多巴哥'],
                  ['Equatorial Guinea', '赤道几内亚'], ['Timor-Leste', '东帝汶'], ['Estonia', '爱沙尼亚'], ['Mauritius', '毛里求斯'],
                  ['Cyprus', '塞浦路斯'], ['Djibouti', '吉布提'], ['Fiji', '斐济'], ['Réunion', '留尼汪'], ['Comoros', '科摩罗'],
                  ['Bhutan', '不丹'], ['Guyana', '圭亚那'], ['Macao', '澳门日报'], ['Solomon Islands', '所罗门群岛'],
                  ['Montenegro', '黑山'], ['Luxembourg', '卢森堡'], ['W. Sahara', '西撒哈拉'], ['Suriname', '苏里南'],
                  ['Cabo Verde', '佛得角'], ['Micronesia', '密克罗尼西亚'], ['Maldives', '马尔代夫'], ['Guadeloupe', '瓜德罗普'],
                  ['Brunei', '文莱'], ['Malta', '马耳他'], ['Bahamas', '巴哈马'], ['Belize', '伯利兹'], ['Martinique', '马提尼克'],
                  ['Iceland', '冰岛'], ['French Guiana', '法属圭亚那'], ['French Polynesia', '法属波利尼西亚'], ['Vanuatu', '瓦努阿图'],
                  ['Barbados', '巴巴多斯'], ['New Caledonia', '新喀里多尼亚'], ['Mayotte', '马约特'],
                  ['Sao Tome & Principe', '圣多美和普林西比'], ['Samoa', '萨摩亚'], ['Saint Lucia', '圣卢西亚'], ['Guam', '关岛'],
                  ['Channel Islands', '海峡群岛'], ['Curaçao', '库拉索'], ['Kiribati', '基里巴斯'],
                  ['St. Vincent & Grenadines', '圣文森特和格林纳丁斯'], ['Tonga', '汤加'], ['Grenada', '格林纳达'], ['Aruba', '阿鲁巴'],
                  ['U.S. Virgin Islands', ['美属维尔京群岛']], ['Antigua and Barbuda', '安提瓜和巴布达'], ['Seychelles', '塞舌尔'],
                  ['Isle of Man', '马恩岛'], ['Andorra', '安道尔'], ['Dominica', '多米尼克'], ['Cayman Islands', '开曼群岛'],
                  ['Bermuda', '百慕大'], ['Greenland', '格陵兰'], ['Saint Kitts & Nevis', '圣基茨和尼维斯'],
                  ['American Samoa', '东萨摩亚'], ['Northern Mariana Islands', '北马里亚纳群岛'], ['Marshall Islands', '马绍尔群岛'],
                  ['Faeroe Islands', '法罗群岛'], ['Sint Maarten', '荷属圣马丁'], ['Monaco', '摩纳哥公国'],
                  ['Liechtenstein', '列支敦斯登'], ['Turks and Caicos', '特克斯和凯科斯群岛'], ['Gibraltar', '直布罗陀'],
                  ['San Marino', '圣马力诺'], ['British Virgin Islands', '英属维尔京群岛'], ['Caribbean Netherlands', '荷兰加勒比区'],
                  ['Palau', '帕劳'], ['Cook Islands', '库克群岛'], ['Anguilla', '安圭拉'], ['Wallis & Futuna', '沃利斯群岛'],
                  ['Tuvalu', '吐瓦鲁'], ['Nauru', '鲁岛'], ['Saint Pierre & Miquelon', '圣皮埃尔'], ['Montserrat', '蒙塞拉特'],
                  ['Saint Helena', '圣赫勒拿岛'], ['Falkland Islands', '福克兰'], ['Niue', '纽埃'], ['Tokelau', '托克劳群岛'],
                  ['Holy See', '圣座']]

    country_data=[]
    for p in POPULATION:
        for c in county_lists:
            if c[0] == p[1]:
                country_data.append([p[0],c[1]])
                break
    # print(country_data)
    # print(county_lists)
    m = Map()
    m.add("累计确诊", country_data, "world")
    m.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    m.set_global_opts(
        title_opts=opts.TitleOpts(title='全球实时确诊数据',
                                  subtitle='数据来源:丁香园'),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(pieces=pieces,
                                          is_piecewise=True,  # 是否为分段型
                                          is_show=True))  # 是否显示视觉映射配置
    m.render("data/map_world.html")

四、最后的显示效果

 

posted @ 2021-03-09 17:10  Maple_feng  阅读(1137)  评论(0编辑  收藏  举报