Python爬取-腾讯新型冠状病毒疫情时时数据(转)
Selenium是一个用于Web应用程序测试的工具
腾讯新型冠状病毒疫情网址:https://news.qq.com/zt2020/page/feiyan.htm?from=timeline&isappinstalled=0
打开开发者工具,刷新网址进行抓包,发现:https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery34107712202448063499_1580707765240&_=1580707765241 里面有我们想要的疫情数据汇总。callback是回调函数,可以尝试清空,最后那个数字表示时间戳,接下来就可以进行抓取了
————————————————
版权声明:本文为CSDN博主「爬虫虫」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_36917018/article/details/104155640
import time, json, requests import csv #文件名称 ExcelName = '2.10疫情日报.csv' #当前日期时间戳 number = format(time.time() * 100, '.0f') url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%s' % number datas = json.loads(requests.get(url=url).json()['data']) print('更新时间:' + datas['lastUpdateTime']) #写入更新时间 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['更新时间:' + datas['lastUpdateTime']]) for contry in datas['areaTree']: if contry['name'] == '中国': for province in contry['children']: print(province['name']) #写入省份名称 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([province['name']]) for city in province['children']: print(city['name'], '确诊:' + str(city['total']['confirm']), '死亡:' + str(city['total']['dead']), '治愈:' + str(city['total']['heal'])) # 写入市的名称,确诊、死亡、治愈的人数 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([city['name'], '确诊:' + str(city['total']['confirm']), '死亡:' + str(city['total']['dead']), '治愈:' + str(city['total']['heal'])])
=====================================================修改后
import json, requests import csv import time import string #文件名称 ExcelName = time.strftime("%Y-%m-%d %H-%M-%S", time.localtime())+'疫情日报.txt' #当前日期时间戳 number = format(time.time() * 100, '.0f') url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%s' % number print(url) datas = json.loads(requests.get(url=url).json()['data']) print('更新时间:' + datas['lastUpdateTime']) #写入更新时间 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) #writer.writerow(['更新时间:' + datas['lastUpdateTime']]) field="地区,总确诊,新增确认,总死亡,新增死亡,总治愈,新增治愈" writer.writerow([field]) for contry in datas['areaTree']: if contry['name'] == '中国': for province in contry['children']: print(province['name']) #写入省份名称 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) #writer.writerow([province['name']]) for city in province['children']: print(city['name'], '总确诊:' + str(city['total']['confirm']),'新增确认:' + str(city['today']['confirm']), '总死亡:' + str(city['total']['dead']),'新增死亡:' + str(city['today']['dead']), '总治愈:' + str(city['total']['heal']),'新增治愈:' + str(city['today']['heal'])) # # 写入市的名称,确诊、死亡、治愈的人数 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) ss="%s,%d,%d,%d,%d,%d,%d"%(city['name'], city['total']['confirm'], city['today']['confirm'], city['total']['dead'], city['today']['dead'], city['total']['heal'], city['today']['heal']) writer.writerow([ss])
==============================================
import requests import json def Down_data(): url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5' headers = { 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36' } r = requests.get(url, headers) res = json.loads(r.text) data_res = json.loads(res['data']) return data_res def Parse_data1(): data = Down_data() list = ['截至时间:' + str(data['lastUpdateTime']) + '\n' '全国确诊人数:' + str(data['chinaTotal']['confirm']) + '\n' '今日新增确诊:' + str( data['chinaAdd']['confirm']) + '\n' '全国疑似:' + str(data['chinaTotal']['suspect']) + '\n' '今日新增疑似:' + str( data['chinaAdd']['suspect']) + '\n' '全国治愈:' + str(data['chinaTotal']['heal']) + '\n' '今日新增治愈:' + str( data['chinaAdd']['heal']) + '\n' '全国死亡:' + str(data['chinaTotal']['dead']) + '\n' '今日新增死亡:' + str( data['chinaAdd']['dead']) + '\n'] result = ''.join(list) with open('疫情查询.txt', 'a+', encoding="utf-8") as f: f.write(result + '\n') def Parse_data2(): data = Down_data()['areaTree'][0]['children'] #path = str(input('请输入你要查询的省份:')) for i in data: #if path in i['name']: for item in i['children']: list_city = [ '地区: ' + str(item['name']) + '\n' ' 确诊人数:' + str(item['total']['confirm']), ' 新增确诊:' + str(item['today']['confirm']), ' 治愈:' + str(item['total']['heal']), ' 新增治愈:' + str(item['today']['heal']), ' 死亡:' + str(item['total']['dead']), ' 新增死亡:' + str(item['today']['dead']) + '\n' ] res_city = ''.join(list_city) with open('疫情查询.txt', 'a+', encoding="utf-8") as f: f.write(res_city) Down_data() Parse_data1() Parse_data2()