数据爬取
要求:
编程爬取每日最新的疫情统计数据。
并将爬取结果导入到数据库中。
将可视化结果与统计数据结合,实时显示当前最新数据。
思路:用python爬取网易云事实数据,并将数据储存到数据库中
源程序代码:
import requests
import time, json
import pymysql
import sys;
def insert(date:str,provinae_list:list):
db = pymysql.connect("localhost", "root", "123", "epidemic")
cursor = db.cursor()
for dict in provinae_list:
for child_dict in dict['children']:
data=(date,dict['name'],child_dict['name'],child_dict['total']['confirm'],child_dict['total']['suspect'],child_dict['total']['heal'],child_dict['total']['dead'],child_dict['id'])
print(date,dict['name'],child_dict['name'],child_dict['total']['confirm'],child_dict['total']['suspect'],child_dict['total']['heal'],child_dict['total']['dead'],child_dict['id'])
sql = "INSERT INTO info(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') "
try:
cursor.execute(sql%data)
db.commit()
print('插入数据成功')
except:
db.rollback()
print("插入数据失败")
db.close()
def get_wangyi_request():
url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-total'
headers = {
'accept': '*/*',
'accept-encoding': 'gzip,deflate,br',
'accept-language': 'en-US,en;q=0.9,zh-CN;q = 0.8,zh;q = 0.7',
'origin': 'https://wp.m.163.com',
'referer': 'https://wp.m.163.com/',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-ite',
'user-agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64) AppleWebKit/37.36 (KHTML, likeGecko) Chrome/82.0.4056.0 Safari/537.36 Edg/82.0.432.3'
}
result = requests.get(url, headers=headers)
return result
def print_mess(string: str, dict_total: dict):
sys.stdout.write(string + '确诊: ' + str(dict_total['confirm'] if dict_total['confirm'] != None else 0))
sys.stdout.write(' ')
sys.stdout.write(string + '疑似: ' + str(dict_total['suspect'] if dict_total['suspect'] != None else 0))
sys.stdout.write(' ')
sys.stdout.write(string + '治愈: ' + str(dict_total['heal'] if dict_total['heal'] != None else 0))
sys.stdout.write(' ')
sys.stdout.write(string + '死亡: ' + str(dict_total['dead'] if dict_total['dead'] != None else 0))
if __name__ == '__main__':
result = get_wangyi_request()
json_str = json.loads(result.text)['data']
# print(json_str.keys())
# dict_keys(['chinaTotal', 'chinaDayList', 'lastUpdateTime', 'areaTree'])
print(json_str['lastUpdateTime'])
provinae_list = json_str['areaTree'][2]['children']
# 每个省份包含如下的键
# dict_keys(['today', 'total', 'extData', 'name', 'id', 'lastUpdateTime', 'children'])
insert(json_str['lastUpdateTime'],provinae_list)