用python爬取疫情数据

日期 开始时间 结束时间 中断时间 净时间 活动 活动详解
3.10 3:40 5:30 30 70 查看爬取数据的资料 查看python
3.10 7:00 8:30 0 90 安装软件 学习python的基础知识
3.11 10:00 11:10 20 50 学习python爬取数据  
3.11 2:30 5:00 25 125 编写代码 连数据库,爬取数据
3.12 2:30 4:00 20 70 改代码  

爬取数据:

 

python代码:

#-*- coding=utf-8 -*-
import pymysql
import re
import json
from pip._vendor import requests


def create():
  # 连接数据库
    db = pymysql.connect("localhost", "root", "123", "payiqing",charset='utf8')  

    cursor = db.cursor()
    cursor.execute("DROP TABLE IF EXISTS info")
    sql = """CREATE TABLE info(
            Id INT PRIMARY KEY AUTO_INCREMENT,
            Date varCHAR(255),
            Province varchar(255),
            City varchar(255),
            Confirmed_num varchar(255),
            Yisi_num varchar(255),
            Cured_num varchar(255),
            Dead_num varchar(255),
            Code varchar(255))"""
       #创建数据库变量
    cursor.execute(sql)

    db.close()


def insert(value):
    db = pymysql.connect("localhost", "root", "123", "payiqing",charset='utf8')

    cursor = db.cursor()
    sql = "INSERT INTO info(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) VALUES ( %s,%s,%s,%s,%s,%s,%s,%s)"
    try:
        cursor.execute(sql, value)
        db.commit()
        print('插入数据成功')
    except:
        db.rollback()
        print("插入数据失败")
    db.close()

 # 创建表
create()  # 创建表

url='https://raw.githubusercontent.com/BlankerL/DXY-2019-nCoV-Data/master/json/DXYArea.json'
response = requests.get(url)
# 将响应信息进行json格式化
versionInfo = response.text
# print(versionInfo)#打印爬取到的数据
# print("------------------------")#重要数据分割线↓

#一个从文件加载,一个从内存加载#json.load(filename)#json.loads(string)
jsonData = json.loads(versionInfo)

#用于存储数据的集合
dataSource = []
provinceShortNameList = []
confirmedCountList = []
curedCount = []
deadCountList = []
#遍历对应的数据存入集合中
for k in range(len(jsonData['results'])):
    if(jsonData['results'][k]['countryName'] == '中国'):
        provinceShortName = jsonData['results'][k]['provinceName']
        if("待明确地区" == provinceShortName):
            continue;

        for i in range(len(jsonData['results'][k]['cities'])):
            confirmnum=jsonData['results'][k]['cities'][i]['confirmedCount']
            yisi_num=jsonData['results'][k]['cities'][i]['suspectedCount']
            cured_num=jsonData['results'][k]['cities'][i]['curedCount']
            dead_num=jsonData['results'][k]['cities'][i]['deadCount']
            code=jsonData['results'][k]['cities'][i]['locationId']
            cityname=jsonData['results'][k]['cities'][i]['cityName']
            date='2020-3-10'
            insert((date,provinceShortName,cityname,confirmnum,yisi_num,cured_num,dead_num,code))

  

 

posted on 2020-03-13 16:06  帝星辰  阅读(2405)  评论(0编辑  收藏  举报

导航