如果汉语背后没有文化,文化背后没有思想,思想背后没有精神,光TMD编造老娘和乔布斯没有说过的话,那中国永远不会是一个伟大的国家。——撒切尔夫人

刘一辰的软件工程随笔

正则表达式速查表:https://www.jb51.net/tools/regexsc.htm

re.search(regex,str)

import re

html = s.find_all('p')[1].text
confirm_add_patten = "新增(.*?)确诊病例(\d+)" #设置正则式 ()内的为需要返回的内容,若不加括号则表示即使符合正则也不会返回
#"新增(.*?)确诊病例(\d+)" 返回 新型冠状病毒肺炎 和 1
#"新增.*?确诊病例(\d+)" 返回 1
confirm_add = re.search(confirm_add_patten,html) #正则匹配 若匹配不到返回None,匹配到则返回一个元组
print(confirm_add.groups()) #显示匹配到的内容
print(confirm_add.group(0)) #显示匹配到的所有文本内容
print(confirm_add.group(1)) #显示匹配到的第一个内容
print(confirm_add.group(2)) #显示匹配到的第二个内容
'''
新增新型冠状病毒肺炎确诊病例1
('新型冠状病毒肺炎', '1')
新型冠状病毒肺炎

数据搜集
爬取腾讯数据
import datetime
import json
import requests
from bs4 import BeautifulSoup
import time
### 封装函数 返回各省市疫情情况
def get_data():
today = str(datetime.date.today())

url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'
res = requests.get(url)
d = json.loads(res.text)
data = json.loads(d['data'])
province = data['provinceCompare']

list1 = []
for pro in province.keys():
list1.append(pro)

nowl = []
for pro in province.values():
nowl.append(pro['nowConfirm'])

addl = []
for pro in province.values():
addl.append(pro['confirmAdd'])

deadl = []
for pro in province.values():
deadl.append(pro['dead'])

heall = []
for pro in province.values():
heall.append(pro['heal'])

zerol = []
for pro in province.values():
zerol.append(pro['zero'])

history = {}
for d in data['chinaDayList']:
tt = '2020.'+d['date']
temp = time.strptime(tt,'%Y.%m.%d')
tm = time.strftime('%Y-%m-%d',temp)
confirm = d['confirm']#累计确诊
dead = d['dead']#累计死亡
heal = d['heal']#累计治愈
suspect = d['suspect']#累计治愈
nowConfirm = d['nowConfirm']#现存确诊
history[tm] = {'confirm':confirm,'suspect':suspect,'heal':heal,'dead':dead,'nowConfirm':nowConfirm}
for d in data['chinaDayAddList']:
tt = '2020.'+d['date']
temp = time.strptime(tt,'%Y.%m.%d')
tm = time.strftime('%Y-%m-%d',temp)
confirm = d['confirm']#新增确诊
suspect = d['suspect']#新增疑似
dead = d['dead']#新增死亡
heal = d['heal']#新增治愈

history[tm].update({'confirm_add':confirm,'suspect_add':suspect,'heal_add':heal,'dead_add':dead,'heal_add':heal})
details = []
for i in range(34):
details.append([today,list1[i],nowl[i],addl[i],heall[i],deadl[i],zerol[i]])
return history,details

###导入数据库

import traceback
import pymysql
import datetime
import json
import requests
from bs4 import BeautifulSoup
import time

def get_conn():
conn = pymysql.connect(host = '127.0.0.1',
user = 'root',
password ='root',
db = 'ch')
cursor = conn.cursor()
return conn,cursor
def close_conn(conn,cursor):
if cursor:
cursor.close()
if conn:
conn.close()

def insert_history():
'''
插入history表
'''
cursor = None
conn = None
try:
dic = get_data()[0]
print(f'{datetime.datetime.now()} 开始插入历史数据')
conn,cursor = get_conn()
sql = 'insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
for k,v in dic.items():
cursor.execute(sql,[k,v.get('confirm'),v.get('suspect'),v.get('heal'),
v.get('dead'),v.get('nowConfirm'),v.get('confirm_add'),
v.get('suspect_add'),v.get('heal_add'),v.get('dead_add')])
conn.commit()
print(f'{datetime.datetime.now()} 历史数据插入完毕')
except:
traceback.print_exc()
finally:
close_conn(conn,cursor)

def update_history():
'''
更新history表
'''
cursor = None
conn = None
try:
dic = get_data()[0]
print(f'{datetime.datetime.now()} 开始更新历史数据')
conn,cursor = get_conn()
sql = 'insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
sql_query = 'select confirm from history where dt=%s'
for k,v in dic.items():
if not cursor.execute(sql_query,k):
cursor.execute(sql,[k,v.get('confirm'),v.get('suspect'),v.get('heal'),
v.get('dead'),v.get('nowConfirm'),v.get('confirm_add'),
v.get('suspect_add'),v.get('heal_add'),v.get('dead_add')])
conn.commit()
print(f'{datetime.datetime.now()} 历史数据更新完毕')
except:
traceback.print_exc()
finally:
close_conn(conn,cursor)

def update_details():
'''
更新details表
'''
cursor = None
conn = None
try:
li = get_data()[1]
conn,cursor = get_conn()
sql = 'insert into details(update_time,province,now_confirm,confirm_add,heal_add,dead_add,zero_days) values(%s,%s,%s,%s,%s,%s,%s)'
sql_query = 'select %s=(select update_time from details order by id desc limit 1)'
cursor.execute(sql_query,li[0][0])
if not cursor.fetchone()[0]:
print(f'{datetime.datetime.now()} 开始更新数据')
for item in li:
cursor.execute(sql,item)
conn.commit()
print(f'{datetime.datetime.now()} 更新最新数据完毕')
else:
print(f'{datetime.datetime.now()} 已经是最新数据')
except:
traceback.print_exc()
finally:
close_conn(conn,cursor)


posted @   崤函隳  阅读(38)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!
历史上的今天:
2020-11-22 2020/11/22 刘一辰的JAVA随笔
点击右上角即可分享
微信分享提示