Python项目@基于Flask的大屏数据可视化
爬取数据
get_tencent_data()
def get_tencent_data():
"""
:return: list全国汇总数据/日期为主键每日更新 list当日详细数据
"""
url = "http://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
r = requests.get(url, headers)
res = json.loads(r.text) # json字符串-->字典
data_all = json.loads(res['data'])
update_time = data_all['lastUpdateTime']
chinaTotal = data_all['chinaTotal']
chinaAdd = data_all['chinaAdd']
# 全国汇总历史数据
ds = update_time.split()[0] # 构造数据库字段
confirm = chinaTotal['confirm']
suspect = chinaTotal['suspect']
heal = chinaTotal['heal']
dead = chinaTotal['dead']
confirm_add = chinaAdd['confirm']
suspect_add = chinaAdd['suspect']
heal_add = chinaAdd['heal']
dead_add = chinaAdd['dead']
history = [ds, confirm, confirm_add, suspect, suspect_add, heal, heal_add, dead, dead_add]
# 全国各省市当日详情数据
details = []
data_province = data_all['areaTree'][0]['children'] # 中国各省
for pro_infos in data_province:
province = pro_infos['name'] # 省
for city_infos in pro_infos['children']:
city = city_infos['name'] # 市
confirm = city_infos['total']['confirm']
confirm_add = city_infos['today']['confirm']
heal = city_infos['today']['confirm']
dead = city_infos['total']['dead']
details.append([update_time, province, city, confirm, confirm_add, heal, dead])
return history, details
def get_baidu_hot()
这里我们用到了selenium
模块,它可以爬取到用ajax
请求的数据。还能直接用鼠标选取标签定位。
def get_baidu_hot():
"""
:return: 返回百度热搜前30中的28个
"""
options = webdriver.FirefoxOptions()
options.add_argument("--headless") # 隐藏浏览器弹窗
options.add_argument("--disable-gpu")
browser = webdriver.Firefox(options=options)
browser.get('https://top.baidu.com/board?tab=realtime')
r = browser.find_elements_by_xpath('//*[@id="sanRoot"]/main/div[2]/div/div[2]') # F12 选中元素右键 copy 得到 Xpath
hot_data = [i.text.split("\n") for i in r]
hot_list = []
n = 0
for e in hot_data[0]:
if e == '热' or e == '新' or e == '沸':
hot_data[0].remove(e)
else:
n += 1
if n % 5 == 0:
hot_list.append(hot_data[0][
n - 5: n]) # ['1', '4992729', '热搜指数', '河北大巴坠河事故致14人遇难', '10月11日,河北石家庄市平山县一辆载51人的大巴车落水。截至12日下午2时40分,最后一名失联人员被找到,已无生命体征... 查看更多>']
return hot_list
保存数据到数据库
创建数据库/表
各个字段含义(history)
+-------------+--------------------+-----------+
| column_name | column_comment | data_type |
+-------------+--------------------+-----------+
| ds | 日期 | datetime |
| confirm | 累计确诊 | int |
| confirm_add | 当日新增确诊 | int |
| suspect | 剩余疑似 | int |
| suspect_add | 当日新增疑似 | int |
| heal | 累计治愈 | int |
| heal_add | 当日新增治愈 | int |
| dead | 累计死亡 | int |
| dead_add | 当日新增死亡 | int |
+-------------+--------------------+-----------+
#mysql 安装https://blog.51cto.com/u_12226796/2431965
#建表
#bdhot
create table `bdhot`(
`id` int(11) not null auto_increment,
`dt` timestamp default current_timestamp,# 默认值为当前时间
`hotrank` int(11) default null,
`hotscore` int(11) default null,
`title` varchar(255) default null,
`content` tinytext default null,
primary key(`id`) using btree
)engine=innodb default charset=utf8mb4;
#history
create table `history`(
`ds` datetime not null comment'日期',
`confirm` int(11) default null comment'累计确诊',
`confirm_add` int(11) default null comment'当日新增确诊',
`suspect` int(11) default null comment'剩余疑似',
`suspect_add` int(11) default null comment'当日新增疑似',
`heal` int(11) default null comment'累计治愈',
`heal_add` int(11) default null comment'当日新增治愈',
`dead` int(11) default null comment'累计死亡',
`dead_add` int(11) default null comment'当日新增死亡',
primary key(`ds`) using btree
)engine=innodb default charset=utf8mb4;
#details
create table `details`(
`id` int(11) not null auto_increment,
`update_time` datetime not null comment'数据最后更新时间',
`province` varchar(50) default null comment'省',
`city` varchar(50) default null comment'市',
`confirm` int(11) default null comment'累计确诊',
`confirm_add` int(11) default null comment'新增确诊',
`heal` int(11) default null comment'累计治愈',
`dead` int(11) default null comment'累计死亡',
primary key(`id`) using btree
)engine=innodb default charset=utf8mb4;
def update_details()
def update_details():
"""
更新 details 表
:return:
"""
cursor = None
conn = None
try:
#li = get_tencent_data()[1] # 0 是历史数据字典, 1 最新详细数据列表
li = get_tencent_data()[1]
conn, cursor = get_conn()
sql = "insert into details(update_time, province, city, confirm, confirm_add, heal, dead) values(%s, %s, %s, %s, %s, %s, %s)"
sql_query = 'select %s=(select update_time from details order by id desc limit 1)' # 对比当前最大时间戳 当前数据时间和最大时间相等返回 1 不等返回 0
cursor.execute(sql_query, li[0][0]) # 随便取一个元素的时间
if not cursor.fetchone()[0]:
print(f"{time.asctime()}详细数据开始更新......")
for item in li:
# print(item)
cursor.execute(sql, item)
conn.commit() # 提交事务 update delete insert 操作
print(f"{time.asctime()}详细数据更新完毕。")
else:
print(f"{time.asctime()}详细数据已是最新 !")
except:
traceback.print_exc() # 打印异常信息
finally:
close_conn(conn, cursor)
def update_history()
def update_history():
"""
插入历史数据, 第一次运行项目直接插入
:return:
"""
cursor = None
conn = None
try:
hlist = get_tencent_data()[0] # 0 是历史数据字典, 1 最新详细数据列表
print(f"{time.asctime()}历史数据开始插入......")
# print(hlist)
conn, cursor = get_conn()
sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
sql_query = 'select %s=(select ds from history order by ds desc limit 1)' # 对比当前最大时间戳 当前数据时间和最大时间相等返回 1 不等返回 0
cursor.execute(sql_query, time.strftime("%Y-%m-%d 00:00:00")) # 随便取一个元素的时间
if not cursor.fetchone()[0]:
cursor.execute(sql, hlist)
conn.commit()
print(f"{time.asctime()}历史数据更新完毕。")
else:
print(f"{time.asctime()}历史数据已是最新 !")
except:
traceback.print_exc()
finally:
close_conn(conn, cursor)
def update_baidu_hot():
def update_baidu_hot():
"""
插入百度热搜数据
:return:
"""
cursor = None
conn = None
hot_list = get_baidu_hot()
print(f"{time.asctime()}百度热搜数据开始更新......")
try:
conn, cursor = get_conn()
sql = "insert into bdhot(hotrank, hotscore, title, content) values(%s, %s, %s, %s)"
for hot in hot_list:
hotrank = int(hot[0])
hotscore = int(hot[1])
title = hot[3]
content = hot[4]
tup = (hotrank, hotscore, title, content)
# print(tup)
cursor.execute(sql, tup)
conn.commit()
print(f"{time.asctime()}百度热搜数据更新完成。")
except:
traceback.print_exc()
finally:
close_conn(conn, cursor)
前端布局
HTML
我们用了很多echarts的图表模板。
<!DOCTYPE html>
<html lang="">
<head>
<meta charset="utf-8">
<title>全国疫情追踪</title>
<script src="../static/js/jquery-1.11.1.min.js"></script>
<script src="../static/js/echarts.min.js"></script>
<script src="../static/js/china.js"></script>
<script src="../static/js/echarts-wordcloud.min.js"></script>
<link href="../static/css/main.css" rel="stylesheet"/>
</head>
<body>
<div id="title">全国疫情追踪</div>
<div id="top-1"></div>
<div id="top-2" class="txt"></div>
<div id="left-1"></div>
<div id="left-2"></div>
<div id="mid-1">
<div class="num"><h2></h2></div>
<div class="num"><h2></h2></div>
<div class="num"><h2></h2></div>
<div class="num"><h2></h2></div>
<div class="txt"><h3>累计确诊</h3></div>
<div class="txt"><h3>剩余疑似</h3></div>
<div class="txt"><h3>累计治愈</h3></div>
<div class="txt"><h3>累计死亡</h3></div>
</div>
<div id="mid-2"></div>
<div id="right-1"></div>
<div id="right-2"></div>
<!--mid-2 中国地图 -->
<script src="../static/js/ec_mid2.js"></script>
<!--left-1 全国趋势 -->
<script src="../static/js/ec_left1.js"></script>
<!--left-2 新增趋势 -->
<script src="../static/js/ec_left2.js"></script>
<!--right-1 确诊前五 -->
<script src="../static/js/ec_right1.js"></script>
<!--right-1 百度热搜词云 -->
<script src="../static/js/ec_right2.js"></script>
<!--ajax 动态请求 -->
<script src="../static/js/ajax_func.js"></script>
</body>
</html>
CSS
简单粗暴的绝对定位。hhh...
body{
margin: 0;
background-color: #100c2a;
}
#top{
position: absolute;
width: 100%;
height: 10%;
top: 0;
left: 0;
right: 0;
/* background-color: pink; */
color: white;
font-size: 30px;
font-family: "幼圆";
display: flex; /* 弹性布局 */
align-items: center;
justify-content: center;
}
#info{
position: absolute;
width: 50%;
height: 21%;
top: 10%;
left: 0%;
right: 75%;
color: white;
/* background-color: cadetblue; */
}
#block-1{
position: absolute;
width: 25%;
height: 21%;
top: 31%;
left: 0%;
right: 75%;
color: white;
background-color: blueviolet;
}
#block-2{
position: absolute;
width: 25%;
height: 21%;
top: 52%;
left: 0%;
right: 75%;
color: white;
background-color: greenyellow;
}
#block-3{
position: absolute;
width: 25%;
height: 21%;
top: 73%;
left: 0%;
right: 75%;
color: white;
background-color: brown;
}
#foot{
position: absolute;
width: 100%;
height: 6%;
top: 94%;
left: 0%;
right: 0%;
color: ghostwhite;
/* background-color: pink; */
}
ajax 局部动态请求
ajax 可局部的请求页面的某个模块,避免刷新整个页面浪费资源。
为了代码美观容易阅读,我们把所有ajax写到同一个文件中。
// div top-2 后台时间
function get_top2_time(){
$.ajax({
url:"/time",
timeout:10000, // 超时时间 10s
success:function(data){
$("#top-2").html(data)
},
error: function(xhr, type, errThrown){
}
});
}
// div mid-1 简略总计
function get_mid1_data(){
$.ajax({
url:"/mid1",
success:function(mid1_data){
$(".num h2").eq(0).text(mid1_data.confiirm);
$(".num h2").eq(1).text(mid1_data.suspect);
$(".num h2").eq(2).text(mid1_data.heal);
$(".num h2").eq(3).text(mid1_data.dead);
},
error: function(xhr, type, errThrown){
}
});
}
// div mid-2 各省数据
function get_mid2_data() {
$.ajax({
url:"/mid2",
success:function(mid2_data){
ec_center_option.series[0].data = mid2_data.data
ec_center.setOption(ec_center_option)
},
error: function(xhr, type, errThrown){
}
});
}
// div left-1 累计趋势
function get_left1_data(){
$.ajax({
url: "/left1",
success: function(left1_data) {
ec_left1_Option.xAxis[0].data=left1_data.day
ec_left1_Option.series[0].data=left1_data.confirm
ec_left1_Option.series[1].data=left1_data.suspect
ec_left1_Option.series[2].data=left1_data.heal
ec_left1_Option.series[3].data=left1_data.dead
ec_left1.setOption(ec_left1_Option)
},
error: function(xhr, type, errThrown){
}
});
}
// div left-2 新增趋势
function get_left2_data(){
$.ajax({
url: "/left2",
success: function(left2_data) {
ec_left2_Option.xAxis[0].data=left2_data.day
ec_left2_Option.series[0].data=left2_data.confirm_add
ec_left2_Option.series[1].data=left2_data.suspect_add
ec_left2_Option.series[2].data=left2_data.heal_add
ec_left2_Option.series[3].data=left2_data.dead_add
ec_left2.setOption(ec_left2_Option)
},
error: function(xhr, type, errThrown){
}
});
}
// div right-1 Top5
function get_right1_data(){
$.ajax({
url: "/right1",
success: function(right1_data) {
ec_right1_Option.xAxis.data=right1_data.city
ec_right1_Option.series[0].data=right1_data.confirm
ec_right1.setOption(ec_right1_Option)
},
error: function(xhr, type, errThrown){
}
});
}
// div right-2 词云
function get_right2_data(){
$.ajax({
url: "/right2",
success: function(right2_data) {
ec_right2_Option.series[0].data=right2_data.kws
ec_right2.setOption(ec_right2_Option)
},
error: function(xhr, type, errThrown){
}
});
}
// setInterval(gettime, 1000) // 一秒钟执行一次
// setInterval(get_mid1_data, 1000)
// setInterval(get_mid2_data, 1000)
get_top2_time()
get_mid1_data()
get_mid2_data()
get_left1_data()
get_left2_data()
get_right1_data()
get_right2_data()
// 刷新频率
setInterval(get_top2_time, 1000)
setInterval(get_right2_data, 1000*10)
从数据库中请求数据
# -*- coding: utf-8 -*-
# @Time : 2021/10/14 22:24
# @Author : JustFly
# @File : func.py
# @Software : PyCharm
import requests
import json
import time
import pymysql
import traceback
from selenium import webdriver
def get_conn():
"""
:return: 连接, 游标
"""
# 创建连接
conn = pymysql.connect(host="127.0.0.1",
user="root",
password="123456",
db="covid_2019",
)
# 创建游标
cursor = conn.cursor()
return conn, cursor
def close_conn(conn, cursor):
if cursor:
cursor.close()
if conn:
conn.close()
def query(sql, *args):
"""
封装通用查询
:param sql:
:param args:
:return:元组
"""
conn, cursor = get_conn()
cursor.execute(sql, args)
res = cursor.fetchall()
# print(res)
close_conn(conn, cursor)
return res
def get_top2_time():
time_str = time.strftime(f"%Y{{}}%m{{}}%d{{}} %X") # 不支持识别中文 {}站位
return time_str.format("年", "月", "日")
def get_mid1_data():
"""
:return: 返回全国总计数据供 div id = "mid-1" 使用
"""
# sql = "select sum(confirm)," \
# "(select suspect from history order by ds desc limit 1)," \
# "sum(heal)," \
# "sum(dead)" \
# "from details;"
sql = "select confirm, suspect, heal, dead from history order by ds desc limit 1;"
res = query(sql)
# print(res)
return res[0]
def get_mid2_data():
"""
:return: 返回个省数据供 div id = "mid-2" 使用
"""
sql = "select province, sum(confirm) from details " \
"where update_time=(select update_time from details order by update_time desc limit 1)" \
"group by province"
res = query(sql)
return res
def get_left1_data():
"""
:return:全国累计趋势数据
"""
sql = "select ds, confirm, suspect, heal, dead from history"
res = query(sql)
return res
def get_left2_data():
"""
:return:全国新增趋势数据
"""
sql = "select ds, confirm_add, suspect_add, heal_add, dead_add from history"
res = query(sql)
return res
def get_right1_data():
"""
:return: 确诊前五非湖北城市
"""
# 除去直辖市,因为它里面直接是某某区而非市
sql = "select city, confirm from" \
"(" \
"select city, confirm from details where update_time=(select update_time from details order by update_time desc limit 1)" \
"and province not in ('台湾', '湖北', '北京', '上海', '天津', '重庆')" \
"union all " \
"select province as city, sum(confirm) as confirm from details " \
"where update_time=(select update_time from details order by update_time desc limit 1)" \
"and province in ('北京', '上海', '天津', '重庆') group by province" \
") as a " \
"where city not in ('境外输入', '地区待确认') " \
"order by confirm desc limit 5"
res = query(sql)
return res
def get_right2_data():
"""
:return: 返回最近20条热搜
"""
sql = "select title, hotscore from bdhot order by id desc limit 20"
res = query(sql)
return res
get_right2_data()
Flask路由逻辑
import string
from flask import Flask
from flask import request
from flask import render_template
import func
from flask import jsonify # json 转 dict
from jieba.analyse import extract_tags
app = Flask(__name__) # 创建一个Flash实例
@app.route("/")
def main():
return render_template("main.html")
@app.route("/time")
def get_top2_time():
return func.get_top2_time()
@app.route("/mid1")
def get_mid1_data():
data = func.get_mid1_data()
dic = {"confiirm": data[0], "suspect": data[1], "heal": data[2], "dead": data[3]}
return jsonify(dic)
@app.route("/mid2")
def get_mid2_data():
res = []
for tup in func.get_mid2_data():
dic = {"name": tup[0], "value": int(tup[1])}
res.append(dic)
print(res)
return jsonify({"data": res})
@app.route("/left1")
def get_left1_data():
data = func.get_left1_data()
day, confirm, suspect, heal, dead = [], [], [], [], []
for d in data:
day.append(d[0].strftime("%m-%d"))
confirm.append(d[1])
suspect.append(d[2])
heal.append(d[3])
dead.append(d[4])
dic = {"day": day, "confirm": confirm, "suspect": suspect, "heal": heal, "dead": dead}
print(dic)
return jsonify(dic)
@app.route("/left2")
def get_left2_data():
data = func.get_left2_data()
day, confirm_add, suspect_add, heal_add, dead_add = [], [], [], [], []
for d in data:
day.append(d[0].strftime("%m-%d"))
confirm_add.append(d[1])
suspect_add.append(d[2])
heal_add.append(d[3])
dead_add.append(d[4])
dic = {"day": day, "confirm_add": confirm_add, "suspect_add": suspect_add, "heal_add": heal_add, "dead_add": dead_add}
print(dic)
return jsonify(dic)
@app.route("/right1")
def get_right1_data():
data = func.get_right1_data()
city = []
confirm = []
for d in data:
city.append(d[0])
confirm.append(int(d[1]))
dic = {"city": city, "confirm": confirm}
# print(dic)
return jsonify(dic)
@app.route("/right2")
def get_right2_data():
data = func.get_right2_data() # (('航天员太空过年吃啥馅饺子?', 4962929), ('南北方将迎下半年来最冷清晨', 4829320),....,)
d = []
for i in data:
k, v = i[0], i[1]
ks = extract_tags(k) # 使用jieba提取关键字
for j in ks:
if not j.isdigit():
d.append({"name": j, "value": str(v)})
# print(d)
return jsonify({"kws": d})
if __name__ == '__main__':
app.run()
部署项目到服务器
注意:
1.修改连接的数据库信息
2.修改 app.py
app.run() => app.run(host=0.0.0.0, port=5000)
3.python3 app.py
可用此命令用于调试,退出命令行界面后失效。
安装
yum install nginx # 安装 nginx
pip install gunicorn # 安装 gunicorn
配置Nginx做反向代理 vim /etc/nginx/nginx.conf
sever 上添加 服务器集群 和权重 ,我只有一台服务器就写一个(127.1我写的是内网ip)。
# upstream mycluster {
# server 127.0.0.1:5000 weight=1;
# }
。。。。
include /etc/nginx/conf.d/*.conf;
upstream mycluster{
server 172.17.0.15:5000 weight=1;
}
server {
listen 80 default_server;
listen [::]:80 default_server;
server_name 172.17.0.15;
root /usr/share/nginx/html;
。。。。
启动服务器 gunicorn -b 127.0.0.1:5000 -D app:app
[root@VM-0-15-centos COVID19]# gunicorn -b 172.17.0.15:5000 -D app:app
[root@VM-0-15-centos COVID19]# ps -ef | grep gunicorn
root 25252 1 0 16:45 ? 00:00:00 /usr/bin/python3.6 /usr/local/bin/gunicorn -b 172.17.0.15:5000 -D app:app
root 25255 25252 5 16:45 ? 00:00:00 /usr/bin/python3.6 /usr/local/bin/gunicorn -b 172.17.0.15:5000 -D app:app
root 25298 20928 0 16:46 pts/3 00:00:00 grep --color=auto gunicorn
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]#
用crotab设置定时更新脚本
[root@VM-0-15-centos COVID19]# crontab -l
*/5 * * * * flock -xn /tmp/stargate.lock -c '/usr/local/qcloud/stargate/admin/start.sh > /dev/null 2>&1 &'
* */12 * * * python3 /root/COVID19/update.py up_all >> /root/COVID19/covid19.log 2>&1 &
5 */1 * * * python3 /root/COVID19/update.py up_hot >> /root/COVID19/covid19.log 2>&1 &
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]#
[root@VM-0-15-centos COVID19]# crontab -e
启动 Nginx
/usr/sbin/nginx
参考
________________________________________________________
Every good deed you do will someday come back to you.
Love you,love word !