基于elasticsearch-kibana框架可视化疫情数据
1、启动Jupiter notebook.
注意要用高权限启动 jupyter notebook --allow-root
2、创建映射
注意 先用数据建一个默认的映射,在做修改
创建一个索引
PUT /timesskk/_doc/1
{
"name":"十一月的肖邦",
"date":"2005-11-24T22:20",
"format_date":"1381419600"
}
获取自动的映射
GET /timesskk/_mapping
3、导入数据确保没有格式错误
格式需要以 [ 开头
如
[{
"locationId": 620000,
"continentName": "亚洲",
"continentEnglishName": "Asia",
"countryName": "中国",
"countryEnglishName": "China",
"provinceName": "甘肃省",
"provinceShortName": "甘肃",
"provinceEnglishName": "Gansu",
"currentConfirmedCount": 27,
"confirmedCount": 91,
"suspectedCount": 0,
"curedCount": 62,
"deadCount": 2,
"cities": [
{
"cityName": "兰州",
"currentConfirmedCount": 12,
"confirmedCount": 36,
"suspectedCount": 0,
"curedCount": 22,
"deadCount": 2,
"locationId": 620100,
"cityEnglishName": "Lanzhou"
},
{
"cityName": "平凉",
"currentConfirmedCount": 6,
"confirmedCount": 9,
4、修改映射
嵌套json要使用 "type": "nested",
PUT covid6
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"cities": {
"type": "nested",
"properties": {
"cityEnglishName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"cityName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"confirmedCount": {
"type": "long"
},
"curedCount": {
"type": "long"
},
"currentConfirmedCount": {
"type": "long"
},
"deadCount": {
"type": "long"
},
"locationId": {
"type": "long"
},
"suspectedCount": {
"type": "long"
}
}
},
"comment": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"confirmedCount": {
"type": "long"
},
"continentEnglishName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"continentName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"countryEnglishName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"countryName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"curedCount": {
"type": "long"
},
"currentConfirmedCount": {
"type": "long"
},
"deadCount": {
"type": "long"
},
"locationId": {
"type": "long"
},
"provinceEnglishName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"provinceName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"provinceShortName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"suspectedCount": {
"type": "long"
},
"updateTime": {
"type": "date"
},
"updateTime_back": {
"type": "date",
"format": "yyyy-MM-dd"
}
}
}
}
数据如下:
{'locationId': 540000, 'continentName': '亚洲', 'continentEnglishName': 'Asia', 'countryName': '中国', 'countryEnglishName': 'China', 'provinceName': '西藏自治区', 'provinceShortName': '西藏', 'provinceEnglishName': 'Xizang', 'currentConfirmedCount': 0, 'confirmedCount': 1, 'suspectedCount': 0, 'curedCount': 1, 'deadCount': 0, 'cities': [{'cityName': '拉萨', 'currentConfirmedCount': 0, 'confirmedCount': 1, 'suspectedCount': 0, 'curedCount': 1, 'deadCount': 0, 'locationId': 540100, 'cityEnglishName': 'Lhasa'}], 'comment': '', 'updateTime': 1581524352057, 'updateTime_back': '2020-02-13’}
5、开始写代码
注意先导入包
pip install --index https://pypi.tuna.tsinghua.edu.cn/simple/ elasticsearch
import pandas as pd
import time
import datetime
from elasticsearch import Elasticsearch
from elasticsearch import helpers
df0=pd.read_json('/Users/zhangkewei/files/COVID-2019’)
Mac的位置 用pwd命令获取
es = Elasticsearch()
ES = es
es.info()
def action2ES(es_bulk_body):
# 调用 ES bulk API, 导入数据
action_count = len(es_bulk_body)
print('%s: Start bulk...' % time.ctime())
try:
stream_bulk = helpers.streaming_bulk(
ES, es_bulk_body, chunk_size=1500,raise_on_error=False, raise_on_exception=False,
max_retries=7, initial_backoff=1, yield_ok=False
)
blk_count = 0
for _blk in stream_bulk:
blk_count += 1
except Exception as e:
print("work Error: %s" % e)
indexed_count = action_count - blk_count
log_string = '%s: actions:%d, indexed:%d' % (
time.ctime(), action_count, indexed_count
)
print(log_string)
if action_count == indexed_count:
print('Bulk Sucess.')
return True
else:
print('Bulk Error')
return False
df0['updateTime_back'] = df0['updateTime']
temp=df0['updateTime']
#df0['record_time'] = rightDatetimeLike(str(temp))
#print(df0['record_time'])
records = df0.to_dict('records')
es_bulk_body = []
for record in records:
print(record['updateTime_back'])
str_updateTime_back=str(record['updateTime_back'])[0:10]
int_updateTime_back=int(str_updateTime_back)
print(record.keys())
timeArray = time.localtime(int_updateTime_back)
otherStyleTime= time.strftime("%Y-%m-%d", timeArray)
#record['updateTime_back']=rightDatetimeLike(record[updatetime])
#print(otherStyleTime)
record['updateTime_back']=otherStyleTime
temp=record['updateTime']
temp1=rightDatetimeLike(str(temp))
#record['record_time']=str(temp1)
action = {
'_index': 'covid5',
'_type': '_doc',
'_source': record
}
print(record)
es_bulk_body.append(action)
print('%s: Generate %d actions.' % (time.ctime(),len(es_bulk_body)))
action2ES(es_bulk_body)
6、查看
GET /covid5/_count
GET /_cat/indices