基于elasticsearch-kibana框架可视化疫情数据

1、启动Jupiter notebook.  

注意要用高权限启动  jupyter notebook --allow-root

2、创建映射 

注意  先用数据建一个默认的映射,在做修改
创建一个索引
PUT /timesskk/_doc/1
{
"name":"十一月的肖邦",
"date":"2005-11-24T22:20",
"format_date":"1381419600"
}
获取自动的映射
GET /timesskk/_mapping

3、导入数据确保没有格式错误

格式需要以 [ 开头  
 
[{
            "locationId": 620000,
            "continentName": "亚洲",
            "continentEnglishName": "Asia",
            "countryName": "中国",
            "countryEnglishName": "China",
            "provinceName": "甘肃省",
            "provinceShortName": "甘肃",
            "provinceEnglishName": "Gansu",
            "currentConfirmedCount": 27,
            "confirmedCount": 91,
            "suspectedCount": 0,
            "curedCount": 62,
            "deadCount": 2,
            "cities": [
                {
                    "cityName": "兰州",
                    "currentConfirmedCount": 12,
                    "confirmedCount": 36,
                    "suspectedCount": 0,
                    "curedCount": 22,
                    "deadCount": 2,
                    "locationId": 620100,
                    "cityEnglishName": "Lanzhou"
                },
                {
                    "cityName": "平凉",
                    "currentConfirmedCount": 6,
                    "confirmedCount": 9,
 
 

4、修改映射

 
 
嵌套json要使用    "type": "nested",
PUT covid6
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "cities": {
        "type": "nested",
        "properties": {
          "cityEnglishName": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "cityName": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "confirmedCount": {
            "type": "long"
          },
          "curedCount": {
            "type": "long"
          },
          "currentConfirmedCount": {
            "type": "long"
          },
          "deadCount": {
            "type": "long"
          },
          "locationId": {
            "type": "long"
          },
          "suspectedCount": {
            "type": "long"
          }
        }
      },
      "comment": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "confirmedCount": {
        "type": "long"
      },
      "continentEnglishName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "continentName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "countryEnglishName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "countryName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "curedCount": {
        "type": "long"
      },
      "currentConfirmedCount": {
        "type": "long"
      },
      "deadCount": {
        "type": "long"
      },
      "locationId": {
        "type": "long"
      },
      "provinceEnglishName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "provinceName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "provinceShortName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "suspectedCount": {
        "type": "long"
      },
      "updateTime": {
        "type": "date"
      },
      "updateTime_back": {
        "type": "date",
        "format": "yyyy-MM-dd"
      }
    }
  }
}
 
数据如下:
{'locationId': 540000, 'continentName': '亚洲', 'continentEnglishName': 'Asia', 'countryName': '中国', 'countryEnglishName': 'China', 'provinceName': '西藏自治区', 'provinceShortName': '西藏', 'provinceEnglishName': 'Xizang', 'currentConfirmedCount': 0, 'confirmedCount': 1, 'suspectedCount': 0, 'curedCount': 1, 'deadCount': 0, 'cities': [{'cityName': '拉萨', 'currentConfirmedCount': 0, 'confirmedCount': 1, 'suspectedCount': 0, 'curedCount': 1, 'deadCount': 0, 'locationId': 540100, 'cityEnglishName': 'Lhasa'}], 'comment': '', 'updateTime': 1581524352057, 'updateTime_back': '2020-02-13’}
 

5、开始写代码

注意先导入包
pip install --index https://pypi.tuna.tsinghua.edu.cn/simple/ elasticsearch
 
import pandas as pd
import time
import datetime
from elasticsearch import Elasticsearch
from elasticsearch import helpers
df0=pd.read_json('/Users/zhangkewei/files/COVID-2019’)
 
Mac的位置 用pwd命令获取
es = Elasticsearch()
ES = es
es.info()
 
def action2ES(es_bulk_body):
    # 调用 ES bulk API, 导入数据
    action_count = len(es_bulk_body)
    print('%s: Start bulk...' % time.ctime())
    try:
        stream_bulk = helpers.streaming_bulk(
            ES, es_bulk_body, chunk_size=1500,raise_on_error=False, raise_on_exception=False,
            max_retries=7, initial_backoff=1, yield_ok=False
        )
        blk_count = 0
        for _blk in stream_bulk:
            blk_count += 1
    except Exception as e:
        print("work Error: %s" % e)
    indexed_count = action_count - blk_count
    log_string = '%s: actions:%d, indexed:%d' % (
        time.ctime(), action_count, indexed_count
        )
    print(log_string)
    if action_count == indexed_count:
        print('Bulk Sucess.')
        return True
    else:
        print('Bulk Error')
        return False
 
 
df0['updateTime_back'] = df0['updateTime']
temp=df0['updateTime']
#df0['record_time'] = rightDatetimeLike(str(temp))
#print(df0['record_time'])
records = df0.to_dict('records')
 
 
es_bulk_body = []
for record in records:
   
    print(record['updateTime_back'])
    str_updateTime_back=str(record['updateTime_back'])[0:10]
    int_updateTime_back=int(str_updateTime_back)
    print(record.keys())
    timeArray = time.localtime(int_updateTime_back)
    otherStyleTime= time.strftime("%Y-%m-%d", timeArray)
    
    #record['updateTime_back']=rightDatetimeLike(record[updatetime])
    #print(otherStyleTime)
  
    record['updateTime_back']=otherStyleTime
    temp=record['updateTime']
    temp1=rightDatetimeLike(str(temp))
    #record['record_time']=str(temp1)
    action = {
            '_index': 'covid5',
            '_type': '_doc',
            '_source': record
            }
    print(record)
    es_bulk_body.append(action)
 
 
 
print('%s: Generate %d actions.' % (time.ctime(),len(es_bulk_body)))
action2ES(es_bulk_body)
 
 

6、查看

GET /covid5/_count
GET /_cat/indices
 

7、可视化分析

 
 
 
 
posted @ 2020-03-16 20:22  土八路2020  阅读(337)  评论(0)    收藏  举报