#!/usr/bin/python
import threading
import json
import time
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import sys
import argparse
host_list = [
    {"host":"1.58.55.11","port":9200},
    {"host":"1.58.55.12","port":9200},
    {"host":"1.58.55.13","port":9200},
]


es = Elasticsearch(host_list)



size = 1000
query = es.search(index='full_sight',scroll='1m',size=size)
results = query['hits']['hits'] # es查询出的结果第一页

total = query['hits']['total'] # es查询出的结果总量
scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果



# 获取总的页数
page = divmod(total,size)
if page[1] == 0:
    page = page[0]
else:
    page = page[0] + 1

import hashlib
obj = hashlib.md5()
num = 1

# 获取所有的数据,计算每条数据的md5值,然后写到文件中
for i in range(0, page): # scroll参数必须指定否则会报错
    query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']
    for m in query_scroll:
        temp = {}
        s = json.dumps(m)
        obj.update(bytes(s,encoding="utf-8"))
        v = obj.hexdigest()
        k = m["_id"]

        temp[k] = v
        with open("test.text","a") as f:
            f.write(json.dumps(temp))
            f.write("\n")

        print(k,num,sep="============>")
        num += 1

 

posted on 2019-04-16 17:49  bainianminguo  阅读(642)  评论(0编辑  收藏  举报