Elasticsearch导出数据存入本地文件
直接上脚本
# -*- coding: utf-8 -*- from elasticsearch import Elasticsearch # 日志的配置环境 import platform import logging.handlers sys_platform = platform.system() if sys_platform == "Windows": LOG_FILE_check = './app_cic.txt' else: LOG_FILE_check = '/cic1.log' handler = logging.handlers.RotatingFileHandler(LOG_FILE_check, maxBytes=1200 * 1024 * 1024,backupCount=10) # 实例化handler 200M 最多十个文件 fmt = '\n' + '%(message)s' formatter = logging.Formatter(fmt) # 实例化formatter handler.setFormatter(formatter) # 为handler添加formatter logger = logging.getLogger('check') # 获取名为tst的logger logger.addHandler(handler) # 为logger添加handler logger.setLevel(logging.DEBUG) # es = Elasticsearch() es = Elasticsearch(["20.0.0.11:9200"], sniff_on_start=True, sniff_on_connection_fail=True,sniff_timeout=60) import time query_json = { "query":{ "terms":{ "site":[ "百度搜索" ] } } } page_num = 1000 # 每次获取数据 query = es.search(index='guoyan_index_v1', body=query_json, scroll='5m', size=page_num) results = query['hits']['hits'] # es查询出的结果第一页 total = query['hits']['total'] # es查询出的结果总量 scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果 every_num = int(total/page_num) # # print(results) print("total",total) print("scroll_id",scroll_id) print("every_num",every_num) alist = [] end_data_list = [] print("----------",int(total/page_num)+1) for i in range(0, every_num): # for i in range(100, 1000): print("正在读取的位置是:",i) results_list = es.scroll(scroll_id=scroll_id, scroll='5m')['hits']['hits'] for key in results_list: try: source = key['_source']["source"] other6 = key['_source']["other6"] result_str = other6 + " " + source end_data_list.append(result_str) except: pass end_data_list = list(set(end_data_list)) print("去重以后的数据是条数是:",len(end_data_list)) for end_data in end_data_list: logger.info(end_data)
如果觉得对您有帮助,麻烦您点一下推荐,谢谢!
好记忆不如烂笔头
好记忆不如烂笔头