Python操作es批量读取数据
1. Python连接elasticserach
python连接elasticsearch有一下几种连接方式
pip3 instal elasticsearch from elasticsearch import Elasticsearch es = Elasticsearch() # 默认连接本地elasticsearch es = Elasticsearch(["127.0.0.1:9200"]) # 连接本地9200端口 es = Elasticsearch(["192.168.1.10", "192.168.1.11", "192.168.1.12"], # 连接集群,以列表的形式存放各节点的IP地址 sniff_on_start=True, # 连接前测试 sniff_on_connection_fail=True, # 节点无响应时刷新节点 sniff_timeout=60) # 设置超时时间
配置可忽略的状态码
es = Elasticsearch(['127.0.0.1:9200'],ignore=400) # 忽略返回的400状态码 es = Elasticsearch(['127.0.0.1:9200'],ignore=[400, 405, 502]) # 以列表的形式忽略多个状态码
2.Python操作es批量读取数据
from elasticsearch import Elasticsearch es = Elasticsearch() query_json = { "query": { "match_all": {} # 获取所有数据 } } page_num = 100 # 每次获取数据 query = es.search(index=8, body=query_json, scroll='5m', size=page_num) results = query['hits']['hits'] # es查询出的结果第一页 total = query['hits']['total'] # es查询出的结果总量 scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果 every_num = int(total/page_num) # alist = [] for i in range(0, every_num+1): # scroll参数必须指定否则会报错 query_scroll = es.scroll(scroll_id=scroll_id, scroll='5m')['hits']['hits'] results += query_scroll for key in results: es_data_dict = key["_source"]["word"] # print(es_data_dict) alist.append(es_data_dict) print(len(alist))