随笔 - 331,  文章 - 92,  评论 - 54,  阅读 - 60万

一、插入数据

1、首先准备类似如下数据

{"_type": "type1", "_id": 1, "_index": "test", "_source": {"JOBNAME0": "guba_eastmoney_com_265162", "JOBNAME1": "guba_eastmoney_com_265162"}}

 

2、调用es相关模块插入数据到es中

复制代码
#!/usr/bin/python
import threading
import queue
import json
import time
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import sys

# 

# host_list = [
#     {"host":"10.58.7.190","port":9200},
#     {"host":"10.58.55.191","port":9200},
#     {"host":"10.58.55.192","port":9200},
# ]
#
host_list = [
    {"host":"10.87.7.190","port":9200},
]



# create a es clint obj
client = Elasticsearch(host_list)


with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"insert.json"),"r") as f:
    for line in f:
        actions = []
        actions.append(json.loads(line))
        try:
            for k, v in helpers.parallel_bulk(client=client, thread_count=1, actions=actions):
                # 这里的actions是插入es的数据,这个格式必须是列表的格式,列表的每个元素又必须是字典
                pass
        except Exception as e:
            sys.stderr(e)
复制代码

 

3、查看es索引中的文档数

[root@test1 cdrom]# curl -XGET 'http://10.87.7.190:9200/_cat/indices?v&pretty'
health status index uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   test  r91GhsFVT7iF6M3iAuNEKg   2   5      19362            0      1.3mb        499.7kb

 

 

二、读取es的数据

复制代码
#!/usr/bin/python
from kafka import KafkaProducer
import threading
import json
import time
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import sys
import argparse
import random

def get_data_from_es():
    host_list = [
        {"host": "10.58.55.1", "port": 9200},
        {"host": "10.58.55.2", "port": 9200},
        {"host": "10.58.55.7", "port": 9200},
        # {"host": "10.58.55.201", "port": 9200},
        # {"host": "10.58.55.202", "port": 9200},
        # {"host": "10.58.55.203", "port": 9200},
    ]

    es = Elasticsearch(host_list)
    err_data_num = 0
    correct_data_num = 0
    size = 100
    query = es.search(index='full_sight', scroll='1m', size=size)
    for m in query['hits']['hits']:
        # print(m)
        d_id = m["_id"]
        if "LASTOPER" in m["_source"].keys():
        # if "UPDATE_TEST" in m["_source"].keys():


            if m["_source"]["LASTOPER"] == "" + str(d_id):
                correct_data_num += 1
            else:
                err_data_num += 1
        else:
            err_data_num += 1
            # print("id为{d_id}数据未更新成功,错误的条数为{num}".format(d_id=d_id, num=err_data_num))


    results = query['hits']['hits']

    total = query['hits']['total']
    scroll_id = query['_scroll_id']


    page = divmod(total, size)
    if page[1] == 0:
        page = page[0]
    else:
        page = page[0] + 1

    for i in range(0, page):
        try:
            query_scroll = es.scroll(scroll_id=scroll_id, scroll='1m', )['hits']['hits']
        except Exception as e:
            continue
        else:
            for m in query_scroll:

                d_id = m.get("_id",None)
                if "LASTOPER" in m["_source"].keys():

                    if m["_source"]["LASTOPER"] == "test" + str(d_id):
                        correct_data_num += 1
                    else:
                        err_data_num += 1
                else:
                    err_data_num += 1

    return err_data_num,correct_data_num

if __name__ == '__main__':
    while True:
        error,correct = get_data_from_es()
        print("未更新的数据的条数为:{num}".format(num = error))
        print("已更新的数据的条数为:{num}".format(num = correct))
        print("=" * 200)
        if int(error) == 0:
            break
        else:
            continue
复制代码

 

posted on   bainianminguo  阅读(6396)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示