elasticsearch-python

一、新建index

设置副本数和自定义分词器等操作

mappings = {
        "settings": {
            # 副本数
            "number_of_replicas": 0,
            # 分片数
            "number_of_shards": 2,
            "analysis": {
                "analyzer": {
                    "comma": {
                        "type": "pattern",
                        "pattern": ","
                    }
                }
            }
        },
        "mappings": {
            "type_video": {
                "properties": {
                    "video_id": { 
                        "type": "keyword"
                    },
                    "category": { 
                        "type": "text",
                        "analyzer": "comma",
                        "search_analyzer": "comma"
                    }
                }
            }
        }
    }

后面动态修改index副本数

curl -XPUT 'host:9200/target_index/_settings' -H 'content-Type:application/json' -d '{
    "index": {
       "number_of_replicas": "0"
    }
}'

二、写入

1.使用helpers批量写入,代替index()方法

from elasticsearch import helpers
action = {
        "op_type":"index",
        "_index": config["index"],
        "_type": "type_video",
        "_id":item[0],
        "_source": body
            }
actions.append(action)
helpers.bulk(es, actions)

2.设置别名实现数据全量覆盖

# 已有数据:video_info_final, 别名 video_info
# 1、remove video_info_final的别名video_info,建立 video_info_temp 别名为 video_info
es.indices.update_aliases(body={
    "actions": [
        {"add": {"index": "video_info_temp", "alias": "video_info"}}
    ]
})

# 2、删除 video_info_final
try:
    es.indices.delete('video_info_final')
    print("已经删除video_info_final")
except:
    print("no video_info_final")
# 3、数据从video_info_temp 保存到 video_info_final
es.reindex(body={
    "source": {
        "index": "video_info_temp"
    },
    "dest": {
        "index": "video_info_final"
    }
})
#4、remove video_info_temp的别名video_info,建立 video_info_final 别名为 video_info
es.indices.update_aliases(body={
   "actions": [
        {"add": {"index": "video_info_final", "alias": "video_info"}}
   ]
})
# 5、删除es临时表 video_info_temp
es.indices.delete('video_info_temp')

三、删除

清空index

def delete_all_document():
    """
    清空index
    """
    body = {
        "query": {
            "match_all": {}
        }
    }
    es = Elasticsearch(hosts="xxx:9200")
    es.delete_by_query(index=config["index"], body=body)

四、索引操作

1、增加一个字段

# 定义新字段的 mapping
mapping = {
        "properties": {
                "has_source": {
                    "type": "integer"
                }
            }
}

index_need = "my_index"

# 更新索引的 mapping
r = es.indices.put_mapping(index=index_need, body=mapping,include_type_name=True,doc_type="type_video")

五、字段更新

1、更新一条记录

r=es.update(index='test123',doc_type='doc',id="tAU_QYQB-S4OeklsxeBv",
                body={"doc": {"count": 1}}
          )

2、批量更新

使用es.bulk(),而不是helpers.bulk()

doc = [
        {'update': {'_index': 'test123', '_id': 'tgVfQYQB-S4OeklsPODj'}},
        {'doc': {'count': 1314}},
        {'update': {'_index': 'test123', '_id': 'tAU_QYQB-S4OeklsxeBv'}},
        {'doc': {'count': 7758}},
    ]
r = es.bulk(body=doc,doc_type="_doc",index="search_text123")
print(r)
posted @ 2022-10-27 21:19  木叶流云  阅读(167)  评论(0编辑  收藏  举报