elasticsearch-python
一、新建index
设置副本数和自定义分词器等操作
mappings = {
"settings": {
# 副本数
"number_of_replicas": 0,
# 分片数
"number_of_shards": 2,
"analysis": {
"analyzer": {
"comma": {
"type": "pattern",
"pattern": ","
}
}
}
},
"mappings": {
"type_video": {
"properties": {
"video_id": {
"type": "keyword"
},
"category": {
"type": "text",
"analyzer": "comma",
"search_analyzer": "comma"
}
}
}
}
}
后面动态修改index副本数
curl -XPUT 'host:9200/target_index/_settings' -H 'content-Type:application/json' -d '{
"index": {
"number_of_replicas": "0"
}
}'
二、写入
1.使用helpers批量写入,代替index()方法
from elasticsearch import helpers
action = {
"op_type":"index",
"_index": config["index"],
"_type": "type_video",
"_id":item[0],
"_source": body
}
actions.append(action)
helpers.bulk(es, actions)
2.设置别名实现数据全量覆盖
# 已有数据:video_info_final, 别名 video_info
# 1、remove video_info_final的别名video_info,建立 video_info_temp 别名为 video_info
es.indices.update_aliases(body={
"actions": [
{"add": {"index": "video_info_temp", "alias": "video_info"}}
]
})
# 2、删除 video_info_final
try:
es.indices.delete('video_info_final')
print("已经删除video_info_final")
except:
print("no video_info_final")
# 3、数据从video_info_temp 保存到 video_info_final
es.reindex(body={
"source": {
"index": "video_info_temp"
},
"dest": {
"index": "video_info_final"
}
})
#4、remove video_info_temp的别名video_info,建立 video_info_final 别名为 video_info
es.indices.update_aliases(body={
"actions": [
{"add": {"index": "video_info_final", "alias": "video_info"}}
]
})
# 5、删除es临时表 video_info_temp
es.indices.delete('video_info_temp')
三、删除
清空index
def delete_all_document():
"""
清空index
"""
body = {
"query": {
"match_all": {}
}
}
es = Elasticsearch(hosts="xxx:9200")
es.delete_by_query(index=config["index"], body=body)
四、索引操作
1、增加一个字段
# 定义新字段的 mapping
mapping = {
"properties": {
"has_source": {
"type": "integer"
}
}
}
index_need = "my_index"
# 更新索引的 mapping
r = es.indices.put_mapping(index=index_need, body=mapping,include_type_name=True,doc_type="type_video")
五、字段更新
1、更新一条记录
r=es.update(index='test123',doc_type='doc',id="tAU_QYQB-S4OeklsxeBv",
body={"doc": {"count": 1}}
)
2、批量更新
使用es.bulk(),而不是helpers.bulk()
doc = [
{'update': {'_index': 'test123', '_id': 'tgVfQYQB-S4OeklsPODj'}},
{'doc': {'count': 1314}},
{'update': {'_index': 'test123', '_id': 'tAU_QYQB-S4OeklsxeBv'}},
{'doc': {'count': 7758}},
]
r = es.bulk(body=doc,doc_type="_doc",index="search_text123")
print(r)