Python 操作Elasticsearch之elasticsearch模块

官方文档：https://elasticsearch-py.readthedocs.io/en/master/

mac下的操作

01 基础

01-01 Elasticsearch 基本操作

启动 elasticsearch

 brew services start elasticsearch
 # 或者
elasticsearch

停止elasticsearch

brew services stop elasticsearch
或者
control + c

后台浏览

http://localhost:9200

01-02 在python中引入

安装Elasticsearch模块

pip3 install elasticsearch

简单基本操作

from elasticsearch import Elasticsearch

ES = ["127.0.0.1:9200", ]
es = Elasticsearch(
    ES,
    # 启动前嗅探es集群服务器
    sniff_on_start=True,
    # es集群服务器结点连接异常时是否刷新es节点信息
    sniff_on_connection_fail=True,
    # 每60秒刷新节点信息
    sniffer_timeout=60
)
# 必须指定id，索引相同的id不能重复，id可以是数字字符串
es.craete(index="索引", id=1, doc_type="类型", body={})
# 不用指定id，是随机的
es.index(index="索引", doc_type="dict", body={})

es.delete(id="id", index="索引", doc_type="类型")
# 注意三个值必填，精准删除，没有时会报错
"""
参数
index 索引
id 数据id
doc_type 数据类型
"""
# 可以指定body搜索指定数据删除
es.delete_by_query(index='books', body=body)

查询

es.search() # 查询所有的
"""
index
doc_type
body
"""
# get
es.get(index="books", doc_type="dict", id=2)
"""
参数
index 索引 必填
doc_type 数据类型 必填
id 必填
"""

注意：

	get三个参数必填，如果没找到会报错

02 操作

02-01 写入

操作模版

from elasticsearch_dsl import connections
from elasticsearch.client import IndicesClient


type_dict = {
    "text": {"type": "text"},
    "keyword": {"type": "keyword"},
    "ip": {"type": "ip"},
    "long": {"type": "long"},
    "double": {"type": "double"},
    "boolean": {"type": "boolean"},
    "integer": {"type": "integer"}

}
mappings = {
    "web_log": {
        "_source": {
            "enabled": True
        },
        "properties": {
            "remote_addr": type_dict["ip"],
            "time_local": {
                "type": "date",
                "format": "dd/MMM/yyyy:HH:mm:ss Z"

            },
            "body_bytes_sent": type_dict["long"],
            "request_length": type_dict["long"],
            "bytes_sent": type_dict["long"],
            "request_time": type_dict["double"],
            "idss_action": type_dict["integer"],
        }
    }
}
template = {
    "index_patterns": ["web_bubble_index*"],
    "settings": {
        "index": {
            "refresh_interval": "30s",
            "number_of_shards": "12",
            "number_of_replicas": "2"
        },
        "translog": {
            "sync_interval": "30s",
            "durability": "async",
            "flush_threshold_size": "1000mb"
        }
    },
    "mappings": mappings

}

es = connections.create_connection(hosts="127.0.0.1:9200", timeout=60, http_compress=True,
                                   sniff_on_connection_fail=True, )
# 设置模版
IndicesClient(es).put_mapping("模版名字", "模版")

# 获取模版
IndicesClient(es).get_template(name="模版名字")

# 删除名字
IndicesClient(es).delete_template(name="模版名字")

写入es库

普通写入

es.index(index="索引", doc_type="dict", body={})

批量写入

from elasticsearch.helpers import bulk

bulk(es, [body,])

02-02 查询

普通查询

match_all

查看所有文档，相当于不做筛选条件

body = {
"query": {
    "match_all": {}
  }
}
es.search(index="books", body=body)

size from

size显示几条，from从第几条开始，默认第一条

显示前几条

# 第一种
body = {
    "query": {
        "match_all": {

        }
    },

}
es.search(index="books", body=body, size=2)
# 第二种
body = {
    "query": {
        "match_all": {

        }
    },
  	 "size": 2

}
es.search(index="books", body=body)

# 显示前两条,search中的参数优先级高

从第一条开始显示几条

# 第一种
body = {
    "query": {
        "match_all": {

        }
    },
    "from": 5

}
es.search(index="books", body=body, size=2)
# 第二种
body = {
    "query": {
        "match_all": {

        }
    },
    "from": 5,
  	"size": 2

}
es.search(index="books", body=body)

# 从第五条开始显示前两条

注意：

	size、from和query同级，都可以用

term

term query会去倒排索引中寻找确切的term，它并不知道分词器的存在，这种查询适合keyword、numeric、date等明确值的

查询某个字段的某个关键字

body = {
    "query":{
        "term":{
            "name": "java"
        }
    }
}
es.search(index="books", body=body)
# name是java的数据的前两条，size限制显示数量

精确匹配，注意query中的格式，value表示一个匹配的数据，注意只支持一个key

terms

查询某个字段里含有多个关键词的文档

body = {
    "query":{
        "terms":{
            "name": ["java", "python"]
        }
    }
}
es.search(index="books", body=body)
# name是python或者java

注意：

	只支持一个key，value必须是了列表或者元组

match

match query 知道分词器的存在，会对field进行分词操作，然后再查询

body = {
    "query":{
        "match":{
            "name": "java python"
        }
    }
}
es.search(index="books", body=body)
# name是java或者是python的数据

注意：

	key只能有一个，value可以是字符串或者数字

	一般情况是字符串，可以以各种类型去分割，空格，逗号，｜等等都可以识别

multi_match

可以指定多个key

body = {
"query": {
    "multi_match": {
        "query": "python java",
        "fields": ["name", "book_name"]
    }
  }
}
es.search(index="books", body=body)
# name或者book_name字段值是python或者是java的数据

注意：

	query指定的是要筛选的value值，必须是字符串或者数字，可以是多个，分词

	fields是指定的key值，必须是列表或者是元组

	只要fields中的key中有query指定的value，都被筛选

复合查询

bool

有三类查询关系

must 都满足

should 只要满足一个

must_not 都不满足

基本语法

可以一块用

body = {
    "query": {
        "bool": {
            "should": [
               {
                    "term": {
                        "name": "java"
                    }
                },
                {
                    "terms": {
                        "name": ["java", "python"]
                    }
                },
                {
                    "match": {
                        "name": "java python"
                    }
                },
                {
                    "multi_match": {
                        "query": "python java",
                        "fields": ["name", "book_name"]
                    }
                }
            ],
            "must": [],
          	"must_not": []
        }
    }
}
es.search(index="books", body=body)

	注意：

		每个查询类中都能写多个查询方法

range

范围查询

body = {
    "query": {
        "range": {
            "price": {
                "lt": 180,
                "gte": 10
            }
        }
    }
}
es.search(index="books", body=body)
"""
price 大于等于10小于180
lt  小于
gt  大于
lte 小于等于
"""

注意：

	可以只写一个，例如大于10

prefix

前缀—以什么开头

body = {
    "query":
        {
            "prefix": {
                "name": "j"
            }
        }
}
es.search(index="books", body=body)
# name以j开头的数据

wildcard

通配符—正则表达式

body = {
    "query": {
        "wildcard": {
            "name": "*v*"
        }
    }
}
es.search(index="books", body=body)
"""
name中包含v的所有数据，
*a 以a结尾
a* 以a开头
"""

排序

sort

desc降序

asc升序

body = {
    "query": {
        "term": {
            "name": "java"
        }

    },
    "sort": {
        "age": {
            "order": "desc"
        }
    }
}
es.search(index="books", body=body, size=2)
# name是java的数据，按年龄是降序，显示前两条

注意：

	sort和query同级

其他

exists

是否存在数据，结果是布尔值

es.exists(index="books", doc_type="dict", id=1)
"""
参数
index 索引必填
doc_type 数据类型 必填
id 必填
"""

get_source

直接返回body数据

es.get_source(id=2, index="books", doc_type="dict")

注意：

	id、index、doc_type必填，没数据会报错

ids

筛选id

body = {
    "query": {
        "ids": {
            "type": "dict",
            "values": [1, 2] 
        }
    }
}
es.search(index="books", body=body)
# 意思是id是1或者2的数据

filter_path

筛选显示数据的字段

body = {
    "query": {
        "term": {
            "name": "java"
        }

    },
    "sort": {
        "age": {
            "order": "desc"
        }
    }
}
es.search(index="books", body=body, filter_path=["hits.hits._id", "hits.hits._source.name"])
# 支持*，匹配任何字段或者字段的一部分
es.search(index="books", body=body, filter_path=["hits.hits._*",])
"""
显示是数据的_id，和数据的name
可以限制显示多个字段，以逗号隔开
没有该字段时，不显示
"""

count

数据数量

body = {
    "query": {
        "term": {
            "name": "java"
        }
    }
}
result = es.count(index="books", body=body)
# 结果是字典result.get("count")

match_phrase

和term一样，不切分

body = {
    "query": {
        "match_phrase": {
            "name": "java python"
        }

    }
}
es.search(index="books", body=body)
# name的值是java python的数据

查询总结

代码加解释

from elasticsearch import Elasticsearch

es = Elasticsearch()
body = {
    "query": {
        "bool": {
            "should": [ 
                {
                    "term": { 
                        "name": "python" 
                    }
                },
                {
                    "terms": {
                        "name": ["java", ]
                    }
                },
                {
                    "match": {
                        "author": "lynn"
                    }
                },
                {
                    "multi_match": {
                        "query": "lynn 田少",
                        "fields": ["publish", "author"]
                    }
                },
                {
                    "prefix": {
                        "name": "金"
                    }
                },
                {
                    "ids": {
                        "type": "dict",
                        "values": [1, 2]
                    }
                }


            ],
            "must":[
{
                    "range": {
                        "price": {
                            "lt": 200,

                        }
                    }
                },
                {
                    "wildcard": {
                        "name": "*梅*"
                    }
                }
            ],
            "must_not": [
                {
                    "range": {
                        "price": {
                            "lt": 0
                        }
                    }
                }
            ]

        }
    },
    "sort": {
        "price": {
            "order": "desc"
        }
    },
    "from": 2,
    "size": 3
}
es.search(index="books", body=body, filter_path=["hits.hits._source.name", "hits.hits._id"])

注意：

	方法都可以放在bool中

posted @ 2020-08-21 17:38 tianzhh_lynn 阅读(2608) 评论(0) 编辑收藏举报

刷新页面返回顶部