elastic_search 指令
#!/usr/bin/env python # -*- coding: utf-8 -*- """ pass """ import os import sys import jieba sys.path.append(os.path.dirname(os.path.split(os.path.realpath(__file__))[0])) from elasticsearch import Elasticsearch from conf.settings import FAQ_ES_CONF # [{'host': '192.168.7.173', 'port': 9200}] es_ser = Elasticsearch(FAQ_ES_CONF) es_ser.indices.delete(index='customer', ignore=404) es_ser.indices.create(index='customer', ignore=400) body={"properties":{'about': {'type': 'string'}, 'name': {'type': 'string'}, 'age': {'type': 'integer'}, 'score': {'type': 'integer'}, 'company': {'type': 'string', 'index': 'not_analyzed'}, 'interests': {'type': 'string'}, 'timestamp': {'type': 'date'}, 'id': {'type': 'integer'}}} es_ser.indices.put_mapping(index='customer', doc_type='round_FAQ2', body=body) es_ser.index(index='customer', doc_type='round_FAQ2', id=1, body={"name":"wulangzhou", "age": 25, "score": [85,75,95], "about": jieba.lcut('i like think deep'), "company": 'zhangyue', "interests": ["music"], "timestamp": '2016'}) es_ser.index(index='customer', doc_type='round_FAQ2', id=2, body={"name":"yanweihong", "age": 28, "about": jieba.lcut('i like exercise more'), "score": [90,85,77], "company": 'zhangyue', "interests": ["forestry", 'i', 'like'], "timestamp": '2017'}) es_ser.index(index='customer', doc_type='round_FAQ2', id=3, body={"name":"liumin", "age": 28, "about": jieba.lcut('i like cat'), "score": [80, 80, 80, 80], "company": 'jindong',
"weight": 85, "interests": ['game'], "timestamp": '2016'}) import time time.sleep(1) body={'query': {'multi_match': {'query': 'i like cat' , 'fields': ['about', 'interests'], 'type': 'most_fields',}}} #'tie_breaker': 0.2}}}
body={'query': {'match_phrase': {'about': 'i like'}}}
body={'query': {'range': {'age': {'gte': 18, 'lte': 35}}}}
body={'query': {'match_all': {}}}
body={'query': {'terms': {'age': [22, 20]}}}
body={'query': {'exists': {'field': 'weight'}}}
for sources in es_ser.search(index='customer', doc_type='round_FAQ2', body=body)['hits']['hits']: for k, v in sources.items(): print k, v print ''
'''
http://www.tuicool.com/articles/uAbmuaU
match_phrase 可以看about 字段,如果该字段是string 且被设置为默认分词,可以看做是‘query_str‘ in ‘match_string’(查询字符和匹配字符都不分词进行匹配)?
match 可以看about 字段,表示 query_str分词后中的每一个词,与match_string分词后中的所有词,看能匹配到几个(查询字符和匹配字符都进行分词匹配)。
term 与 match_phrase 稍微有点区别 ‘query_str‘ == ‘match_string’ ?) (不进行分词的匹配)
multi_match 如果搭配 most_fields 表示fields中的所有字段,分词后尽量匹配多的词的和(不要带tie_breaker)
如果搭配 best_fields 表示完全匹配的分值最高 比如 i like cat 如果全部匹配到了则分高(带tie_breaker)
terms 与term 类似
bool 当我们需要and or 查询的时候,可以用 bool 查询,查询条件可以嵌套 { "bool" : { "must" : [], "should" : [], "must_not" : [], } }
def get_analyze_body(**kargs): """ 将查询条件转成特殊的查询参数 """ from faq.doc_idf import get_phrases_rate question = kargs.get('question') if question and isinstance(question, str): question = question.decode('utf-8') question = replace_string(question) question_args = get_right_phrases(filter_phrases(jieba_cut(question))) channel_num_arg = kargs.get('channel_num') version_arg = kargs.get('version') question_arg_rate = get_phrases_rate(question_args) should = [] for question_arg, rate in question_arg_rate.items(): should.append({'match_phrase': {'question': {'query': question_arg, 'boost': 10 * rate}}}) must_channel_num = [] must_channel_num.append({'match_phrase': {'channel_num': {'query': -1, 'boost': 1}}}) if channel_num_arg: must_channel_num.append({'match_phrase': {'channel_num': {'query': int(channel_num_arg), 'boost': 1.5}}}) must_version = [] must_version.append({'match_phrase': {'version': {'query': -1, 'boost': 1}}}) if version_arg: must_version.append({'match_phrase': {'version': {'query': int(version_arg), 'boost': 1.5}}}) return {'query': {'bool': {'should': should, 'must': [{'bool': {'should': must_channel_num}}, {'bool': {'should': must_version}}]}}, 'min_score': 1}