Elasticsearch之pythonAPI简单使用
elasticsearch自动补全建议功能
数据入库操作
ESmapping要求
PUT music { "mappings": { "_doc" : { "properties" : { "suggest" : { "type" : "completion" }, "title" : { "type": "keyword" } } } } }
DocType类
from elasticsearch_dsl import DocType, Date, Nested, Boolean, \ analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer from elasticsearch_dsl.connections import connections connections.create_connection(hosts=["localhost"]) class CustomAnalyzer(_CustomAnalyzer): """ 避免ik_analyzer参数传递时会报错的问题 """ def get_analysis_definition(self): return {} ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"]) class ArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) ...
Items类
from models.es_types import ArticleType from elasticsearch_dsl.connections import connections es = connections.create_connection(ArticleType._doc_type.using) def gen_suggests(index, info_tuple): # 根据字符串生成搜索建议数组 used_words = set() suggests = [] for text, weight in info_tuple: if text: # 调用es的analyze接口分析字符串 words = es.indices.analyze(index=index, analyzer="ik_max_word", params={'filter':["lowercase"]}, body=text) anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1]) new_words = anylyzed_words - used_words else: new_words = set() if new_words: suggests.append({"input":list(new_words), "weight":weight}) class JobBoleArticleItem(scrapy.Item): ... def save_to_es(self): ... article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10),(article.tags, 7))) article.save() redis_cli.incr("jobbole_count") return
ES搜索语法
POST myindex/_search?pretty { "suggest": { "my-suggest": { "text": "linux", "completion": { "field": "suggest", "fuzzy": { "fuzziness": 2 } } } }, "_source": ["title"] }
自动补全建议核心代码
# django_views中的写法 from search.models import ArticleType class SearchSuggest(View): def get(self, request): key_words = request.GET.get('s','') re_datas = [] if key_words: s = ArticleType.search() s = s.suggest('my_suggest', key_words, completion={ "field":"suggest", "fuzzy":{ "fuzziness":2 }, "size": 10 }) suggestions = s.execute_suggest() for match in suggestions.my_suggest[0].options: source = match._source re_datas.append(source["title"]) return HttpResponse(json.dumps(re_datas), content_type="application/json")
elasticsearch内容搜索功能
数据入库操作
和上面一样
搜索核心代码
# django_views中的写法 from elasticsearch import Elasticsearch client = Elasticsearch(hosts=["127.0.0.1"]) class SearchView(View): def get(self, request): key_words = request.GET.get("q","") s_type = request.GET.get("s_type", "article") page = request.GET.get("p", "1") try: page = int(page) except: page = 1 start_time = datetime.now() response = client.search( index= "jobbole", body={ "query":{ "multi_match":{ "query":key_words, "fields":["tags", "title", "content"] } }, "from":(page-1)*10, "size":10, "highlight": { "pre_tags": ['<span class="keyWord">'], "post_tags": ['</span>'], "fields": { "title": {}, "content": {}, } } } ) end_time = datetime.now() last_seconds = (end_time-start_time).total_seconds() total_nums = response["hits"]["total"] if (page%10) > 0: page_nums = int(total_nums/10) +1 else: page_nums = int(total_nums/10) hit_list = [] for hit in response["hits"]["hits"]: hit_dict = {} if "title" in hit["highlight"]: hit_dict["title"] = "".join(hit["highlight"]["title"]) else: hit_dict["title"] = hit["_source"]["title"] if "content" in hit["highlight"]: hit_dict["content"] = "".join(hit["highlight"]["content"])[:500] else: hit_dict["content"] = hit["_source"]["content"][:500] hit_dict["create_date"] = hit["_source"]["create_date"] hit_dict["url"] = hit["_source"]["url"] hit_dict["score"] = hit["_score"] hit_list.append(hit_dict) return render(request, "result.html", {"page":page, "all_hits":hit_list, "key_words":key_words, "total_nums":total_nums, "page_nums":page_nums, "last_seconds":last_seconds })
scrapy框架+django框架组合使用
github项目参考
https://github.com/holgerd77/django-dynamic-scraper