代码改变世界

elasticSearch Java Spring Data Api

2017-12-02 14:43  安松  阅读(3946)  评论(0)    收藏  举报

1. 

  BoolQueryBuilder qb=QueryBuilders. boolQuery();
        qb.should(QueryBuilders.matchQuery("keyWord","经济"));
        SearchQuery searchQuery = new NativeSearchQueryBuilder()
                .withFields("userName","keyWord","userId")
                .withHighlightFields(new HighlightBuilder.Field("keyWord").fragmentSize(15))
                .build();
        AggregatedPage<SearchHistory> sampleEntities = elasticsearchTemplate.queryForPage(searchQuery, SearchHistory.class, new SearchResultMapper() {
            @Override
            public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> clazz, Pageable pageable) {
                List<SearchHistory> chunk = new ArrayList<SearchHistory>();
                SearchHits hits = response.getHits();
                for (SearchHit searchHit : response.getHits()) {
                    if (response.getHits().getHits().length <= 0) {
                        return null;
                    }
                    SearchHistory user = new SearchHistory();
                    user.setUserId((Integer)searchHit.getFields().get("userId").getValue());//这么获取
                    Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();//高亮字段为空
                    chunk.add(user);
                }
                if (chunk.size() > 0) {
                    return  new AggregatedPageImpl<T>((List<T>) chunk);
                }
                return null;
            }
        });
        return sampleEntities;
 .withQuery(termQuery("keyWord", "安徽"))  //单个可以使用
 .withQuery(termQuery("userName", "小经济"))  //两个一块写后面会覆盖前面的也就是以“小经济”为高亮
.withFields("userName","keyWord","userId")
.withHighlightFields(new HighlightBuilder.Field("keyWord").fragmentSize(15),new HighlightBuilder.Field("userName").fragmentSize(15))//.preTags("<am>").postTags("</am>").fragmentSize(15)) 1 .withQuery(QueryBuilders.queryStringQuery("安徽")) //这样组合可以查出来,但是没有高亮
.withFields("userName","keyWord","userId") //去掉这行,在source中,没有高亮
.withHighlightFields(new HighlightBuilder.Field("keyWord").fragmentSize(15),new HighlightBuilder.Field("userName").fragmentSize(15))//.preTags("<am>").postTags("</am>").fragmentSize(15)) 1 .withFilter(boolQuery().should(termQuery("keyWord", "经济")).should(termQuery("keyWord", "安徽"))) //这样组合可以查出来,但是没有高亮
String[]  includes = new String[]{"userName","keyWord","userId"};
.withHighlightFields(new HighlightBuilder.Field("keyWord").fragmentSize(15),new HighlightBuilder.Field("userName").fragmentSize(15))
.withQuery(boolQuery().should(termQuery("keyWord", "安徽")).should(termQuery("userName","小经济"))) 
.withSourceFilter(new FetchSourceFilter(includes,new String[]{})) //这个是可以
结果 : {keyWord='<em>安徽</em>商报报道,<em>安徽</em>宣城市泾县一名', userId=1005, userName='<em>小经济</em>'}



    
String keyWord = searchHit.getHighlightFields().get("keyWord").fragments()[0].toString();
String userName = searchHit.getHighlightFields().get("userName").fragments()[0].toString();
user.setUserId((Integer) searchHit.getSource().get("userId"));

 

 

2. 测试ik

/**
     * 测试ik
     * @throws IOException
     */
    public void test() throws IOException {
        AnalyzeRequestBuilder ikRequest = new AnalyzeRequestBuilder(elasticsearchTemplate.getClient(),
                AnalyzeAction.INSTANCE,"test","一个大的安全帽");
        ikRequest.setTokenizer("ik");
        List<AnalyzeResponse.AnalyzeToken> ikTokenList = ikRequest.execute().actionGet().getTokens();

        // 循环赋值
        List<String> searchTermList = new ArrayList<>();
        ikTokenList.forEach(ikToken -> { searchTermList.add(ikToken.getTerm()); });

        System.out.println(JSON.json(searchTermList));
    }

3.

termQuery 和 matchQuery 和 multiMatchQuery 和 matchPhraseQuery

 4. store属性

 

5. 将时间加入权重中

(1)最终找到的方案

     Map<String, Object> params = new HashMap<>(); 
        params.put("pubTimeStamp", 1521632807000L);
        String inlineScript = "return (1/(pubTimeStamp-doc['pubTimeStamp'].value.toDouble()+1))/2"; //时间加入权重的公式
        Script script = new Script(inlineScript, ScriptService.ScriptType.INLINE, "groovy", params); //设置脚本
        QueryBuilder queryBuilder = boolQuery().must(matchQuery("title","中国"));//普通的查询
        ScoreFunctionBuilder scoreFunctionBuilder = ScoreFunctionBuilders.scriptFunction(script);//将脚本加入函数中
        FunctionScoreQueryBuilder query = QueryBuilders.functionScoreQuery(queryBuilder,scoreFunctionBuilder);//加入普通查询和脚本

     SearchQuery searchQuery = new NativeSearchQueryBuilder()
//搜索的type(相当于table)
.withTypes(types)
//高亮字段定义
          .withHighlightFields(new HighlightBuilder.Field("title").preTags("<font color=\"#ff55ae\">").postTags("</font>"))
//查询条件
.withQuery(query) //加入查询条件(包含普通和脚本)
//返回字段includes 和不包含的字段 excludes
.withSourceFilter(new FetchSourceFilter(queryFields,new String[]{})) //这个是可以
//分页
.withPageable(pageable)
.build();

上面函数对应的restful

{
  "function_score" : {
    "query" : {
      "bool" : {
        "must" : {
          "match" : {
            "title" : {
              "query" : "中国",
              "type" : "boolean"
            }
          }
        }
      }
    },
    "functions" : [ {
      "script_score" : {
        "script" : {
          "inline" : "return (1/(pubTimeStamp-doc['pubTimeStamp'].value.toDouble()+1))/2",
          "lang" : "groovy",
          "params" : {
            "pubTimeStamp" : 1521632807000
          }
        }
      }
    } ]
  }
}

正确的应该是这种样式的

{
  "query": {
    "function_score": {
      "query": {
        "match": {
          "title": "天安门"
        }
      },
      "script_score": {
        "script": "return (1/(pubTimeStamp-doc['pubTimeStamp'].value.toDouble()+1))/2",
        "lang": "groovy",
        "params": {
          "pubTimeStamp": 1521632807000
        }
      }
    }
  }
}

 

(2)走过的错路

 Map<String, Object> params = new HashMap<>();
        params.put("pubTimeStamp", 1521632807000L);
        String inlineScript = "return (1/(pubTimeStamp-doc['pubTimeStamp'].value.toDouble()+1))/2";
        Script script = new Script(inlineScript, ScriptService.ScriptType.INLINE, "groovy", params);
        QueryBuilder queryBuilder = boolQuery().must(matchQuery("title","中国"));
        ScoreFunctionBuilder scoreFunctionBuilder = ScoreFunctionBuilders.scriptFunction(script);
      SearchQuery searchQuery = new NativeSearchQueryBuilder()
//搜索的type(相当于table)
.withTypes(types)
//高亮字段定义
         .withHighlightFields(new HighlightBuilder.Field("title").preTags("<font color=\"#ff55ae\">").postTags("</font>"))
//查询条件
.withQuery(functionScoreQuery().add(queryBuilder,scoreFunctionBuilder))//放入普通的查询和脚本查询
//返回字段includes 和不包含的字段 excludes
.withSourceFilter(new FetchSourceFilter(queryFields,new String[]{})) //这个是可以
//分页
.withPageable(pageable)
.build();

这样出来的restful

{
  "function_score" : {
    "functions" : [ { //从这里可以看出是不正确的,function不应该包含filter(对照上面正确的可以看出),查出来的结果就是将不包含"中国"的数据也差出来了,还有就是分数总是为1.0
      "filter" : {
        "bool" : {
          "must" : {
            "match" : {
              "title" : {
                "query" : "中国",
                "type" : "boolean"
              }
            }
          }
        }
      },
      "script_score" : {
        "script" : {
          "inline" : "return (1/(pubTimeStamp-doc['pubTimeStamp'].value.toDouble()+1))/2",
          "lang" : "groovy",
          "params" : {
            "pubTimeStamp" : 1521632807000
          }
        }
      }
    } ]
  }
}

 6. 分数公式

(1)totalScore = _socre * doc['id'].value  ====》    总分数 = 原始分数 *  二次评分

"script_score": {
        "script": "doc['id'].value",
        "lang": "groovy"
      }

(2)跟上面一样

"script_score": {
        "script": "_score+doc['id'].value",
        "lang": "groovy"
      }