java基础之----elasticsearch(Java客服端搜索实例)
概述
es是使用Java编写的,对Java原生比较支持,下面是使用Java写的一个demo,根据关键字进行搜索,并对搜索结果重排序,对部分字段惊醒高亮处理。
public class EsIndexService { protected Logger logger = LoggerFactory.getLogger(getClass()); public static final String SCS_WEB_INDEX = "scs_web"; public static final String QUESTION_TYPE = "question"; public static final String QUESTION_PAIR_TYPE = "question_pair"; public static final String DELETED_FLAG = "1"; public static final String FIELD_CONTENT = "content"; public static final String FIELD_ANSWER = "answer"; public static final String FIELD_HRELATION_TYPE = "hRelation.relationType"; public static final String FIELD_MRELATION_TYPE = "mRelation.relationType"; public static final String FIELD_QUESTION1_ID = "question1.id"; public static final String FIELD_QUESTION2_ID = "question2.id"; public static final String FIELD_HRELATION_START_DATE = "hRelation.startDate"; public static final String FIELD_HRELATION_EXPIRE_DATE = "hRelation.expireDate"; public static final String FIELD_SCORE = "score"; public static final String FIELD_COUNT = "count"; public static final Float COUNT_FACTOR = 0.1f; @Autowired private QuestionDao questionDao; @Autowired private QuestionPairDao questionPairDao; @Autowired private DataSourceTransactionManager transactionManager; @Autowired private SearchLogDao searchLogDao; public Page<QuestionSearchResult> searchQuestion(Question question) throws IOException { //搜索问题为空直接返回 if (StringUtils.isEmpty(question.getContent())) return emptyResult(question); //es原始搜索 MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(FIELD_CONTENT, question.getContent()); //开启模糊匹配 matchQueryBuilder.fuzziness(Fuzziness.AUTO); //对原始结果进行重排序,采用的公式为newScore = oldScore + log(1 + 0.1*count),启动count为数据库中一个字段,意思是点击次数,这个优化的目的就是让点击数大的排在前面 ScoreFunctionBuilder scoreFunctionBuilder = ScoreFunctionBuilders. fieldValueFactorFunction(FIELD_COUNT).factor(COUNT_FACTOR).modifier(FieldValueFactorFunction.Modifier.LOG1P); FunctionScoreQueryBuilder queryBuilder = QueryBuilders.functionScoreQuery(matchQueryBuilder, scoreFunctionBuilder); //这个就是采用oldScore + log(1 + 0.1*count),控制中间的加号的,默认是multiply,就是相乘 queryBuilder.boostMode(CombineFunction.SUM); return searchQuestion(question, queryBuilder, null, null); } public Page<QuestionSearchResult> topByCount(Question question) throws IOException { return searchQuestion(question, null, FIELD_COUNT, SortOrder.DESC); } public Page<QuestionSearchResult> searchQuestion(Question question, QueryBuilder queryBuilder, String orderBy, SortOrder order) throws IOException { Page<QuestionSearchResult> resultPage = emptyResult(question); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.trackScores(true); //进行分页的操作,这个是设置每页的大小 searchSourceBuilder.size(question.getPage().getPageSize()); if (question.getPage().getPageNo() > 1) { //设置从哪里开始搜索 searchSourceBuilder.from((question.getPage().getPageNo() - 1) * question.getPage().getPageSize()); } if (queryBuilder != null) searchSourceBuilder.query(queryBuilder); if (StringUtils.isNotEmpty(orderBy)) searchSourceBuilder.sort(orderBy, order); //高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); //高亮的结果会使用<em>中间是需要高亮的数据</em>括住 highlightBuilder.field(FIELD_CONTENT).field(FIELD_ANSWER).requireFieldMatch(false); searchSourceBuilder.highlighter(highlightBuilder); SearchRequest searchRequest = new SearchRequest(SCS_WEB_INDEX); searchRequest.types(QUESTION_TYPE); searchRequest.source(searchSourceBuilder); SearchResponse response = EsUtil.client.search(searchRequest); logger.info("搜索问题[{}],结果:{}条,最高score:{},耗时:{}ms", question.getContent(), response.getHits().totalHits, response.getHits().getMaxScore(), response.getTookInMillis()); int maxResultCount = Integer.parseInt(DictUtils.getDictValue("max_result_count", "scs_config", "30")); resultPage.setCount(response.getHits().totalHits > maxResultCount ? maxResultCount : response.getHits().totalHits); if (response.getHits().totalHits < 1) { return resultPage; } BigDecimal maxScore = BigDecimal.valueOf(response.getHits().getMaxScore()); for (SearchHit hit : response.getHits()) { QuestionSearchResult tmp = new QuestionSearchResult(); tmp.setSimilarQuestion((Question) JsonMapper.fromJsonString(hit.getSourceAsString(), Question.class)); BigDecimal currentDocScore = BigDecimal.valueOf(hit.getScore()); tmp.setEsScore(currentDocScore.divide(maxScore, 4, BigDecimal.ROUND_HALF_EVEN)); logger.debug("搜索结果 score: {}, question: {}, count:{}, id: {}", hit.getScore(), tmp.getSimilarQuestion().getContent(), tmp.getSimilarQuestion().getCount(), hit.getId()); List<StringHighlightField> highlightList = new ArrayList<>(); for (String key : hit.getHighlightFields().keySet()) { highlightList.add(StringHighlightField.fromHighlightField(hit.getHighlightFields().get(key))); } tmp.setHighlightList(highlightList); resultPage.getList().add(tmp); } searchLogDao.insert(question.getContent(), response.getHits().getTotalHits()); return resultPage; }
public void bulkQuestion(List<Question> questions) throws IOException { bulk(questions, QUESTION_TYPE); } public void bulkQuestionPairs(List<QuestionPair> questionPairs) throws IOException { bulk(questionPairs, QUESTION_PAIR_TYPE); } public void bulk(List<?> list, String type) throws IOException { BulkRequest bulkRequest = new BulkRequest(); for (Object obj : list) { DataEntity entity = (DataEntity) obj; if (DELETED_FLAG.equals(entity.getDelFlag())) {
//删除es中的数据,通过id,es的索引是scs_web,id只是type中的一个字段 DeleteRequest request = new DeleteRequest(SCS_WEB_INDEX, type, entity.getId()); bulkRequest.add(request); } else {
// 设置一个查询的条件,使用id查询,如果查找不到,则添加文档数据
//这个IndexRequest中有个参数,OpType.INDEX,默认是INDEX,意思就是如果es已经存在这条记录,会强制覆盖,而不是更新 IndexRequest request = new IndexRequest(SCS_WEB_INDEX, type, entity.getId()); request.source(JsonMapper.toJsonString(entity), XContentType.JSON); bulkRequest.add(request); } } EsUtil.client.bulk(bulkRequest); } @Scheduled(cron = "0/10 * * * * ?") public void cronIndex() throws IOException { logger.info("定时索引更新开始"); boolean continueFlag = true; while (continueFlag) { DefaultTransactionDefinition trans = new DefaultTransactionDefinition(); trans.setTimeout(10); trans.setPropagationBehavior(DefaultTransactionDefinition.PROPAGATION_REQUIRES_NEW); TransactionStatus transStatus = transactionManager.getTransaction(trans); try { List<Question> questionList = questionDao.findUnIndexed(Integer.valueOf(DictUtils.getDictValue("import_size", "scs_config", "100"))); if (!questionList.isEmpty()) { this.bulkQuestion(questionList); questionDao.updateIndexFlag(questionList); } else { continueFlag = false; } transactionManager.commit(transStatus); } catch (Throwable e) { transactionManager.rollback(transStatus); throw e; } } continueFlag = true; while (continueFlag) { DefaultTransactionDefinition trans = new DefaultTransactionDefinition(); trans.setPropagationBehavior(DefaultTransactionDefinition.PROPAGATION_REQUIRES_NEW); trans.setTimeout(10); TransactionStatus transStatus = transactionManager.getTransaction(trans); try { List<QuestionPair> questionPairList = questionPairDao.findUnIndexed( Integer.valueOf(DictUtils.getDictValue("import_size", "scs_config", "100")), new BigDecimal(DictUtils.getDictValue("mark_score_min", "scs_config", "0.6"))); if (!questionPairList.isEmpty()) { this.bulkQuestionPairs(questionPairList); questionPairDao.updateIndexFlag(questionPairList); } else { continueFlag = false; } transactionManager.commit(transStatus); } catch (Throwable e) { transactionManager.rollback(transStatus); throw e; } } logger.info("定时索引更新结束"); }
参考:https://blog.csdn.net/prestigeding/article/details/83351064