【搜索引擎】Solr全文检索近实时查询优化

设置多个搜索建议查找算法

 <searchComponent name="suggest" class="solr.SuggestComponent">
    <lst name="suggester">
      <str name="name">AnalyzingSuggester</str>
      <str name="lookupImpl">AnalyzingLookupFactory</str>      
      <str name="dictionaryImpl">DocumentDictionaryFactory</str>
      <str name="field">suggest_name</str>
      <str name="weightField">suggest_name</str>
	  <str name="payloadField">gid</str>
      <str name="suggestAnalyzerFieldType">text_suggest</str>
      <str name="buildOnStartup">false</str>
	  <str name="buildOnCommit">true</str>
    </lst>
	
	<lst name="suggester">
      <str name="name">AnalyzingInfixSuggester</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>      
      <str name="dictionaryImpl">DocumentDictionaryFactory</str>
      <str name="field">suggest_name</str>
      <str name="weightField">suggest_name</str>
	  <str name="highlight">false</str>
	  <str name="payloadField">gid</str>
      <str name="suggestAnalyzerFieldType">text_suggest</str>
      <str name="buildOnStartup">false</str>
	  <str name="buildOnCommit">true</str>
    </lst>
  </searchComponent>
  • 设置AnalyzingLookupFactory和AnalyzingInfixLookupFactory两种查找算法。首先通过AnalyzingLookupFactory先分析传入文本并将分析后的表单添加到加权FST的查找,然后在查找时执行相同的操作,若查找不够你需求的数量。再通过AnalyzingInfixLookupFactory前缀分析。
  • 例如 AnalyzingInfixLookupFactory "aaa bbb ccc",可通过bbb,或者ccc搜索到,而 AnalyzingLookupFactory必须是先从a开始匹配才能出结果。
  • AnalyzingInfixLookupFactory可通过标签false关闭高亮提示。
  • true可通过此标签设置软提交时才进行文本构建。注意此种需求需要在提交文本不频繁的场景设置。

设置软提交时间

  • 配置在自己core下的conf文件夹中的solrconfig.xml文件
vim solrconfig.xml
    <autoSoftCommit>
      <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
    </autoSoftCommit>

将maxTime可以设置成你需要的时间,单位是毫秒ms.

  • 也可以在solr启动的时候通过命令设置软提交:
bin/solr start -force -Dsolr.autoSoftCommit.maxTime=10000

设置了软提交时间后,当有新的文档提交时,会达到设置的软提交时间才真正提交。

关闭停用词过滤器

在建立索引的时候,fileType定义的字段可不加入停用词过滤器,因为我们要检索的词很短,加入会影响检索结果。

 <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />

Java服务器调用suggest接口时,禁用suggest.build=true

加入suggest.build=true这个条件,每输入一个字符检索的时候都会去重新构建suggest索引,检索效率大大减低。通过上面的软提交方式达到近实时检索。

Java服务器测试用例

/**
 * @author monkjavaer
 * @version V1.0
 * @date 2019/6/21 0021 22:42
 */
public class SolJTest {
    /**
     * 日志
     */
    private static Logger logger = LoggerFactory.getLogger(SolJTest.class);

    /**
     * solr 地址
     */
    private static String SOLR_URL = PropertyReaderUtils.getProValue("solr.address_url");

    /**
     * suggest AnalyzingLookupFactory
     */
    public final static String SOLR_ANALYZINGSUGGESTER = PropertyReaderUtils.getProValue("solr.AnalyzingSuggester");

    /**
     * suggest AnalyzingInfixLookupFactory
     */
    public final static String SOLR_ANALYZINGINFIXSUGGESTER = PropertyReaderUtils.getProValue("solr.AnalyzingInfixSuggester");

    /**
     * HttpSolrClient
     */
    private HttpSolrClient httpSolrClient;

    /**
     * default socket connection timeout in ms
     */
    private static int DEFAULT_CONNECTION_TIMEOUT = 60000;

    /**
     * @return void
     * @author monkjavaer
     * @description get HttpSolrClient
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Before
    public void getHttpSolrClient() {
        logger.info("start getHttpSolrClient......");
        try {
            if (httpSolrClient == null) {
                httpSolrClient = new HttpSolrClient.Builder(SOLR_URL).build();
                httpSolrClient.setConnectionTimeout(DEFAULT_CONNECTION_TIMEOUT);
                httpSolrClient.setDefaultMaxConnectionsPerHost(100);
                httpSolrClient.setMaxTotalConnections(100);
            }
        } catch (Exception e) {
            e.printStackTrace();
            logger.error(e.getMessage());
        }
        logger.info("end getHttpSolrClient......");
    }

    /**
     * @return void
     * @author monkjavaer
     * @description test suggester response object
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSuggesterResponseObject() throws IOException, SolrServerException {
        SolrQuery query = new SolrQuery("*:*");
        query.set(CommonParams.QT, "/suggest");
        query.set("suggest.dictionary", SOLR_ANALYZINGSUGGESTER, SOLR_ANALYZINGINFIXSUGGESTER);
        query.set("suggest.q", "aoa");
        query.set("suggest.build", true);
        QueryRequest request = new QueryRequest(query);
        QueryResponse queryResponse = request.process(httpSolrClient);
        SuggesterResponse response = queryResponse.getSuggesterResponse();
        Map<String, List<Suggestion>> suggestionsMap = response.getSuggestions();
        assertTrue(suggestionsMap.keySet().contains(SOLR_ANALYZINGSUGGESTER));

        List<Suggestion> mySuggester = suggestionsMap.get(SOLR_ANALYZINGSUGGESTER);
        logger.info(mySuggester.get(0).getTerm());
        logger.info(mySuggester.get(0).getPayload());
    }

    /**
     * @return void
     * @author monkjavaer
     * @description test suggester response terms
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSuggesterResponseTerms() throws Exception {
        SolrQuery query = new SolrQuery("*:*");
        query.set(CommonParams.QT, "/suggest");
        query.set("suggest.dictionary", SOLR_ANALYZINGSUGGESTER, SOLR_ANALYZINGINFIXSUGGESTER);
        query.set("suggest.q", "aoa");
//        query.set("suggest.build", true);
        QueryRequest request = new QueryRequest(query);
        QueryResponse queryResponse = request.process(httpSolrClient);
        SuggesterResponse response = queryResponse.getSuggesterResponse();
        Map<String, List<String>> dictionary2suggestions = response.getSuggestedTerms();
        assertTrue(dictionary2suggestions.keySet().contains(SOLR_ANALYZINGSUGGESTER));

        List<String> mySuggester = dictionary2suggestions.get(SOLR_ANALYZINGSUGGESTER);
        assertEquals("aoa", mySuggester.get(0));
        assertEquals("aoa bob", mySuggester.get(1));
    }

    /**
     * @return void
     * @author monkjavaer
     * @description 简单查询自动转换为bean
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSolrQueryGetBeans() throws IOException, SolrServerException {
        final SolrQuery query = new SolrQuery();
        query.setQuery("Zhong Hua Yuan");
        //设置查询列
        query.addField("id");
        query.addField("name");
        //排序
        query.setSort("id", SolrQuery.ORDER.asc);

        final QueryResponse response = httpSolrClient.query("adress", query);
        final List<Adress> adresses = response.getBeans(Adress.class);

        logger.info("Found " + adresses.size() + " documents");
        for (Adress adress : adresses) {
            logger.info("id:{} ; name:{}; ", adress.getId(), adress.getName());
        }
    }

    /**
     * @return void
     * @author monkjavaer
     * @description 批量添加
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testAddIndex() throws IOException, SolrServerException {
        List<Adress> lists = new ArrayList<>();
        Adress adress = new Adress();
        adress.setId(1);
        adress.setName("aoa");
        lists.add(adress);
        //向solr批量添加索引数据
        long startTime = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        httpSolrClient.addBeans(lists);
        httpSolrClient.commit();
        long endTime = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        logger.info("commit solr data cost {} ms.", endTime - startTime);
    }
}
posted @ 2019-06-27 18:17  monkjavaer  阅读(1023)  评论(0编辑  收藏  举报