Lucene4:创建一个简单查询
1. 要求
环境:
Lucene 4.1版本/IKAnalyzer 2012 FF版本/mmseg4j 1.9版本
功能:
1). 默认查询,显示文档得分及内容
2. 实现代码
package com.clzhang.sample.lucene; import java.io.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Version; //import org.wltea.analyzer.lucene.IKAnalyzer; import com.chenlb.mmseg4j.Dictionary; import com.chenlb.mmseg4j.analysis.ComplexAnalyzer; import org.junit.Test; /** * 环境:Lucene 4.1版本/IKAnalyzer 2012 FF版本/mmseg4j 1.9版本 * 功能: * 1.默认查询,显示文档得分及内容 * @author Administrator * */ public class QueryDemo { // mmseg4j字典路径 private static final String MMSEG4J_DICT_PATH = "C:\\solr\\news\\conf"; private static Dictionary dictionary = Dictionary.getInstance(MMSEG4J_DICT_PATH); // Lucene索引存放路径 private static final String LUCENE_INDEX_DIR = "C:\\lucene\\data"; @Test public void doQuery() throws Exception { // 实例化IKAnalyzer分词器 // Analyzer analyzer = new IKAnalyzer(); // 实例化mmseg4j分词器,可以设置为另两种分词器 Analyzer analyzer = new ComplexAnalyzer(dictionary); // 实例化搜索器 Directory directory = FSDirectory.open(new File(LUCENE_INDEX_DIR)); DirectoryReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); String keyword = "空姐"; // 使用QueryParser查询分析器构造Query对象 QueryParser qp = new QueryParser(Version.LUCENE_41, "text", analyzer); Query query = qp.parse(keyword); // 搜索相似度最高的5条记录 TopDocs hits = searcher.search(query, 5); System.out.println("命中:" + hits.totalHits); System.out.println("最高得分:" + hits.getMaxScore()); // 输出结果 for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); System.out.println("得分:" + scoreDoc.score + "\t标题:" + doc.get("title") + "\t内容:" + doc.get("content")); } reader.close(); directory.close(); } }
输出:
命中:1
最高得分:0.4790727
得分:0.4790727 标题:广州打空姐区政委 内容:昨天,记者相继连线广州市越秀区宣传部