1 import java.io.File;
2 import java.io.IOException;
3 import java.io.StringReader;
4
5 import org.apache.lucene.analysis.Analyzer;
6 import org.apache.lucene.analysis.TokenStream;
7 import org.apache.lucene.document.Document;
8 import org.apache.lucene.document.TextField;
9 import org.apache.lucene.document.Field.Store;
10 import org.apache.lucene.index.IndexReader;
11 import org.apache.lucene.index.IndexWriter;
12 import org.apache.lucene.index.IndexWriterConfig;
13 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
14 import org.apache.lucene.queryparser.classic.ParseException;
15 import org.apache.lucene.queryparser.classic.QueryParser;
16 import org.apache.lucene.search.IndexSearcher;
17 import org.apache.lucene.search.Query;
18 import org.apache.lucene.search.ScoreDoc;
19 import org.apache.lucene.search.TopDocs;
20 import org.apache.lucene.search.TopScoreDocCollector;
21 import org.apache.lucene.search.highlight.Highlighter;
22 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
23 import org.apache.lucene.search.highlight.QueryScorer;
24 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
25 import org.apache.lucene.store.Directory;
26 import org.apache.lucene.store.FSDirectory;
27 import org.apache.lucene.util.Version;
28 import org.wltea.analyzer.lucene.IKAnalyzer;
29
30 public class IndexTools {
31 /**
32 * 获得indexwriter对象
33 *
34 * @param dir
35 * @return
36 * @throws IOException
37 * @throws Exception
38 */
39 private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException {
40 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
41 return new IndexWriter(dir, iwc);
42 }
43
44 /**
45 * 关闭indexwriter对象
46 *
47 * @throws IOException
48 *
49 * @throws Exception
50 */
51 private void closeWriter(IndexWriter indexWriter) throws IOException {
52 if (indexWriter != null) {
53 indexWriter.close();
54 }
55 }
56
57 /**
58 * 创建索引
59 *
60 * @throws InvalidTokenOffsetsException
61 */
62 public void createIndex() throws InvalidTokenOffsetsException {
63 String indexPath = "D://luceneindex"; // 建立索引文件的目录
64 // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
65 Analyzer analyzer = new IKAnalyzer(true);
66 IndexWriter indexWriter = null;
67 Directory directory = null;
68 try {
69 directory = FSDirectory.open(new File(indexPath));
70 indexWriter = getIndexWriter(directory, analyzer);
71 } catch (Exception e) {
72 System.out.println("索引打开异常!");
73 }
74 // 添加索引
75 try {
76 Document document = new Document();
77 document.add(new TextField("filename", "标题:起点", Store.YES));
78 document.add(new TextField("content", "内容:我是一名程序员", Store.YES));
79 indexWriter.addDocument(document);
80 Document document1 = new Document();
81 document1.add(new TextField("filename", "标题:终点", Store.YES));
82 document1.add(new TextField("content", "内容:我不再只是程序员", Store.YES));
83 indexWriter.addDocument(document1);
84 indexWriter.commit();
85 } catch (IOException e1) {
86 System.out.println("索引创建异常!");
87 }
88 try {
89 closeWriter(indexWriter);
90 } catch (Exception e) {
91 System.out.println("索引关闭异常!");
92 }
93 }
94
95 /**
96 * 搜索
97 *
98 * @throws ParseException
99 * @throws IOException
100 * @throws InvalidTokenOffsetsException
101 */
102 @SuppressWarnings("deprecation")
103 public void searchIndex() throws ParseException, IOException, InvalidTokenOffsetsException {
104 String indexPath = "D://luceneindex"; // 建立索引文件的目录
105 // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
106 Analyzer analyzer = new IKAnalyzer(true);
107 Directory directory = null;
108 try {
109 directory = FSDirectory.open(new File(indexPath));
110 } catch (Exception e) {
111 System.out.println("索引打开异常!");
112 }
113 IndexReader ireader = null;
114 IndexSearcher isearcher = null;
115 try {
116 ireader = IndexReader.open(directory);
117 } catch (IOException e) {
118 System.out.println("打开索引文件!");
119 }
120 isearcher = new IndexSearcher(ireader);
121 String keyword = "程序员";
122 // 使用QueryParser查询分析器构造Query对象
123 // eg:单个字段查询
124 // String fieldName = "content";
125 // QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer);
126 String[] fields = { "filename", "content" };
127 QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
128 qp.setDefaultOperator(QueryParser.AND_OPERATOR);
129 Query query = qp.parse(keyword);
130 // 搜索相似度最高的5条记录
131 TopDocs topDocs = isearcher.search(query, 25);
132 System.out.println("命中:" + topDocs.totalHits);
133 // 输出结果
134 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
135 for (int i = 0; i < topDocs.totalHits; i++) {
136 Document targetDoc = isearcher.doc(scoreDocs[i].doc);
137 System.out.println("内容:" + targetDoc.toString());
138 }
139 // 分页,高亮显示
140 higherIndex(analyzer, isearcher, query, topDocs);
141 }
142
143 public static void main(String[] args) {
144 IndexTools tool = new IndexTools();
145 try {
146 tool.searchIndex();
147 } catch (ParseException e) {
148 System.out.println("解析错误");
149 } catch (IOException e) {
150 System.out.println("读取文件流错误");
151 } catch (InvalidTokenOffsetsException e) {
152 System.out.println("查询失败");
153 }
154 }
155
156 /**
157 * 分页,高亮显示
158 *
159 * @param analyzer
160 * @param isearcher
161 * @param query
162 * @param topDocs
163 * @throws IOException
164 * @throws InvalidTokenOffsetsException
165 */
166 public void higherIndex(Analyzer analyzer, IndexSearcher isearcher, Query query, TopDocs topDocs)
167 throws IOException, InvalidTokenOffsetsException {
168 TopScoreDocCollector results = TopScoreDocCollector.create(topDocs.totalHits, false);
169 isearcher.search(query, results);
170 // 分页取出指定的doc(开始条数, 取几条)
171 ScoreDoc[] docs = results.topDocs(1, 2).scoreDocs;
172 for (int i = 0; i < docs.length; i++) {
173 Document targetDoc = isearcher.doc(docs[i].doc);
174 System.out.println("内容:" + targetDoc.toString());
175 }
176 // 关键字高亮显示的html标签,需要导入lucene-highlighter-3.5.0.jar
177 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
178 Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
179 for (int i = 0; i < docs.length; i++) {
180 Document doc = isearcher.doc(docs[i].doc);
181 // 标题增加高亮显示
182 TokenStream tokenStream1 = analyzer.tokenStream("filename", new StringReader(doc.get("filename")));
183 String title = highlighter.getBestFragment(tokenStream1, doc.get("filename"));
184 // 内容增加高亮显示
185 TokenStream tokenStream2 = analyzer.tokenStream("content", new StringReader(doc.get("content")));
186 String content = highlighter.getBestFragment(tokenStream2, doc.get("content"));
187 System.out.println(doc.get("filename") + " : " + title + " : " + content);
188 }
189 }
190 }