lucene最新版本3.3的基本功能用法

lucene最新版本是3.3的,用法和2.X的某些地方不同,很多在网上都搜不到.我简单的写了下3.3的基本功能用法.
包括:建立索引,查询等.

  1 /**
2 * @作者 loyal
3 * @日期 2011-7-15
4 * @时间 上午10:44:24
5 * @描述
6 * @版本
7 */
8 package test.lucene;
9
10 import java.io.BufferedInputStream;
11 import java.io.File;
12 import java.io.FileInputStream;
13 import java.io.FileNotFoundException;
14 import java.io.IOException;
15 import java.io.StringReader;
16
17 import org.apache.lucene.analysis.Analyzer;
18 import org.apache.lucene.analysis.TokenStream;
19 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
20 import org.apache.lucene.analysis.standard.StandardAnalyzer;
21 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
22 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
23 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
24 import org.apache.lucene.document.Document;
25 import org.apache.lucene.document.Field;
26 import org.apache.lucene.document.Field.Index;
27 import org.apache.lucene.document.Field.Store;
28 import org.apache.lucene.index.IndexWriter;
29 import org.apache.lucene.index.IndexWriterConfig;
30 import org.apache.lucene.queryParser.MultiFieldQueryParser;
31 import org.apache.lucene.queryParser.QueryParser;
32 import org.apache.lucene.search.IndexSearcher;
33 import org.apache.lucene.search.Query;
34 import org.apache.lucene.search.TopDocs;
35 import org.apache.lucene.search.highlight.Formatter;
36 import org.apache.lucene.search.highlight.Fragmenter;
37 import org.apache.lucene.search.highlight.Highlighter;
38 import org.apache.lucene.search.highlight.QueryScorer;
39 import org.apache.lucene.search.highlight.Scorer;
40 import org.apache.lucene.search.highlight.SimpleFragmenter;
41 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
42 import org.apache.lucene.store.Directory;
43 import org.apache.lucene.store.FSDirectory;
44 import org.apache.lucene.store.RAMDirectory;
45 import org.apache.lucene.util.Version;
46
47 import com.chenlb.mmseg4j.analysis.SimpleAnalyzer;
48
49 /**
50 * @作者 loyal
51 * @日期 2011-7-15
52 * @时间 上午10:44:24
53 * @描述
54 * @版本 v1.0
55 */
56 public class Test {
57
58
59 /**
60 * @描述
61 * @作者 loyal
62 * @日期 2011-7-15
63 * @时间 上午10:44:24
64 * @param args
65 */
66 static Version matchVersion=Version.LUCENE_33;
67 static String indexPath ="C:\\index";
68 static String filePath = "files/testss.txt";
69 static Analyzer analyzer=new StandardAnalyzer(matchVersion);
70 static Analyzer a3=new CJKAnalyzer(matchVersion);//二分法分词
71 static Analyzer a4=new SimpleAnalyzer();//中文分词器mmseg4j中提供的一种分词器
72
73 public static byte[] getFileBytes(File file) {
74 //略
95 }
96
97 public static byte[] addByte(byte[] array1, byte[] array2, int len) {
98 //略
115 }
116
117 @org.junit.Test
118 public void testCreateIndex() throws Exception{
119 System.out.println(indexPath);
120 System.out.println(filePath);
121 Directory dir =FSDirectory.open(new File(indexPath)) ;
122 Document doc=new Document();
123 Field titleField=new Field("title",new File(filePath).getName(),Store.YES,Index.ANALYZED);
124 String content=new String(Test.getFileBytes(new File(filePath)));
125 Field contentField=new Field("content",content,Store.YES,Index.ANALYZED);
126 doc.add(titleField);
127 doc.add(contentField);
128 IndexWriterConfig iwc=new IndexWriterConfig(matchVersion, a4);
129 //默认create_or_append
130 iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
131 IndexWriter iw=new IndexWriter(dir, iwc) ;
132 iw.addDocument(doc);
133 iw.close();
134
135 }
136 @org.junit.Test
137 public void testSearch() throws Exception{
138 Directory dir =FSDirectory.open(new File(indexPath),null) ;
139 IndexSearcher is=new IndexSearcher(dir);
140 System.out.println(is.maxDoc());
141
142 String[] fields={"title","content"};
143 QueryParser qp=new MultiFieldQueryParser(matchVersion, fields, a4);
144 // QueryParser qp=new QueryParser(matchVersion, "content", analyzer);
145 Query query=qp.parse("汉字");
146 //System.out.println(query.toString("content"));
147 TopDocs tDocs=is.search(query,10000);//一次查询多少个结果
148 // 准备高亮器
149 Formatter formatter=new SimpleHTMLFormatter("<span class=\"highlighter\">","</span>");
150 Scorer fragmentScorer=new QueryScorer(query);
151 Highlighter highlighter=new Highlighter(formatter, fragmentScorer);
152 Fragmenter fragmenter=new SimpleFragmenter(100);//高亮范围
153 highlighter.setTextFragmenter(fragmenter);
154
155 int numTotalHits = tDocs.totalHits;
156 System.out.println("总共有【"+numTotalHits+"】条结果");
157 System.out.println(tDocs.scoreDocs.length);
158 //
159 // int k = tDocs.scoreDocs[0].doc ; //文档内部编号
160 //Document doc = is.doc(k) ; //更具文档编号取出对应文档
161 Document doc = is.doc(0);
162 //doc.getField("content");//获取属性值,与下相同
163 String content = doc.get("content");//获取属性值
164 //如果当前属性值中没有出现关键字,则返回null
165 String hc=highlighter.getBestFragment(a4, "content", content);
166 System.out.println("hc:"+hc);
167 if(hc==null){//如果无结果那么返回原文的前50个字符
168 hc=content.substring(0,Math.min(50,content.length()));
169 // Field contentField=doc.getFieldable("content");
170 }
171 Field contentField=(Field) doc.getFieldable("content");
172 contentField.setValue(hc);
173 // doc.getField("content").setValue(hc);
174 System.out.println(doc.get("content"));
175
176 TokenStream ts=a4.tokenStream("content", new StringReader(content));
177 // System.out.println("token: "+ts.getAttribute(String.class).toString());
178 OffsetAttribute offsetAttribute = ts.getAttribute(OffsetAttribute.class);
179 TermAttribute termAttribute = ts.getAttribute(TermAttribute.class);
180 while (ts.incrementToken()) {
181 int startOffset = offsetAttribute.startOffset();
182 int endOffset = offsetAttribute.endOffset();
183 String term = termAttribute.term();
184 //System.out.println(term);
185 }
186 }
187 @org.junit.Test
188 public void testCreateRAMandFS() throws Exception{
189 Directory fsDir =FSDirectory.open(new File(indexPath)) ;
190 //1.将索引读取到内存中
191 Directory ramDir =new RAMDirectory(fsDir);
192 //2.填入文档
193 Document doc=new Document();
194 Field titleField=new Field("title",new File(filePath).getName(),Store.YES,Index.ANALYZED);
195 String content=new String(Test.getFileBytes(new File(filePath)));
196 Field contentField=new Field("content",content,Store.YES,Index.ANALYZED);
197 doc.add(titleField);
198 doc.add(contentField);
199 IndexWriterConfig ramiwc=new IndexWriterConfig(matchVersion, analyzer);
200 //默认create_or_append
201 // ramiwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
202 IndexWriter ramiw=new IndexWriter(ramDir, ramiwc) ;
203 ramiw.addDocument(doc);
204 ramiw.close();
205 //3.关闭时,写入到文件
206 IndexWriterConfig fsiwc=new IndexWriterConfig(matchVersion, analyzer);
207 //默认create_or_append
208 fsiwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
209 IndexWriter fsiw=new IndexWriter(fsDir, fsiwc) ;
210 //将内存的索引文件加入到fsiw中
211 fsiw.addIndexes(ramDir);
212 fsiw.commit();
213 //优化索引文件(合并索引文件)
214 fsiw.optimize();
215 fsiw.close();
216 System.out.println("===执行完毕");
217 }
218 }

posted on 2011-07-17 23:50  码魂  阅读(2457)  评论(2编辑  收藏  举报

导航