lucene最新版本3.3的基本功能用法

lucene最新版本是3.3的,用法和2.X的某些地方不同,很多在网上都搜不到.我简单的写了下3.3的基本功能用法.
包括:建立索引,查询等.
  1 /**
  2  * @作者 loyal
  3  * @日期 2011-7-15
  4  * @时间 上午10:44:24
  5  * @描述
  6  * @版本
  7  */
  8 package test.lucene;
  9 
 10 import java.io.BufferedInputStream;
 11 import java.io.File;
 12 import java.io.FileInputStream;
 13 import java.io.FileNotFoundException;
 14 import java.io.IOException;
 15 import java.io.StringReader;
 16 
 17 import org.apache.lucene.analysis.Analyzer;
 18 import org.apache.lucene.analysis.TokenStream;
 19 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 20 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 21 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 22 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 23 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 24 import org.apache.lucene.document.Document;
 25 import org.apache.lucene.document.Field;
 26 import org.apache.lucene.document.Field.Index;
 27 import org.apache.lucene.document.Field.Store;
 28 import org.apache.lucene.index.IndexWriter;
 29 import org.apache.lucene.index.IndexWriterConfig;
 30 import org.apache.lucene.queryParser.MultiFieldQueryParser;
 31 import org.apache.lucene.queryParser.QueryParser;
 32 import org.apache.lucene.search.IndexSearcher;
 33 import org.apache.lucene.search.Query;
 34 import org.apache.lucene.search.TopDocs;
 35 import org.apache.lucene.search.highlight.Formatter;
 36 import org.apache.lucene.search.highlight.Fragmenter;
 37 import org.apache.lucene.search.highlight.Highlighter;
 38 import org.apache.lucene.search.highlight.QueryScorer;
 39 import org.apache.lucene.search.highlight.Scorer;
 40 import org.apache.lucene.search.highlight.SimpleFragmenter;
 41 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 42 import org.apache.lucene.store.Directory;
 43 import org.apache.lucene.store.FSDirectory;
 44 import org.apache.lucene.store.RAMDirectory;
 45 import org.apache.lucene.util.Version;
 46 
 47 import com.chenlb.mmseg4j.analysis.SimpleAnalyzer;
 48 
 49 /**
 50  * @作者 loyal
 51  * @日期 2011-7-15
 52  * @时间 上午10:44:24
 53  * @描述
 54  * @版本 v1.0
 55  */
 56 public class Test {
 57 
 58 
 59     /**
 60      * @描述
 61      * @作者 loyal
 62      * @日期 2011-7-15
 63      * @时间 上午10:44:24
 64      * @param args
 65      */
 66     static Version matchVersion=Version.LUCENE_33;
 67     static String indexPath ="C:\\index";
 68     static String filePath = "files/testss.txt";
 69     static Analyzer analyzer=new StandardAnalyzer(matchVersion);
 70     static Analyzer a3=new CJKAnalyzer(matchVersion);//二分法分词
 71     static Analyzer a4=new SimpleAnalyzer();//中文分词器mmseg4j中提供的一种分词器
 72     
 73     public static byte[] getFileBytes(File file) {
 74       //略
 95     }
 96 
 97     public static byte[] addByte(byte[] array1, byte[] array2, int len) {
 98        //略
115     }
116     
117     @org.junit.Test
118     public void testCreateIndex() throws Exception{
119         System.out.println(indexPath);
120         System.out.println(filePath);
121         Directory dir =FSDirectory.open(new File(indexPath)) ;
122         Document doc=new Document();
123         Field titleField=new Field("title",new File(filePath).getName(),Store.YES,Index.ANALYZED);
124         String content=new String(Test.getFileBytes(new File(filePath)));
125         Field contentField=new Field("content",content,Store.YES,Index.ANALYZED);
126         doc.add(titleField);
127         doc.add(contentField);
128         IndexWriterConfig iwc=new IndexWriterConfig(matchVersion, a4);
129         //默认create_or_append
130         iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
131         IndexWriter iw=new IndexWriter(dir, iwc) ;
132         iw.addDocument(doc);
133         iw.close();
134     
135     }
136     @org.junit.Test
137     public void testSearch() throws Exception{
138         Directory dir =FSDirectory.open(new File(indexPath),null) ;
139         IndexSearcher is=new IndexSearcher(dir);
140         System.out.println(is.maxDoc());
141         
142         String[] fields={"title","content"};
143         QueryParser qp=new MultiFieldQueryParser(matchVersion, fields, a4);
144 //        QueryParser qp=new QueryParser(matchVersion, "content", analyzer);
145         Query query=qp.parse("汉字");
146         //System.out.println(query.toString("content"));
147         TopDocs tDocs=is.search(query,10000);//一次查询多少个结果
148         // 准备高亮器
149         Formatter formatter=new SimpleHTMLFormatter("<span class=\"highlighter\">","</span>");
150         Scorer fragmentScorer=new QueryScorer(query);
151         Highlighter highlighter=new Highlighter(formatter, fragmentScorer);
152         Fragmenter fragmenter=new SimpleFragmenter(100);//高亮范围
153         highlighter.setTextFragmenter(fragmenter);
154         
155         int numTotalHits = tDocs.totalHits;
156         System.out.println("总共有【"+numTotalHits+"】条结果");
157         System.out.println(tDocs.scoreDocs.length);
158         //
159         // int  k = tDocs.scoreDocs[0].doc ; //文档内部编号
160         //Document doc = is.doc(k) ; //更具文档编号取出对应文档
161         Document doc = is.doc(0);
162         //doc.getField("content");//获取属性值,与下相同
163         String content = doc.get("content");//获取属性值
164         //如果当前属性值中没有出现关键字,则返回null
165         String hc=highlighter.getBestFragment(a4, "content", content);
166         System.out.println("hc:"+hc);
167         if(hc==null){//如果无结果那么返回原文的前50个字符
168             hc=content.substring(0,Math.min(50,content.length()));
169         //    Field contentField=doc.getFieldable("content");
170         }
171         Field contentField=(Field) doc.getFieldable("content");
172         contentField.setValue(hc);
173 //        doc.getField("content").setValue(hc);
174         System.out.println(doc.get("content"));
175         
176         TokenStream ts=a4.tokenStream("content", new StringReader(content));
177 //         System.out.println("token: "+ts.getAttribute(String.class).toString());
178         OffsetAttribute offsetAttribute = ts.getAttribute(OffsetAttribute.class);
179         TermAttribute termAttribute = ts.getAttribute(TermAttribute.class);
180         while (ts.incrementToken()) {
181             int startOffset = offsetAttribute.startOffset();
182             int endOffset = offsetAttribute.endOffset();
183             String term = termAttribute.term();
184             //System.out.println(term);
185         }
186     }
187     @org.junit.Test
188     public void testCreateRAMandFS() throws Exception{
189         Directory fsDir =FSDirectory.open(new File(indexPath)) ;
190         //1.将索引读取到内存中
191         Directory ramDir =new RAMDirectory(fsDir);
192         //2.填入文档
193         Document doc=new Document();
194         Field titleField=new Field("title",new File(filePath).getName(),Store.YES,Index.ANALYZED);
195         String content=new String(Test.getFileBytes(new File(filePath)));
196         Field contentField=new Field("content",content,Store.YES,Index.ANALYZED);
197         doc.add(titleField);
198         doc.add(contentField);
199         IndexWriterConfig ramiwc=new IndexWriterConfig(matchVersion, analyzer);
200         //默认create_or_append
201 //        ramiwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
202         IndexWriter ramiw=new IndexWriter(ramDir, ramiwc) ;
203         ramiw.addDocument(doc);
204         ramiw.close();
205         //3.关闭时,写入到文件
206         IndexWriterConfig fsiwc=new IndexWriterConfig(matchVersion, analyzer);
207         //默认create_or_append
208         fsiwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建
209         IndexWriter fsiw=new IndexWriter(fsDir, fsiwc) ;
210         //将内存的索引文件加入到fsiw中
211         fsiw.addIndexes(ramDir);
212         fsiw.commit();
213         //优化索引文件(合并索引文件)
214         fsiw.optimize();
215         fsiw.close();
216         System.out.println("===执行完毕");
217     }
218 }
posted on 2011-07-17 23:50 码魂阅读(2467) 评论(2) 收藏举报
刷新页面返回顶部
lucene最新版本3.3的基本功能用法

导航

公告