Lucene学习
创建索引的例子:
package com.test; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class TestIndex { public static void main(String[] args) { TestIndex testIndex = new TestIndex(); testIndex.creatIndex(); } public void creatIndex() { // 这是索引存放的位置 try { String indexPath = "f://index//"; Directory dir; dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new Field("title", "我的Lucene", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("content", "Lucene 是一个简单的开源包", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); } catch (IOException e) { e.printStackTrace(); } } }
在创建索引的时候
一般 目录的话都是用 FSDirectory.open() 的,把索引写在内存的话(RAMDirectory)内存必须足够大,除非那种要求速度特快的项目
分析器的话,建议使用自己编写的分词器,Lucene自己带的分词器毕竟不能满足自己的需要(我自己曾写过一个分词器,前面提到过的)
查询索引的代码:
private void search() { try{ String index = "Lucene"; String field = "f://index//"; IndexReader reader = IndexReader .open(FSDirectory.open(new File(field))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); QueryParser parser =new QueryParser(Version.LUCENE_31, "content", analyzer); Query query = parser.parse(index); searcher.search(query, null,100); TopDocs results = searcher.search(query, 10); //只取排名前10的搜索结果 ScoreDoc[] hits = results.scoreDocs; Document doc = null; for (int i = 0; i < hits.length; i++) { doc = searcher.doc(hits[i].doc); String contents = doc.get("content"); } searcher.close(); reader.close(); }catch(Exception e){ e.printStackTrace(); } }
更新索引的例子:(更新是删除和添加的结合)
public void updateIndex() { try { Directory dir = FSDirectory.open(new File("D:/index")); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); Document doc=new Document(); doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("content","www",Field.Store.YES,Field.Index.ANALYZED)); writer.updateDocument(new Term("id", "1"),doc); writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
删除索引的例子:
public void testDelete() { try { Directory dir = FSDirectory.open(new File("D:/index")); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term("id","1")); writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
删除索引的时候既可以用IndexWriter 也可以用IndexReader,
没有进行段合并的时候
IndexWriter 的删除并不是真正的删除,只是把他标记为已删除
可以用MaxDoc 和numDocs 方法来查看