Lucene3.0 基本索引操作
2011-12-31 14:30 _9527 阅读(255) 评论(0) 编辑 收藏 举报package demo.indexing; import java.io.IOException; import junit.framework.TestCase; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import util.TestUtil; public class IndexingTest extends TestCase { // 测试数据 protected String[] ids = { "1", "2" }; protected String[] unindexed = { "Netherlands", "Italy" }; protected String[] unstored = { "Amsterdam has lots of bridges", "Vencie has lots of canals" }; protected String[] text = { "Amsterdam", "Venice" }; private Directory directory; /** 每次测试前运行 */ @Override protected void setUp() throws Exception { directory = new RAMDirectory(); // 创建IndexWriter对象 IndexWriter writer = getWriter(); // 添加文档 for (int i = 0; i < ids.length; i++) { Document doc = new Document(); doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.close(); } /** 创建IndexWriter对象 */ private IndexWriter getWriter() throws IOException { return new IndexWriter(directory, // 索引对象存储与该类 new WhitespaceAnalyzer(), // 分析器,被用来索引语汇单元化得域 IndexWriter.MaxFieldLength.UNLIMITED); // MaxFieldLength.UNLIMITED,指示IndexWriter索引文档中所有得语汇单元 } /** 创建IndexSearcher对象,并通过指定字符串来执行基本的单项查询,返回与查询内容匹配得文档数 */ protected int getHitCount(String fieldName, String searchString) throws IOException { // 创建新的IndexSearcher IndexSearcher searcher = new IndexSearcher(directory); // 建立简单的单term查询 Term t = new Term(fieldName, searchString); Query query = new TermQuery(t); // 获取命中数 // TestUtil.hitCount(searcher, query)是一个工具类,该方法调用搜索模块,并返回匹配查询条件得结果总数 int hitCount = TestUtil.hitCount(searcher, query); searcher.close(); return hitCount; } public void testIndexWriter() throws IOException { IndexWriter writer = getWriter(); // 核对写入的文档数 assertEquals(ids.length, writer.numDocs()); writer.close(); } public void testIndexReader() throws IOException { IndexReader reader = IndexReader.open(directory); // 核对读入的文档数 assertEquals(ids.length, reader.maxDoc()); assertEquals(ids.length, reader.numDocs()); reader.close(); } public void testDeleteBeforeOptimize() throws IOException { IndexWriter writer = getWriter(); // 确认索引中的两个文档 assertEquals(2, writer.numDocs()); // 删除第一个文档 writer.deleteDocuments(new Term("id", "1")); writer.commit(); // 确认被标记为删除得文档 assertTrue(writer.hasDeletions()); // 确认删除一个文档并剩余一个文档 assertEquals(2, writer.maxDoc()); assertEquals(1, writer.numDocs()); writer.close(); } public void testDeleterAfterOptimize() throws IOException { IndexWriter writer = getWriter(); assertEquals(2, writer.numDocs()); writer.deleteDocuments(new Term("id", "1")); // 优化操作使删除生效 writer.optimize(); writer.commit(); // 确认没有删除文档并剩余一个文档 assertFalse(writer.hasDeletions()); assertEquals(1, writer.maxDoc()); assertEquals(1, writer.numDocs()); writer.close(); } public void testUpdate() throws IOException { assertEquals(1, getHitCount("city", "Amsterdam")); IndexWriter writer = getWriter(); // 为"北京"建立新文档 Document doc = new Document(); doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", "China", Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", "Beijing is Royal park", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", "Beijing", Field.Store.YES, Field.Index.ANALYZED)); // 更新文档版本 writer.updateDocument(new Term("id", "1"), doc); writer.close(); // 确认旧文档已删除 assertEquals(0, getHitCount("city", "Amsterdam")); // 确认新文档已被索引 assertEquals(1, getHitCount("city", "Beijing")); } }
/* 添加文档的方法 : 1.addDocument(Document) : 使用默认的分析器添加文档,该分析器在创建IndexWriter对象时指定,用于语汇单元化操作 2.addDocument(Document,Analyzer) : 使用指定得分析器添加文档和语汇单元化操 删除文档的方法 : 1.deleteDocuments(Term) : 删除包含项的所有文档 2.deleteDocuments(Term[]) : 删除包含项数组任一元素得所有文档 3.deleteDocuments(Query) : 删除匹配查询语句的所有文档 4.deleteDocuments(Query[]) : 删除匹配查询语句数组任一元素的所有文档 5.deleteAll() : 删除索引中得所有文档 更新文档的方法 : 1.updateDocument(Term,Document) : 首先删除包含Term变量得所有文档,然后使用writer得默认分析器添加新文档 2.updateDocument(Term,Document,Analyzer) : 功能和上述一致,区别在于它可以指定分析器添加文档 */
下载: