代码改变世界

Lucene3.0 基本索引操作

2011-12-31 14:30  _9527  阅读(255)  评论(0编辑  收藏  举报
package demo.indexing;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import util.TestUtil;


public class IndexingTest extends TestCase {
	// 测试数据
	protected String[] ids = { "1", "2" };
	protected String[] unindexed = { "Netherlands", "Italy" };
	protected String[] unstored = { "Amsterdam has lots of bridges",
			"Vencie has lots of canals" };
	protected String[] text = { "Amsterdam", "Venice" };

	private Directory directory;

	/** 每次测试前运行 */
	@Override
	protected void setUp() throws Exception {
		directory = new RAMDirectory();
		// 创建IndexWriter对象
		IndexWriter writer = getWriter();
		// 添加文档
		for (int i = 0; i < ids.length; i++) {
			Document doc = new Document();
			doc.add(new Field("id", ids[i], Field.Store.YES,
					Field.Index.NOT_ANALYZED));
			doc.add(new Field("country", unindexed[i], Field.Store.YES,
					Field.Index.NO));
			doc.add(new Field("contents", unstored[i], Field.Store.NO,
					Field.Index.ANALYZED));
			doc.add(new Field("city", text[i], Field.Store.YES,
					Field.Index.ANALYZED));
			writer.addDocument(doc);
		}
		writer.close();
	}

	/** 创建IndexWriter对象 */
	private IndexWriter getWriter() throws IOException {
		return new IndexWriter(directory,	// 索引对象存储与该类
				new WhitespaceAnalyzer(),	// 分析器,被用来索引语汇单元化得域
				IndexWriter.MaxFieldLength.UNLIMITED);	// MaxFieldLength.UNLIMITED,指示IndexWriter索引文档中所有得语汇单元
	}

	/** 创建IndexSearcher对象,并通过指定字符串来执行基本的单项查询,返回与查询内容匹配得文档数 */
	protected int getHitCount(String fieldName, String searchString)
			throws IOException {
		// 创建新的IndexSearcher
		IndexSearcher searcher = new IndexSearcher(directory);
		// 建立简单的单term查询
		Term t = new Term(fieldName, searchString);
		Query query = new TermQuery(t);
		// 获取命中数
		// TestUtil.hitCount(searcher, query)是一个工具类,该方法调用搜索模块,并返回匹配查询条件得结果总数
		int hitCount = TestUtil.hitCount(searcher, query);
		searcher.close();
		return hitCount;

	}

	public void testIndexWriter() throws IOException {
		IndexWriter writer = getWriter();
		// 核对写入的文档数
		assertEquals(ids.length, writer.numDocs());
		writer.close();
	}

	public void testIndexReader() throws IOException {
		IndexReader reader = IndexReader.open(directory);
		// 核对读入的文档数
		assertEquals(ids.length, reader.maxDoc());
		assertEquals(ids.length, reader.numDocs());
		reader.close();
	}

	public void testDeleteBeforeOptimize() throws IOException {
		IndexWriter writer = getWriter();
		// 确认索引中的两个文档
		assertEquals(2, writer.numDocs());
		// 删除第一个文档
		writer.deleteDocuments(new Term("id", "1"));
		writer.commit();
		// 确认被标记为删除得文档
		assertTrue(writer.hasDeletions());
		// 确认删除一个文档并剩余一个文档
		assertEquals(2, writer.maxDoc());
		assertEquals(1, writer.numDocs());
		writer.close();
	}
  
	public void testDeleterAfterOptimize() throws IOException {
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());
		writer.deleteDocuments(new Term("id", "1"));
		// 优化操作使删除生效
		writer.optimize();
		writer.commit();

		// 确认没有删除文档并剩余一个文档
		assertFalse(writer.hasDeletions());
		assertEquals(1, writer.maxDoc());
		assertEquals(1, writer.numDocs());
		writer.close();
	}
	
	public void testUpdate() throws IOException {

		assertEquals(1, getHitCount("city", "Amsterdam"));

		IndexWriter writer = getWriter();

		// 为"北京"建立新文档
		Document doc = new Document();
		doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
		doc.add(new Field("country", "China", Field.Store.YES, Field.Index.NO));
		doc.add(new Field("contents", "Beijing is Royal park", Field.Store.NO,
				Field.Index.ANALYZED));
		doc.add(new Field("city", "Beijing", Field.Store.YES,
				Field.Index.ANALYZED));

		// 更新文档版本
		writer.updateDocument(new Term("id", "1"), doc);

		writer.close();

		// 确认旧文档已删除
		assertEquals(0, getHitCount("city", "Amsterdam"));
		// 确认新文档已被索引
		assertEquals(1, getHitCount("city", "Beijing"));
	}
	
	
}

  

 

/*
  添加文档的方法 : 
  	1.addDocument(Document) : 使用默认的分析器添加文档,该分析器在创建IndexWriter对象时指定,用于语汇单元化操作
  	2.addDocument(Document,Analyzer) : 使用指定得分析器添加文档和语汇单元化操
  删除文档的方法 : 
  	1.deleteDocuments(Term) : 删除包含项的所有文档
  	2.deleteDocuments(Term[]) : 删除包含项数组任一元素得所有文档
  	3.deleteDocuments(Query) : 删除匹配查询语句的所有文档
  	4.deleteDocuments(Query[]) : 删除匹配查询语句数组任一元素的所有文档
  	5.deleteAll() : 删除索引中得所有文档
  更新文档的方法 : 
  	1.updateDocument(Term,Document) : 首先删除包含Term变量得所有文档,然后使用writer得默认分析器添加新文档
  	2.updateDocument(Term,Document,Analyzer) : 功能和上述一致,区别在于它可以指定分析器添加文档
 */

  

下载:

IndexerDemo.zip

Lucene_in_Action_2nd_Edition.rar