apache lucene 一个最简单的实例

就像每个程序都有一个Hello World来让人体验它一样，lucene也可以很简单的提供一个实例。如下（来自lucene in action的例子）有两个类组成：
一个是建立索引

package my;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

public class Indexer {

public static void main(String[] args) throws Exception {

if (args.length != 2) {

throw new Exception("Usage: java " + Indexer.class.getName()

+ " <index dir> <data dir>");

}

File indexDir = new File(args[0]);

File dataDir = new File(args[1]);

long start = new Date().getTime();

int numIndexed = index(indexDir, dataDir);

long end = new Date().getTime();

System.out.println("Indexing " + numIndexed + " files took "

+ (end - start) + " milliseconds");

}

// open an index and start file directory traversal

public static int index(File indexDir, File dataDir) throws IOException {

if (!dataDir.exists() || !dataDir.isDirectory()) {

throw new IOException(dataDir

+ " does not exist or is not a directory");

}

IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(),

true);

writer.setUseCompoundFile(false);

indexDirectory(writer, dataDir);

int numIndexed = writer.docCount();

writer.optimize();

writer.close();

return numIndexed;

}

// recursive method that calls itself when it finds a directory

private static void indexDirectory(IndexWriter writer, File dir)

throws IOException {

File[] files = dir.listFiles();

for (int i = 0; i < files.length; i++) {

File f = files[i];

if (f.isDirectory()) {

indexDirectory(writer, f);

} else if (f.getName().endsWith(".txt")) {

indexFile(writer, f);

}

// method to actually index file using Lucene

private static void indexFile(IndexWriter writer, File f)

throws IOException {

if (f.isHidden() || !f.exists() || !f.canRead()) {

return;

}

System.out.println("Indexing " + f.getCanonicalPath());

Document doc = new Document();

doc.add(Field.Text("contents", new FileReader(f)));

doc.add(Field.Keyword("filename", f.getCanonicalPath()));

writer.addDocument(doc);

}

另一个是搜索：

package my;

import java.io.File;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Hits;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

public class Searcher {

public static void main(String[] args) throws Exception {

if (args.length != 2) {

throw new Exception("Usage: java " + Searcher.class.getName()

+ " <index dir> <auery>");

}

File indexDir = new File(args[0]);

String q = args[1];

if (!indexDir.exists() || !indexDir.isDirectory()) {

throw new Exception(indexDir

+ " does not exist or is not a directory.");

}

search(indexDir, q);

}

public static void search(File indexDir, String q) throws Exception {

Directory fsDir = FSDirectory.getDirectory(indexDir, false);

IndexSearcher is = new IndexSearcher(fsDir);

Query query = QueryParser.parse(q, "contents", new StandardAnalyzer());

long start = new Date().getTime();

Hits hits = is.search(query);

long end = new Date().getTime();

System.err.println("Found " + hits.length() + " document(s) (in "

+ (end - start) + " milliseconds) that matched query ‘" + q

+ "’:");

for (int i = 0; i < hits.length(); i++) {

Document doc = hits.doc(i);

System.out.println(doc.get("filename"));

}

ok，这样就简单实现了，在搜索目录下所有txt，找出包括某一个字符串的txt文件名的功能。
下篇文章将介绍一下lucene的核心类

posted on 2009-05-21 14:10 马森阅读(465) 评论(0) 编辑收藏举报

刷新页面返回顶部

pony

apache lucene 一个最简单的实例

导航

公告