jackyrong

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

lucene是一款不错的针对搞全文搜索的API,可以结合JAVA使用,但在用lucene 2.0时,如果参考目前的一些文章,可能会有一些API是过期了,我在看一些讲lucene的文章时,就遇到这类的情况,于是经过查找,发现有如下的一些要注意的地方。
Field.Text(java.lang.String, java.io.Reader)

new Field(java.lang.String, java.io.Reader)代替

Field.Keyword(java.lang.String, java.lang.String)

Field.Keyword(java.lang.String, java.lang.String)
代替

Query query = QueryParser.parse(q, "contents", new StandardAnalyzer ());

QueryParser parser = new QueryParser("contents", new StandardAnalyzer ());
Query query = parser.parse(q);

代替

因此,再摘录一个命令行方式下的简单lucene程序,来自IBM DW,效果是先对某目录下的TXT文件建立索引,然后再排序
建立索引的程序:

import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

 

/**
 * This class demonstrate the process of creating index with Lucene
 * for text files
 */
public class TxtFileIndexer {
 public static void main(String[] args) throws Exception{
  //indexDir is the directory that hosts Lucene's index files
        File   indexDir = new File("D:\\luceneIndex");
        //dataDir is the directory that hosts the text files that to be indexed
        File   dataDir  = new File("D:\\luceneData");
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        File[] dataFiles  = dataDir.listFiles();
        IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
        long startTime = new Date().getTime();
        for(int i = 0; i < dataFiles.length; i++){
         if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
          System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
          Document document = new Document();
          Reader txtReader = new FileReader(dataFiles[i]);
          

          document.add(new Field("path",dataFiles[i].getPath(),Field.Store.YES,Field.Index.NO));
          document.add(new Field("contents",txtReader));
         
          indexWriter.addDocument(document);
         }
        }
        indexWriter.optimize();
        indexWriter.close();
        long endTime = new Date().getTime();
       
        System.out.println("It takes " + (endTime - startTime)
                           + " milliseconds to create index for the files in directory "
                     + dataDir.getPath());       
 }
}

搜索的程序
import java.io.File;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;

/**
 * This class is used to demonstrate the
 * process of searching on an existing
 * Lucene index
 *
 */
public class TxtFileSearcher {
 public static void main(String[] args) throws Exception{
     String queryStr = "我";
     //This is the directory that hosts the Lucene index
        File indexDir = new File("D:\\luceneIndex");
        FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
        IndexSearcher searcher = new IndexSearcher(directory);
        if(!indexDir.exists()){
         System.out.println("The Lucene index is not exist");
         return;
        }
        Term term = new Term("contents",queryStr.toLowerCase());
        TermQuery luceneQuery = new TermQuery(term);
        Hits hits = searcher.search(luceneQuery);
        System.out.println("his result is"+hits.length());
        for(int i = 0; i < hits.length(); i++){
         Document document = hits.doc(i);
         System.out.println("File: " + document.get("path"));
        }
 }
}

 

posted on 2006-09-25 21:46  jackyrong的世界  阅读(1135)  评论(0编辑  收藏  举报