Lucene学习笔记(1)--初步了解Lucene
Lucen是一个强大的java搜索库,它能让你很轻易地将搜索功能加入到任何程序中。刚开始学习Lucene,首先要了解Lucene的整体架构,这样就能清晰地理解程序中由Lucene完成的内容,以及其他需要你自行完成的内容。
搜索程序首先需要实现的功能是索引链,这需要按照几个独立的步骤依次来完成:1、检索原始内容;2、根据原始内容来创建对应的文档;3、对创建的文档进行索引。一旦建立起索引,用于搜索的组件也就出来了,这些搜索组件包括:用户接口、构建可编程查询语句的方法、执行查询语句(或者检索匹配文档)、展现查询结果等。
根据以上的说明,我们先来创建一个Lucene的示例程序,通过这个示例来进一步了解Lucene的易用性和强大功能。
1、建立索引
package com.lucene.demo; import java.io.File; import java.io.FileFilter; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class Indexer { private IndexWriter writer; //构造方法,创建IndexWriter public Indexer(String indexDir) throws IOException{ Directory dir = FSDirectory.open(new File(indexDir)); writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED); } public void close() throws IOException, IOException{ writer.close(); } //返回被索引的文档数 public int index(String dataDir, FileFilter filter) throws Exception{ File[] files = new File(dataDir).listFiles(); for(File f:files){ if(!f.isDirectory()&&!f.isHidden()&&f.exists()&&f.canRead()&&(filter==null||filter.accept(f))){ indexFile(f); } } return writer.numDocs(); } //只对txt文档建立索引 private static class TextFilesFilter implements FileFilter{ @Override public boolean accept(File pathname) { return pathname.getName().toLowerCase().endsWith(".txt"); } } protected Document getDocument(File f) throws Exception{ Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; } private void indexFile(File f) throws Exception{ System.out.println("Indexing "+f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } public static void main(String args[]) throws Exception{ //存放Lucene索引的路径 String indexDir = "E:\\luceneDir\\indexDir"; //被索引文件的存放路径 String dataDir = "E:\\luceneDir\\dataDir"; long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try{ numIndexed = indexer.index(dataDir, new TextFilesFilter()); }finally{ indexer.close(); } long end = System.currentTimeMillis(); System.out.println(" Indexing "+ numIndexed + " files took "+ (end - start)+ " milliseconds"); } }
2、搜索索引
package com.lucene.demo; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class Searcher { /* * indexDir:索引文件存放路径 * q:输入的查询条件 */ public static void search(String indexDir, String q) throws Exception{ //打开索引文件 Directory dir = FSDirectory.open(new File(indexDir)); IndexSearcher is = new IndexSearcher(dir); //解析查询字符串 QueryParser parser = new QueryParser(Version.LUCENE_30,"contents", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(q); long start = System.currentTimeMillis(); //搜索索引 TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); System.err.println("Found "+hits.totalHits+" documnet(s) (in "+(end-start)+" milliseconds) that matched query '"+q+"';" ); for(ScoreDoc scoreDoc : hits.scoreDocs){ Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullpath")); } is.close(); } public static void main(String[] args) throws Exception { String indexDir = "E:\\luceneDir\\indexDir"; String queryStr = "lucene"; search(indexDir, queryStr); } }
通过以上代码,我们初步的了解了一下Lucene的功能,但不要因为这个例子简单就感到满足,Lucene包含的内容还有很多。
智者,寡言而多行