Lucene 4.9 document的简单应用
package com.merlin.lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class LuceneIndexDemo { public static void main(String[] args) throws IOException, ParseException { LuceneIndexDemo demo = new LuceneIndexDemo(); // demo.createIndex(); 创建索引 demo.searcher("merlin"); //删除 demo.delete(); demo.query(); } private void delete() { IndexWriter writer = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)); writer = new IndexWriter(FSDirectory.open(new File("D:\\index")), indexWriterConfig); //参数是一个选项,可以是一个Query,也可以是一个Term,Term是一个精确查找的值 //此时删除的文档并不会完全被删除,而是存储在一个回收站中,可以恢复 //使用Reader可以有效的恢复取到的文档数 writer.deleteDocuments(new Term("path","E:\\wamp\\www\\meal\\Application\\Common\\Conf\\config.php")); } catch (Exception e) { e.printStackTrace(); }finally{ if(writer!=null){ try { writer.close(); } catch (Exception e) { e.printStackTrace(); } } } } /** * 被删除的索引查询 */ public void query(){ try { IndexReader indexReader = IndexReader.open(FSDirectory.open(new File("D:\\index"))); System.out.println("存储的文档数:" + indexReader.numDocs()); System.out.println("总存储量:" + indexReader.maxDoc()); System.out.println("被删除的文档:" + indexReader.numDeletedDocs()); } catch (Exception e) { e.printStackTrace(); } } /** * 更新 索引 */ public void update(){ IndexWriter indexWriter = null; try { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)); indexWriter = new IndexWriter(FSDirectory.open(new File("D:\\index")), indexWriterConfig); //Luence并没有提供更新,这里的更新操作其实是先删除再添加的操作合集 Document document = new Document(); //更新path 为 d:\test\test的数据 indexWriter.updateDocument(new Term("path","D:\\test\\test"), document); } catch (Exception e) { e.printStackTrace(); }finally{ if(indexWriter!=null){ try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } } } /** * 1.创建Directory 2.创建IndexWriter 3.创建Document对象 4.为Document添加Field 为本地文件夹创建 * 索引 */ public void createIndex() { String indexPath = "D:\\index";// 索引存放路径 String docsPath = "E:\\wamp\\www\\meal";// 为该文件夹下的所有文件建立索引 boolean create = true; // 创建 final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out .println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } /** * 创建Directory 2.创建IndexReader 3.根据IndexReader创建IndexSearcher 4.创建搜索的Query * 5.根据Searcher搜索并且返回TopDocs 6.根据TopDocs获取ScoreDoc对象 * 7.根据Seacher和ScoreDoc对象获取具体的Document对象 8.根据Document对象获取需要的值 * 9.关闭IndexReader * * @throws IOException * @throws ParseException */ public void searcher( String querystring) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File( "D:\\index"))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents", analyzer); // 搜索文件中含有querystring的文件列表 Query query = parser.parse(querystring); TopDocs results = searcher.search(query, 10); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); System.out.println(path); } } private void indexDocs(IndexWriter writer, File file) throws IOException { if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { return; } try { Document doc = new Document(); Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); doc.add(new TextField("contents", new BufferedReader( new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } } }
困难是老天给我们提高的机会,坚定不移勇敢地去攻克,不要退缩,加油!
posted on 2014-10-22 17:48 天地一连线_孤鸿挂飘渺 阅读(186) 评论(0) 编辑 收藏 举报