merlin

精益求精

孤鸿

导航

Lucene 4.9 document的简单应用

 

 

 

 

 

 

 

package com.merlin.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneIndexDemo {

	public static void main(String[] args) throws IOException, ParseException {

		LuceneIndexDemo demo = new LuceneIndexDemo();
		
//		demo.createIndex(); 创建索引
		demo.searcher("merlin");
		
		//删除
		demo.delete();
		demo.query();
	}

	private void delete() {
		
		IndexWriter writer = null;
		 
        try {
        	
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
            writer = new IndexWriter(FSDirectory.open(new File("D:\\index")), indexWriterConfig);
            
            //参数是一个选项,可以是一个Query,也可以是一个Term,Term是一个精确查找的值
            //此时删除的文档并不会完全被删除,而是存储在一个回收站中,可以恢复
            //使用Reader可以有效的恢复取到的文档数
            
            writer.deleteDocuments(new Term("path","E:\\wamp\\www\\meal\\Application\\Common\\Conf\\config.php"));
            
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            if(writer!=null){
                try {
                	writer.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }		
	}

	/**
	 *  被删除的索引查询
	 */
	public void query(){
        try {
            IndexReader indexReader = IndexReader.open(FSDirectory.open(new File("D:\\index")));
            System.out.println("存储的文档数:" + indexReader.numDocs());
            System.out.println("总存储量:" + indexReader.maxDoc());
            System.out.println("被删除的文档:" + indexReader.numDeletedDocs());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
	
	/**
	 * 更新 索引
	 */

	public void update(){
        IndexWriter indexWriter = null;
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
            indexWriter = new IndexWriter(FSDirectory.open(new File("D:\\index")), indexWriterConfig);
            //Luence并没有提供更新,这里的更新操作其实是先删除再添加的操作合集
            Document document = new Document();
            //更新path 为 d:\test\test的数据
            indexWriter.updateDocument(new Term("path","D:\\test\\test"), document);
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            if(indexWriter!=null){
                try {
                    indexWriter.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
}
	
	/**
	 * 1.创建Directory 2.创建IndexWriter 3.创建Document对象 4.为Document添加Field 为本地文件夹创建
	 * 索引
	 */
	public void createIndex() {

		String indexPath = "D:\\index";// 索引存放路径
		String docsPath = "E:\\wamp\\www\\meal";// 为该文件夹下的所有文件建立索引
		boolean create = true; // 创建

		final File docDir = new File(docsPath);
		if (!docDir.exists() || !docDir.canRead()) {
			System.out
					.println("Document directory '"
							+ docDir.getAbsolutePath()
							+ "' does not exist or is not readable, please check the path");
			System.exit(1);
		}

		Date start = new Date();

		try {
			System.out.println("Indexing to directory '" + indexPath + "'...");

			Directory dir = FSDirectory.open(new File(indexPath));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9,
					analyzer);

			if (create) {
				// Create a new index in the directory, removing any
				// previously indexed documents:
				iwc.setOpenMode(OpenMode.CREATE);
			} else {
				// Add new documents to an existing index:
				iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
			}

			IndexWriter writer = new IndexWriter(dir, iwc);
			indexDocs(writer, docDir);

			writer.close();

			Date end = new Date();
			System.out.println(end.getTime() - start.getTime()
					+ " total milliseconds");

		} catch (IOException e) {
			System.out.println(" caught a " + e.getClass()
					+ "\n with message: " + e.getMessage());
		}
	}

	/**
	 * 创建Directory 2.创建IndexReader 3.根据IndexReader创建IndexSearcher 4.创建搜索的Query
	 * 5.根据Searcher搜索并且返回TopDocs 6.根据TopDocs获取ScoreDoc对象
	 * 7.根据Seacher和ScoreDoc对象获取具体的Document对象 8.根据Document对象获取需要的值
	 * 9.关闭IndexReader
	 * 
	 * @throws IOException
	 * @throws ParseException
	 */
	public void searcher( String querystring) throws IOException, ParseException {

		IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(
				"D:\\index")));

		IndexSearcher searcher = new IndexSearcher(reader);
		// :Post-Release-Update-Version.LUCENE_XY:

		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
		QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents",
				analyzer);

		// 搜索文件中含有querystring的文件列表
		Query query = parser.parse(querystring);

		TopDocs results = searcher.search(query, 10);
		ScoreDoc[] hits = results.scoreDocs;

		int numTotalHits = results.totalHits;
		System.out.println(numTotalHits + " total matching documents");

		for (int i = 0; i < hits.length; i++) {
			Document doc = searcher.doc(hits[i].doc);
			String path = doc.get("path");
			System.out.println(path);
		}

	}

	private void indexDocs(IndexWriter writer, File file) throws IOException {

		if (file.canRead()) {

			if (file.isDirectory()) {

				String[] files = file.list();
				if (files != null) {
					for (int i = 0; i < files.length; i++) {
						indexDocs(writer, new File(file, files[i]));
					}
				}
			} else {

				FileInputStream fis;
				try {
					fis = new FileInputStream(file);
				} catch (FileNotFoundException fnfe) {
					return;
				}

				try {

					Document doc = new Document();

					Field pathField = new StringField("path", file.getPath(),
							Field.Store.YES);
					doc.add(pathField);
					doc.add(new LongField("modified", file.lastModified(),
							Field.Store.NO));
					doc.add(new TextField("contents", new BufferedReader(
							new InputStreamReader(fis, StandardCharsets.UTF_8))));

					if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
						System.out.println("adding " + file);
						writer.addDocument(doc);
					} else {
						System.out.println("updating " + file);
						writer.updateDocument(new Term("path", file.getPath()),
								doc);
					}

				} finally {
					fis.close();
				}
			}
		}
	}
}

  

posted on 2014-10-22 17:48  天地一连线_孤鸿挂飘渺  阅读(186)  评论(0编辑  收藏  举报