lucen全文检索

package cn.richinfo.cmail.basemail.common.tools;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import cn.richinfo.cmail.basemail.addr.model.LuceneEngineModel;
import cn.richinfo.cmail.common.log.CommonLogger;
import cn.richinfo.cmail.common.log.Log;

public class LuceneEngineUtil {

    private static final Log log = CommonLogger.getInstance();

    /**
     * 清空旧索引文件
     * @param path
     */
    public static void delFiles(String path) {
        File file = new File(path);
        if (file.exists() && file.isDirectory() && file.list().length > 0) {
            log.info("delete file " + path);
            File[] files = file.listFiles();
            for (int i = 0; i < files.length; i++) {
                if (!files[i].isFile()) {
                    delFiles(files[i].getAbsolutePath());
                }
                files[i].delete();
            }
        }
    }

    private static IndexWriter getIndexWriter(String path) throws Exception {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
        conf.setMaxBufferedDocs(100);
        Directory directory = FSDirectory.open(new File(path));
        return new IndexWriter(directory, conf);
    }

    private static IndexReader getIndexReader(String path) throws Exception {
        Directory directory = FSDirectory.open(new File(path));
        return DirectoryReader.open(directory);
    }

    /**
     * 创建索引
     * 
     * @param path
     * @param model
     */
    public static void createIndex(String path, LuceneEngineModel model) {
        log.info("create index: path=" + path);
        IndexWriter writer = null;
        try {
            writer = getIndexWriter(path);
            writer.addDocument(toDocument(model));
            log.info("create index success.");
        } catch (Exception e) {
            log.error("create index fail: ", new Exception(e));
        } finally {
            try {
                if (writer != null) {
                    log.info("writer close……");
                    writer.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 创建索引
     * 
     * @param path
     * @param list
     */
    public static void createIndex(String path, List<LuceneEngineModel> list) {
        log.info(String.format("create index batch: path=%s | size=%s", path, list.size()));
        IndexWriter writer = null;
        long start = System.currentTimeMillis();
        try {
            writer = getIndexWriter(path);
            for (int i = 0; i < list.size(); i++) {
                writer.addDocument(toDocument(list.get(i)));
            }
            log.info(String.format("create index batch success : time=%sms", (System.currentTimeMillis() - start)));
        } catch (Exception e) {
            log.error("create index batch fail: ", new Exception(e));
        } finally {
            try {
                if (writer != null) {
                    log.info("writer close……");
                    writer.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    private static Document toDocument(LuceneEngineModel model) {
        Document document = new Document();
        if (model != null) {
            log.info("addDocument: " + model.toString());
            document.add(new IntField("type", model.getType(), Field.Store.YES));
            document.add(new IntField("id", model.getId(), Field.Store.YES));
            document.add(new StringField("email", formatNull(model.getEmail()), Field.Store.YES));
            document.add(new StringField("mobile", formatNull(model.getMobile()), Field.Store.YES));
            document.add(new StringField("first_name", formatNull(model.getFirst_name()), Field.Store.YES));
            document.add(new StringField("second_name", formatNull(model.getSecond_name()), Field.Store.YES));
            document.add(new StringField("position", formatNull(model.getPosition()), Field.Store.YES));
            document.add(new StringField("locate_name_list", formatNull(model.getLocate_name_string()), Field.Store.YES));
            document.add(new StringField("dept_list", formatNull(model.getDept_list()), Field.Store.YES));
        }
        return document;
    }

    private static String formatNull(String value) {
        if (value == null)
            return "";
        return value;
    }

    /**
     * 删除索引
     * 
     * @param path
     * @param email
     */
    public static void deleteIndex(String path, String email) {
        log.info(String.format("delete index: path=%s | email=%s", path, email));
        IndexWriter writer = null;
        try {
            writer = getIndexWriter(path);
            Term term = new Term("email", email);
            writer.deleteDocuments(term);
            log.info("delete index success.");
        } catch (Exception e) {
            log.error("delete index fail: ", new Exception(e));
        } finally {
            try {
                if (writer != null) {
                    log.info("writer close……");
                    writer.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 更新索引
     * 
     * @param path
     * @param email
     * @param model
     */
    public static void updateIndex(String path, String email, LuceneEngineModel model) {
        log.info(String.format("update index: path=%s | email=%s", path, email));
        IndexWriter writer = null;
        try {
            writer = getIndexWriter(path);
            Term term = new Term("email", email);
            writer.updateDocument(term, toDocument(model));
            log.info("update index success.");
        } catch (Exception e) {
            log.error("update index fail: ", new Exception(e));
        } finally {
            try {
                if (writer != null) {
                    log.info("writer close……");
                    writer.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    public static List<LuceneEngineModel> searchDeptIndex(String path, String deptId) {
        log.info(String.format("search index: path=%s | deptId=[%s]", path, deptId));
        List<LuceneEngineModel> list = new ArrayList<LuceneEngineModel>();
        IndexReader reader = null;
        long start = System.currentTimeMillis();
        try {
            reader = getIndexReader(path);
            IndexSearcher searcher = new IndexSearcher(reader);
            Term term = new Term("dept_list", deptId);
            Query query = new WildcardQuery(term);

            TopDocs tds = searcher.search(query, 20);
            for (ScoreDoc sd : tds.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                list.add(toEntity(doc));
            }

            log.info(String.format("search index success: size=%s | time=%sms", tds.totalHits, (System.currentTimeMillis() - start)));
        } catch (Exception e) {
            log.error("search index fail: ", new Exception(e));
        } finally {
            try {
                if (reader != null) {
                    log.info("reader close……");
                    reader.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return list;
    }

    /**
     * 检索索引
     * 
     * @param path
     * @param content
     * @param total
     * @return
     * @throws Exception
     */
    public static List<LuceneEngineModel> searchIndex(String path, String content, String total) {
        log.info(String.format("search index: path=%s | content=[%s] | total=%s", path, content, total));
        List<LuceneEngineModel> list = new ArrayList<LuceneEngineModel>();
        IndexReader reader = null;
        long start = System.currentTimeMillis();
        try {
            reader = getIndexReader(path);
            IndexSearcher searcher = new IndexSearcher(reader);
            Term term = new Term("email", "*" + content + "*");
            Query query = new WildcardQuery(term);
            TopDocs tds = searcher.search(query, Integer.parseInt(total));
            log.info(String.format("search index success: size=%s | time=%sms", tds.totalHits, (System.currentTimeMillis() - start)));
            for (ScoreDoc sd : tds.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                list.add(toEntity(doc));
            }
        } catch (Exception e) {
            log.error("search index fail: ", new Exception(e));
        } finally {
            try {
                if (reader != null) {
                    log.info("reader close……");
                    reader.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return list;
    }

    private static LuceneEngineModel toEntity(Document doc) {
        LuceneEngineModel model = new LuceneEngineModel();
        model.setType(Integer.parseInt(doc.get("type")));
        model.setId(Integer.parseInt(doc.get("id")));
        model.setEmail(doc.get("email"));
        model.setMobile(doc.get("mobile"));
        model.setFirst_name(doc.get("first_name"));
        model.setSecond_name(doc.get("second_name"));
        model.setPosition(doc.get("position"));
        String array[] = doc.get("locate_name_list").split(",");
        model.setLocate_name_list(Arrays.asList(array));
        return model;
    }

}

 

posted @ 2016-09-22 17:51  呱哇  阅读(270)  评论(0编辑  收藏  举报