Fork me on GitHub

【lucene】一个简单的招聘网站的建立

1.建立索引库: 核心代码如下

package com.tabchanj.job.index;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import com.tabchanj.job.domain.JobApply;
import com.tabchanj.job.util.LuceneUtils;

@SuppressWarnings("deprecation")
public class JobApplyIndexHelper {

    public void createIndex(List<JobApply> jobApplies, boolean rebuild) {
        // 索引写入器,要保证全应用只有一个写入器,多个写入器同时写入会报错
        try {
            IndexWriter indexWriter = LuceneUtils.getIndexWriter();
            if (rebuild) {
                indexWriter.deleteAll();
                indexWriter.commit();
            }
            for (JobApply jobApply : jobApplies) {
                // 将每一条数据包装为document
                Document document = obj2Doc(jobApply);
                // 添加到索引库中
                indexWriter.addDocument(document);
            }
            indexWriter.commit();
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage());
        }
    }

    private Document obj2Doc(JobApply jobApply) {
        Document document = new Document();
        document.add(new Field("id", jobApply.getId() + "", Store.YES, Index.NO));
        document.add(new Field("title", jobApply.getTitle(), Store.YES, Index.NO));
        document.add(new Field("content", jobApply.getContent(), Store.NO, Index.ANALYZED));
        document.add(new Field("companyId", jobApply.getCompany().getId() + "", Store.YES, Index.NOT_ANALYZED));
        document.add(new Field("companyName", jobApply.getCompany().getName(), Store.YES, Index.ANALYZED));
        document.add(new Field("cityName", jobApply.getCity().getName(), Store.YES, Index.ANALYZED));
        document.add(new Field("cityId", jobApply.getCity().getId() + "", Store.YES, Index.NOT_ANALYZED));
        document.add(new Field("tradeId", jobApply.getTrade().getId() + "", Store.YES, Index.NOT_ANALYZED));
        document.add(new Field("tradeName", jobApply.getTrade().getName(), Store.YES, Index.ANALYZED));
        document.add(new Field("salaryScope", jobApply.getSalaryLevel().getName(), Store.YES, Index.ANALYZED));
        document.add(new Field("salaryId", jobApply.getSalaryLevel().getId() + "", Store.YES, Index.NOT_ANALYZED));
        return document;

    }
//上面使用的LuceneUtils工具类代码如下:
package
com.tabchanj.job.util; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; /** * Lucene通用工具 * * @author tab * */ public class LuceneUtils { // 创建索引库 private static Directory directory = null; // 创建词法分析器 private static Analyzer analyzer = new IKAnalyzer(); // 创建索引写入器 private static IndexWriter indexWriter = null; public static Analyzer getAnalyzer() { return analyzer; } /** * 获取索引库文件路径 * * @return */ public static String getIndexPath() { StringBuilder pathname = new StringBuilder(Global.webAppPath); pathname.append("WEB-INF").append(File.separator).append("index").append(File.separator); return pathname.toString(); } /** * 获取索引库directory对象 * * @return */ public static Directory getDirectory() { try { if (directory == null) { directory = FSDirectory.open(new File(getIndexPath())); } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } return directory; } /** * 获取indexWriter * * @return */ public static IndexWriter getIndexWriter() { try { if (indexWriter == null) { synchronized (LuceneUtils.class) { if (indexWriter == null) { Version version = Version.LUCENE_4_10_4; indexWriter = new IndexWriter(getDirectory(), new IndexWriterConfig(version, getAnalyzer())); } } } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } return indexWriter; } /** * 关闭indexWriter */ public static void closeIndexWriter() { try { if (indexWriter != null) { indexWriter.close(); } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } } }
//上面用到的Gloal.webappPath常量如下
package com.tabchanj.job.util;

public class Global {
    public static String webAppPath="";
}
//用于在容器启动时通过下面的监听器给Global.webappPath常量赋值
package com.tabchanj.job.listener;

import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;

import com.tabchanj.job.util.Global;
import com.tabchanj.job.util.LuceneUtils;

public class WebAppListener implements ServletContextListener {

    @Override
    public void contextDestroyed(ServletContextEvent event) {
        LuceneUtils.closeIndexWriter();
    }

    /**
     * 在容器启动时,初始化项目根目录常量
     */
    @Override
    public void contextInitialized(ServletContextEvent event) {
        Global.webAppPath = event.getServletContext().getRealPath("/");
    }
}

 

 

 

 

2.查询索引库: 核心代码如下

/**
     * 
     * 搜索索引库
     * 
     * @param query
     */
    public List<Map<String, Object>> search(Query query) {
        List<Map<String, Object>> lists = new ArrayList<Map<String, Object>>();
        try {
            // 加载索引库
            IndexReader reader = IndexReader.open(LuceneUtils.getDirectory());
            // 获取加载了Indexreader的索引搜索器
            IndexSearcher searcher = new IndexSearcher(reader);
            // 获取结果对象
            TopDocs docs = searcher.search(query, 1000);
            // 获取结果数组
            ScoreDoc[] socres = docs.scoreDocs;
            // 创建高亮器=========================================================
            Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");//设置关键字的高亮格式
            Scorer fragmentScorer = new QueryScorer(query);
            Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
            Fragmenter fragmenter = new SimpleFragmenter(200);//设置包含关键字的片段长度
            highlighter.setTextFragmenter(fragmenter );
            // ====================================================================
            // 遍历结果数组
            for (ScoreDoc scoreDoc : socres) {
                // 获取文档编号
                int docNumber = scoreDoc.doc;
                // 根据编号搜索文档
                Document document = searcher.doc(docNumber);
                // 把文档相应的字段封装到集合的map中
                Map<String, Object> map = new HashMap<String, Object>();
                // 在需要的字段使用高亮器
                String title = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", document.get("title"));
                System.out.println("title:before="+title);
                //针对title中没有出先搜索的关键字的title直接输出其本来内容
                if (title == null) {
                    title = document.get("title");
                }
                System.out.println("title:after="+title);
                map.put("title", title);
                map.put("city", document.get("cityName"));
                map.put("company", document.get("companyName"));
                map.put("trade", document.get("tradeName"));
                map.put("salaryScope", document.get("salaryScope"));
                lists.add(map);
            }

        } catch (Exception e) {
            e.printStackTrace();
            // throw new RuntimeException(e.getMessage());
        }
        return lists;
    }

3.关键字高亮: 代码在第二步中

 

posted @ 2016-08-10 14:28  gouermazi  阅读(313)  评论(0编辑  收藏  举报