【lucene】一个简单的招聘网站的建立
1.建立索引库: 核心代码如下
package com.tabchanj.job.index; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import com.tabchanj.job.domain.JobApply; import com.tabchanj.job.util.LuceneUtils; @SuppressWarnings("deprecation") public class JobApplyIndexHelper { public void createIndex(List<JobApply> jobApplies, boolean rebuild) { // 索引写入器,要保证全应用只有一个写入器,多个写入器同时写入会报错 try { IndexWriter indexWriter = LuceneUtils.getIndexWriter(); if (rebuild) { indexWriter.deleteAll(); indexWriter.commit(); } for (JobApply jobApply : jobApplies) { // 将每一条数据包装为document Document document = obj2Doc(jobApply); // 添加到索引库中 indexWriter.addDocument(document); } indexWriter.commit(); } catch (Exception e) { throw new RuntimeException(e.getMessage()); } } private Document obj2Doc(JobApply jobApply) { Document document = new Document(); document.add(new Field("id", jobApply.getId() + "", Store.YES, Index.NO)); document.add(new Field("title", jobApply.getTitle(), Store.YES, Index.NO)); document.add(new Field("content", jobApply.getContent(), Store.NO, Index.ANALYZED)); document.add(new Field("companyId", jobApply.getCompany().getId() + "", Store.YES, Index.NOT_ANALYZED)); document.add(new Field("companyName", jobApply.getCompany().getName(), Store.YES, Index.ANALYZED)); document.add(new Field("cityName", jobApply.getCity().getName(), Store.YES, Index.ANALYZED)); document.add(new Field("cityId", jobApply.getCity().getId() + "", Store.YES, Index.NOT_ANALYZED)); document.add(new Field("tradeId", jobApply.getTrade().getId() + "", Store.YES, Index.NOT_ANALYZED)); document.add(new Field("tradeName", jobApply.getTrade().getName(), Store.YES, Index.ANALYZED)); document.add(new Field("salaryScope", jobApply.getSalaryLevel().getName(), Store.YES, Index.ANALYZED)); document.add(new Field("salaryId", jobApply.getSalaryLevel().getId() + "", Store.YES, Index.NOT_ANALYZED)); return document; }
//上面使用的LuceneUtils工具类代码如下:
package com.tabchanj.job.util; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; /** * Lucene通用工具 * * @author tab * */ public class LuceneUtils { // 创建索引库 private static Directory directory = null; // 创建词法分析器 private static Analyzer analyzer = new IKAnalyzer(); // 创建索引写入器 private static IndexWriter indexWriter = null; public static Analyzer getAnalyzer() { return analyzer; } /** * 获取索引库文件路径 * * @return */ public static String getIndexPath() { StringBuilder pathname = new StringBuilder(Global.webAppPath); pathname.append("WEB-INF").append(File.separator).append("index").append(File.separator); return pathname.toString(); } /** * 获取索引库directory对象 * * @return */ public static Directory getDirectory() { try { if (directory == null) { directory = FSDirectory.open(new File(getIndexPath())); } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } return directory; } /** * 获取indexWriter * * @return */ public static IndexWriter getIndexWriter() { try { if (indexWriter == null) { synchronized (LuceneUtils.class) { if (indexWriter == null) { Version version = Version.LUCENE_4_10_4; indexWriter = new IndexWriter(getDirectory(), new IndexWriterConfig(version, getAnalyzer())); } } } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } return indexWriter; } /** * 关闭indexWriter */ public static void closeIndexWriter() { try { if (indexWriter != null) { indexWriter.close(); } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } } }
//上面用到的Gloal.webappPath常量如下
package com.tabchanj.job.util; public class Global { public static String webAppPath=""; }
//用于在容器启动时通过下面的监听器给Global.webappPath常量赋值
package com.tabchanj.job.listener; import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; import com.tabchanj.job.util.Global; import com.tabchanj.job.util.LuceneUtils; public class WebAppListener implements ServletContextListener { @Override public void contextDestroyed(ServletContextEvent event) { LuceneUtils.closeIndexWriter(); } /** * 在容器启动时,初始化项目根目录常量 */ @Override public void contextInitialized(ServletContextEvent event) { Global.webAppPath = event.getServletContext().getRealPath("/"); } }
2.查询索引库: 核心代码如下
/** * * 搜索索引库 * * @param query */ public List<Map<String, Object>> search(Query query) { List<Map<String, Object>> lists = new ArrayList<Map<String, Object>>(); try { // 加载索引库 IndexReader reader = IndexReader.open(LuceneUtils.getDirectory()); // 获取加载了Indexreader的索引搜索器 IndexSearcher searcher = new IndexSearcher(reader); // 获取结果对象 TopDocs docs = searcher.search(query, 1000); // 获取结果数组 ScoreDoc[] socres = docs.scoreDocs; // 创建高亮器========================================================= Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");//设置关键字的高亮格式 Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); Fragmenter fragmenter = new SimpleFragmenter(200);//设置包含关键字的片段长度 highlighter.setTextFragmenter(fragmenter ); // ==================================================================== // 遍历结果数组 for (ScoreDoc scoreDoc : socres) { // 获取文档编号 int docNumber = scoreDoc.doc; // 根据编号搜索文档 Document document = searcher.doc(docNumber); // 把文档相应的字段封装到集合的map中 Map<String, Object> map = new HashMap<String, Object>(); // 在需要的字段使用高亮器 String title = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", document.get("title")); System.out.println("title:before="+title); //针对title中没有出先搜索的关键字的title直接输出其本来内容 if (title == null) { title = document.get("title"); } System.out.println("title:after="+title); map.put("title", title); map.put("city", document.get("cityName")); map.put("company", document.get("companyName")); map.put("trade", document.get("tradeName")); map.put("salaryScope", document.get("salaryScope")); lists.add(map); } } catch (Exception e) { e.printStackTrace(); // throw new RuntimeException(e.getMessage()); } return lists; }
3.关键字高亮: 代码在第二步中