lucene 简单实用
1 package com.kite.luncene.helloworld; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.List; 6 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.standard.StandardAnalyzer; 9 import org.apache.lucene.document.Document; 10 import org.apache.lucene.document.Field; 11 import org.apache.lucene.document.Field.Index; 12 import org.apache.lucene.document.Field.Store; 13 import org.apache.lucene.index.IndexWriter; 14 import org.apache.lucene.index.IndexWriter.MaxFieldLength; 15 import org.apache.lucene.queryParser.QueryParser; 16 import org.apache.lucene.search.IndexSearcher; 17 import org.apache.lucene.search.Query; 18 import org.apache.lucene.search.ScoreDoc; 19 import org.apache.lucene.search.TopDocs; 20 import org.apache.lucene.store.Directory; 21 import org.apache.lucene.store.FSDirectory; 22 import org.apache.lucene.util.Version; 23 import org.junit.Test; 24 25 import com.kite.bean.Article; 26 27 public class HelloWorld 28 { 29 @Test 30 public void testCreate() throws Exception 31 { 32 /** 33 * 1、创建一个article对象,并且把信息存放进去 34 * 2、调用indexWriter的API把数据存放在索引库中 35 * 3、关闭indexWriter 36 */ 37 //创建一个article对象,并且把信息存放进去 38 Article article = new Article(); 39 article.setId(1L); 40 article.setTitle("java goodnice"); 41 article .setContent("多年来就是这么吊"); 42 43 //2、调用indexWriter的API把数据存放在索引库中 44 /** 45 * 创建一个IndexWriter 46 * 参数三个 47 * 1、索引库 指向索引库的位置 48 * 2、分词器 49 */ 50 //创建索引库 51 Directory directory = FSDirectory.open(new File("./indexDir")); 52 //创建分词器 53 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); 54 IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED); 55 56 //把一个对象转换成document 57 Document document = new Document(); 58 Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED); 59 Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED); 60 Field contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED); 61 document.add(idField); 62 document.add(titleField); 63 document.add(contentField); 64 indexWriter.addDocument(document); 65 66 //3、关闭indexWriter 67 indexWriter.close(); 68 } 69 70 @Test 71 public void testSearchIndex() throws Exception 72 { 73 /** 74 * 1.创建一个 indexSerach对象 75 * 2.调用search方法进行检索 76 * 3.输出内容 77 */ 78 79 // 1.创建一个 indexSerach对象 80 //--索引库 81 Directory directory = FSDirectory.open(new File("./indexDir")); 82 IndexSearcher searcher = new IndexSearcher(directory); 83 //2..调用search方法进行检索 84 //-- 85 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); 86 QueryParser queryParser = new QueryParser(Version.LUCENE_30, "id", analyzer); 87 //设置搜索的关键字 88 Query query = queryParser.parse("1"); 89 TopDocs topDocs = searcher.search(query, 10); 90 //获得根据关键字查询出来的总的记录数 91 int count = topDocs.totalHits; 92 List<Article> articles = new ArrayList<Article>(); 93 //获得数组 94 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 95 for(ScoreDoc scoreDoc : scoreDocs) 96 { 97 //关键字得分 98 float score = scoreDoc.score; 99 //索引的下标 100 int index = scoreDoc.doc; 101 //根据索引获得document对象 102 Document document = searcher.doc(index); 103 //把document转化成article 104 Article article = new Article(); 105 article.setId(Long.parseLong(document.get("id"))); 106 article.setTitle(document.get("title")); 107 article.setContent(document.get("content")); 108 articles.add(article); 109 } 110 for(Article article : articles) 111 { 112 System.out.println(article.getId()); 113 System.out.println(article.getTitle()); 114 System.out.println(article.getContent()); 115 } 116 } 117 }
两个工具类
package com.kite.luncene.utils; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class LunceneUtils { //索引库 public static Directory directory; //分词器 public static Analyzer analyzer; static { try { directory = FSDirectory.open(new File("./indexDor")); analyzer = new StandardAnalyzer(Version.LUCENE_30); } catch (Exception e) { e.printStackTrace(); } } }
package com.kite.luncene.utils; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import com.kite.bean.Article; public class DocumentUtils { /** * 通过 article获得document * @param article * @return */ public static Document articleToDocument(Article article) { Document document = new Document(); Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED); Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED); Field contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED); document.add(idField); document.add(titleField); document.add(contentField); return document; } /** * 通过document 获得article对象 * @param document * @return */ public static Article documentToArticle(Document document) { Article article = new Article(); article.setId(Long.parseLong(document.get("id"))); article.setTitle((document.get("title"))); article.setContent(document.get("content")); return article; } }
实用工具类实现简单的增删改查功能
package com.kite.luncene.index; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Version; import org.junit.Test; import com.kite.bean.Article; import com.kite.luncene.utils.DocumentUtils; import com.kite.luncene.utils.LunceneUtils; public class ArticleIndex { @Test public void testCreateIndex() throws Exception { IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED); Article article = new Article(); article.setId(1L); article.setTitle("luncenes是一个好难写的东西"); article.setContent("百度,谷歌是很好的搜索引擎"); //通过工具类转换成document Document document = DocumentUtils.articleToDocument(article); indexWriter.addDocument(document); indexWriter.close(); } @Test public void testSearchIndex() throws Exception { IndexSearcher indexSearcher = new IndexSearcher(LunceneUtils.directory); /** * Version.LUCENE_30 版本 * "title" 根据那个字段 * LunceneUtils.analyzer 分词器 */ QueryParser queryParser = new QueryParser(Version.LUCENE_30, "title", LunceneUtils.analyzer); //luncene 关键字 Query query = queryParser.parse("luncene"); TopDocs topDocs = indexSearcher.search(query, 2); //获得根据关键字查询到的所有的记录数 int count = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; List<Article> articles = new ArrayList<Article>(); for(ScoreDoc scoreDoc : scoreDocs) { //scoreDoc.score 获得关键字得分 float score = scoreDoc.score; //scoreDoc.doc 获得索引的下标 int index = scoreDoc.doc; //通过索引的下标进行查询 Document document = indexSearcher.doc(index); Article article = DocumentUtils.documentToArticle(document); articles.add(article); } //遍历输出 for(Article article : articles) { System.out.println(article.getId().toString()); System.err.println(article.getTitle()); System.out.println(article.getContent()); } } /** * 修改是先删除 然后进行添加 * @throws Exception */ @Test public void testDeleteIndex() throws Exception { IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED); //indexWriter.deleteAll();--删除所有 /* * term 关键字对象 * title 字段名 * luncenes 关键字的内容 */ Term term = new Term("title", "luncenes"); //根据关键字进行删除 会在文件夹中增加一个.del结尾的文件 indexWriter.deleteDocuments(term); indexWriter.close(); } @Test public void testUpdateIndex() throws Exception { IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED); Term term = new Term("title","luncenes"); Article article = new Article(); article.setId(1L); article.setTitle("luncene是一个好难写的东西,少个s不解释"); article.setContent("百度,谷歌是很好的搜索引擎"); /* * term 根据关键字进行修改 * doc 修改后的内容 */ indexWriter.updateDocument(term, DocumentUtils.articleToDocument(article)); indexWriter.close(); } }