Lucene(全文检索框架) 简单实例

前言:

Lucene是目前最受欢迎的Java全文搜索框架,准确地说,它是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎。Lucene为开发人员提供了相当完整的工具包,可以非常方便地实现强大的全文检索功能。

本实例主要使用lucene实现存储与读取索引库

代码:

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.zzm</groupId>
  <artifactId>lucene</artifactId>
  <packaging>war</packaging>
  <version>0.0.1-SNAPSHOT</version>
  <name>lucene Maven Webapp</name>
  <url>http://maven.apache.org</url>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    
    <dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>3.0.1</version>
    </dependency>
    
    
    <dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-analyzers</artifactId>
    <version>3.0.1</version>
    </dependency>
    
    
    <dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-highlighter</artifactId>
    <version>3.0.1</version>
    </dependency>
    
    <dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-memory</artifactId>
    <version>3.0.1</version>
    </dependency>
    
  </dependencies>
  <build>
    <finalName>lucene</finalName>
  </build>
</project>

 

Article.class (pojo对象)

package com.zzm.lucene.domain;

/**
 * @ClassName 文章类
 * @author zhanmin.zheng
 * @CreateDate 2016/02/26
 * @ModifyDate
 * @version 1.0
 */
public class Article {
    private Long id;//主键
    
    private String title;//标题
    
    private String content;//内容

    public Article() {
        super();
    }

    public Article(long id, String title, String content) {
        super();
        this.id = id;
        this.title = title;
        this.content = content;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    @Override
    public String toString() {
        return "Article [id=" + id + ", title=" + title + ", content=" + content + "]";
    }
    
    
}

 

WeclomeService.java

package cm.zzm.lucene.service;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import com.zzm.lucene.domain.Article;

/**
 * @Description acticle对象持久化
 * @author zzm
 * @CreateDate 2016/02/26
 * @ModifyDate
 * @Version 1.0
 */
public class WeclomeService {
    
    public void CreateIndex() throws IOException {
        /**
         * 1.创建一个article对象,并且把信息存放进去
         * 2.创建indexWriter的api吧数据存放在索引库
         * 3.关闭indexWriter对象
         */
        Article article = new Article(1L, "lucene全文索引", "百度,google都是搜索引擎");
        /**
         * IndexWriter 
         * @Param 索引库
         * @Param 生成器
         */
//        Path path = Paths.get("./indexDir");//5.0版本使用path
//        Directory directory = FSDirectory.open(path);//创建一个索引库
        Directory directory = FSDirectory.open(new File("./indexDir"));//创建一个索引库
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);//创建分析器
        IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);//创建indexWriter对象,指定索引库
        Document document = new Document();
        Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
        Field titleField = new Field("title", article.getTitle(), Store.YES, Index.NOT_ANALYZED);
        Field contentField = new Field("content", article.getContent(), Store.YES, Index.NOT_ANALYZED);
        document.add(idField);
        document.add(titleField);
        document.add(contentField);
        indexWriter.addDocument(document);
        indexWriter.close();
    }
    
    public void searchIndex() throws IOException, ParseException {
        /**
         * 1.创建一个IndexSearcher对象
         * 2.调用search方法进行检索
         * 3.输出内容
         */
        /**
         * 创建一个IndexSearcher对象
         */
        Directory directory = FSDirectory.open(new File("./indexDir"));
        IndexSearcher IndexSearcher = new IndexSearcher(directory);
        /**
         * 调用search方法进行检索
         */
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
        Query query = queryParser.parse("goo");//查询的关键词
        TopDocs topDocs = IndexSearcher.search(query, 2);
        int count = topDocs.totalHits;//根据关键词查询出来的记录数
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<Article> articleList = new ArrayList<Article>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            float score = scoreDoc.score;//关键字得分
            int index = scoreDoc.doc;//索引下标
            Document document = IndexSearcher.doc(index);
            /**
             * 把document转换成article对象
             */
            Article article = new Article();
            article.setId(Long.parseLong(document.get("id")));//document.getField("id").stringValue()
            article.setTitle(document.get("title"));
            article.setContent(document.get("content"));    
            articleList.add(article);
        }
        
        System.out.println("articleList is "+articleList);
    }
    
}

 

posted @ 2016-02-26 22:57  sz_zzm  阅读(431)  评论(0编辑  收藏  举报