Lucene4:创建一个简单查询

1. 要求

环境:

  Lucene 4.1版本/IKAnalyzer 2012 FF版本/mmseg4j 1.9版本
功能:
  1). 默认查询,显示文档得分及内容

2. 实现代码

package com.clzhang.sample.lucene;

import java.io.*;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;

//import org.wltea.analyzer.lucene.IKAnalyzer;
import com.chenlb.mmseg4j.Dictionary;
import com.chenlb.mmseg4j.analysis.ComplexAnalyzer;

import org.junit.Test;

/**
 * 环境:Lucene 4.1版本/IKAnalyzer 2012 FF版本/mmseg4j 1.9版本
 * 功能:
 * 1.默认查询,显示文档得分及内容
 * @author Administrator
 *
 */
public class QueryDemo {
    // mmseg4j字典路径
    private static final String MMSEG4J_DICT_PATH = "C:\\solr\\news\\conf";
    private static Dictionary dictionary = Dictionary.getInstance(MMSEG4J_DICT_PATH);
    
    // Lucene索引存放路径 
    private static final String LUCENE_INDEX_DIR = "C:\\lucene\\data";
    
    @Test
    public void doQuery() throws Exception {
        // 实例化IKAnalyzer分词器
//        Analyzer analyzer = new IKAnalyzer();
        
        // 实例化mmseg4j分词器,可以设置为另两种分词器
        Analyzer analyzer = new ComplexAnalyzer(dictionary);

        // 实例化搜索器
        Directory directory = FSDirectory.open(new File(LUCENE_INDEX_DIR));
        DirectoryReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        String keyword = "空姐";

        // 使用QueryParser查询分析器构造Query对象
        QueryParser qp = new QueryParser(Version.LUCENE_41, "text", analyzer);
        Query query = qp.parse(keyword);
        
        // 搜索相似度最高的5条记录
        TopDocs hits = searcher.search(query, 5);
        System.out.println("命中:" + hits.totalHits);
        System.out.println("最高得分:" + hits.getMaxScore());

        // 输出结果
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println("得分:" + scoreDoc.score + "\t标题:"
                    + doc.get("title") + "\t内容:" + doc.get("content"));
        }
        reader.close();
        directory.close();
    }
}

输出:

命中:1
最高得分:0.4790727
得分:0.4790727 标题:广州打空姐区政委 内容:昨天,记者相继连线广州市越秀区宣传部

posted @ 2013-01-25 16:24  那些年的事儿  阅读(370)  评论(0编辑  收藏  举报