随笔 - 1357  文章 - 0  评论 - 1104  阅读 - 1941万

lucene中FSDirectory、RAMDirectory的用法

复制代码
package com.ljq.one;



import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Test;

public class DirectoryTest {
    // 数据源路径
    String dspath = "E:/workspace/mylucene/lucenes/IndexWriter addDocument's a javadoc .txt";
    //存放索引文件的位置,即索引库
    String indexpath = "E:/workspace/mylucene/luceneIndex";
    //分词器
    Analyzer analyzer = new StandardAnalyzer();
    
    /**
     * 创建索引,会抛异常,因为没对索引库进行保存
     * 
     * IndexWriter 用来操作(增、删、改)索引库的
     */
    @Test
    public void createIndex() throws Exception {
        //Directory dir=FSDirectory.getDirectory(indexpath);
        //内存存储:优点速度快,缺点程序退出数据就没了,所以记得程序退出时保存索引库,已FSDirectory结合使用
        //由于此处只暂时保存在内存中,程序退出时没进行索引库保存,因此在搜索时程序会报错
        Directory dir=new RAMDirectory();
        File file = new File(dspath);
        //Document存放经过组织后的数据源,只有转换为Document对象才可以被索引和搜索到
        Document doc = new Document();
        //文件名称
        doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
        //检索到的内容
        doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
        //文件大小
        doc.add(new Field("size", NumberTools.longToString(file.length()),
                Store.YES, Index.NOT_ANALYZED));
        //检索到的文件位置
        doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

        // 建立索引
        //第一种方式
        //IndexWriter indexWriter = new IndexWriter(indexpath, analyzer, MaxFieldLength.LIMITED);
        //第二种方式
        IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED);
        indexWriter.addDocument(doc);
        indexWriter.close();
    }
    
    /**
     * 创建索引(推荐)
     * 
     * IndexWriter 用来操作(增、删、改)索引库的
     */
    @Test
    public void createIndex2() throws Exception {
        Directory fsDir = FSDirectory.getDirectory(indexpath);
        //1、启动时读取
        Directory ramDir = new RAMDirectory(fsDir);
        
        // 运行程序时操作ramDir
        IndexWriter ramIndexWriter = new IndexWriter(ramDir, analyzer, MaxFieldLength.LIMITED);
        
        //数据源
        File file = new File(dspath);
        // 添加 Document
        Document doc = new Document();
        //文件名称
        doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
        //检索到的内容
        doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
        //文件大小
        doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
        //检索到的文件位置
        doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
        ramIndexWriter.addDocument(doc);
        ramIndexWriter.close();
        
        //2、退出时保存
        IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, true, MaxFieldLength.LIMITED);
        fsIndexWriter.addIndexesNoOptimize(new Directory[]{ramDir});
        
        // 优化操作
        fsIndexWriter.commit();
        fsIndexWriter.optimize();
        
        fsIndexWriter.close();
    }
    
    /**
     * 优化操作
     * 
     * @throws Exception
     */
    @Test
    public void createIndex3() throws Exception{
        Directory fsDir = FSDirectory.getDirectory(indexpath);
        IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, MaxFieldLength.LIMITED);
        
        fsIndexWriter.optimize();
        fsIndexWriter.close();
    }
    
    /**
     * 搜索
     * 
     * IndexSearcher 用来在索引库中进行查询
     */
    @Test
    public void search() throws Exception {
        //请求字段
        //String queryString = "document";
        String queryString = "adddocument";

        // 1,把要搜索的文本解析为 Query
        String[] fields = { "name", "content" };
        QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
        Query query = queryParser.parse(queryString);

        // 2,进行查询,从索引库中查找
        IndexSearcher indexSearcher = new IndexSearcher(indexpath);
        Filter filter = null;
        TopDocs topDocs = indexSearcher.search(query, filter, 10000);
        System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");

        // 3,打印结果
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            // 文档内部编号
            int index = scoreDoc.doc; 
            // 根据编号取出相应的文档
            Document doc = indexSearcher.doc(index);
            System.out.println("------------------------------");
            System.out.println("name = " + doc.get("name"));
            System.out.println("content = " + doc.get("content"));
            System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));
            System.out.println("path = " + doc.get("path"));
        }
    }

    /**
     * 读取文件内容
     */
    public static String readFileContent(File file) {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
            StringBuffer content = new StringBuffer();
            for (String line = null; (line = reader.readLine()) != null;) {
                content.append(line).append("\n");
            }
            reader.close();
            return content.toString();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    

}
复制代码

 

posted on   Ruthless  阅读(11083)  评论(0编辑  收藏  举报
编辑推荐:
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 写一个简单的SQL生成工具
· AI 智能体引爆开源社区「GitHub 热点速览」
< 2011年3月 >
27 28 1 2 3 4 5
6 7 8 9 10 11 12
13 14 15 16 17 18 19
20 21 22 23 24 25 26
27 28 29 30 31 1 2
3 4 5 6 7 8 9

点击右上角即可分享
微信分享提示