Lucese——字符串检索

Lucese版本太多,最新的已经到8,而网络教程大都还是3,并且有类似于ES、Solr这样的封装框架,学习收益较低,并没有准备花很多时间研究的打算。

 

之前双十一购物时,有了个思考:“如何检索一个商品名称?”

用数据库的Like语句,功能是在太单薄,因此,用搜索引擎搞一个吧。

 

以下代码可以微调,设计成文件检索索引。

        <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>3.6.2</version>
        </dependency>

 

复制代码
package lucese.test;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;

public class StringIndexer {
  private static String KEY = "string";
  private static String path;
  private static Directory indexDirectory;
  private static Directory ramDirectory;
  private static Analyzer analyzer;
  private static IndexWriterConfig ramConfig;
  private static IndexWriterConfig discConfig;

  public static void init(String out) throws IOException {
    path = out;
    indexDirectory = FSDirectory.open(new File(out));


    ramDirectory = new RAMDirectory(indexDirectory);

    //标准的分词
    analyzer = new StandardAnalyzer(Version.LUCENE_36);

    ramConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);

    discConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    discConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
  }

  public static void createIndex(String str) {
    try (IndexWriter writer = new IndexWriter(ramDirectory, ramConfig)) {
      write(writer, str);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("create IndexWriter error:", e);
    }
  }

  public static void createIndexInDisc(String str) {
    try (IndexWriter writer = new IndexWriter(indexDirectory, discConfig)) {
      write(writer, str);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("create IndexWriter error:", e);
    }
  }

  public static void write(IndexWriter writer, String str) throws IOException {
    Document document = new Document();
    Field fileNameField = new Field(KEY,
        str,
        Field.Store.YES,
        Field.Index.ANALYZED);
    document.add(fileNameField);
    writer.addDocument(document);
  }

  public static void query(String str) {
    try (IndexSearcher indexSearcher = new IndexSearcher(ramDirectory)) {
      /**
       * 参数一: Version matchVersion 版本号
       * 参数二:String f
       * 参数三:Analyzer
       * */
      QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer);
      Query query = queryParser.parse(str);

      //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准
      TopDocs hits = indexSearcher.search(query, 100);
      for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = indexSearcher.doc(scoreDoc.doc);
        System.out.println(doc.get(KEY));
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException("query error:", e);
    }
  }

  public static void queryFromDisc(String str) {
    try (IndexSearcher indexSearcher = new IndexSearcher(indexDirectory)) {
      /**
       * 参数一: Version matchVersion 版本号
       * 参数二:String f
       * 参数三:Analyzer
       * */
      QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer);
      Query query = queryParser.parse(str);

      //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准
      TopDocs hits = indexSearcher.search(query, 100);
      for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = indexSearcher.doc(scoreDoc.doc);
        System.out.println(doc.get(KEY));
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException("query error:", e);
    }
  }

  /**
   * 数据同步,内存同步到磁盘
   */
  public static void synce() {
    try (IndexWriter fsIndexWriter = new IndexWriter(indexDirectory, discConfig)) {
      fsIndexWriter.addIndexes(ramDirectory);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("create IndexWriter error:", e);
    }
  }

  public static void main(String[] args) throws IOException {
    StringIndexer.init("D:/lucene/index");
    StringIndexer.createIndex("Mr.css is a teacher!");
    StringIndexer.synce();
//        Indexer.query("Mr.css");

//        Indexer.createIndexInDisc("Mr.css is a teacher!");
    StringIndexer.queryFromDisc("Mr.css");
//        Indexer.synce();
//        {
//        }
  }
}
复制代码

 

posted on   疯狂的妞妞  阅读(188)  评论(0编辑  收藏  举报

(评论功能已被禁用)
编辑推荐:
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
阅读排行:
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
历史上的今天:
2018-11-28 Itext pdf文字水印、文本水印
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示