Lucese——字符串检索
Lucese版本太多,最新的已经到8,而网络教程大都还是3,并且有类似于ES、Solr这样的封装框架,学习收益较低,并没有准备花很多时间研究的打算。
之前双十一购物时,有了个思考:“如何检索一个商品名称?”
用数据库的Like语句,功能是在太单薄,因此,用搜索引擎搞一个吧。
以下代码可以微调,设计成文件检索索引。
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>3.6.2</version> </dependency>
package lucese.test; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; public class StringIndexer { private static String KEY = "string"; private static String path; private static Directory indexDirectory; private static Directory ramDirectory; private static Analyzer analyzer; private static IndexWriterConfig ramConfig; private static IndexWriterConfig discConfig; public static void init(String out) throws IOException { path = out; indexDirectory = FSDirectory.open(new File(out)); ramDirectory = new RAMDirectory(indexDirectory); //标准的分词 analyzer = new StandardAnalyzer(Version.LUCENE_36); ramConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); discConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); discConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } public static void createIndex(String str) { try (IndexWriter writer = new IndexWriter(ramDirectory, ramConfig)) { write(writer, str); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("create IndexWriter error:", e); } } public static void createIndexInDisc(String str) { try (IndexWriter writer = new IndexWriter(indexDirectory, discConfig)) { write(writer, str); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("create IndexWriter error:", e); } } public static void write(IndexWriter writer, String str) throws IOException { Document document = new Document(); Field fileNameField = new Field(KEY, str, Field.Store.YES, Field.Index.ANALYZED); document.add(fileNameField); writer.addDocument(document); } public static void query(String str) { try (IndexSearcher indexSearcher = new IndexSearcher(ramDirectory)) { /** * 参数一: Version matchVersion 版本号 * 参数二:String f * 参数三:Analyzer * */ QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer); Query query = queryParser.parse(str); //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准 TopDocs hits = indexSearcher.search(query, 100); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); System.out.println(doc.get(KEY)); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("query error:", e); } } public static void queryFromDisc(String str) { try (IndexSearcher indexSearcher = new IndexSearcher(indexDirectory)) { /** * 参数一: Version matchVersion 版本号 * 参数二:String f * 参数三:Analyzer * */ QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer); Query query = queryParser.parse(str); //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准 TopDocs hits = indexSearcher.search(query, 100); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); System.out.println(doc.get(KEY)); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("query error:", e); } } /** * 数据同步,内存同步到磁盘 */ public static void synce() { try (IndexWriter fsIndexWriter = new IndexWriter(indexDirectory, discConfig)) { fsIndexWriter.addIndexes(ramDirectory); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("create IndexWriter error:", e); } } public static void main(String[] args) throws IOException { StringIndexer.init("D:/lucene/index"); StringIndexer.createIndex("Mr.css is a teacher!"); StringIndexer.synce(); // Indexer.query("Mr.css"); // Indexer.createIndexInDisc("Mr.css is a teacher!"); StringIndexer.queryFromDisc("Mr.css"); // Indexer.synce(); // { // } } }
疯狂的妞妞 :每一天,做什么都好,不要什么都不做!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
2018-11-28 Itext pdf文字水印、文本水印