Lucese——字符串检索
Lucese版本太多,最新的已经到8,而网络教程大都还是3,并且有类似于ES、Solr这样的封装框架,学习收益较低,并没有准备花很多时间研究的打算。
之前双十一购物时,有了个思考:“如何检索一个商品名称?”
用数据库的Like语句,功能是在太单薄,因此,用搜索引擎搞一个吧。
以下代码可以微调,设计成文件检索索引。
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>3.6.2</version> </dependency>
package lucese.test; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; public class StringIndexer { private static String KEY = "string"; private static String path; private static Directory indexDirectory; private static Directory ramDirectory; private static Analyzer analyzer; private static IndexWriterConfig ramConfig; private static IndexWriterConfig discConfig; public static void init(String out) throws IOException { path = out; indexDirectory = FSDirectory.open(new File(out)); ramDirectory = new RAMDirectory(indexDirectory); //标准的分词 analyzer = new StandardAnalyzer(Version.LUCENE_36); ramConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); discConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer); discConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } public static void createIndex(String str) { try (IndexWriter writer = new IndexWriter(ramDirectory, ramConfig)) { write(writer, str); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("create IndexWriter error:", e); } } public static void createIndexInDisc(String str) { try (IndexWriter writer = new IndexWriter(indexDirectory, discConfig)) { write(writer, str); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("create IndexWriter error:", e); } } public static void write(IndexWriter writer, String str) throws IOException { Document document = new Document(); Field fileNameField = new Field(KEY, str, Field.Store.YES, Field.Index.ANALYZED); document.add(fileNameField); writer.addDocument(document); } public static void query(String str) { try (IndexSearcher indexSearcher = new IndexSearcher(ramDirectory)) { /** * 参数一: Version matchVersion 版本号 * 参数二:String f * 参数三:Analyzer * */ QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer); Query query = queryParser.parse(str); //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准 TopDocs hits = indexSearcher.search(query, 100); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); System.out.println(doc.get(KEY)); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("query error:", e); } } public static void queryFromDisc(String str) { try (IndexSearcher indexSearcher = new IndexSearcher(indexDirectory)) { /** * 参数一: Version matchVersion 版本号 * 参数二:String f * 参数三:Analyzer * */ QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer); Query query = queryParser.parse(str); //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准 TopDocs hits = indexSearcher.search(query, 100); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); System.out.println(doc.get(KEY)); } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("query error:", e); } } /** * 数据同步,内存同步到磁盘 */ public static void synce() { try (IndexWriter fsIndexWriter = new IndexWriter(indexDirectory, discConfig)) { fsIndexWriter.addIndexes(ramDirectory); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("create IndexWriter error:", e); } } public static void main(String[] args) throws IOException { StringIndexer.init("D:/lucene/index"); StringIndexer.createIndex("Mr.css is a teacher!"); StringIndexer.synce(); // Indexer.query("Mr.css"); // Indexer.createIndexInDisc("Mr.css is a teacher!"); StringIndexer.queryFromDisc("Mr.css"); // Indexer.synce(); // { // } } }
疯狂的妞妞 :每一天,做什么都好,不要什么都不做!