第二阶段团队冲刺

下面是一个简单的使用Lucene进行文本搜索和精准识别的代码示例。这个示例将读取一个文本文件,创建一个Lucene索引,并进行搜索和精准识别。

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

public class LuceneSearchAndIdentify {

    private static final String INDEX_DIRECTORY = "lucene-index";
    private static final String CONTENT_FIELD_NAME = "content";
    private static final String ID_FIELD_NAME = "id";

    public static void main(String[] args) {

        String textToIdentify = "The quick brown fox jumps over the lazy dog";
        String filePath = "sample.txt";

        // 1. 创建索引
        createIndex(new File(filePath));

        // 2. 搜索文本
        String[] fieldsToSearch = { CONTENT_FIELD_NAME };
        String queryStr = textToIdentify;
        try {
            search(fieldsToSearch, queryStr);
        } catch (Exception e) {
            System.out.println("搜索出错:" + e.getMessage());
        }
    }

    private static void createIndex(File file) {
        try (Analyzer analyzer = new KeywordAnalyzer()) {
            try (FSDirectory dir = FSDirectory.open(Paths.get(INDEX_DIRECTORY))) {
                try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
                    Document doc = new Document();
                    doc.add(new TextField(CONTENT_FIELD_NAME, new FileReader(file)));
                    doc.add(new StringField(ID_FIELD_NAME, file.getName(), Field.Store.YES));
                    writer.addDocument(doc);
                    writer.commit();
                }
            }
        } catch (IOException e) {
            System.out.println("创建索引出错:" + e.getMessage());
        }
    }

    private static void search(String[] fields, String searchQuery) throws Exception {

        BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD };
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new KeywordAnalyzer(), flags);

        Query query = parser.parse(searchQuery);

        try (IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_DIRECTORY)))) {
            IndexSearcher searcher = new IndexSearcher(reader);

            TopDocs docs = searcher.search(query, 10);
            ScoreDoc[] hits = docs.scoreDocs;

            System.out.println("搜索结果:");
            for (int i = 0; i < hits.length; i++) {
                Document doc = searcher.doc(hits[i].doc);
                String id = doc.get(ID_FIELD_NAME);
                System.out.println((i + 1) + ". " + id);
            }

        }
    }
}

  以上的代码演示了如何使用Lucene的 API 进行文本识别和精确匹配。在 createIndex 方法中,代码从文件中读取文本并将其添加到 Lucene 索引中。在 search 方法中,代码创建 MultiFieldQueryParser 对象并将要搜索的字段传递给构造函数。然后它将查询字符串解析为查询对象,并通过 IndexSearcher 对象执行查询。最终,代码输出与查询匹配的所有文档的 ID。

posted @ 2023-05-18 22:33  阖家旺  阅读(4)  评论(0编辑  收藏  举报