Lucene.net 全文检索数据库

原文:https://www.cnblogs.com/LTEF/p/10403114.html

https://www.cnblogs.com/zuowj/p/11689563.html

Lucene是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,Lucene.NET是C#及.NET运行时下的另一种语言的实现,官网地址:http://lucenenet.apache.org/ 

需要安装:Lucene.Net 和 Lucene.Net.Analysis.PanGu

复制代码
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.PanGu;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Windows.Forms;
复制代码

 

复制代码
 
  private void button1_Click(object sender, EventArgs e)
        {
            //StringBuilder sb = new StringBuilder();
            //索引库目录
            Lucene.Net.Store.Directory dir_search =
                FSDirectory.Open(new System.IO.DirectoryInfo("ItemIndexDir"), new NoLockFactory());
            IndexReader reader = IndexReader.Open(dir_search, true);
            IndexSearcher search = null;
            try
            {
                search = new IndexSearcher(reader);
                QueryParser parser =
                    new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "ItemName", new PanGuAnalyzer());
                Query query = parser.Parse(LuceneHelper.GetKeyWordSplid(textBox1.Text));
                //执行搜索,获取查询结果集对象  
                TopDocs ts = search.Search(query, null, 20000);
                //获取命中的文档信息对象  
                ScoreDoc[] docs = ts.ScoreDocs;
                Console.WriteLine(docs.Length);
                foreach (var t in docs)
                {
                    int docId = t.Doc;
                    Document doc = search.Doc(docId);
                    var id = doc.Get("id");
                    Console.WriteLine(id);
                    var itemName = doc.Get("ItemName");
                    Console.WriteLine(itemName);
                    var purity = doc.Get("Purity");
                    Console.WriteLine(purity);
                    var size = doc.Get("Size");
                    Console.WriteLine(size);
                    var unit = doc.Get("Unit");
                    Console.WriteLine(unit);
                    var venderName = doc.Get("VenderName");
                    Console.WriteLine(venderName);
                    textBox2.Text = JsonConvert.SerializeObject(doc);
                }
            }
            catch (Exception ex)
            {
                throw;
            }
            finally
            {
                search?.Dispose();
                dir_search?.Dispose();
            }


        }

        private void button2_Click(object sender, EventArgs e)
        {
            Console.WriteLine(@"开始创建索引");
            var bills = new List<ItemInfo>
            {
                new ItemInfo
                {
                    ItemId = 1000,
                    ItemName = "吃葡萄不吐葡萄皮",
                    Price = "100",
                    Purity = "100",
                    Size = "1",
                    Unit = "1",
                    VenderName = "2"
                },
                new ItemInfo
                {
                    ItemId = 2000,
                    ItemName = "年轻人不讲武德",
                    Price = "100",
                    Purity = "100",
                    Size = "1",
                    Unit = "1",
                    VenderName = "2"
                },
                new ItemInfo
                {
                    ItemId = 1000,
                    ItemName = "耗子尾汁",
                    Price = "100",
                    Purity = "100",
                    Size = "1",
                    Unit = "1",
                    VenderName = "2"
                },
                new ItemInfo
                {
                    ItemId = 1000,
                    ItemName = "闪电五连鞭",
                    Price = "100",
                    Purity = "100",
                    Size = "1",
                    Unit = "1",
                    VenderName = "2"
                }
            };
            CreateIndex(bills);
        }

        //帮助类,对搜索的关键词进行分词
        public static class LuceneHelper
        {
            public static string GetKeyWordSplid(string keywords)
            {
                StringBuilder sb = new StringBuilder();
                Analyzer analyzer = new PanGuAnalyzer();
                TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords));
                ITermAttribute ita = null;
                bool hasNext = stream.IncrementToken();
                while (hasNext)
                {
                    ita = stream.GetAttribute<ITermAttribute>();
                    sb.Append(ita.Term + " ");
                    hasNext = stream.IncrementToken();
                }

                return sb.ToString();
            }
        }

        /// <summary>
        /// 创建索引文件
        /// </summary>
        private static void CreateIndex(List<ItemInfo> list)
        {
            IndexWriter writer = null;
            Analyzer analyzer = new PanGuAnalyzer();
            Lucene.Net.Store.Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo("ItemIndexDir"));
            int i = 0;
            try
            {
                ////IndexReader:对索引进行读取的类。
                //该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。
                bool isCreate = !IndexReader.IndexExists(dir);
                writer = new IndexWriter(dir, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED);
                //添加索引
                foreach (var item in list)
                {
                    Document doc = new Document();
                    if (item.ItemId % 1000 == 0)
                        Console.WriteLine($@"开始写入{item.ItemId}");

                    doc.Add(new Field("id", item.ItemId.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    i = 1;
                    doc.Add(new Field("ItemName", item.ItemName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    i = 2;
                    doc.Add(new Field("Purity", item.Purity, Field.Store.YES, Field.Index.ANALYZED));
                    i = 3;
                    doc.Add(new Field("Size", item.Size, Field.Store.YES, Field.Index.ANALYZED));
                    i = 4;
                    doc.Add(new Field("Unit", item.Unit, Field.Store.YES, Field.Index.ANALYZED));
                    i = 5;
                    doc.Add(new Field("VenderName", item.VenderName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    i = 6;
                    doc.Add(new Field("Price", item.Price, Field.Store.YES, Field.Index.ANALYZED));
                    i = 7;

                    writer.AddDocument(doc, analyzer);
                }
                writer.Optimize();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
                Console.WriteLine($@"error step {i}");
                throw;
            }
            finally
            {
                writer?.Dispose();
                dir?.Dispose();
            }
        }

        public class ItemInfo
        {
            public int ItemId { get; set; }
            public string ItemName { get; set; }
            public string Purity { get; set; }
            public string Size { get; set; }
            public string Unit { get; set; }
            public string VenderName { get; set; }
            public string Price { get; set; }
        }
复制代码

 

posted @   Tozhang  阅读(189)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
点击右上角即可分享
微信分享提示