使用Lucene.net+盘古分词实现搜索查询
这里我的的Demo的逻辑是这样的:首先我基本的数据是储存在Sql数据库中,然后我把我的必需的数据推送到MongoDB中,这样再去利用Lucene.net+盘古创建索引;其中为什么要这样把数据推送到MongoDb中,我的理解是可以从Sql中直接取出来必需的数据,首次推送可能是要推送所有的数据,直接创建索引,但是在第二次推送的时候,可能存在要修改或者新增的数据,这样我们就需要一个中间表,而这个中间表我们是使用的MongoDB文件型存储来处理啦,而没在sql数据库中在建立一张表,我是这么理解的。。。大体上的逻辑是这样的,下面简单写一个实现的逻辑。
细微的说下程序的逻辑如下:
1、第一次推数据,把所有的数据推送到mongo,而且第一次搜索的话只能在元数据的list上取数据建立索引
2、第二次推数据:首先对比mongo,如果mongo里面有要新增或者修改的数据,更新mongo,然后再推送lucene中
查询数据:1、如果查询出来的list为空,则从第三方查询,分别有2个链接,如果为空则为空,如果不为空,则查询出来的数据存放在MongoDB,推送Lucene.net中即可。
一、准备工作
首先我这里创建了一个控制台程序,然后引用在NuGet包中添加如下需要安装的dll,如下:
二、Demo结构如下:
三、代码如下:
1、Helper文件夹下的BizLogger如下:
using NLog; using System; namespace LuceneNetTest { /// <summary> /// 业务日志记录器 /// </summary> public class BizLogger { protected Logger _Logger { get; set; } public static BizLogger Default { get; protected set; } protected BizLogger(Logger logger) { this._Logger = logger; } public BizLogger(string name) : this(LogManager.GetLogger(name)) { } /// <summary> /// 静态构造函数 /// </summary> static BizLogger() { Default = new BizLogger(LogManager.GetCurrentClassLogger()); } public void Debug(string message, params object[] args) { this._Logger.Debug(this.getMessage(message), args); } public void Info(string message, params object[] args) { this._Logger.Info(this.getMessage(message), args); } public void Trace(string message, params object[] args) { this._Logger.Trace(this.getMessage(message), args); } public void Error(string message, params object[] args) { this._Logger.Error(this.getMessage(message), args); } public void Fatal(string message, params object[] args) { this._Logger.Fatal(this.getMessage(message), args); } public void Debug(Exception exception, string message, params object[] args) { this._Logger.Log(LogLevel.Debug, exception, this.getMessage(message), args); } public void Info(Exception exception, string message, params object[] args) { this._Logger.Log(LogLevel.Info, exception, this.getMessage(message), args); } public void Trace(Exception exception, string message, params object[] args) { this._Logger.Log(LogLevel.Trace, exception, this.getMessage(message), args); } public void Error(Exception exception, string message, params object[] args) { this._Logger.Log(LogLevel.Error, exception, this.getMessage(message), args); } public void Fatal(Exception exception, string message, params object[] args) { this._Logger.Log(LogLevel.Fatal, exception, this.getMessage(message), args); } protected virtual string getMessage(string message) { return message; } } }
2、Helper文件夹下的DataSerializer如下:
using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using System; using System.Collections.Generic; using System.Linq; using System.Reflection; using System.Text; using System.Threading.Tasks; namespace LuceneNetTest { /// <summary> /// 数据序列化器 /// </summary> public static class DataSerializer { /// <summary> /// json 序列号 /// </summary> /// <param name="source">要序列化的对象</param> /// <param name="b">是否忽略空值,true为不忽略</param> /// <returns></returns> public static string SerizlizeToJSON(object source, bool b = true) { var setting = new JsonSerializerSettings(); setting.ContractResolver = new SortedContractResolver(); if (!b) { setting.NullValueHandling = NullValueHandling.Ignore; //忽略空对象 } var jsonString = JsonConvert.SerializeObject(source, setting); return jsonString; } public static TData DeserializeFromJSON<TData>(string jsonString) { var data = JsonConvert.DeserializeObject<TData>(jsonString); return data; } } public class SortedContractResolver : DefaultContractResolver { protected override List<MemberInfo> GetSerializableMembers(Type objectType) { var members = base.GetSerializableMembers(objectType); return members.OrderBy(m => m.Name).ToList(); } } }
3、Helper文件夹下的LuceneNetTestHelpers如下:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Net; using System.IO; using LuceneNetTest.Models; namespace LuceneNetTest.Helpers { /// <summary> /// 数据处理 /// </summary> public static class LuceneNetTestHelpers { /// <summary> /// 从sql数据库中获取所有的数据,推送到mongodb中 /// </summary> /// <returns>所有的数据list</returns> public static List<LuceneTestData> GetSqlData() { #region 第一次推送数据到mogodb List<LuceneTestData> invList = new List<LuceneTestData>();//创建索引的集合 return invList; #endregion } /// <summary> /// mongodb数据库中的数据和新查询出来的数据进项对比, /// 如果有需要修改和需要新增的数据及修改mongodb, /// 这里为lucene创建索引奠定基础(state的状态为y即为新增,n表示修改) /// </summary> /// <param name="invList">查询出来的sql数据</param> public static string GetMongoDBData(List<LuceneTestData> invList) { var result = string.Empty; MongoDbHelper<LuceneTestData> mgTwo = new MongoDbHelper<LuceneTestData>(); var exsitList = mgTwo.QueryAll();//取出来mongodb中的数据 #region 第二次以后推送数据 try { foreach (var inv in invList)//遍历元数据库 { //如果存在就对比是否一样,不一样的话更新mongodb if (exsitList.Any(n => n.Id == inv.Id)) { var oldInvoice = exsitList.FirstOrDefault(n => n.Id == inv.Id); if (oldInvoice.Name != inv.Name || oldInvoice.Code != inv.Code || oldInvoice.AddressPhone != inv.AddressPhone || oldInvoice.BankAccount != inv.BankAccount) { oldInvoice.Name = inv.Name; oldInvoice.Code = inv.Code; oldInvoice.AddressPhone = inv.AddressPhone; oldInvoice.BankAccount = inv.BankAccount; oldInvoice.State = "n"; inv.CreateTime = DateTime.Now.ToString(); inv.UpdateTime = ""; mgTwo.Update(oldInvoice); } } else { var newInvoice = new LuceneTestData(); newInvoice.Name = inv.Name; newInvoice.Code = inv.Code; newInvoice.AddressPhone = inv.AddressPhone; newInvoice.BankAccount = inv.BankAccount; newInvoice.State = "y"; newInvoice.CreateTime = DateTime.Now.ToString(); newInvoice.UpdateTime = ""; mgTwo.InsertOne(newInvoice);//如果不存在就直接插入操作 } } return ""; } catch(Exception ex) { return ex.Message; } #endregion } /// <summary> /// 请求第三方数据 /// </summary> /// <param name="name"></param> /// <returns></returns> public static string RequestData(string name) { byte[] bufferStr = System.Text.Encoding.Default.GetBytes(name); //api的url StringBuilder InvoiceRequestUrl = new StringBuilder(); InvoiceRequestUrl.Append("。。。。。。。。。。。。。。"); InvoiceRequestUrl.Append(name); InvoiceRequestUrl.Append("。。。。。。。。。。。"); string strResult = PostReauest(bufferStr, InvoiceRequestUrl.ToString()); return strResult; } /// <summary> /// 请求第三方数据 /// </summary> /// <param name="name"></param> /// <returns></returns> public static string RequestData2(string name) { byte[] bufferStr = System.Text.Encoding.Default.GetBytes(name); //api的url StringBuilder InvoiceRequestUrl = new StringBuilder(); InvoiceRequestUrl.Append("。。。。。。。。。。。。。。。。。。"); InvoiceRequestUrl.Append(name); InvoiceRequestUrl.Append("。。。。。。。。。。。。。。。。"); string strResult = PostReauest(bufferStr, InvoiceRequestUrl.ToString()); return strResult; } /// <summary> /// 请求第三方获取搜索结果 /// </summary> /// <param name="buffer"></param> /// <param name="requestUri"></param> /// <returns></returns> public static string PostReauest(byte[] buffer, string requestUri) { //请求远程HTTP string strResult = ""; Encoding code = Encoding.GetEncoding("utf-8"); try { //设置HttpWebRequest基本信息 HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(requestUri); myReq.Method = "post"; myReq.ContentType = "application / json"; //填充POST数据 myReq.ContentLength = buffer.Length; Stream requestStream = myReq.GetRequestStream(); requestStream.Write(buffer, 0, buffer.Length); requestStream.Close(); //发送POST数据请求服务器 HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse(); Stream myStream = HttpWResp.GetResponseStream(); //获取服务器返回信息 StreamReader reader = new StreamReader(myStream, code); StringBuilder responseData = new StringBuilder(); String line; while ((line = reader.ReadLine()) != null) { responseData.Append(line); } //释放 myStream.Close(); strResult = responseData.ToString(); } catch (Exception exp) { strResult = "报错:" + exp.Message; } return strResult; } /// <summary> /// 第三方结果添加到mongodb中 /// </summary> /// <param name="thirdInvoice">需要添加的第三数据</param> /// <param name="path">创建索引 </param> /// <param name="IndexDic"></param> public static void InThirdInfoToMongoAndLucene(List<ThirdInfo> thirdInvoice,string path,string IndexDic) { List<LuceneTestData> invList = new List<LuceneTestData>(); foreach (var item in thirdInvoice) { var newInvoice = new LuceneTestData(); newInvoice.Name = item.Name; newInvoice.State = "y"; newInvoice.CreateTime = DateTime.Now.ToString(); newInvoice.UpdateTime = ""; invList.Add(newInvoice); } MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>(); mg.InsertBatch(invList); PanGuLuceneHelper.InitPanGuXmlPath(path); PanGuLuceneHelper.CreateIndex(IndexDic, invList);//创建索引 } /// <summary> /// 第三方结果添加到mongodb中 /// </summary> /// <param name="thirdInvoice">需要添加的第三数据</param> /// <param name="path">创建索引 </param> /// <param name="IndexDic"></param> public static void InThirdTestInfoToMongoAndLucene(List<ThirdTestInfo> elephantHuiYun, string path, string IndexDic) { List<LuceneTestData> invList = new List<LuceneTestData>(); foreach (var item in elephantHuiYun) { var newInvoice = new LuceneTestData(); newInvoice.Name = item.Nsrmc; newInvoice.State = "y"; newInvoice.CreateTime = DateTime.Now.ToString(); newInvoice.UpdateTime = ""; invList.Add(newInvoice); } MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>(); mg.InsertBatch(invList); PanGuLuceneHelper.InitPanGuXmlPath(path); PanGuLuceneHelper.CreateIndex(IndexDic, invList);//创建索引 } } }
4、Helper文件夹下的MongoDbHelper如下:
using System; using System.Collections.Generic; using System.Linq; using MongoDB.Bson; using MongoDB.Driver; using System.Linq.Expressions; using System.Security.Authentication; namespace LuceneNetTest.Helpers { /// <summary> /// MongoDb帮助类 /// </summary> public class DB { private static readonly string connStr = "mongodb://192.168.4.192:27017"; private static readonly string dbName = "LuceneTestData"; private static IMongoDatabase db = null; private static readonly object lockHelper = new object(); private DB() { } public static IMongoDatabase GetDb() { if (db == null) { lock (lockHelper) { if (db == null) { MongoClientSettings settings = MongoClientSettings.FromUrl( new MongoUrl(connStr) ); settings.SslSettings = new SslSettings() { EnabledSslProtocols = SslProtocols.Tls12 }; var mongoClient = new MongoClient(settings); db = mongoClient.GetDatabase(dbName); } } } return db; } } public class MongoDbHelper<T> where T : BaseEntity { private IMongoDatabase db = null; private IMongoCollection<T> collection = null; public MongoDbHelper() { this.db = DB.GetDb(); collection = db.GetCollection<T>(typeof(T).Name); } /// <summary> /// 新增,异步 /// </summary> /// <param name="entity"></param> /// <returns></returns> public T Insert(T entity) { var flag = ObjectId.GenerateNewId(); entity.GetType().GetProperty("Id").SetValue(entity, flag); entity.State = "y"; entity.CreateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); entity.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); collection.InsertOne(entity); return entity; } /// <summary> /// 新增数据,同步 /// </summary> /// <param name="entity">待存储类对象</param> /// <returns></returns> public T InsertOne(T entity) { var flag = ObjectId.GenerateNewId(); entity.GetType().GetProperty("Id").SetValue(entity, flag); entity.State = "y"; entity.CreateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); entity.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); collection.InsertOne(entity); return entity; } /// <summary> /// 修改 /// </summary> /// <param name="id"></param> /// <param name="field"></param> /// <param name="value"></param> public void Modify(string id, string field, string value) { var filter = Builders<T>.Filter.Eq("Id", ObjectId.Parse(id)); var updated = Builders<T>.Update.Set(field, value); UpdateResult result = collection.UpdateOne(filter, updated); } /// <summary> /// 更新 /// </summary> /// <param name="entity"></param> public void Update(T entity) { try { var old = collection.Find(e => e.Id.Equals(entity.Id)).ToList().FirstOrDefault(); foreach (var prop in entity.GetType().GetProperties()) { var newValue = prop.GetValue(entity); var oldValue = old.GetType().GetProperty(prop.Name).GetValue(old); if (newValue != null) { if (oldValue == null) oldValue = ""; if (!newValue.ToString().Equals(oldValue.ToString())) { old.GetType().GetProperty(prop.Name).SetValue(old, newValue.ToString()); } } } old.State = "n"; old.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); var filter = Builders<T>.Filter.Eq("Id", entity.Id); ReplaceOneResult result = collection.ReplaceOne(filter, old); } catch (Exception ex) { var aaa = ex.Message + ex.StackTrace; throw ex; } } /// <summary> /// 更新 /// </summary> /// <param name="entity"></param> public void UpdateEx(T entity) { try { var old = collection.Find(e => e.Id.Equals(entity.Id)).ToList().FirstOrDefault(); foreach (var prop in entity.GetType().GetProperties()) { var newValue = prop.GetValue(entity); var oldValue = old.GetType().GetProperty(prop.Name).GetValue(old); if (newValue != null) { if (oldValue == null) oldValue = ""; if (!newValue.ToString().Equals(oldValue.ToString())) { old.GetType().GetProperty(prop.Name).SetValue(old, newValue); } } } old.State = "n"; old.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); var filter = Builders<T>.Filter.Eq("Id", entity.Id); ReplaceOneResult result = collection.ReplaceOne(filter, old); } catch (Exception ex) { var aaa = ex.Message + ex.StackTrace; throw ex; } } /// <summary> /// 删除,异步 /// </summary> /// <param name="entity">删除对象</param> public void Delete(T entity) { var filter = Builders<T>.Filter.Eq("Id", entity.Id); collection.DeleteOne(filter); } /// <summary> /// 删除,同步 /// </summary> /// <param name="Id">删除对象Id</param> /// <returns></returns> public long Delete(string IdString) { ObjectId Id = ObjectId.Parse(IdString); var filter = Builders<T>.Filter.Eq("Id", Id); var rt = collection.DeleteOne(filter); return rt.DeletedCount; } /// <summary> /// 根据id查询一条数据 /// </summary> /// <param name="id"></param> /// <returns></returns> public T QueryOne(string id) { return collection.Find(a => a.Id == ObjectId.Parse(id)).ToList().FirstOrDefault(); } /// <summary> /// 查询所有数据 /// </summary> /// <returns></returns> public List<T> QueryAll() { return collection.Find(a => a.State != "").ToList(); } /// <summary> /// 根据条件查询一条数据 /// </summary> /// <param name="express"></param> /// <returns></returns> public T QueryByFirst(Expression<Func<T, bool>> express) { return collection.Find(express).ToList().FirstOrDefault(); } /// <summary> /// 根据条件查询数据 /// </summary> /// <param name="express"></param> /// <returns></returns> public List<T> QueryBy(Expression<Func<T, bool>> express) { return collection.Find(express).ToList(); } /// <summary> /// 批量添加 /// </summary> /// <param name="list"></param> public void InsertBatch(List<T> list) { collection.InsertMany(list); } /// <summary> /// 根据Id批量删除 /// </summary> public void DeleteBatch(List<ObjectId> list) { var filter = Builders<T>.Filter.In("Id", list); collection.DeleteMany(filter); } /// <summary> /// 未添加到索引的数据 /// </summary> /// <returns></returns> public List<T> QueryToLucene() { return collection.Find(a => a.State.Equals("y") || a.State.Equals("n")).ToList(); } } public abstract class BaseEntity { /// <summary> /// 对象存储Id,数据库赋值 /// </summary> public ObjectId Id { get; set; } /// <summary> /// 状态值,可自行决定使用性质 /// </summary> public string State { get; set; } /// <summary> /// 创建时间 /// </summary> public string CreateTime { get; set; } /// <summary> /// 修改时间 /// </summary> public string UpdateTime { get; set; } } }
5、Models文件夹下的LuceneTestData如下:
using LuceneNetTest.Helpers; namespace LuceneNetTest.Models { public class LuceneTestData : BaseEntity { public string Name { get; set; } public string Code { get; set; } public string AddressPhone { get; set; } public string BankAccount { get; set; } } }
6、Models文件夹下的ThirdInfo如下:
namespace LuceneNetTest.Models { public class ThirdInfo { public string Id { get; set; } public string Name { get; set; } public string Taxnum { get; set; } } }
7、Models文件夹下的ThirdTestData用户请求第三方数据反序列化解析数据,暂不展示
8、PanGu文件夹下是盘古分词,这里下载添加即可。
9、PanGuLuceneHelper如下:
using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Analysis; using System.Collections.Generic; using System.IO; using Lucene.Net.Search; using System; using Lucene.Net.Store; using Lucene.Net.QueryParsers; using LuceneNetTest.Models; using LuceneNetTest.Helpers; using MongoDB.Bson; namespace LuceneNetTest { /// <summary> /// 信息 /// </summary> public class PanGuLuceneHelper { public static Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 /// <summary> /// 初始化盘古分词的xml引用路径 /// </summary> /// <param name="PanGuXmlPath"></param> public static void InitPanGuXmlPath(string PanGuXmlPath) { //定义盘古分词的xml引用路径 PanGu.Segment.Init(PanGuXmlPath); } /// <summary> /// 创建索引 /// </summary> /// <param name="IndexDic">目录地址</param> /// <param name="isCreate">是否重新创建</param> public static Result CreateIndex(string IndexDic,List<LuceneTestData> list = null) { IndexWriter writer; var result = new Result(); try { //创建索引目录 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexDic), new NativeFSLockFactory()); //IndexReader:对索引库进行读取的类 //是否存在索引库文件夹以及索引库特征文件 //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁 if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } //IndexWriter第三个参数:true指重新创建索引,false指从当前索引追加....此处为新建索引所以为true writer = new IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); AddIndex(writer, list); writer.Optimize(); writer.Dispose(); result = new Result() { Success = true }; } catch (Exception ex) { result = new Result() { Success = false, Message = ex.Message }; } return result; } /// <summary> /// 创建索引 /// </summary> /// <param name="analyzer"></param> /// <param name="title"></param> /// <param name="content"></param> private static void AddIndex(IndexWriter writer, List<LuceneTestData> list = null) { try { //for (int i = 0; i < 10; i++) //{ // Document doc = new Document(); // doc.Add(new Field("Name", "大贲科技" + i, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 // writer.AddDocument(doc); //} MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>(); var customerList = mg.QueryToLucene(); if (list != null) { customerList = list; } foreach (var item in customerList) { Document doc = new Document(); doc.Add(new Field("Name", item.Name == null ? "" : item.Name, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("TaxCode", item.Code == null ? "" : item.Code, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AddressPhone", item.AddressPhone == null ? "" : item.AddressPhone, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("BankAccount", item.BankAccount == null ? "" : item.BankAccount, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("State", item.State, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("Id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("CreateTime", item.CreateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("UpdateTime", item.UpdateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 if (item.State == "n")//修改 { Term t = new Term("Id", item.Id.ToString()); writer.DeleteDocuments(t); writer.AddDocument(doc); } else if (item.State == "y")//新增 { writer.AddDocument(doc); } //修改mongodb状态为ok,下次就不操作到索引了。 mg.Modify(item.Id.ToString(), "State", "ok"); } } catch (FileNotFoundException fnfe) { throw fnfe; } catch (Exception ex) { throw ex; } } /// <summary> /// 创建索引 /// </summary> /// <param name="analyzer"></param> /// <param name="title"></param> /// <param name="content"></param> private static void AddIndexNew(IndexWriter writer) { try { MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>(); var customerList = mg.QueryToLucene(); foreach (var item in customerList) { Document doc = new Document(); doc.Add(new Field("Name", item.Name == null ? "" : item.Name, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("TaxCode", item.Code == null ? "" : item.Code, Field.Store.YES, Field.Index.ANALYZED));//存储且索引 doc.Add(new Field("AddressPhone", item.AddressPhone == null ? "" : item.AddressPhone, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("BankAccount", item.BankAccount == null ? "" : item.BankAccount, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("State", item.State, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("Id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("CreateTime", item.CreateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 doc.Add(new Field("UpdateTime", item.UpdateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引 if (item.State == "n")//修改 { Term t = new Term("Id", item.Id.ToString()); writer.DeleteDocuments(t); writer.AddDocument(doc); } else if (item.State == "y")//新增 { writer.AddDocument(doc); } //修改mongodb状态为ok,下次就不操作到索引了。 mg.Modify(item.Id.ToString(), "State", "ok"); } } catch (FileNotFoundException fnfe) { throw fnfe; } catch (Exception ex) { throw ex; } } /// <summary> /// 分词方法 /// </summary> /// <param name="words">待分词内容</param> /// <param name="analyzer"></param> /// <returns></returns> private static string cutWords(string words, Analyzer analyzer) { string resultStr = ""; System.IO.StringReader reader = new System.IO.StringReader(words); Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(words, reader); bool hasNext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasNext) { ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); resultStr += ita.Term + "|"; hasNext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return resultStr; } /// <summary> /// 从索引搜索结果 /// </summary> public static List<LuceneTestData> SearchIndex(string content, string IndexDic) { try { var str = cutWords(content, analyzer); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexDic), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher search = new IndexSearcher(directory, true); //创建查询 PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(analyzer); wrapper.AddAnalyzer("Name", analyzer); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Name", wrapper); Query query = parser.Parse(content); TopScoreDocCollector collector = TopScoreDocCollector.Create(4, true);//10--默认查询4条数 BooleanQuery bQuery = new BooleanQuery(); bQuery.Add(query, new Occur()); search.Search(bQuery, collector); var hits = collector.TopDocs().ScoreDocs; int numTotalHits = collector.TotalHits; List<LuceneTestData> list = new List<LuceneTestData>(); for (int i = 0; i < hits.Length; i++) { var hit = hits[i]; Document doc = search.Doc(hit.Doc); var model = new LuceneTestData() { Name = doc.Get("Name").ToString(), Code = doc.Get("TaxCode").ToString(), AddressPhone = doc.Get("AddressPhone").ToString(), BankAccount = doc.Get("BankAccount").ToString(), State = doc.Get("State").ToString(), Id = ObjectId.Parse(doc.Get("Id")), CreateTime = doc.Get("CreateTime").ToString(), UpdateTime = doc.Get("UpdateTime").ToString() }; list.Add(model); } return list; } catch (Exception ex) { throw ex; } } } }
10、Program如下:
using System; using System.Collections.Generic; using System.Linq; using LuceneNetTest.Models; using LuceneNetTest.Helpers; namespace LuceneNetTest { class Program { //1、第一次推数据,把所有的数据推送到mongo,而且第一次搜索的话只能在元数据的list上取数据建立索引 //2、第二次推数据:首先对比mongo,如果mongo里面有要新增或者修改的数据,更新mongo,然后再推送lucene中 //查询数据:1、如果查询出来的list为空,则从第三方查询,分别有2个链接,如果为空则为空,如果不为空, //则查询出来的数据存放在mongo中,推送到lucene中 static void Main(string[] args) { var IndexDic = @"C:/Users/Administrator/Desktop/RSA/LuceneNetTest/IndexDic/IndexDic";//索引创建的地址 var path = @"C:/Users/Administrator/Desktop/RSA/LuceneNetTest/PanGu/PanGu.xml";//盘古分词地址 List<LuceneTestData> invList = LuceneNetTestHelpers.GetSqlData(); BizLogger.Default.Info($"初次获取信息数据,总共获取了{invList.Count}条。"); #region 第一次推送数据到mogodb MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>(); mg.InsertBatch(invList); BizLogger.Default.Info($"信息数据推送到MongoDB数据库完成,总共获取了{invList.Count}条。"); PanGuLuceneHelper.InitPanGuXmlPath(path); PanGuLuceneHelper.CreateIndex(IndexDic, invList);//创建索引 BizLogger.Default.Info($"信息数据初次创建索引成功,总共创建了{invList.Count}条数据。"); #endregion #region 第二次推送数据到mogodb //取出来本次查询的sql数据与mongodb数据库中的数据是否有差别。。。 string result = LuceneNetTestHelpers.GetMongoDBData(invList); BizLogger.Default.Info($"从SQL数据库获取信息与MongoDB数据信息进行对比,总共获取了{invList.Count}条。"); //var IndexDic = AppDomain.CurrentDomain.BaseDirectory + "/IndexDic"; if (string.IsNullOrEmpty(result)) { PanGuLuceneHelper.InitPanGuXmlPath(path); PanGuLuceneHelper.CreateIndex(IndexDic);//创建索引 BizLogger.Default.Info($"信息创建索引成功"); } #endregion #region 搜索 string name = "上海"; var list = PanGuLuceneHelper.SearchIndex(name, IndexDic);//搜索查询 if (list == null) { //第一次请求第三方(诺诺:用的航信的) string strResult = LuceneNetTestHelpers.RequestData(name); if (!string.IsNullOrEmpty(strResult)) { List<ThirdInfo> thirdInfo = DataSerializer.DeserializeFromJSON<List<ThirdInfo>>(strResult); //更新mongodb数据库饼更新创建索引 LuceneNetTestHelpers.InThirdInfoToMongoAndLucene(thirdInfo, path, IndexDic); BizLogger.Default.Info($"从第三方。。中获取信息更新到MongoDB成功,并且创建索引成功"); //反序列化结果 Console.WriteLine(string.Join(",", thirdInfo.Select(t => t.Name))); } else { //第二次请求第三方(大象慧云) string twoResult = LuceneNetTestHelpers.RequestData2(name); if (!string.IsNullOrEmpty(twoResult)) { //同上请求第三方 } else { Console.WriteLine("暂无信息"); } } } #endregion Console.WriteLine(string.Join(",", list.Select(n => n.Name))); Console.ReadKey(); } } }
11、Result如下:
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace LuceneNetTest { public class Result { public bool Success { get; set; } public int StatusCode { get; set; } public string Message { get; set; } } }
上面简单的实现了搜索。