使用Lucene.net+盘古分词实现搜索查询

                           这里我的的Demo的逻辑是这样的:首先我基本的数据是储存在Sql数据库中,然后我把我的必需的数据推送到MongoDB中,这样再去利用Lucene.net+盘古创建索引;其中为什么要这样把数据推送到MongoDb中,我的理解是可以从Sql中直接取出来必需的数据,首次推送可能是要推送所有的数据,直接创建索引,但是在第二次推送的时候,可能存在要修改或者新增的数据,这样我们就需要一个中间表,而这个中间表我们是使用的MongoDB文件型存储来处理啦,而没在sql数据库中在建立一张表,我是这么理解的。。。大体上的逻辑是这样的,下面简单写一个实现的逻辑。

          细微的说下程序的逻辑如下:

          1、第一次推数据,把所有的数据推送到mongo,而且第一次搜索的话只能在元数据的list上取数据建立索引
          2、第二次推数据:首先对比mongo,如果mongo里面有要新增或者修改的数据,更新mongo,然后再推送lucene中

          查询数据:1、如果查询出来的list为空,则从第三方查询,分别有2个链接,如果为空则为空,如果不为空,则查询出来的数据存放在MongoDB,推送Lucene.net中即可。

         一、准备工作

         首先我这里创建了一个控制台程序,然后引用在NuGet包中添加如下需要安装的dll,如下:

                           

                          

                         二、Demo结构如下: 

                         

                         三、代码如下:

                         1、Helper文件夹下的BizLogger如下:

using NLog;
using System;

namespace LuceneNetTest
{
    /// <summary>
    /// 业务日志记录器
    /// </summary>
    public class BizLogger
    {
        protected Logger _Logger { get; set; }

        public static BizLogger Default { get; protected set; }

        protected BizLogger(Logger logger)
        {
            this._Logger = logger;
        }

        public BizLogger(string name) : this(LogManager.GetLogger(name)) { }

        /// <summary>
        /// 静态构造函数
        /// </summary>
        static BizLogger()
        {
            Default = new BizLogger(LogManager.GetCurrentClassLogger());
        }

        public void Debug(string message, params object[] args)
        {
            this._Logger.Debug(this.getMessage(message), args);
        }

        public void Info(string message, params object[] args)
        {
            this._Logger.Info(this.getMessage(message), args);
        }

        public void Trace(string message, params object[] args)
        {
            this._Logger.Trace(this.getMessage(message), args);
        }

        public void Error(string message, params object[] args)
        {
            this._Logger.Error(this.getMessage(message), args);
        }

        public void Fatal(string message, params object[] args)
        {
            this._Logger.Fatal(this.getMessage(message), args);
        }

        public void Debug(Exception exception, string message, params object[] args)
        {
            this._Logger.Log(LogLevel.Debug, exception, this.getMessage(message), args);
        }

        public void Info(Exception exception, string message, params object[] args)
        {
            this._Logger.Log(LogLevel.Info, exception, this.getMessage(message), args);
        }

        public void Trace(Exception exception, string message, params object[] args)
        {
            this._Logger.Log(LogLevel.Trace, exception, this.getMessage(message), args);
        }

        public void Error(Exception exception, string message, params object[] args)
        {
            this._Logger.Log(LogLevel.Error, exception, this.getMessage(message), args);
        }

        public void Fatal(Exception exception, string message, params object[] args)
        {
            this._Logger.Log(LogLevel.Fatal, exception, this.getMessage(message), args);
        }

        protected virtual string getMessage(string message)
        {
            return message;
        }
    }
}
View Code

                         2、Helper文件夹下的DataSerializer如下:

using Newtonsoft.Json;
using Newtonsoft.Json.Serialization;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;

namespace LuceneNetTest
{
    /// <summary>
    /// 数据序列化器
    /// </summary>
    public static class DataSerializer
    {
        /// <summary>
        /// json 序列号
        /// </summary>
        /// <param name="source">要序列化的对象</param>
        /// <param name="b">是否忽略空值,true为不忽略</param>
        /// <returns></returns>
        public static string SerizlizeToJSON(object source, bool b = true)
        {
            var setting = new JsonSerializerSettings();
            setting.ContractResolver = new SortedContractResolver();
            if (!b)
            {
                setting.NullValueHandling = NullValueHandling.Ignore; //忽略空对象 
            }
            var jsonString = JsonConvert.SerializeObject(source, setting);

            return jsonString;
        }

        public static TData DeserializeFromJSON<TData>(string jsonString)
        {
            var data = JsonConvert.DeserializeObject<TData>(jsonString);
            return data;
        }
    }

    public class SortedContractResolver : DefaultContractResolver
    {
        protected override List<MemberInfo> GetSerializableMembers(Type objectType)
        {
            var members = base.GetSerializableMembers(objectType);

            return members.OrderBy(m => m.Name).ToList();
        }
    }
}
View Code

                         3、Helper文件夹下的LuceneNetTestHelpers如下:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using LuceneNetTest.Models;

namespace LuceneNetTest.Helpers
{
    /// <summary>
    /// 数据处理
    /// </summary>
    public static class LuceneNetTestHelpers
    {
        /// <summary>
        /// 从sql数据库中获取所有的数据,推送到mongodb中
        /// </summary>
        /// <returns>所有的数据list</returns>
        public static List<LuceneTestData> GetSqlData()
        {
            #region 第一次推送数据到mogodb

            List<LuceneTestData> invList = new List<LuceneTestData>();//创建索引的集合
            return invList;

            #endregion
        }

        /// <summary>
        /// mongodb数据库中的数据和新查询出来的数据进项对比,
        /// 如果有需要修改和需要新增的数据及修改mongodb,
        /// 这里为lucene创建索引奠定基础(state的状态为y即为新增,n表示修改)
        /// </summary>
        /// <param name="invList">查询出来的sql数据</param>
        public static string GetMongoDBData(List<LuceneTestData> invList)
        {
            var result = string.Empty;
            MongoDbHelper<LuceneTestData> mgTwo = new MongoDbHelper<LuceneTestData>();
            var exsitList = mgTwo.QueryAll();//取出来mongodb中的数据

            #region 第二次以后推送数据
            try
            {
                foreach (var inv in invList)//遍历元数据库
                {
                    //如果存在就对比是否一样,不一样的话更新mongodb
                    if (exsitList.Any(n => n.Id == inv.Id))
                    {
                        var oldInvoice = exsitList.FirstOrDefault(n => n.Id == inv.Id);
                        if (oldInvoice.Name != inv.Name || oldInvoice.Code != inv.Code || oldInvoice.AddressPhone != inv.AddressPhone || oldInvoice.BankAccount != inv.BankAccount)
                        {
                            oldInvoice.Name = inv.Name;
                            oldInvoice.Code = inv.Code;
                            oldInvoice.AddressPhone = inv.AddressPhone;
                            oldInvoice.BankAccount = inv.BankAccount;
                            oldInvoice.State = "n";
                            inv.CreateTime = DateTime.Now.ToString();
                            inv.UpdateTime = "";

                            mgTwo.Update(oldInvoice);
                        }
                    }
                    else
                    {
                        var newInvoice = new LuceneTestData();
                        newInvoice.Name = inv.Name;
                        newInvoice.Code = inv.Code;
                        newInvoice.AddressPhone = inv.AddressPhone;
                        newInvoice.BankAccount = inv.BankAccount;
                        newInvoice.State = "y";
                        newInvoice.CreateTime = DateTime.Now.ToString();
                        newInvoice.UpdateTime = "";
                        mgTwo.InsertOne(newInvoice);//如果不存在就直接插入操作
                    }
                }
                return "";
            }
            catch(Exception ex)
            {
                return ex.Message;
            }

            #endregion

        }

        /// <summary>
        /// 请求第三方数据
        /// </summary>
        /// <param name="name"></param>
        /// <returns></returns>
        public static string RequestData(string name)
        {
            byte[] bufferStr = System.Text.Encoding.Default.GetBytes(name);
            //api的url
            StringBuilder InvoiceRequestUrl = new StringBuilder();
            InvoiceRequestUrl.Append("。。。。。。。。。。。。。。");
            InvoiceRequestUrl.Append(name);
            InvoiceRequestUrl.Append("。。。。。。。。。。。");
            string strResult = PostReauest(bufferStr, InvoiceRequestUrl.ToString());
            return strResult;
        }

        /// <summary>
        /// 请求第三方数据
        /// </summary>
        /// <param name="name"></param>
        /// <returns></returns>
        public static string RequestData2(string name)
        {
            byte[] bufferStr = System.Text.Encoding.Default.GetBytes(name);
            //api的url
            StringBuilder InvoiceRequestUrl = new StringBuilder();
            InvoiceRequestUrl.Append("。。。。。。。。。。。。。。。。。。");
            InvoiceRequestUrl.Append(name);
            InvoiceRequestUrl.Append("。。。。。。。。。。。。。。。。");
            string strResult = PostReauest(bufferStr, InvoiceRequestUrl.ToString());
            return strResult;
        }

        /// <summary>
        /// 请求第三方获取搜索结果
        /// </summary>
        /// <param name="buffer"></param>
        /// <param name="requestUri"></param>
        /// <returns></returns>
        public static string PostReauest(byte[] buffer, string requestUri)
        {
            //请求远程HTTP
            string strResult = "";
            Encoding code = Encoding.GetEncoding("utf-8");
            try
            {
                //设置HttpWebRequest基本信息
                HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(requestUri);
                myReq.Method = "post";
                myReq.ContentType = "application / json";

                //填充POST数据
                myReq.ContentLength = buffer.Length;
                Stream requestStream = myReq.GetRequestStream();
                requestStream.Write(buffer, 0, buffer.Length);
                requestStream.Close();

                //发送POST数据请求服务器
                HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse();
                Stream myStream = HttpWResp.GetResponseStream();

                //获取服务器返回信息
                StreamReader reader = new StreamReader(myStream, code);
                StringBuilder responseData = new StringBuilder();
                String line;
                while ((line = reader.ReadLine()) != null)
                {
                    responseData.Append(line);
                }

                //释放
                myStream.Close();

                strResult = responseData.ToString();
            }
            catch (Exception exp)
            {
                strResult = "报错:" + exp.Message;
            }
            return strResult;
        }

        /// <summary>
        /// 第三方结果添加到mongodb中
        /// </summary>
        /// <param name="thirdInvoice">需要添加的第三数据</param>
        /// <param name="path">创建索引 </param>
        /// <param name="IndexDic"></param>
        public static void InThirdInfoToMongoAndLucene(List<ThirdInfo> thirdInvoice,string path,string IndexDic)
        {
            List<LuceneTestData> invList = new List<LuceneTestData>();
            foreach (var item in thirdInvoice)
            {
                var newInvoice = new LuceneTestData();
                newInvoice.Name = item.Name;
                newInvoice.State = "y";
                newInvoice.CreateTime = DateTime.Now.ToString();
                newInvoice.UpdateTime = "";
                invList.Add(newInvoice);
            }
            MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>();
            mg.InsertBatch(invList);
            PanGuLuceneHelper.InitPanGuXmlPath(path);
            PanGuLuceneHelper.CreateIndex(IndexDic, invList);//创建索引
        }

        /// <summary>
        /// 第三方结果添加到mongodb中
        /// </summary>
        /// <param name="thirdInvoice">需要添加的第三数据</param>
        /// <param name="path">创建索引 </param>
        /// <param name="IndexDic"></param>
        public static void InThirdTestInfoToMongoAndLucene(List<ThirdTestInfo> elephantHuiYun, string path, string IndexDic)
        {
            List<LuceneTestData> invList = new List<LuceneTestData>();
            foreach (var item in elephantHuiYun)
            {
                var newInvoice = new LuceneTestData();
                newInvoice.Name = item.Nsrmc;
                newInvoice.State = "y";
                newInvoice.CreateTime = DateTime.Now.ToString();
                newInvoice.UpdateTime = "";
                invList.Add(newInvoice);
            }
            MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>();
            mg.InsertBatch(invList);
            PanGuLuceneHelper.InitPanGuXmlPath(path);
            PanGuLuceneHelper.CreateIndex(IndexDic, invList);//创建索引
        }
    }
}
View Code

                          4、Helper文件夹下的MongoDbHelper如下:

using System;
using System.Collections.Generic;
using System.Linq;
using MongoDB.Bson;
using MongoDB.Driver;
using System.Linq.Expressions;
using System.Security.Authentication;

namespace LuceneNetTest.Helpers
{
    /// <summary>
    /// MongoDb帮助类
    /// </summary>
    public class DB
    {
        private static readonly string connStr = "mongodb://192.168.4.192:27017";
        private static readonly string dbName = "LuceneTestData";

        private static IMongoDatabase db = null;

        private static readonly object lockHelper = new object();

        private DB() { }

        public static IMongoDatabase GetDb()
        {
            if (db == null)
            {
                lock (lockHelper)
                {
                    if (db == null)
                    {
                        MongoClientSettings settings = MongoClientSettings.FromUrl(
                          new MongoUrl(connStr)
                        );
                        settings.SslSettings =
                          new SslSettings() { EnabledSslProtocols = SslProtocols.Tls12 };
                        var mongoClient = new MongoClient(settings);
                        db = mongoClient.GetDatabase(dbName);
                    }
                }
            }
            return db;
        }
    }

    public class MongoDbHelper<T> where T : BaseEntity
    {
        private IMongoDatabase db = null;

        private IMongoCollection<T> collection = null;

        public MongoDbHelper()
        {
            this.db = DB.GetDb();
            collection = db.GetCollection<T>(typeof(T).Name);
        }
        /// <summary>
        /// 新增,异步
        /// </summary>
        /// <param name="entity"></param>
        /// <returns></returns>
        public T Insert(T entity)
        {
            var flag = ObjectId.GenerateNewId();
            entity.GetType().GetProperty("Id").SetValue(entity, flag);
            entity.State = "y";
            entity.CreateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
            entity.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");

            collection.InsertOne(entity);

            return entity;
        }

        /// <summary>
        /// 新增数据,同步
        /// </summary>
        /// <param name="entity">待存储类对象</param>
        /// <returns></returns>
        public T InsertOne(T entity)
        {
            var flag = ObjectId.GenerateNewId();
            entity.GetType().GetProperty("Id").SetValue(entity, flag);
            entity.State = "y";
            entity.CreateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
            entity.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");

            collection.InsertOne(entity);

            return entity;
        }

        /// <summary>
        /// 修改
        /// </summary>
        /// <param name="id"></param>
        /// <param name="field"></param>
        /// <param name="value"></param>
        public void Modify(string id, string field, string value)
        {
            var filter = Builders<T>.Filter.Eq("Id", ObjectId.Parse(id));
            var updated = Builders<T>.Update.Set(field, value);
            UpdateResult result = collection.UpdateOne(filter, updated);
        }
        /// <summary>
        /// 更新
        /// </summary>
        /// <param name="entity"></param>
        public void Update(T entity)
        {
            try
            {
                var old = collection.Find(e => e.Id.Equals(entity.Id)).ToList().FirstOrDefault();

                foreach (var prop in entity.GetType().GetProperties())
                {
                    var newValue = prop.GetValue(entity);
                    var oldValue = old.GetType().GetProperty(prop.Name).GetValue(old);
                    if (newValue != null)
                    {
                        if (oldValue == null)
                            oldValue = "";
                        if (!newValue.ToString().Equals(oldValue.ToString()))
                        {
                            old.GetType().GetProperty(prop.Name).SetValue(old, newValue.ToString());
                        }
                    }
                }
                old.State = "n";
                old.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");

                var filter = Builders<T>.Filter.Eq("Id", entity.Id);
                ReplaceOneResult result = collection.ReplaceOne(filter, old);
            }
            catch (Exception ex)
            {
                var aaa = ex.Message + ex.StackTrace;
                throw ex;
            }
        }
        /// <summary>
        /// 更新
        /// </summary>
        /// <param name="entity"></param>
        public void UpdateEx(T entity)
        {
            try
            {
                var old = collection.Find(e => e.Id.Equals(entity.Id)).ToList().FirstOrDefault();

                foreach (var prop in entity.GetType().GetProperties())
                {
                    var newValue = prop.GetValue(entity);
                    var oldValue = old.GetType().GetProperty(prop.Name).GetValue(old);
                    if (newValue != null)
                    {
                        if (oldValue == null)
                            oldValue = "";
                        if (!newValue.ToString().Equals(oldValue.ToString()))
                        {
                            old.GetType().GetProperty(prop.Name).SetValue(old, newValue);
                        }
                    }
                }
                old.State = "n";
                old.UpdateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");

                var filter = Builders<T>.Filter.Eq("Id", entity.Id);
                ReplaceOneResult result = collection.ReplaceOne(filter, old);
            }
            catch (Exception ex)
            {
                var aaa = ex.Message + ex.StackTrace;
                throw ex;
            }
        }
        /// <summary>
        /// 删除,异步
        /// </summary>
        /// <param name="entity">删除对象</param>
        public void Delete(T entity)
        {
            var filter = Builders<T>.Filter.Eq("Id", entity.Id);
            collection.DeleteOne(filter);
        }

        /// <summary>
        /// 删除,同步
        /// </summary>
        /// <param name="Id">删除对象Id</param>
        /// <returns></returns>
        public long Delete(string IdString)
        {
            ObjectId Id = ObjectId.Parse(IdString);

            var filter = Builders<T>.Filter.Eq("Id", Id);
            var rt = collection.DeleteOne(filter);
            return rt.DeletedCount;
        }
        /// <summary>
        /// 根据id查询一条数据
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public T QueryOne(string id)
        {
            return collection.Find(a => a.Id == ObjectId.Parse(id)).ToList().FirstOrDefault();
        }
        /// <summary>
        /// 查询所有数据
        /// </summary>
        /// <returns></returns>
        public List<T> QueryAll()
        {
            return collection.Find(a => a.State != "").ToList();
        }
        /// <summary>
        /// 根据条件查询一条数据
        /// </summary>
        /// <param name="express"></param>
        /// <returns></returns>
        public T QueryByFirst(Expression<Func<T, bool>> express)
        {
            return collection.Find(express).ToList().FirstOrDefault();
        }
        /// <summary>
        /// 根据条件查询数据
        /// </summary>
        /// <param name="express"></param>
        /// <returns></returns>
        public List<T> QueryBy(Expression<Func<T, bool>> express)
        {
            return collection.Find(express).ToList();
        }
        /// <summary>
        /// 批量添加
        /// </summary>
        /// <param name="list"></param>
        public void InsertBatch(List<T> list)
        {
            collection.InsertMany(list);
        }
        /// <summary>
        /// 根据Id批量删除
        /// </summary>
        public void DeleteBatch(List<ObjectId> list)
        {
            var filter = Builders<T>.Filter.In("Id", list);
            collection.DeleteMany(filter);
        }

        /// <summary>
        /// 未添加到索引的数据
        /// </summary>
        /// <returns></returns>
        public List<T> QueryToLucene()
        {
            return collection.Find(a => a.State.Equals("y") || a.State.Equals("n")).ToList();
        }
    }
    public abstract class BaseEntity
    {
        /// <summary>
        /// 对象存储Id,数据库赋值
        /// </summary>
        public ObjectId Id { get; set; }

        /// <summary>
        /// 状态值,可自行决定使用性质
        /// </summary>
        public string State { get; set; }

        /// <summary>
        /// 创建时间
        /// </summary>
        public string CreateTime { get; set; }

        /// <summary>
        /// 修改时间
        /// </summary>
        public string UpdateTime { get; set; }
    }
}
View Code

                          5、Models文件夹下的LuceneTestData如下:

using LuceneNetTest.Helpers;

namespace LuceneNetTest.Models
{
    public class LuceneTestData : BaseEntity
    {
        public string Name { get; set; }
        public string Code { get; set; }
        public string AddressPhone { get; set; }
        public string BankAccount { get; set; }
    }
}
View Code

                          6、Models文件夹下的ThirdInfo如下:

namespace LuceneNetTest.Models
{
    public class ThirdInfo
    {
        public string Id { get; set; }
        public string Name { get; set; }
        public string Taxnum { get; set; }
    }
}
View Code

                          7、Models文件夹下的ThirdTestData用户请求第三方数据反序列化解析数据,暂不展示

                       8、PanGu文件夹下是盘古分词,这里下载添加即可。

                       9、PanGuLuceneHelper如下:

using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Analysis;
using System.Collections.Generic;
using System.IO;
using Lucene.Net.Search;
using System;
using Lucene.Net.Store;
using Lucene.Net.QueryParsers;
using LuceneNetTest.Models;
using LuceneNetTest.Helpers;
using MongoDB.Bson;

namespace LuceneNetTest
{
    /// <summary>
    /// 信息
    /// </summary>
    public class PanGuLuceneHelper
    {

        public static Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法

        /// <summary>
        /// 初始化盘古分词的xml引用路径
        /// </summary>
        /// <param name="PanGuXmlPath"></param>
        public static void InitPanGuXmlPath(string PanGuXmlPath)
        {
            //定义盘古分词的xml引用路径
            PanGu.Segment.Init(PanGuXmlPath);
        }

        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="IndexDic">目录地址</param>
        /// <param name="isCreate">是否重新创建</param>
        public static Result CreateIndex(string IndexDic,List<LuceneTestData> list = null)
        {
            IndexWriter writer;
            var result = new Result();
            try
            {
                //创建索引目录
                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexDic), new NativeFSLockFactory());
                //IndexReader:对索引库进行读取的类
                //是否存在索引库文件夹以及索引库特征文件
                //如果索引目录被锁定(比如索引过程中程序异常退出或另一进程在操作索引库),则解锁
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
                //IndexWriter第三个参数:true指重新创建索引,false指从当前索引追加....此处为新建索引所以为true
                writer = new IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);
                AddIndex(writer, list);
                writer.Optimize();
                writer.Dispose();
                result = new Result()
                {
                    Success = true
                };
            }
            catch (Exception ex)
            {
                result = new Result()
                {
                    Success = false,
                    Message = ex.Message
                };
            }
            return result;
        }

        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="analyzer"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private static void AddIndex(IndexWriter writer, List<LuceneTestData> list = null)
        {
            try
            {
                //for (int i = 0; i < 10; i++)
                //{
                //    Document doc = new Document();
                //    doc.Add(new Field("Name", "大贲科技" + i, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                //    writer.AddDocument(doc);
                //}
                MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>();
                var customerList = mg.QueryToLucene();
                if (list != null)
                {
                    customerList = list;
                }
                foreach (var item in customerList)
                {
                    Document doc = new Document();
                    doc.Add(new Field("Name", item.Name == null ? "" : item.Name, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                    doc.Add(new Field("TaxCode", item.Code == null ? "" : item.Code, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                    doc.Add(new Field("AddressPhone", item.AddressPhone == null ? "" : item.AddressPhone, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("BankAccount", item.BankAccount == null ? "" : item.BankAccount, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("State", item.State, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("Id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("CreateTime", item.CreateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("UpdateTime", item.UpdateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引

                    if (item.State == "n")//修改
                    {
                        Term t = new Term("Id", item.Id.ToString());
                        writer.DeleteDocuments(t);
                        writer.AddDocument(doc);
                    }
                    else if (item.State == "y")//新增
                    {
                        writer.AddDocument(doc);
                    }
                    //修改mongodb状态为ok,下次就不操作到索引了。
                    mg.Modify(item.Id.ToString(), "State", "ok");
                }
            }
            catch (FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }

        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="analyzer"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private static void AddIndexNew(IndexWriter writer)
        {
            try
            {
                MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>();
                var customerList = mg.QueryToLucene();
                foreach (var item in customerList)
                {
                    Document doc = new Document();
                    doc.Add(new Field("Name", item.Name == null ? "" : item.Name, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                    doc.Add(new Field("TaxCode", item.Code == null ? "" : item.Code, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                    doc.Add(new Field("AddressPhone", item.AddressPhone == null ? "" : item.AddressPhone, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("BankAccount", item.BankAccount == null ? "" : item.BankAccount, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("State", item.State, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("Id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("CreateTime", item.CreateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引
                    doc.Add(new Field("UpdateTime", item.UpdateTime.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//存储且索引

                    if (item.State == "n")//修改
                    {
                        Term t = new Term("Id", item.Id.ToString());
                        writer.DeleteDocuments(t);
                        writer.AddDocument(doc);
                    }
                    else if (item.State == "y")//新增
                    {
                        writer.AddDocument(doc);
                    }
                    //修改mongodb状态为ok,下次就不操作到索引了。
                    mg.Modify(item.Id.ToString(), "State", "ok");
                }
            }
            catch (FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }

        /// <summary>
        /// 分词方法
        /// </summary>
        /// <param name="words">待分词内容</param>
        /// <param name="analyzer"></param>
        /// <returns></returns>
        private static string cutWords(string words, Analyzer analyzer)
        {
            string resultStr = "";
            System.IO.StringReader reader = new System.IO.StringReader(words);
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(words, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                resultStr += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return resultStr;
        }

        /// <summary>
        /// 从索引搜索结果
        /// </summary>
        public static List<LuceneTestData> SearchIndex(string content, string IndexDic)
        {
            try
            {
                var str = cutWords(content, analyzer);
                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexDic), new NoLockFactory());
                IndexReader reader = IndexReader.Open(directory, true);
                IndexSearcher search = new IndexSearcher(directory, true);

                //创建查询
                PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(analyzer);
                wrapper.AddAnalyzer("Name", analyzer);
                QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Name", wrapper);
                Query query = parser.Parse(content);
                TopScoreDocCollector collector = TopScoreDocCollector.Create(4, true);//10--默认查询4条数

                BooleanQuery bQuery = new BooleanQuery();
                bQuery.Add(query, new Occur());
                search.Search(bQuery, collector);
                var hits = collector.TopDocs().ScoreDocs;

                int numTotalHits = collector.TotalHits;

                List<LuceneTestData> list = new List<LuceneTestData>();
                for (int i = 0; i < hits.Length; i++)
                {
                    var hit = hits[i];
                    Document doc = search.Doc(hit.Doc);

                    var model = new LuceneTestData()
                    {
                        Name = doc.Get("Name").ToString(),
                        Code = doc.Get("TaxCode").ToString(),
                        AddressPhone = doc.Get("AddressPhone").ToString(),
                        BankAccount = doc.Get("BankAccount").ToString(),
                        State = doc.Get("State").ToString(),
                        Id = ObjectId.Parse(doc.Get("Id")),
                        CreateTime = doc.Get("CreateTime").ToString(),
                        UpdateTime = doc.Get("UpdateTime").ToString()
                    };
                    list.Add(model);
                }
                return list;
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
    }
}
View Code

                          10、Program如下:

using System;
using System.Collections.Generic;
using System.Linq;
using LuceneNetTest.Models;
using LuceneNetTest.Helpers;

namespace LuceneNetTest
{
    class Program
    {
        //1、第一次推数据,把所有的数据推送到mongo,而且第一次搜索的话只能在元数据的list上取数据建立索引
        //2、第二次推数据:首先对比mongo,如果mongo里面有要新增或者修改的数据,更新mongo,然后再推送lucene中

        //查询数据:1、如果查询出来的list为空,则从第三方查询,分别有2个链接,如果为空则为空,如果不为空,
        //则查询出来的数据存放在mongo中,推送到lucene中
        static void Main(string[] args)
        {
            var IndexDic = @"C:/Users/Administrator/Desktop/RSA/LuceneNetTest/IndexDic/IndexDic";//索引创建的地址
            var path = @"C:/Users/Administrator/Desktop/RSA/LuceneNetTest/PanGu/PanGu.xml";//盘古分词地址
            List<LuceneTestData> invList = LuceneNetTestHelpers.GetSqlData();
            BizLogger.Default.Info($"初次获取信息数据,总共获取了{invList.Count}条。");

            #region  第一次推送数据到mogodb

            MongoDbHelper<LuceneTestData> mg = new MongoDbHelper<LuceneTestData>();
            mg.InsertBatch(invList);
            BizLogger.Default.Info($"信息数据推送到MongoDB数据库完成,总共获取了{invList.Count}条。");
            PanGuLuceneHelper.InitPanGuXmlPath(path);
            PanGuLuceneHelper.CreateIndex(IndexDic, invList);//创建索引
            BizLogger.Default.Info($"信息数据初次创建索引成功,总共创建了{invList.Count}条数据。");

            #endregion

            #region 第二次推送数据到mogodb

            //取出来本次查询的sql数据与mongodb数据库中的数据是否有差别。。。
            string result = LuceneNetTestHelpers.GetMongoDBData(invList);
            BizLogger.Default.Info($"从SQL数据库获取信息与MongoDB数据信息进行对比,总共获取了{invList.Count}条。");
            //var IndexDic = AppDomain.CurrentDomain.BaseDirectory + "/IndexDic";
            if (string.IsNullOrEmpty(result))
            {
                PanGuLuceneHelper.InitPanGuXmlPath(path);
                PanGuLuceneHelper.CreateIndex(IndexDic);//创建索引
                BizLogger.Default.Info($"信息创建索引成功");
            }

            #endregion

            #region 搜索

            string name = "上海";
            var list = PanGuLuceneHelper.SearchIndex(name, IndexDic);//搜索查询
            if (list == null)
            {
                //第一次请求第三方(诺诺:用的航信的)
                string strResult = LuceneNetTestHelpers.RequestData(name);
                if (!string.IsNullOrEmpty(strResult))
                {
                    List<ThirdInfo> thirdInfo = DataSerializer.DeserializeFromJSON<List<ThirdInfo>>(strResult);
                    //更新mongodb数据库饼更新创建索引
                    LuceneNetTestHelpers.InThirdInfoToMongoAndLucene(thirdInfo, path, IndexDic);
                    BizLogger.Default.Info($"从第三方。。中获取信息更新到MongoDB成功,并且创建索引成功");
                    //反序列化结果
                    Console.WriteLine(string.Join("", thirdInfo.Select(t => t.Name)));
                }
                else
                {
                    //第二次请求第三方(大象慧云)
                    string twoResult = LuceneNetTestHelpers.RequestData2(name);
                    if (!string.IsNullOrEmpty(twoResult))
                    {
                        //同上请求第三方
                    }
                    else
                    {
                        Console.WriteLine("暂无信息");
                    }
                }
            }

            #endregion

            Console.WriteLine(string.Join("", list.Select(n => n.Name)));
            Console.ReadKey();
        }
    }
}
 
View Code

                          11、Result如下:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace LuceneNetTest
{
    public class Result
    {
        public bool Success { get; set; }

        public int StatusCode { get; set; }

        public string Message { get; set; }
    }
}
View Code

                          上面简单的实现了搜索。

 

posted @ 2017-07-21 14:45  雪?  阅读(1193)  评论(4编辑  收藏  举报