Lucene.Net 站内搜索 - AdolphYang

公告

一全文检索：

like查询是全表扫描(为性能杀手)
Lucene.Net搜索引擎,开源,而sql搜索引擎是收费的
Lucene.Net只是一个全文检索开发包(只是帮我们存数据取数据,并没有界面,可以看作一个数据库,只能对文本信息进行检索)
Lucene.Net原理:把文本切词保存,然后根据词汇表的页来找到文章

二分词算法：

//一元分词算法(引用Lucene.Net.dll)

            //一元分词算法（已过时）
            Analyzer analyzer = new StandardAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京，HI欢饮你hello word"));
            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
            }
            Console.ReadKey();

一元分词算法

//二元分词算法(CJK:China Japan Korean 需要再引用CJKAnalyzer.cs/CJKTokenizer.cs)

            //二元分词算法（CJK:China Japan Korean）
            Analyzer analyzer = new CJKAnalyzer(); // new StandardAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京，HI欢饮你"));
            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
            }
            Console.ReadKey();

二元分词算法

//基于词库的分词算法(盘古分词算法)

打开PanGu4Luene\WebDemo\Bin,将Dictionaries添加到项目根路径(改名Dict),对于其下的文件，在其属性里，输出目录修改为如果较新则复制
添加PanGu.dll的引用(如果直接引用PanGu.dll则必须不带PanGu.xml)
添加PanGu4Luene\Release中PanGu.Luene.Analyzer.dll的引用

其中PanGu_Release_V2.3.1.0\Release\DictManage.exe可以查看Dict.dct二进制词库,既可以查看词汇也可以加入词汇

            //基于词库的分词算法（盘古分词）
            Analyzer analyzer = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京，HI欢饮你hello word"));
            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
            }
            Console.ReadKey();

盘古分词算法

三写入索引

Luene.Net写入类介绍

打开文件夹,指定要写入的文件夹
文件加锁,避免两个人同时写入文件(并发)
判断是否文件中有数据,有的话就更新数据,没有就创建
逐一读取待读文件中文本并写入文档
写之后进行close,则表示解锁,可以由其他人写入(加锁写入过程中程序出现bug需要强制解锁时可能出问题)
各种类的作用:
Directory保存数据:FSDirectory(文件中),RAMDirectory(内存中)
IndexReader对索引库进行读取的类,IndexWriter对索引库进行写的类
IndexReader的bool IndexExists(Directory directory)判断目录是否是一个索引目录
IndexWriter的bool IsLocked(Directory directory)判断目录是否是锁定的
IndexWriter在进行写操作时会自动加锁,close的时候会自动解锁.IndexWriter.Unlock方法手动解锁(比如还没来得及close IndexWriter程序就崩溃了,可能造成一直被锁定)
IndexWriter(Directory dir,Analyzer a,bool create,MaxFieldLength mfl)写入哪个文件夹,采用什么分词算法,是否是创建,最大大小
void AddDocument(Document doc),向索引中添加文档
Add(Field field)向文档中添加字段
DeleteAll()删除所有文档,DeleteDocuments按照条件删除文档
File类得构造函数 Field(string name,string value,Field.Store store,Field.Index index,Field.TermVector termVector)
上面依次表示:(字段名,字段值,是否把原文保存到索引中,index表示如何创建索引(Field.Index需要进行全文检索,NOT_ANALYZED不需要的),termVector表示索引词之间的距离,超出则关联度低)
处理并发(写的时候只能逐一写入):用消息队列保证只有一个程序(线程)对索引操作,其他程序不直接进行索引库的写入,而是把要写入的数据放入消息队列,由单独的程序从消息队列中取数据进行索引库的写入

文章在新增和编辑时写入索引：

引用4个ServiceStack的dll用于队列

引用Quartz.dll/Common.Logging.dl用于定时任务

引用Lucene.Net.dll/PanGu.dll/PanGu.Lucene.Analyzer.dll用于写入索引

添加Dictionary改名Dict，旗下文件修改为如果较新则复制

/// <summary>
        /// 入队列（用于新闻索引的队列集合）
        /// </summary>
        /// <param name="news"></param>
        public void EnqueueForNewsSearch(TD_NEWS news)
        {
            //获得新闻信息
            Dictionary<string, object> dict = new Dictionary<string, object>();
            dict["ID"] = news.ID;
            dict["CATEGORYID"] = news.CATEGORYID;
            dict["TITLE"] = news.TITLE;
            dict["CONTENT"] = news.CONTENT;
            string json = CommonHelper.Serializer(dict);
            //入队列
            using (IRedisClient client=RedisManager.ClientManager.GetClient())
            {
                client.EnqueueItemOnList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX, json);
            }
        }

文章新增或编辑时入队列

namespace DIDAO.Timer
{
    /// <summary>
    /// 新闻索引（出队列，把每一条新闻信息 写入新闻索引）
    /// </summary>
    public class NewsIndex
    {
        public void Start()
        //public void Execute(JobExecutionContext context)
        {
            while(true)
            {
                using(IRedisClient client=RedisManager.ClientManager.GetClient())
                {
                    string json = client.DequeueItemFromList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX);
                    if (json == null)
                    {
                        Thread.Sleep(100);
                        //return;
                    }
                    else
                    {
                        //获得新闻信息
                        Dictionary<string, object> dict = (Dictionary<string, object>)CommonHelper.DeSerializer( json);
                        TD_NEWS news = new TD_NEWS();
                        news.ID = Convert.ToInt64(dict["ID"]);
                        news.CATEGORYID = Convert.ToInt64(dict["CATEGORYID"]);
                        news.TITLE = dict["TITLE"].ToString();
                        news.CONTENT = dict["CONTENT"].ToString();
                        //一条一条写入索引
                        WriteToNewsIndex(news.ID, news.CATEGORYID, news.TITLE, news.CONTENT);
                    }
                }
            }
        }

        /// <summary>
        /// //一条一条写入索引
        /// </summary>
        /// <param name="id"></param>
        /// <param name="categoryid"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private void WriteToNewsIndex(long id, long categoryid, string title, string content)
        {
            FSDirectory directory = null;
            IndexWriter writer = null;
            try
            {
                string indexPath =@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsIndex"; //目录
                directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //获得新的索引目录：打开索引的目录并加锁，防止并发写入
                bool exist = IndexReader.IndexExists(directory);
                if (exist) //如果读取时，目录中有索引
                {
                    if(IndexWriter.IsLocked(directory)) //且写入时，目录原先是锁定的，则需要手动强制解锁（说明原先是异常退出，没有解锁）
                    {
                        IndexWriter.Unlock(directory);
                    }
                }
                //向目录中一条一条的写入索引
                //初始化 "写入索引"（目录，分词算法，是否创建，最大字段长度）
                writer = new IndexWriter(directory, new PanGuAnalyzer(), !exist, IndexWriter.MaxFieldLength.UNLIMITED);
                //初始化一个文档，向文档添加字段，把文档写入索引
                Document doc = new Document();
                doc.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("categoryid", categoryid.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
                doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
                writer.AddDocument(doc);
            }
            finally
            {
                if(writer!=null)
                {
                    writer.Close(); //释放 写入索引资源
                }
                if(directory!=null)
                {
                    directory.Close(); //释放 打开目录资源
                }
            }
        }
    }
}

文章出队列写入索引

写入索引如果报错：

未能加载文件或程序集“PanGu, Version=2.3.0.0, Culture=neutral, PublicKeyToken=null”
或它的某一个依赖项。系统找不到指定的文件。

原因：

执行这个写入索引的令一个程序也需要引用PanGu.dll，最先执行写入索引的程序与后面真正类所在的索引是相互依赖的。

四文章搜索：

query.Add(new Term("字段名","关键词"))
query.Add(new Term("字段名2","关键词2"))
类似于:where 字段名contains关键词 and 字段名2contains关键词2
PhraseQuery用于进行多个关键词的检索
PhraseQuery.SetSlop(int slop)用来设置单词之间的最大距离
BooleanQuery可以实现字段名contains关键词or字段名2contains关键词2

搜索时所采用的分词算法必须和生成索引时一致，即盘古分词算法

总条数 totalSize = collector.GetTotalHits()

查询结果集合应该是从(pagenum-1)*5,pagenum*5,但是collector.TopDocs(m,n)的m是从0开始、n是条数

#region 新闻搜索
                #region 逻辑
                //1    获得所有关键词 //获得当前页 pagenum 
                //2    遍历关键词 //用盘古分词的Segment进行切词 
                //3    添加按关键词查询 
                //4    设置关键词间距离 

                //5    打开目录并不加锁 获得目录 
                //6    打开目录 进行索引读取 
                //7    从索引读取中初始化搜索 
                //8    获得查询结果的100条结果
                //9    在这个结果中，按照query这个条件进行搜索 
                //10 获得搜索结果的第m-n条结果 （//获得总条数 totalszie //设置每页多少条 pagesize //获得搜索结果的 (pagenum-1)*PageSize,pagesize 的搜索结果 ）
                //11    遍历结果【
                //12    获得每一条结果的Lucene所分配的文档id 
                //13    根据文档id搜索到文档 
                //14    获得该文档的某个字段的值 
                //15    拼接url和title，添加到结果集合 】
                //16    把这个结果集合解析到某个cshtml  
                #endregion
                #region 获得请求
                string keywords = context.Request["keywords"].Trim();
                string pagenumStr = context.Request["pagenum"];
                //验证 非空
                if (string.IsNullOrWhiteSpace(keywords))
                {
                    return;
                }
                //获得当前页
                int pagenum = 1;
                if (!string.IsNullOrWhiteSpace(pagenumStr) && VolidHelper.CheckStringIsInt(pagenumStr))
                {
                    pagenum = Convert.ToInt32(pagenumStr);
                } 
                #endregion
                #region 查询条件
                //用盘古分词的Segment进行切词 
                PanGu.Segment segment = new PanGu.Segment();
                var wordInfos = segment.DoSegment(keywords); //获得切词集合
                //查询方式
                PhraseQuery query = new PhraseQuery(); //适用多个关键词的查询
                foreach (var wordInfo in wordInfos)
                {
                    query.Add(new Term("content", wordInfo.Word)); //添加查询条件
                }
                query.SetSlop(1000); //设置关键词间距离  
                #endregion
                //获得查询的结果集合
                List<TD_NewsSearchResult> results = new List<TD_NewsSearchResult>();
                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsIndex"), new NoLockFactory()); //打开目录不加锁，并获得目录
                IndexReader reader = IndexReader.Open(directory, true); //打开目录，并获得索引读取类IndexReader
                IndexSearcher searcher = new IndexSearcher(reader); //通过索引读取类 初始化索引搜索类IndexSearcher 
                TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); //通过TopScoreDocCollector获得最多1000条的查询结果
                searcher.Search(query, null, collector); //按query条件 从查询结果collector 进行搜索
                int totalsize = collector.GetTotalHits(); //搜索结果的总条数 
                int pagesize = 10; //每页多少条  (pagenum-1)*pagesize,pagesize （从0开始取）
                ScoreDoc[] docs = collector.TopDocs((pagenum - 1) * pagesize, pagesize).scoreDocs; //获得搜索结果collector的第m-n条的文档结果ScoreDoc 
                foreach (ScoreDoc scoredoc in docs) //遍历文档结果集
                {
                    int docId = scoredoc.doc; //获得文档结果Lucene所分配的文档id 
                    Document doc = searcher.Doc(docId); //根据文档id搜索到文档
                    long id = Convert.ToInt64(doc.Get("id")); //获得该文档的 字段id的值
                    long categoryid = Convert.ToInt64(doc.Get("categoryid")); //获得该文档的 字段id的值
                    string title = doc.Get("title");
                    string content = doc.Get("content");
                    TD_NewsSearchResult nsr = new TD_NewsSearchResult();
                    nsr.URL = "/News/" + categoryid + "/" + id + ".shtml";
                    nsr.TITLE = title;
                    results.Add(nsr);
                }
                RazorHelper.RazorParse(context, "~/News/NewsSearch.cshtml", new { results = results, keywords = keywords,
                                totalsize=totalsize, pagesize=pagesize,currpage=pagenum }); 
                #endregion

NewsController.ashx?action=search

五写入索引优化：

通过多线程避免界面卡死：

因为耗时操作会阻塞主进程，所以需要把耗时操作放入子线程

因为主线程关闭，则子线程也会关闭，所以需要把子线程设置为后台子线程，这样主线程关闭，子线程会继续

示例：

        private void btnMainThread_Click(object sender, EventArgs e)
        {
            Thread thread = new Thread(F1); //把把耗时操作F1 委托给子线程thread 
            thread.IsBackground = true; //把子线程设置为后台子线程 （从而主线程关闭，后台线程依然继续）
            thread.Start(); //启动这个后台子线程 
        }

        //子线程中执行耗时操作 
        private void F1()
        {
            string path = @"G:\RuPeng_yangguo_work\EasyUi.rar";
            for (int i = 0; i < 1000;i++ )
            {
                File.ReadAllBytes(path);
                //如果子线程要操作界面控件，需要控件的BeginInvoke(new Action(()=>{...})) 
                textBox1.BeginInvoke(new Action(() =>
                {
                    textBox1.Text = "正在读取文件第" + i + "次";
                }));
            }
        }

testThread.Form1.cs

实例：

//定时任务,子进程中出队列,然后写入文章索引,关闭窗口时终止子进程(出队列)和quartz.net进程
首先,启动窗体,执行定时任务,而定时的任务是进行新闻的出队列
然后,新闻的出队列是耗时操作,需要委托子进程,并设为后台进程,然后开始执行进程,其中出队列进程的控制由while(IsRunning)控制,先预先设置IsRunning=true
IsRunning = true;
Thread thread = new Thread(RunScan);//委托给子线程去RunScan
thread.IsBackground = true;//该子线程为后台线程
thread.Start();//执行该后台子线程,去执行RunScan方法
然后,执行出队列这个后台子进程
public static bool IsRunning { get; set; }//是否继续线程
public void RunScan()
{
while (IsRunning)//一旦窗体关闭,IsRunning=false,该进程终止
{...
然后,一直执行这个子进程,直到窗体被关闭,这时设置IsRunning=false使还在执行的这个后台子进程Thread的RunScan()终止,同时还需终止后台Quartz.net进程,避免窗体关闭而进程还在
private void Form1_FormClosed(object sender, FormClosedEventArgs e)
{
NewsIndexer.IsRunning = false;//终止后台子进程RunScan方法
SendNewRegisterUM.schedWNI.Shutdown();//还需要终止后台Quartz.net进程,避免窗体已关闭,但是进程依然在
}

    /// <summary>
    /// 新闻索引（出队列，把每一条新闻信息 写入新闻索引）
    /// </summary>
    public class NewsIndex:IJob
    {
        /// <summary>
        /// 是否开始后台子线程
        /// </summary>
        public bool IsBegining { get; set; }

        //把耗时操作委托给后台子线程，是为了避免页面卡死。且主线程关闭，子线程会继续。当然这里由关闭时的 IsBegining 控制子线程是否继续
        public void Execute(JobExecutionContext context)
        //public void Start()
        {
            Thread thread = new Thread(DequeueForNewIndex); //把耗时操作（出队列）委托给子线程thread 
            thread.IsBackground = true; //把这个子线程设置为后台子线程（从而主线程结束，后台子线程依然继续）
            this.IsBegining = true; //后台子线程中的耗时操作 初始化为true
            thread.Start(); //开始子线程
        }

        //耗时操作所在的子线程 
        public void DequeueForNewIndex()
        {
            while(this.IsBegining)
            {
                using(IRedisClient client=RedisManager.ClientManager.GetClient())
                {
                    string json = client.DequeueItemFromList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX);
                    if (json == null)
                    {
                        //Thread.Sleep(100);
                        return; //定时任务时，在此结束循环 去执行下一次任务 
                    }
                    else
                    {
                        //获得新闻信息
                        Dictionary<string, object> dict = (Dictionary<string, object>)CommonHelper.DeSerializer( json);
                        TD_NEWS news = new TD_NEWS();
                        news.ID = Convert.ToInt64(dict["ID"]);
                        news.CATEGORYID = Convert.ToInt64(dict["CATEGORYID"]);
                        news.TITLE = dict["TITLE"].ToString();
                        news.CONTENT = dict["CONTENT"].ToString();
                        //一条一条写入索引
                        WriteToNewsIndex(news.ID, news.CATEGORYID, news.TITLE, news.CONTENT);
                    }
                }
            }
        }

        /// <summary>
        /// //一条一条写入索引
        /// </summary>
        /// <param name="id"></param>
        /// <param name="categoryid"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private void WriteToNewsIndex(long id, long categoryid, string title, string content)
        {
            FSDirectory directory = null;
            IndexWriter writer = null;
            try
            {
                string indexPath =@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsIndex"; //目录
                directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //获得新的索引目录：打开索引的目录并加锁，防止并发写入
                bool exist = IndexReader.IndexExists(directory);
                if (exist) //如果读取时，目录中有索引
                {
                    if(IndexWriter.IsLocked(directory)) //且写入时，目录原先是锁定的，则需要手动强制解锁（说明原先是异常退出，没有解锁）
                    {
                        IndexWriter.Unlock(directory);
                    }
                }
                //向目录中一条一条的写入索引
                //初始化 "写入索引"（目录，分词算法，是否创建，最大字段长度）
                writer = new IndexWriter(directory, new PanGuAnalyzer(), !exist, IndexWriter.MaxFieldLength.UNLIMITED);
                //初始化一个文档，向文档添加字段，把文档写入索引
                Document doc = new Document();
                doc.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("categoryid", categoryid.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
                doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
                writer.AddDocument(doc);
            }
            finally
            {
                if(writer!=null)
                {
                    writer.Close(); //释放 写入索引资源
                }
                if(directory!=null)
                {
                    directory.Close(); //释放 打开目录资源
                }
            }
        }
    }

Timer.NewsIndex.cs

        //窗口关闭，同时关闭后台子线程 和定时任务 
        private void FormMain_FormClosed(object sender, FormClosedEventArgs e)
        {
            new NewsIndex().IsBegining = false; //结束后台子线程 
            //TimeSchedule.sched.Shutdown(); //关闭定时计划 --出错
        }

TimerForm.FormMain.cs

获取html的InnerText

搜索出来的不仅只是Title,还需要预览一部分内容body
用Lucene.net放入索引的时候需要过滤html标签
解决索引中body中全是html标签的问题,不利于搜索,很多垃圾信息,显示不方便。
使用HtmlAgilityPack进行innerText处理.
考虑文章编辑\重新索引等问题,需要先把旧的文档删除,再增加新的(等价于update)HTML解析器:输入一个html文档,提供对html文档操作的接口
开发包HtmlAgilityPack.1.4.0.zip,用于把html标签进行innerText后再放入索引库

引用 HtmlAgilityPack.dll

示例：

    HtmlDocument htmlDoc = new HtmlDocument();
    htmlDoc.Load(@"D:\temp\htmlAgilityPack.txt");
    //HtmlNode node = htmlDoc.GetElementbyId("p11");//获得hmtl文档中id为p11的标签节点
    //Console.WriteLine(node.InnerText);
    Console.WriteLine(htmlDoc.DocumentNode.InnerText);//获得html文档中的文档节点的innerText显示
    //htmlDoc.DocumentNode.DescendantNodes()
    Console.ReadKey();




HtmlAgilityPack.dll提供操作Html文档的标签方法
获得网页title:doc.DocumentNode.SelectSingleNode("//title").InnerText;//XPath中"//title"表示所有title节点;SelectSingleNode用于获取满足条件的唯一节点
获得所有超链接:doc.DocumentNode.Descendants("a");
获得name为kw的input,相当于getElementByName();
var kwBox=doc.DocumentNode.SelectSingleNode("//input[@name='kw']");//"//input[@name='kw']"也是XPath语法,表示name=kw的input标签

示例

实例：

        /// <summary>
        /// 入队列（用于新闻索引的队列集合）
        /// </summary>
        /// <param name="news"></param>
        public void EnqueueForNewsSearch(TD_NEWS news)
        {
            //获得新闻信息
            Dictionary<string, object> dict = new Dictionary<string, object>();
            dict["ID"] = news.ID;
            dict["CATEGORYID"] = news.CATEGORYID;
            dict["TITLE"] = news.TITLE;
            //需要把CONTENT中的html都innerText才加入索引
            HtmlDocument htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(news.CONTENT);
            dict["CONTENT"] = htmlDoc.DocumentNode.InnerText;
            string json = CommonHelper.Serializer(dict);
            //入队列
            using (IRedisClient client=RedisManager.ClientManager.GetClient())
            {
                client.EnqueueItemOnList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX, json);
            }
        }

实例

一键建立新闻索引：

入队列：

如果新闻太多，应该分批次进行入队列建立索引

        [PermissionAction("所有新闻一键重建全文索引")]
        public void allNewsIndex(HttpContext context)
        {
            #region 所有新闻 一键重建全文索引
            #region 逻辑
            //1    判断是否有权限 
            //1   获得categoryid，用于重定向到当前新闻列表
            //2    如果新闻太多，应该分批次进入队列建立索引 
            //3    按100行查取新闻，相当于pagesize=100
            //4    获得总条数totalsize，获得totalsize/pagesize的水仙花数，即总页数totalpage 
            //5    遍历每一页【
            //6    获得该页的新闻集合
            //7    对于每一个新闻进行新闻入队列】
            //8    记录操作日志  
            #endregion
            string categoryidStr = context.Request["categoryid"];
            int categoryid = VolidHelper.CheckStrToInt(categoryidStr);
            long pagesize = 100; //每页100条，相当于每次查询100条 
            long totalsize = myORM_BLL.SelectCountByField(typeof(TD_NEWS), 1); //新闻总数
            long totalpage = (long)Math.Ceiling(totalsize * 1.0 / pagesize); //水仙花数，即总页数
            for (long i = 1; i <= totalpage; i++)
            {
                //获得该页的新闻集合
                List<object> list = myORM_BLL.SelectModelsByRownum(typeof(TD_NEWS), "NO", 1, (i - 1) * pagesize + 1, i * pagesize);
                //对于每一个新闻进行新闻入队列
                using (IRedisClient client = RedisManager.ClientManager.GetClient())
                {
                    foreach (object obj in list)
                    {
                        TD_NEWS news = obj as TD_NEWS;
                        new NewsBLL().EnqueueForNewsSearch(news, client);
                    }
                }
            }
            AdminHelper.RecordOperateLog(context, "新闻一键重建全文索引");
            context.Response.Redirect("/News/NewsController.ashx?action=list&categoryid="+categoryid); 
            #endregion
        }

NewsController.ashx

        /// <summary>
        /// 入队列 带外连接（用于新闻索引的队列集合）
        /// </summary>
        /// <param name="news"></param>
        public void EnqueueForNewsSearch(TD_NEWS news, IRedisClient client)
        {
            //获得新闻信息
            Dictionary<string, object> dict = new Dictionary<string, object>();
            dict["ID"] = news.ID;
            dict["CATEGORYID"] = news.CATEGORYID;
            dict["TITLE"] = news.TITLE;
            //需要把CONTENT中的html都innerText才加入索引
            HtmlDocument htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(news.CONTENT);
            string content = htmlDoc.DocumentNode.InnerText;
            dict["CONTENT"] = content;
            string json = CommonHelper.Serializer(dict);
            //入队列
            client.EnqueueItemOnList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX, json);
        }

NewsBLL.cs

出队列：

每次出队列都进行一次索引路径的打开读取和关闭,效率低

全部出队列之前先打开索引目录,之后才关闭索引目录,最后才等待下一次client的队列中新数据

每次出队列加入检索索引之前,都需要删除文档索引中的相同id的文档索引,因为"编辑新闻"和"一键重建全文索引"都会再次加入同id的索引

    /// <summary>
    /// 新闻索引（出队列，把每一条新闻信息 写入新闻索引）
    /// </summary>
    public class NewsIndex:IJob
    {
        /// <summary>
        /// 是否开始后台子线程
        /// </summary>
        public bool IsBegining { get; set; }

        //把耗时操作委托给后台子线程，是为了避免页面卡死。且主线程关闭，子线程会继续。当然这里由关闭时的 IsBegining 控制子线程是否继续
        public void Execute(JobExecutionContext context)
        //public void Start()
        {
            Thread thread = new Thread(DequeueForNewIndex); //把耗时操作（出队列）委托给子线程thread 
            thread.IsBackground = true; //把这个子线程设置为后台子线程（从而主线程结束，后台子线程依然继续）
            this.IsBegining = true; //后台子线程中的耗时操作 初始化为true
            thread.Start(); //开始子线程
        }

        //耗时操作所在的子线程 
        public void DequeueForNewIndex()
        {
            while(this.IsBegining)
            {
                using(IRedisClient client=RedisManager.ClientManager.GetClient())
                {
                    ProcessDenqueue(client);
                    
                }
            }
        }

        //进行出队列操作 
        private void ProcessDenqueue(IRedisClient client)
        {
            //打开目录获得写入类
            FSDirectory directory = null;
            IndexWriter writer = null;
            try
            {
                string indexPath =@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsIndex"; //目录
                directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //获得新的索引目录：打开索引的目录并加锁，防止并发写入
                bool exist = IndexReader.IndexExists(directory);
                if (exist) //如果读取时，目录中有索引
                {
                    if(IndexWriter.IsLocked(directory)) //且写入时，目录原先是锁定的，则需要手动强制解锁（说明原先是异常退出，没有解锁）
                    {
                        IndexWriter.Unlock(directory);
                    }
                }
                //向目录中一条一条的写入索引
                //初始化 "写入索引"（目录，分词算法，是否创建，最大字段长度）
                writer = new IndexWriter(directory, new PanGuAnalyzer(), !exist, IndexWriter.MaxFieldLength.UNLIMITED);

                //一直进行出队列，并写入索引 
                while(true)
                {
                    string json = client.DequeueItemFromList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX);
                    if (json == null)
                    {
                        //Thread.Sleep(100);
                        return; //在此结束当前连接的出队列，跳到子线程中的下一次client出队列 
                    }
                    else
                    {
                        //获得新闻信息
                        Dictionary<string, object> dict = (Dictionary<string, object>)CommonHelper.DeSerializer(json);
                        TD_NEWS news = new TD_NEWS();
                        news.ID = Convert.ToInt64(dict["ID"]);
                        news.CATEGORYID = Convert.ToInt64(dict["CATEGORYID"]);
                        news.TITLE = dict["TITLE"].ToString();
                        news.CONTENT = dict["CONTENT"].ToString();
                        //一条一条写入索引
                        WriteToNewsIndex( writer,news.ID, news.CATEGORYID, news.TITLE, news.CONTENT);
                    }
                }

                //回收资源 
            }
            finally
            {
                if (writer != null)
                {
                    writer.Close(); //释放 写入索引资源
                }
                if (directory != null)
                {
                    directory.Close(); //释放 打开目录资源
                }
            }
            
        }

        /// <summary>
        /// //一条一条写入索引 （写入之前先删除相同文档id的文档）
        /// </summary>
        /// <param name="writer">索引写入类</param>
        /// <param name="id"></param>
        /// <param name="categoryid"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private void WriteToNewsIndex(IndexWriter writer, long id, long categoryid, string title, string content)
        {
            //写入之前先删除相同文档id的文档
            writer.DeleteDocuments(new Term("id", id.ToString()));
            //初始化一个文档，向文档添加字段，把文档写入索引
            Document doc = new Document();
            doc.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("categoryid", categoryid.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
            writer.AddDocument(doc);
        }
    }

Timer.NewsIndex.cs

六搜索优化：

搜索结果高亮显示：

搜索结果中获取最匹配的摘要段，关键词需要高亮显示

引用 PanGu.HighLight.dll

        /// <summary>
        /// 搜索结果 （关键词高亮，并获取最匹配摘要）
        /// </summary>
        /// <param name="keywords">关键词</param>
        /// <param name="content">内容</param>
        /// <returns>搜索结果的内容摘要</returns>
        public static string HighLight(string keywords,string content)
        {
            #region 逻辑
            //1    引用HighLight.dll
            //2    设置关键词高亮的样式
            //3    用关键词样式HtmlFormatter和盘古分词对象Segment 初始化Highlighter
            //4    设置每个摘要字符数 
            //5    获得最匹配的摘要段  
            #endregion
            //设置关键词高亮的样式
            PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<span class='keywordHL'>", "</span>");
            //初始化高亮类Highlighter（用关键词样式HtmlFormatter和盘古分词对象Segment）
            PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
            //设置每个摘要字符数 
            highlighter.FragmentSize = 100;
            //获得最匹配的摘要段 
            return highlighter.GetBestFragment(keywords, content);
        }

Front.FrontHelper.cs

#region 新闻搜索
                #region 逻辑
                //1    获得所有关键词 //获得当前页 pagenum 
                //2    遍历关键词 //用盘古分词的Segment进行切词 
                //3    添加按关键词查询 
                //4    设置关键词间距离 

                //5    打开目录并不加锁 获得目录 
                //6    打开目录 进行索引读取 
                //7    从索引读取中初始化搜索 
                //8    获得查询结果的100条结果
                //9    在这个结果中，按照query这个条件进行搜索 
                //10 获得搜索结果的第m-n条结果 （//获得总条数 totalszie //设置每页多少条 pagesize //获得搜索结果的 (pagenum-1)*PageSize,pagesize 的搜索结果 ）
                //11    遍历结果【
                //12    获得每一条结果的Lucene所分配的文档id 
                //13    根据文档id搜索到文档 
                //14    获得该文档的某个字段的值 
                //15    拼接url和title，添加到结果集合 】
                //16    把这个结果集合解析到某个cshtml  
                #endregion
                #region 获得请求
                string keywords = context.Request["keywords"].Trim();
                string pagenumStr = context.Request["pagenum"];
                //验证 非空
                if (string.IsNullOrWhiteSpace(keywords))
                {
                    return;
                }
                //获得当前页
                int pagenum = 1;
                if (!string.IsNullOrWhiteSpace(pagenumStr) && VolidHelper.CheckStringIsInt(pagenumStr))
                {
                    pagenum = Convert.ToInt32(pagenumStr);
                } 
                #endregion
                #region 查询条件
                //用盘古分词的Segment进行切词 
                PanGu.Segment segment = new PanGu.Segment();
                var wordInfos = segment.DoSegment(keywords); //获得切词集合
                //查询方式
                PhraseQuery query = new PhraseQuery(); //适用多个关键词的查询
                foreach (var wordInfo in wordInfos)
                {
                    query.Add(new Term("content", wordInfo.Word)); //添加查询条件
                }
                query.SetSlop(1000); //设置关键词间距离  
                #endregion
                //获得查询的结果集合
                List<TD_NewsSearchResult> results = new List<TD_NewsSearchResult>();
                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsIndex"), new NoLockFactory()); //打开目录不加锁，并获得目录
                IndexReader reader = IndexReader.Open(directory, true); //打开目录，并获得索引读取类IndexReader
                IndexSearcher searcher = new IndexSearcher(reader); //通过索引读取类 初始化索引搜索类IndexSearcher 
                TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); //通过TopScoreDocCollector获得最多1000条的查询结果
                searcher.Search(query, null, collector); //按query条件 从查询结果collector 进行搜索
                int totalsize = collector.GetTotalHits(); //搜索结果的总条数 
                int pagesize = 10; //每页多少条  (pagenum-1)*pagesize,pagesize （从0开始取）
                ScoreDoc[] docs = collector.TopDocs((pagenum - 1) * pagesize, pagesize).scoreDocs; //获得搜索结果collector的第m-n条的文档结果ScoreDoc 
                foreach (ScoreDoc scoredoc in docs) //遍历文档结果集
                {
                    int docId = scoredoc.doc; //获得文档结果Lucene所分配的文档id 
                    Document doc = searcher.Doc(docId); //根据文档id搜索到文档
                    long id = Convert.ToInt64(doc.Get("id")); //获得该文档的 字段id的值
                    long categoryid = Convert.ToInt64(doc.Get("categoryid")); //获得该文档的 字段id的值
                    string title = doc.Get("title");
                    string content = doc.Get("content");
                    TD_NewsSearchResult nsr = new TD_NewsSearchResult();
                    nsr.URL = "/News/" + categoryid + "/" + id + ".shtml";
                    nsr.TITLE = title;
                    nsr.CONTENT = FrontHelper.HighLight(keywords,content);
                    results.Add(nsr);
                }
                RazorHelper.RazorParse(context, "~/News/NewsSearch.cshtml", new { results = results, keywords = keywords,
                                totalsize=totalsize, pagesize=pagesize,currpage=pagenum }); 
                #endregion

Front.News.NewsController.ashx

七扩展任务：

项目任务:完成新闻搜索、视频笔记搜索功能,而且是综合搜索
逻辑思路：//搜索(分页\高亮显示)-->建立索引-->出队列-->入队列T_Segment(Id,Name,note,ChapterId)\T_News(Id,Title,NewsContent,CategoryId)

综合搜索：

入队列：
1    把所有的新闻入队列（已做 ）
2    把所有的课程笔记入队列（未做）    {
3    判断是否有权限 
4    获得课程类别的父级pid=0 
5    获得段落总数totalsize 
6    设置每次获得多少条段落pagesize 
7    获得总次数（总页数，天花板数totalpage）
8    遍历页数    【
9    获得当前页的段落集合 
10    入队列：[
11    对于每一个段落的TD_SEGEMENT(ID CHAPTERID NAME NOTE)
12    对NOTE进行innerText
13    拼接Dict进行json化
14    进行入队列]    】    }
15    记录操作日志 
16    跳转课程类别列表

入队列：

#region 一键重建段落索引
                #region 逻辑
                //入队列：
                //1    把所有的新闻入队列（已做 ）
                //2    把所有的课程笔记入队列（未做）    {
                //3    判断是否有权限 
                //4    获得课程类别的父级pid=0 
                //5    获得段落总数totalsize 
                //6    设置每次获得多少条段落pagesize 
                //7    获得总次数（总页数，天花板数totalpage）
                //8    遍历页数    【
                //9    获得当前页的段落集合 
                //10    入队列：[
                //11    对于每一个段落的TD_SEGEMENT(ID CHAPTERID NAME NOTE)
                //12    对NOTE进行innerText
                //13    拼接Dict进行json化
                //14    进行入队列]    】    }
                //15    记录操作日志 
                //16    跳转课程类别列表 
                #endregion
                //获得课程类别的父级pid
                string pidStr = context.Request["pid"];
                int pid = VolidHelper.CheckStrToInt(pidStr);
                //获得段落总数totalsize 
                long totalsize = myORM_BLL.SelectCountByField(typeof(TD_SEGEMENT), 1);
                long pagesize = 100; //设置每次获得多少条段落pagesize 
                long totalpage = (long)Math.Ceiling(totalsize * 1.0 / pagesize); //获得总次数（总页数，天花板数totalpage）
                //遍历页数    【
                for (long i = 1; i <= totalpage; i++)
                {
                    //获得当前页的段落集合 
                    List<object> list = myORM_BLL.SelectModelsByRownum(typeof(TD_SEGEMENT), "NO", 1, (i - 1) * pagesize + 1, i * pagesize);
                    //入队列：[
                    using (IRedisClient client = RedisManager.ClientManager.GetClient())
                    {
                        foreach (object obj in list)
                        {
                            TD_SEGEMENT sge = obj as TD_SEGEMENT;
                            new CourseBLL().EnqueueForSegementSearch(sge, client);
                        }
                    }
                }
                //记录操作日志 
                AdminHelper.RecordOperateLog(context, "一键重建段落索引");
                context.Response.Redirect("/Course/CategoryController.ashx?action=list&pid=" + pid); 
                #endregion

Admin/Course/CateogoryController.ashx?action=allSegmentIndex

        /// <summary>
        /// 入队列：（用于段落索引）
        /// </summary>
        /// <param name="sge">段落实例</param>
        /// <param name="client"></param>
        public void EnqueueForSegementSearch(TD_SEGEMENT sge, IRedisClient client)
        {
            Dictionary<string, object> dict = new Dictionary<string, object>();
            dict["ID"] = sge.ID;
            dict["CHAPTERID"] = sge.CHAPTERID;
            dict["NAME"] = sge.NAME;
            HtmlDocument htmlDoc = new HtmlDocument();
            string note = sge.NOTE == null ? string.Empty : sge.NOTE;
            htmlDoc.LoadHtml(note);
            dict["NOTE"] = htmlDoc.DocumentNode.InnerText;
            string json = CommonHelper.Serializer(dict);
            client.EnqueueItemOnList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_SEGEMENTINDEX, json);
        }

BLL/CourseBLL.cs

出队列并写入索引 ：
1    窗口启动，执行定时任务 
2    定时任务中 把出队列委托给后台子线程，并用IsBegining控制子线程的运行
3    执行后台子线程（由IsBegining控制）
4    打开目录，获得IndexWriter类，如果IndexReader时存在原索引，并且原索引加锁，则强制解锁
5    初始化写入索引IndexWriter类 （由盘古分词算法）

6    循环出队列    {
7    把队列中每一个json解析为Dict
8    获得段落信息 
9    把段落信息写入索引【
10    先删除相同文档id的索引文档
11    初始化文档 
12    向文档中添加字段 
13    把文档添加到索引】    }
14    再对段落循环出队列     {
15    （其中id="sgement"+id，保证写入索引时id不与新闻中id重复）}
16    回收目录资源
17    回收索引资源

出队列并写入索引：

    /// <summary>
    /// 新闻索引（出队列，把每一条新闻信息 写入新闻索引）
    /// </summary>
    public class NewsAndSegmentIndex:IJob
    {
        /// <summary>
        /// 是否开始后台子线程
        /// </summary>
        public bool IsBegining { get; set; }

        //把耗时操作委托给后台子线程，是为了避免页面卡死。且主线程关闭，子线程会继续。当然这里由关闭时的 IsBegining 控制子线程是否继续
        public void Execute(JobExecutionContext context)
        //public void Start()
        {
            Thread thread = new Thread(DequeueForNewIndex); //把耗时操作（出队列）委托给子线程thread 
            thread.IsBackground = true; //把这个子线程设置为后台子线程（从而主线程结束，后台子线程依然继续）
            this.IsBegining = true; //后台子线程中的耗时操作 初始化为true
            thread.Start(); //开始子线程
        }

        //耗时操作所在的子线程 
        public void DequeueForNewIndex()
        {
            while(this.IsBegining)
            {
                using(IRedisClient client=RedisManager.ClientManager.GetClient())
                {
                    ProcessDenqueue(client);
                }
            }
        }

        //进行出队列操作 （新闻和段落 都要出队列）
        private void ProcessDenqueue(IRedisClient client)
        {
            //打开目录获得写入类
            FSDirectory directory = null;
            IndexWriter writer = null;
            try
            {
                string indexPath =@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsAndSegmentIndex"; //目录
                directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); //获得新的索引目录：打开索引的目录并加锁，防止并发写入
                bool exist = IndexReader.IndexExists(directory);
                if (exist) //如果读取时，目录中有索引
                {
                    if(IndexWriter.IsLocked(directory)) //且写入时，目录原先是锁定的，则需要手动强制解锁（说明原先是异常退出，没有解锁）
                    {
                        IndexWriter.Unlock(directory);
                    }
                }
                //向目录中一条一条的写入索引
                //初始化 "写入索引"（目录，分词算法，是否创建，最大字段长度）
                writer = new IndexWriter(directory, new PanGuAnalyzer(), !exist, IndexWriter.MaxFieldLength.UNLIMITED);

                //一直进行出队列，并写入索引 
                while(true)
                {
                    string json = client.DequeueItemFromList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_NEWSINDEX);
                    if (json == null)
                    {
                        //return; //在此结束当前连接的出队列，跳到子线程中的下一次client出队列 
                        #region 段落出队列
                        while(true)
                        {
                            //如果新闻的队列集合全部出队了 就继续把段落的队列集合出队列
                            string segJson = client.DequeueItemFromList(ConstStringHelper.REDIS_ADMIN_QUEUELIST_SEGEMENTINDEX);
                            if (segJson == null)
                            {
                                return;
                            }
                            else
                            {
                                //获得段落信息
                                Dictionary<string, object> dict = (Dictionary<string, object>)CommonHelper.DeSerializer(segJson);
                                TD_SEGEMENT news = new TD_SEGEMENT();
                                long idOld = Convert.ToInt64(dict["ID"]);
                                //其中id="news_"+id，保证写入相同索引目录时 id不与段落中id重复
                                string id = "segement_" + idOld;
                                long categoryid = Convert.ToInt64(dict["CHAPTERID"]);
                                string title = dict["NAME"].ToString();
                                string content = dict["NOTE"].ToString();
                                //一条一条写入索引
                                WriteToNewsIndex(writer, id, categoryid, title, content);
                            } 
                        }
                        #endregion
                    }
                    else
                    {
                        //获得新闻信息
                        Dictionary<string, object> dict = (Dictionary<string, object>)CommonHelper.DeSerializer(json);
                        TD_NEWS news = new TD_NEWS();
                        long idOld = Convert.ToInt64(dict["ID"]);
                        //其中id="news_"+id，保证写入相同索引目录时 id不与段落中id重复
                        string id = "news_" + idOld;
                        long categoryid = Convert.ToInt64(dict["CATEGORYID"]);
                        string title = dict["TITLE"].ToString();
                        string content = dict["CONTENT"].ToString();
                        //一条一条写入索引
                        WriteToNewsIndex(writer, id, categoryid, title, content);
                    }
                }

                //回收资源 
            }
            finally
            {
                if (writer != null)
                {
                    writer.Close(); //释放 写入索引资源
                }
                if (directory != null)
                {
                    directory.Close(); //释放 打开目录资源
                }
            }
            
        }

        /// <summary>
        /// //一条一条写入索引 （写入之前先删除相同文档id的文档）
        /// </summary>
        /// <param name="writer">索引写入类</param>
        /// <param name="id"></param>
        /// <param name="categoryid"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private void WriteToNewsIndex(IndexWriter writer, string id, long categoryid, string title, string content)
        {
            //写入之前先删除相同文档id的文档
            writer.DeleteDocuments(new Term("id", id));
            //初始化一个文档，向文档添加字段，把文档写入索引
            Document doc = new Document();
            doc.Add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("categoryid", categoryid.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_OFFSETS));
            writer.AddDocument(doc);
        }
    }

Timer/NewsAndSegmentIndex.cs

综合搜索：
1    获得关键字、当前页
2    验证非空 格式 
3    查询条件【
4    使用盘古分词算法分词，初始化Segment类，使用这个类切词
5    遍历每一个词，添加查询条件
6    设置查询的关键字间距】

7    打开目录
8    获得索引读取类IndexReader
9    初始化索引搜索类IndexSearcher
10    获得共多少条结果
11    在这些结果中进行搜索
12    获得搜索的总条数totalsize
13    设置每页的条数pagesize
14    获得搜索结果中的第m-n条搜索结果集合
15    遍历搜索结果集合【
16    获得搜索结果的文档id
17    根据文档id搜索到该文档
18    获得文档对应字段的信息
19    用这些信息实例化TD_RESULT [
20    如果id中含有"news"则url是新闻的Url/如果含有"segment"则是段落的url ]
21    把这些记过加入到最后结果集合
22    把结果集合解析到cshtml

综合搜索：

#region 综合搜索
                #region 逻辑
                //1    获得所有关键词 //获得当前页 pagenum 
                //2    遍历关键词 //用盘古分词的Segment进行切词 
                //3    添加按关键词查询 
                //4    设置关键词间距离 

                //5    打开目录并不加锁 获得目录 
                //6    打开目录 进行索引读取 
                //7    从索引读取中初始化搜索 
                //8    获得查询结果的100条结果
                //9    在这个结果中，按照query这个条件进行搜索 
                //10 获得搜索结果的第m-n条结果 （//获得总条数 totalszie //设置每页多少条 pagesize //获得搜索结果的 (pagenum-1)*PageSize,pagesize 的搜索结果 ）
                //11    遍历结果【
                //12    获得每一条结果的Lucene所分配的文档id 
                //13    根据文档id搜索到文档 
                //14    获得该文档的某个字段的值 
                //15    拼接url和title，添加到结果集合 （如果id中含有"news"则url是新闻的Url/如果含有"segment"则是段落的url ）】
                //16    把这个结果集合解析到某个cshtml  
                #endregion
                #region 获得请求
                string keywords = context.Request["keywords"].Trim();
                string pagenumStr = context.Request["pagenum"];
                //验证 非空
                if (string.IsNullOrWhiteSpace(keywords))
                {
                    return;
                }
                //获得当前页
                int pagenum = 1;
                if (!string.IsNullOrWhiteSpace(pagenumStr) && VolidHelper.CheckStringIsInt(pagenumStr))
                {
                    pagenum = Convert.ToInt32(pagenumStr);
                } 
                #endregion
                #region 查询条件
                //用盘古分词的Segment进行切词 
                PanGu.Segment segment = new PanGu.Segment();
                var wordInfos = segment.DoSegment(keywords); //获得切词集合
                //查询方式
                PhraseQuery query = new PhraseQuery(); //适用多个关键词的查询
                foreach (var wordInfo in wordInfos)
                {
                    query.Add(new Term("content", wordInfo.Word)); //添加查询条件
                }
                query.SetSlop(1000); //设置关键词间距离  
                #endregion
                //获得查询的结果集合
                List<TD_NewsSearchResult> results = new List<TD_NewsSearchResult>();
                FSDirectory directory = FSDirectory.Open(new DirectoryInfo(@"E:\RuPeng_Project\DiDao\DIDAO.Timer\NewsAndSegmentIndex"), new NoLockFactory()); //打开目录不加锁，并获得目录
                IndexReader reader = IndexReader.Open(directory, true); //打开目录，并获得索引读取类IndexReader
                IndexSearcher searcher = new IndexSearcher(reader); //通过索引读取类 初始化索引搜索类IndexSearcher 
                TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); //通过TopScoreDocCollector获得最多1000条的查询结果
                searcher.Search(query, null, collector); //按query条件 从查询结果collector 进行搜索
                int totalsize = collector.GetTotalHits(); //搜索结果的总条数 
                int pagesize = 10; //每页多少条  (pagenum-1)*pagesize,pagesize （从0开始取）
                ScoreDoc[] docs = collector.TopDocs((pagenum - 1) * pagesize, pagesize).scoreDocs; //获得搜索结果collector的第m-n条的文档结果ScoreDoc 
                foreach (ScoreDoc scoredoc in docs) //遍历文档结果集
                {
                    int docId = scoredoc.doc; //获得文档结果Lucene所分配的文档id 
                    Document doc = searcher.Doc(docId); //根据文档id搜索到文档
                    //long id = Convert.ToInt64(doc.Get("id")); //获得该文档的 字段id的值
                    string id = doc.Get("id"); //获得该文档的 字段id的值 "news_1"或"segement_1"
                    long categoryid = Convert.ToInt64(doc.Get("categoryid")); //获得该文档的 字段id的值
                    string title = doc.Get("title");
                    string content = doc.Get("content");
                    TD_NewsSearchResult nsr = new TD_NewsSearchResult();
                    //如果id中含有"news"则url是新闻的Url/如果含有"segment"则是段落的url 
                    string[] idArry = id.Split('_');
                    if (idArry[0] == "news")
                    {
                        nsr.URL = "/News/" + categoryid + "/" + idArry[1] + ".shtml";
                        nsr.TITLE = "新闻："+ title;
                    }
                    else if (idArry[0] == "segement")
                    {
                        nsr.URL = "/Course/CourseController_segment" + idArry[1] + ".ashx";
                        nsr.TITLE = "课程：" + title;
                    }
                    else
                    {
                        RazorHelper.RazorParse(context, "~/error.cshtml", new { Msg = "综合搜索失败，综合索引中 未找到 id=" + id });
                        return;
                    }
                    nsr.CONTENT = FrontHelper.HighLight(keywords,content);
                    results.Add(nsr);
                }
                RazorHelper.RazorParse(context, "~/News/NewsSearch.cshtml", new { results = results, keywords = keywords,
                                totalsize=totalsize, pagesize=pagesize,currpage=pagenum }); 
                #endregion

Front/News/NewsController.ashx?action=search

posted on 2015-11-07 17:38 AdolphYang 阅读(355) 评论(0) 编辑收藏举报

刷新页面返回顶部