盘古搜索--实例解析
1.引用
PanGu.dll
Lucene.Net.dll
PanGu.Lucene.Analyzer.dll
PanGu.HighLight.dll--高亮使用
2.Dict文件夹
文件夹名称一定要为Dict,并且设置“如果较新则复制”
3.创建索引部分。
思路:把添加的消息放在redis队列中,从队列中读取消息,并且添加索引。
利用线程读队列然后一条一条写入索引。
写入索引的部分通常是单独项目中,因为写入索引会非常耗内存
1 public class MessageIndex 2 { 3 Thread thread; 4 public bool IsRunning { get; set; } 5 public void Start() 6 { 7 IsRunning = true; 8 thread = new Thread(RunScan);//线程开始就不断的从队列中取出消息,,先取消息再写入索引 9 thread.IsBackground = false;//后台线程,关闭程序就退出 10 thread.Start(); 11 } 12 public void RunScan() 13 { 14 while (IsRunning) 15 { 16 using (var client = RedisManager.ClientManager.GetClient()) 17 { 18 StartIndex(client);//读出消息后取出写入索引 19 } 20 } 21 } 22 public void StartIndex(IRedisClient client) 23 { 24 FSDirectory directory = null; 25 IndexWriter writer = null; 26 try 27 { 28 string indexPath = @"F:\tmp\index2";//注意和磁盘上文件夹的大小写一致,否则会报错。索引保存位置 29 directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); 30 bool isExists = IndexReader.IndexExists(directory); 31 if (isExists) 32 { 33 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 34 //Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁 35 //不能多线程执行,只能处理意外被永远锁定的情况 36 if (IndexWriter.IsLocked(directory)) 37 { 38 IndexWriter.Unlock(directory);//强制解锁 39 } 40 } 41 writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExists, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); 42 //从redis队列中取出数据 43 while (true) 44 { 45 string json = client.DequeueItemFromList("QiuShiBaiKe.Message"); 46 //取出可能为null 47 if (string.IsNullOrEmpty(json)) 48 { 49 Thread.Sleep(3000);// 50 return;//没有消息可读就return 51 } 52 else 53 { 54 //反序列化 55 Message message = new JavaScriptSerializer().Deserialize<Message>(json); 56 WriteIndex(message, writer); 57 58 } 59 60 } 61 } 62 catch (Exception) 63 { 64 throw new Exception("写入索引出错"); 65 } 66 finally 67 { 68 writer.Close(); 69 directory.Close();//不要忘了Close,否则索引结果搜不到 70 } 71 } 72 /// <summary> 73 /// 写入索引 74 /// </summary> 75 /// <param name="message"></param> 76 private void WriteIndex(Message message, IndexWriter writer) 77 { 78 //将重复的字段删掉 79 writer.DeleteDocuments(new Term("id", message.Id.ToString())); 80 //添加记录 81 Document document = new Document();//一条Document相当于一条记录 82 document.Add(new Field("id", message.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型 83 document.Add(new Field("message", message.Msg, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); 84 writer.AddDocument(document);//insert into...插入一条记录,有两个字段:number和body 85 Console.WriteLine("id="+message.Id.ToString()+"msg="+message.Msg); 86 } 87 }
在索引的主程序中,调用
MessageIndex messageIndex = new MessageIndex(); messageIndex.Start();
4.搜索部分--使用MVC框架
1)分词方法:
//分词 static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); } 2)搜索并且对结果高亮显示 //搜索 public static List<SearchResult> SearchResult(String indexDir, String q, int pageLen, int pageNo, out int recCount) { string keywords = q; IndexSearcher search = new IndexSearcher(indexDir); q = GetKeyWordsSplitBySpace(q, new PanGuTokenizer()); QueryParser queryParser = new QueryParser("message", new PanGuAnalyzer(true)); Query query = queryParser.Parse(q);//在message中搜索keyword //当然这里可以添加多个搜索条件 //QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true)); //Query titleQuery = titleQueryParser.Parse(q); BooleanQuery bq = new BooleanQuery(); bq.Add(query, BooleanClause.Occur.SHOULD); //bq.Add(titleQuery, BooleanClause.Occur.SHOULD); Hits hits = search.Search(bq); List<SearchResult> results = new List<SearchResult>(); recCount = hits.Length();//搜索结果的总条数 int i = (pageNo - 1) * pageLen; while (i < recCount && results.Count < pageLen)//这里是一个分页处理 { SearchResult result = null; try { result = new SearchResult(); result.Message = hits.Doc(i).Get("message"); result.MessageUrl = "/Message/PreviewMessage/" + hits.Doc(i).Get("id"); //news.Url = hits.Doc(i).Get("url"); //String strTime = hits.Doc(i).Get("time"); //news.Time = DateTime.ParseExact(strTime, "yyyyMMdd", null); PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>"); PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment()); highlighter.FragmentSize = 50; result.MessageHightLigther = highlighter.GetBestFragment(keywords, result.Message);//高亮显示的消息 //news.TitleHighLighter = highlighter.GetBestFragment(keywords, news.Title); if (string.IsNullOrEmpty(result.MessageHightLigther)) { result.MessageHightLigther = result.Message; } } catch (Exception e) { Console.WriteLine(e.Message); } finally { results.Add(result); i++; } } search.Close(); return results; }
3)搜索结果的类
1 public class SearchResult 2 { 3 public string Message { get;set;}//消息 4 public string MessageUrl { get; set; }//消息详细页链接 5 public string MessageHightLigther { get; set; }//高亮显示的消息 6 }
4)在Controller中的主代码
1 public ActionResult Search(string keyword, int? pageIndex) 2 { 3 if (pageIndex==null) 4 { 5 pageIndex = 1; 6 } 7 if (pageIndex<=0) 8 { 9 return View("Error", (object)"pageIndex必须大于0"); 10 } 11 int totalCount; 12 List<SearchResult>results= SearchResult(@"F:\tmp\index2", keyword, 3, (int)pageIndex, out totalCount); 13 ViewBag.KeyWord = keyword; 14 ViewBag.SearchResults = results; 15 ViewBag.TotalCount = totalCount;//总结果条数 16 ViewBag.PageIndex = pageIndex; 17 ViewBag.PageSize = 3; 18 return View(); 19 }
5)Search View中的代码
1 @{ 2 ViewBag.Title = "搜索" + ViewBag.KeyWord; 3 Layout = "~/Views/DefaultLayout.cshtml"; 4 } 5 @{ 6 foreach (var result in ViewBag.SearchResults) 7 { 8 <div > 9 <a href="@result.MessageUrl" target="_blank">@(new HtmlString(result.MessageHightLigther))</a> 10 </div> 11 12 } 13 } 14 @QiuShiBaiKe.Web.WebHelper.Pager("/Message/Search/{pageIndex}/"+ViewBag.KeyWord, ViewBag.TotalCount, ViewBag.PageIndex, ViewBag.PageSize)
6)搜索的路由配置
1 //搜索路由 2 routes.MapRoute( 3 name: "search", 4 url: "{controller}/{action}/{pageIndex}/{keyword}", 5 defaults: new { controller = "Message", action = "Search", pageIndex = UrlParameter.Optional, keyword=UrlParameter.Optional } 6 );