架构深渊

慢慢走进程序的深渊……关注领域驱动设计、测试驱动开发、设计模式、企业应用架构模式……积累技术细节,以设计架构为宗。
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

初试Lucene.net搜索及高亮分页

Posted on 2008-09-13 14:19  chen eric  阅读(897)  评论(0编辑  收藏  举报


 


using System;
using System.Data;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using System.Configuration;

namespace So
{
    
public class BaseSearch : System.Web.UI.Page
    {
        变量声明
#region 变量声明
        
/**//// <summary>
        
/// 搜索结果数据
        
/// </summary>
        public DataTable Results = new DataTable();

        
/**//// <summary>
        
/// 开始索引
        
/// </summary>
        public int startAt;

        
/**//// <summary>
        
/// First item on page (user format).
        
/// </summary>
        public int fromItem;

        
/**//// <summary>
        
/// Last item on page (user format).
        
/// </summary>
        public int toItem;

        
/**//// <summary>
        
/// 搜索的结果总数
        
/// </summary>
        public int total;

        
/**//// <summary>
        
/// 搜索所用时间
        
/// </summary>
        public TimeSpan duration;

        
/**//// <summary>
        
/// 每页显示结果项条数
        
/// </summary>
        public int maxResults = 10;

        
/**//// <summary>
        
/// 是否启用网页缓存功能
        
/// </summary>
        public bool EnableCache;
        
/**//// <summary>
        
/// 缓存URL
        
/// </summary>
        public string CacheURL;
        
/**//// <summary>
        
/// 索引文件存放的路径
        
/// </summary>
        public string IndexDiectory;
        
private string m_Query;
        
public Lucene.Net.Store.Directory dir;

        
#endregion 变量声明


        取得查询目标索引的缓存
#region 取得查询目标索引的缓存
        
public void GetIndexDir(string IndexKey, string IndexDiectory)
        {
            
//object obj = Cache[IndexKey];
            
//if (obj != null)
            
//{
            
//    dir = (Lucene.Net.Store.Directory)obj;
            
//}
            
//else
            
//{
            
//    dir = new Lucene.Net.Store.RAMDirectory(IndexDiectory);
            
//    Cache.Insert(IndexKey, dir, null, DateTime.Now.AddMinutes(2), TimeSpan.Zero);
            
//}
            dir = new Lucene.Net.Store.RAMDirectory(IndexDiectory);
        }
        
#endregion





        得到定长的字符串
#region 得到定长的字符串
        
/**//// <summary>
        
/// 得到定长的字符串
        
/// </summary>
        
/// <param name="p_Text">原字符串</param>
        
/// <param name="p_Length">长度</param>
        
/// <param name="p_ExtraText">多余部分显示字符</param>
        
/// <returns></returns>
        public string GetLengthText(string p_Text, int p_Length, string p_ExtraText)
        {
            
return (p_Text.Length > p_Length) ? (p_Text.Substring(045+ p_ExtraText) : p_Text;
        }
        
#endregion

        取得两个参数中的最小值
#region 取得两个参数中的最小值
        
/**//// <summary>
        
/// 取得两个参数中的最小值
        
/// </summary>
        
/// <param name="first">参数一</param>
        
/// <param name="second">参数二</param>
        
/// <returns>最小值</returns>
        public int smallerOf(int first, int second)
        {
            
return first < second ? first : second;
        }
        
#endregion

        检测开始位置
#region 检测开始位置
        
/**//// <summary>
        
/// Initializes startAt value. Checks for bad values.
        
/// </summary>
        
/// <returns></returns>
        public int initStartAt()
        {
            
try
            {
                
int sa = Convert.ToInt32(this.Request.Params["start"]);

                
// too small starting item, return first page
                if (sa < 0)
                    
return 0;

                
// too big starting item, return last page
                if (sa >= total - 1)
                {
                    
return lastPageStartsAt;
                }

                
return sa;
            }
            
catch
            {
                
return 0;
            }
        }
        
#endregion

        最后一页的第一项
#region 最后一页的第一项

        
/**//// <summary>
        
/// First item of the last page
        
/// </summary>
        public int lastPageStartsAt
        {
            
get
            {
                
return pageCount * maxResults;
            }
        }
        
public int pageCount
        {
            
get
            {
                
return (total - 1/ maxResults; // floor
            }
        }
        
#endregion

        取得高亮连接
#region 取得高亮连接

        
/**//// <summary>
        
/// 取得高亮连接
        
/// </summary>
        
/// <param name="p_Body">处理内容</param>
        
/// <param name="p_KeyWords">关键词</param>
        
/// <returns></returns>
        public string SimpleHighLighter(string p_Body, string p_KeyWords, string p_Before,
            
string p_After, int p_MaxLength)
        {
            
string[] KeyWords = p_KeyWords.Trim().Split(' ');

            
//if (p_Body.Length > p_MaxLength)
            
//{
            
//    if (p_Body.IndexOf(KeyWords[0]) > 10)
            
//    {
            
//        try
            
//        {
            
//            if ((p_Body.Length - 10) > p_MaxLength)
            
//                p_Body = p_Body.Substring(p_Body.IndexOf(KeyWords[0]) - 10, p_MaxLength) + "";
            
//            else
            
//                p_Body = p_Body.Substring(p_Body.IndexOf(KeyWords[0]) - 10) + "";
            
//        }
            
//        catch
            
//        { }
            
//    }
            
//    else
            
//        p_Body = p_Body.Substring(0, p_MaxLength) + "";

            
//}


            
for (int i = 0; i < KeyWords.Length; i++)
            {
                p_Body 
= p_Body.Replace(KeyWords[i], p_Before + KeyWords[i] + p_After);

            }


            
return p_Body;

        }
        
#endregion

        属性
#region 属性
        
/**//// <summary>
        
/// 查询关键词
        
/// </summary>
        public string Query
        {
            
get
            {
                
return m_Query;
            }
            
set
            {
                m_Query 
= value;
            }
        }
        
#endregion
    }
}

using System;
using System.Data;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using System.Configuration;

namespace So.News
{
    
public class NewsSearch : BaseSearch
    {
        
public NewsSearch()
        {
            
this.IndexDiectory = ConfigurationManager.AppSettings["NewsIndexPath"];
        }

        处理搜索并将信息转换为可显示结果数据源
#region 处理搜索并将信息转换为可显示结果数据源

        
/**//// <summary>
        
/// Does the search and stores the information about the results.
        
/// </summary>
        public void search()
        {


            
// 索引目录
            
//string indexDirectory = Server.MapPath(ConfigurationSettings.AppSettings["EnableCache"] );  
            
//创建一个Searcher用于搜索

            
//记录查询开始的时间
            DateTime start = DateTime.Now;
            
this.GetIndexDir("HDC.News", IndexDiectory);
            IndexSearcher searcher 
= new IndexSearcher(dir);
            
//从"body"字段搜索
            
//Console.WriteLine(this.Query);

            Lucene.Net.Analysis.Analyzer OneAnalyzer 
= new StandardAnalyzer();
            QueryParser parser 
= new QueryParser("newsContent", OneAnalyzer);
            Query query 
= parser.Parse(this.Query);



            
//创建结果记录集
            
//定义字段
            this.Results.Columns.Add("ArticleID"typeof(int));
            
this.Results.Columns.Add("ArticleClassID"typeof(int));
            
this.Results.Columns.Add("className"typeof(string));
            
this.Results.Columns.Add("titleImg"typeof(string));
            
this.Results.Columns.Add("updateTime"typeof(DateTime));
            
this.Results.Columns.Add("source"typeof(string));
            
this.Results.Columns.Add("title"typeof(string));
            
this.Results.Columns.Add("summary"typeof(string));




            Sort sort 
= new Sort(new SortField("ArticleID", SortField.DOC, true));
            
//Hits是搜索结果记录集,不过是Lucene自己的格式,需要格式化成标准输出
            Hits hits = searcher.Search(query, sort);

            
//结果个数
            this.total = hits.Length();

            
/**/////创建高亮显示
            //Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="#C60A00">", "</font>"), new QueryScorer(query));
            
//highlighter.TextFragmenter = new SimpleFragmenter(160);
            
//highlighter.MaxDocBytesToAnalyze = 256;

            
// initialize startAt
            this.startAt = initStartAt();

            
// how many items we should show - less than defined at the end of the results
            int resultsCount = smallerOf(total, this.maxResults + this.startAt);

            
for (int i = startAt; i < resultsCount; i++)
            {
                Document doc 
= hits.Doc(i);
                DataRow row 
= this.Results.NewRow();

                row[
"ArticleID"= Convert.ToInt32(doc.Get("ArticleID"));
                row[
"ArticleClassID"= Convert.ToInt32(doc.Get("ArticleClassID"));
                
string summary = doc.Get("summary");

                row[
"summary"= this.SimpleHighLighter(summary, this.Query,"<font color="#C60A00">""</font>",226);
                row[
"className"= doc.Get("className");
                row[
"titleImg"= doc.Get("titleImg");
                row[
"updateTime"= Convert.ToDateTime(doc.Get("updateTime"));
                row[
"source"= doc.Get("source");
                row[
"title"= doc.Get("title");
                
this.Results.Rows.Add(row);
            }
            searcher.Close();

            
// result information

            
this.fromItem = startAt + 1;
            
this.toItem = smallerOf(startAt + maxResults, total);
            
//记录查询使用的时间
            this.duration = DateTime.Now - start;
        }
        
#endregion

        页面底航连接
#region 页面底航连接
        
/**//// <summary>
        
/// 页面底航连接
        
/// </summary>
        public DataTable Paging
        {
            
get
            {
                
int pageNumber = (startAt + maxResults - 1/ maxResults;

                DataTable dt 
= new DataTable();
                dt.Columns.Add(
"html"typeof(string));

                
//增加第一页链接
                DataRow tar = dt.NewRow();
                
if (startAt >= maxResults)
                    tar[
"html"= "<EM><a href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + (startAt - maxResults) + ""><IMG src="images/b_pre.gif"></a></EM>";
                
else
                    tar[
"html"= "<EM><IMG src="images/b_pre.gif"></EM>";
                dt.Rows.Add(tar);


                
int previousPagesCount = 7;

                DataRow ar 
= dt.NewRow();
                ar[
"html"= pagingItemHtml(startAt, pageNumber + 1false);
                dt.Rows.Add(ar);

                
for (int i = pageNumber - 1; i >= 0 && i >= pageNumber - previousPagesCount; i--)
                {
                    
int step = i - pageNumber;
                    DataRow r 
= dt.NewRow();
                    r[
"html"= pagingItemHtml(startAt + (maxResults * step), i + 1true);

                    dt.Rows.InsertAt(r, 
1);
                }

                
int nextPagesCount = 8;
                
for (int i = pageNumber + 1; i <= pageCount && i <= pageNumber + nextPagesCount; i++)
                {
                    
int step = i - pageNumber;
                    DataRow r 
= dt.NewRow();
                    r[
"html"= pagingItemHtml(startAt + (maxResults * step), i + 1true);

                    dt.Rows.Add(r);
                }

                
//增加第一页链接
                DataRow far = dt.NewRow();
                
if (pageNumber < pageCount)
                    far[
"html"= "<EM><a class="blue1" href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + (startAt + maxResults) + ""><IMG src="images/b_nextpage.gif"></a></EM>";
                
else
                    far[
"html"= "<EM><IMG src="images/b_nextpage.gif"></EM>";
                dt.Rows.Add(far);
                
return dt;
            }
        }


        页面连接列表
#region 页面连接列表

        
/**//// <summary>
        
/// 页面连接列表
        
/// </summary>
        
/// <param name="start">开始</param>
        
/// <param name="number">显示数量</param>
        
/// <param name="active">活动</param>
        
/// <returns></returns>
        public string pagingItemHtml(int start, int number, bool active)
        {

            
if (active)
                
return "<VAR><a href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + start + "">" + number + "</a></VAR>";
            
else
                
return "<VAR class=on>" + number + "</VAR>";
        }
        
#endregion


        
#endregion



    }
}