简单使用Lucene.Net

Lucene.Net

/// <summary>
/// 绑定数据
/// 1.读取分页信息(分页控件)
/// 2.处理lucen返回的Hits结果集
/// 3.绑定数据控件
/// </summary>
public void BindData(string keyWords)
{
  // 1.分页信息
  int pageSize = this.AspNetPageControls.PageSize = 20;
  int currentPage = this.AspNetPageControls.CurrentPageIndex;
  if (!Directory.Exists(Server.MapPath("ArticleIndex")))
  {
    this.SetArticleIndex();
  }
  DateTime startTime = DateTime.Now;
  int totalCount = this.GetHitsResultBy(Server.MapPath("ArticleIndex"), keyWords, currentPage, pageSize);
  double timeSeconds = (DateTime.Now - startTime).TotalSeconds;
  this.AspNetPageControls.RecordCount = totalCount;
  this.LiMessage.Text = string.Format("搜索“ {0} ” 约 {1} 个结果,", keyWords, totalCount);

  // 3.绑定数据控件
  this.RepArticleList.DataSource = ArticleList;
  this.RepArticleList.DataBind();
}

/// <summary>
/// 查询LucenIndex
/// 1.创建索引查询器
/// 2.创建查询分析器
/// 3.使用查询分析器分析条件
/// 4.设置排序规则
/// 5.使用查询器把查询分析条件(即3得到结果)和排序规则作为参数,进行查询
/// </summary>
/// <param name="path">绝对路径Server.MapPath("ArticleIndex")</param>
/// <param name="keyWord">关键词</param>
/// <returns>Hits</returns>
public int GetHitsResultBy(string path, string keyWord, int currPage, int pageSize)
{
  #region /* 注释代码 */
  // Hits hits = null;
  //// 设置对哪个文件(即索引)进行搜索
  //IndexSearcher indexSearch = new IndexSearcher(path);
  //// 具体对哪个索引进行搜索,使用哪种分析器
  
////QueryParser multiQuery = new QueryParser("Content", new ChineseAnalyzer());
  
//// 对那个字段索引进行索引
  //MultiFieldQueryParser multiQuery = new MultiFieldQueryParser(new string[] { "Title", "Content" }, new ChineseAnalyzer());
  //// 将参数传入到索引解析器中
  //Query query = multiQuery.Parse(keyWord);
  
  //// 模糊查询*代表通配所有?刚表式通配单个字符
  
////WildcardQuery wildcarQ = new WildcardQuery(new Term("contents"), keyWord + "??");

  
//// 按照什么要求排序为true是降序 false是升序
  //Sort sort = new Sort(new SortField("Title", SortField.DOC, true));
  //// hit是搜索结果记录集,不过是Luence自己的格式,需要格式化成标准输出
  #endregion
  int result = 0;
  Hits hits = null; // 设置对哪个文件(即索引)进行搜索
  IndexSearcher indexSearch = new IndexSearcher(path);
  try
  {
    ChineseAnalyzer chinesAnalyer = new ChineseAnalyzer();
    MultiFieldQueryParser multiQuery = new MultiFieldQueryParser(new string[] { "Title", "Content" }, chinesAnalyer);
  
    // 将参数传入到索引解析器中
    Query query = multiQuery.Parse(keyWord);
    // 按照什么要求排序为true是降序 false是升序
    Sort sort = new Sort(new SortField("Title", SortField.SCORE, false));
    hits = indexSearch.Search(query, sort);
    result = hits.Length();

    // 分解词
    TokenStream tokenStearm = chinesAnalyer.TokenStream("", new StringReader(keyWord));
    Lucene.Net.Analysis.Token token;
    List<string> keyWordList = new List<string>();
    while ((token = tokenStearm.Next()) != null) { keyWordList.Add(token.TermText()); }

    // 2.处理lucen返回的Hits结果集
    Article article = null;
    int pageCount = (result < pageSize) ? result : pageSize;
    if (hits != null && hits.Length() > 0)
    {
      int stratIndex = (currPage - 1) * pageSize;
      for (int i = (stratIndex <= 0) ? stratIndex : (stratIndex - 1); i < pageCount; i++)
      {
        article = new Article();
        Document doc = hits.Doc(i);
        article.FromSiteUrl = doc.Get("Url").ToString();
        article.Title = SimpleHighLighter(doc.Get("Title").ToString(), keyWordList, "<font style='color:Red'>{0}</font>", 266); ;
        article.Content = SimpleHighLighter(doc.Get("Content").ToString(), keyWordList, "<font style='color:Red'>{0}</font>", 266);
        article.UptoTime = doc.Get("Updated");
        ArticleList.Add(article);
      }
    }
    indexSearch.Close();
  }catch (Exception){
    hits = null;
    indexSearch.Close();
    return 0;
  }
  return result;
}

/// <summary>
/// 创建索引
/// </summary>
private void SetArticleIndex()
{
  #region /* 注释创建索引 */
  //IndexWriter writer = new IndexWriter(path, new ChineseAnalyzer(), true);
  
//foreach (Article article in list)
  
//{
  
// Document doc = new Document();
  
// // Field.Store.Yes 是存储索引值 Tokenized是对内容进行分词 UN_Tokenized不分词
  
// // 如果内容较多,可以对内容不进行存储,但可对他分词,将一些url信息附加到相关信息上去
  
// doc.Add(SetField("url", article.Url, Field.Store.YES, Field.Index.UN_TOKENIZED));
  
// doc.Add(SetField("title", article.Title, Field.Store.YES, Field.Index.TOKENIZED));
  
// doc.Add(SetField("content", article.Content, Field.Store.YES, Field.Index.TOKENIZED));
  
// writer.AddDocument(doc);
  
//}  
  
//writer.Optimize();
  
//writer.Close();
  #endregion
  List<Article> list = HelperArticle.GetAllArticle(TypeOfArticle.LinkArticle);
  this.SetLuceneIndex<Article>(Server.MapPath("ArticleIndex"), new ChineseAnalyzer(), list, true, "Url", "Title", "Content", "Updated");
}
/// <summary>
/// 通用创建索引
/// </summary>
/// <typeparam name="T">类型</typeparam>
/// <param name="path">绝对路径Server.MapPath("LuceneArticleIndex")</param>
/// <param name="analyzer">分词</param>
/// <param name="tList">需要分词数据</param>
/// <param name="isCreateOrAppend">true创建 false追加</param>
/// <param name="colunms">需要索引列名</param>
private void SetLuceneIndex<T>(string path, Analyzer analyzer, List<T> tList, bool isCreateOrAppend, params string[] colunms)
{
  IndexWriter writer = new IndexWriter(path, analyzer, isCreateOrAppend);
  try
  {
    foreach (T type in tList)
    {
      Document doc = new Document();
      foreach (string colunm in colunms)
      {
        object obj = type.GetType().GetProperty(colunm).GetValue(type, null);
        string content = string.Empty;

        if ("Updated".Equals(colunm))
          content = ((DateTime)obj).ToString("yyyy-MM-dd");
        else
          content = We7Helper.RemoveHtml(obj.ToString()).Replace(" ", "");

        // 索引数据
        if ("title".Equals(colunm.ToLower()) || "content".Equals(colunm.ToLower()))
          doc.Add(SetField(colunm, content, Field.Store.YES, Field.Index.TOKENIZED));
        else
          doc.Add(SetField(colunm, content, Field.Store.YES, Field.Index.UN_TOKENIZED));
      }
      writer.AddDocument(doc);
    }
    writer.Optimize();
    writer.Close();
  }
  catch (Exception)
  {
    writer.Close();
  }
}

/// <summary>
/// 替换高亮字符
/// </summary>
/// <param name="body">文本</param>
/// <param name="keyWords">关键字</param>
/// <param name="highLighter">高亮信息(使用占位符)如:html标签 {0} html结束标签</param>
/// <param name="Max_Length">字符长度</param>
/// <returns>输出信息</returns>
public string SimpleHighLighter(string body, List<string> keyWords, string highLighter, int Max_Length)
{
  body = body.Replace(" ", "");
  body = body.Substring(0, (Max_Length < body.Length) ? Max_Length : body.Length);
  StringBuilder sbBody = new StringBuilder(body);
  foreach (string objkeyWord in keyWords)
  {
    sbBody.Replace(objkeyWord, string.Format(highLighter, objkeyWord));
  }
  sbBody.Append("...");
  return sbBody.ToString();
}


posted @ 2012-01-17 10:23  one light  阅读(360)  评论(0编辑  收藏  举报