关于Lucene.net 中高亮显示关键词的深究
这几天一直在学习lucene,也写了3篇自己总结的知识点,本以为很容易上手的东西,但是却遇到了一个很棘手的问题,借此,希望可以跟大家探讨一下
问题:使用盘古高亮显示组件后,如搜索“mp3 player”,搜索词中间包含空格,但是搜出来的结果,经过高亮后,空格莫名其妙的没了,具体不知道是怎么回事,如果去掉高亮的话,搜出来的结果中就有空格
代码:
////用于搜索的代码
Lucene.Net.Store.FSDirectory dir = Lucene.Net.Store.FSDirectory.GetDirectory(basePath);
Lucene.Net.Search.IndexSearcher search = new Lucene.Net.Search.IndexSearcher(dir, true);
Lucene.Net.Search.BooleanQuery boolQuery = new Lucene.Net.Search.BooleanQuery();
Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "title", pgAnalyzer);
boolQuery.Add(parser.Parse(kw), Lucene.Net.Search.BooleanClause.Occur.MUST);
parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "keywords", pgAnalyzer);
boolQuery.Add(parser.Parse(kw), Lucene.Net.Search.BooleanClause.Occur.SHOULD);
parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "productsummary", pgAnalyzer);
boolQuery.Add(parser.Parse(kw), Lucene.Net.Search.BooleanClause.Occur.SHOULD);
Lucene.Net.Search.Hits hits = search.Search(boolQuery);
List<ProductInfo> lists = PageList(hits, pageIndex, pageSize);
if (lists != null && lists.Count > 0)
{
records = hits.Length();
rptList.DataSource = lists;
rptList.DataBind();
}
dir.Close();
///分页显示的结果处理,其中ProductInfo是自己定义的一个数据结果体
public List<ProductInfo> PageList(Lucene.Net.Search.Hits hits, int pageIndex, int pageSize)
{
List<ProductInfo> lists = new List<ProductInfo>();
if (hits == null)
return null;
if (hits.Length() <= 0)
return null;
int totalPages = hits.Length() % pageSize == 0 ? hits.Length() / pageSize : hits.Length() / pageSize + 1;//总页数
if (pageIndex <= 0)
pageIndex = 1;
if (pageIndex > totalPages)
pageIndex = totalPages;
for (int i = (pageIndex - 1) * pageSize; i < pageIndex * pageSize && i < hits.Length(); i++)
{
Lucene.Net.Documents.Document doc = hits.Doc(i);
ProductInfo item = new ProductInfo();
item.AttributeCustom = doc.Get("attributecustom");
item.BusinessType = doc.Get("businesstype");
item.CompanyName = doc.Get("companyname");
item.CountryName = doc.Get("countryname");
item.Guid = doc.Get("guid");
item.LocalPic = doc.Get("localpic");
item.MemberId = doc.Get("memberid");
item.ProductSummary = doc.Get("productsummary");
item.Title = doc.Get("title");
item.UrlTitle = doc.Get("urltitle");
lists.Add(item);
}
return lists;
}
///高亮方法,在repeter绑定数据源时传入当前的title调用
protected string hightLightTitle(string title)
{
if (string.IsNullOrEmpty(title))
return string.Empty;
if (string.IsNullOrEmpty(title))
return string.Empty;
PanGu.HighLight.Highlighter highter = new PanGu.HighLight.Highlighter(formatter, new PanGu.Segment());
highter.FragmentSize = 100;
return highter.GetBestFragment(kw, title);
}
比如:搜索 “China Shine Globe”,出来的结果如下
关键词中的空格莫名其妙的没了,真的很让人费解,小弟研究了好久叶没搞明白,希望各位大侠可以指导指导