利用lucene.net 的PanGu对一段话分词
//添加下面三个dll文件和把dict放在项目下的Bin文件下
![](http://images0.cnblogs.com/blog/358019/201302/27111904-ff81187fa3654a0ca92bcb2ee64ee184.jpg)
![](http://images0.cnblogs.com/blog/358019/201302/27111920-15f9093199d948da89acc8990a002064.jpg)
//事件
protected void Button1_Click(object sender, EventArgs e)
{
TextBox2.Text = GetKeyWordsSplitBySpace(TextBox1.Text, new PanGuTokenizer());
}
#region 对标题提取关键字分词
/// <summary>
/// 对标题提取关键字分词
/// </summary>
/// <param name="keywords"></param>
/// <param name="ktTokenizer"></param>
/// <returns></returns>
static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
{
StringBuilder result = new StringBuilder();
ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
foreach (WordInfo word in words)
{
if (word == null)
{
continue;
}
else if (word.Word.Length >= 2)
{
result.AppendFormat("{0},", word.Word);
}
}
return result.ToString().Trim();
}
#endregion