利用lucene.net 的PanGu对一段话分词

//添加下面三个dll文件和把dict放在项目下的Bin文件下

 

//事件

protected void Button1_Click(object sender, EventArgs e)
        {
            TextBox2.Text = GetKeyWordsSplitBySpace(TextBox1.Text, new PanGuTokenizer());
        }

#region 对标题提取关键字分词
        /// <summary>
        /// 对标题提取关键字分词
        /// </summary>
        /// <param name="keywords"></param>
        /// <param name="ktTokenizer"></param>
        /// <returns></returns>
        static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
        {
            StringBuilder result = new StringBuilder();

            ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }
                else if (word.Word.Length >= 2)
                {

                    result.AppendFormat("{0},", word.Word);
                }
            }
            return result.ToString().Trim();
        }
        #endregion

posted @ 2013-02-27 11:21  沅江  阅读(202)  评论(0编辑  收藏  举报