最大匹配分词算法
代码
/*
* Created by SharpDevelop.
* User: Administrator
* Date: 2010/4/9
* Time: 15:06
* Directions:最大匹配分词算法
*
* To change this template use Tools | Options | Coding | Edit Standard Headers.
*/
using System;
using System.IO;
using System.Text;
using System.Collections;
namespace cc
{
class Program
{
public static void Main(string[] args)
{
string s1=@"那一刻我升起风马不为乞福只为守候你的到来那一天闭目在经殿香雾中蓦然听见你颂经中的真言那一月我摇动所有的经桶不为超度只为触摸你的指尖那一年磕长头匍匐在山路不为觐见只为贴着你的温暖那一世转山转水转佛塔啊不为修来生只为途中与你相见";
string s2="";
int maxlen=4;//分词长度
string w;
DateTime timeStart=DateTime.Now;//开始时间
StreamReader sr=new StreamReader(@"E:\works\ChineseDictionary.txt",Encoding.Default);
ArrayList arr=new ArrayList();//把中文词典保存在数组中
while(sr.Peek() > -1)
{
arr.Add(sr.ReadLine());
}
while(s1 != "")
{
if(s1.Length > maxlen)
{
w=s1.Substring(0,maxlen);
}//if
else
{
w=s1;
}
while (w.Length!=1)
{
if (arr.IndexOf(w) != -1)
{
break;
}
w=w.Substring(0,w.Length-1);
}//while
s2 = s2 + w + "/";
s1=s1.Substring(w.Length);
}//while
DateTime timeEnd=DateTime.Now;//结束时间
TimeSpan ts=timeEnd - timeStart;
Console.WriteLine(s2);
Console.WriteLine("分词花费的时间:{0}",ts.ToString());
Console.Write("Press any key to continue . . . ");
Console.ReadKey(true);
}
}
}
* Created by SharpDevelop.
* User: Administrator
* Date: 2010/4/9
* Time: 15:06
* Directions:最大匹配分词算法
*
* To change this template use Tools | Options | Coding | Edit Standard Headers.
*/
using System;
using System.IO;
using System.Text;
using System.Collections;
namespace cc
{
class Program
{
public static void Main(string[] args)
{
string s1=@"那一刻我升起风马不为乞福只为守候你的到来那一天闭目在经殿香雾中蓦然听见你颂经中的真言那一月我摇动所有的经桶不为超度只为触摸你的指尖那一年磕长头匍匐在山路不为觐见只为贴着你的温暖那一世转山转水转佛塔啊不为修来生只为途中与你相见";
string s2="";
int maxlen=4;//分词长度
string w;
DateTime timeStart=DateTime.Now;//开始时间
StreamReader sr=new StreamReader(@"E:\works\ChineseDictionary.txt",Encoding.Default);
ArrayList arr=new ArrayList();//把中文词典保存在数组中
while(sr.Peek() > -1)
{
arr.Add(sr.ReadLine());
}
while(s1 != "")
{
if(s1.Length > maxlen)
{
w=s1.Substring(0,maxlen);
}//if
else
{
w=s1;
}
while (w.Length!=1)
{
if (arr.IndexOf(w) != -1)
{
break;
}
w=w.Substring(0,w.Length-1);
}//while
s2 = s2 + w + "/";
s1=s1.Substring(w.Length);
}//while
DateTime timeEnd=DateTime.Now;//结束时间
TimeSpan ts=timeEnd - timeStart;
Console.WriteLine(s2);
Console.WriteLine("分词花费的时间:{0}",ts.ToString());
Console.Write("Press any key to continue . . . ");
Console.ReadKey(true);
}
}
}