中文分词太麻烦了,也有些中文分词组件也不错,但要自己维护词库,我还是觉得麻烦,我尝试用SCWS中文分词,直接调用他的api,我什么都不需要做了,在此感谢一下。
废话不多说了,直接上菜。
1 /// <summary>
2 /// 利用SCWS进行中文分词
3 /// 1638988@gmail.com
4 /// </summary>
5 /// <param name="str">需要分词的字符串</param>
6 /// <returns>用空格分开的分词结果</returns>
7 public static string Segment(string str)
8 {
9 System.Text.StringBuilder sb = new System.Text.StringBuilder();
10 try
11 {
12 string s = string.Empty;
13 System.Net.CookieContainer cookieContainer = new System.Net.CookieContainer();
14 // 将提交的字符串数据转换成字节数组
15 byte[] postData = System.Text.Encoding.ASCII.GetBytes("data=" + System.Web.HttpUtility.UrlEncode(str) + "&respond=json&charset=utf8&ignore=yes&duality=no&traditional=no&multi=0");
16
17 // 设置提交的相关参数
18 System.Net.HttpWebRequest request = System.Net.WebRequest.Create("http://www.ftphp.com/scws/api.php") as System.Net.HttpWebRequest;
19 request.Method = "POST";
20 request.KeepAlive = false;
21 request.ContentType = "application/x-www-form-urlencoded";
22 request.CookieContainer = cookieContainer;
23 request.ContentLength = postData.Length;
24
25 // 提交请求数据
26 System.IO.Stream outputStream = request.GetRequestStream();
27 outputStream.Write(postData, 0, postData.Length);
28 outputStream.Close();
29
30 // 接收返回的页面
31 System.Net.HttpWebResponse response = request.GetResponse() as System.Net.HttpWebResponse;
32 System.IO.Stream responseStream = response.GetResponseStream();
33 System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, System.Text.Encoding.GetEncoding("utf-8"));
34 string val = reader.ReadToEnd();
35
36 Newtonsoft.Json.Linq.JObject results = Newtonsoft.Json.Linq.JObject.Parse(val);
37 foreach (var item in results["words"].Children())
38 {
39 Newtonsoft.Json.Linq.JObject word = Newtonsoft.Json.Linq.JObject.Parse(item.ToString());
40 sb.Append(word["word"].ToString() + " ");
41 }
42 }
43 catch
44 {
45 }
46
47 return sb.ToString();
48 }
3 /// 1638988@gmail.com
4 /// </summary>
5 /// <param name="str">需要分词的字符串</param>
6 /// <returns>用空格分开的分词结果</returns>
7 public static string Segment(string str)
8 {
9 System.Text.StringBuilder sb = new System.Text.StringBuilder();
10 try
11 {
12 string s = string.Empty;
13 System.Net.CookieContainer cookieContainer = new System.Net.CookieContainer();
14 // 将提交的字符串数据转换成字节数组
15 byte[] postData = System.Text.Encoding.ASCII.GetBytes("data=" + System.Web.HttpUtility.UrlEncode(str) + "&respond=json&charset=utf8&ignore=yes&duality=no&traditional=no&multi=0");
16
17 // 设置提交的相关参数
18 System.Net.HttpWebRequest request = System.Net.WebRequest.Create("http://www.ftphp.com/scws/api.php") as System.Net.HttpWebRequest;
19 request.Method = "POST";
20 request.KeepAlive = false;
21 request.ContentType = "application/x-www-form-urlencoded";
22 request.CookieContainer = cookieContainer;
23 request.ContentLength = postData.Length;
24
25 // 提交请求数据
26 System.IO.Stream outputStream = request.GetRequestStream();
27 outputStream.Write(postData, 0, postData.Length);
28 outputStream.Close();
29
30 // 接收返回的页面
31 System.Net.HttpWebResponse response = request.GetResponse() as System.Net.HttpWebResponse;
32 System.IO.Stream responseStream = response.GetResponseStream();
33 System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, System.Text.Encoding.GetEncoding("utf-8"));
34 string val = reader.ReadToEnd();
35
36 Newtonsoft.Json.Linq.JObject results = Newtonsoft.Json.Linq.JObject.Parse(val);
37 foreach (var item in results["words"].Children())
38 {
39 Newtonsoft.Json.Linq.JObject word = Newtonsoft.Json.Linq.JObject.Parse(item.ToString());
40 sb.Append(word["word"].ToString() + " ");
41 }
42 }
43 catch
44 {
45 }
46
47 return sb.ToString();
48 }