C#统计英文文本中的单词数并排序
思路如下:
1.使用的Hashtable(高效)集合,记录每个单词出现的次数
2.采用ArrayList对Hashtable中的Keys按字母序排列
3.排序使用插入排序(稳定)
public void StatisticsWords(string path) { if (!File.Exists(path)) { Console.WriteLine("文件不存在!"); return; } Hashtable ht = new Hashtable(StringComparer.OrdinalIgnoreCase); StreamReader sr = new StreamReader(path, System.Text.Encoding.UTF8); string line = sr.ReadLine(); string[] wordArr = null; int num = 0; while (line.Length > 0) { // MatchCollection mc = Regex.Matches(line, @"\b[a-z]+", RegexOptions.Compiled | RegexOptions.IgnoreCase); //foreach (Match m in mc) //{ // if (ht.ContainsKey(m.Value)) // { // num = Convert.ToInt32(ht[m.Value]) + 1; // ht[m.Value] = num; // } // else // { // ht.Add(m.Value, 1); // } //} //line = sr.ReadLine(); wordArr = line.Split(' '); foreach (string s in wordArr) { if (s.Length == 0) continue; //去除标点 line = Regex.Replace(line, @"[\p{P}*]", "", RegexOptions.Compiled); //将单词加入哈希表 if (ht.ContainsKey(s)) { num = Convert.ToInt32(ht[s]) + 1; ht[s] = num; } else { ht.Add(s, 1); } } line = sr.ReadLine(); } ArrayList keysList = new ArrayList(ht.Keys); //对Hashtable中的Keys按字母序排列 keysList.Sort(); //按次数进行插入排序【稳定排序】,所以相同次数的单词依旧是字母序 string tmp = String.Empty; int valueTmp = 0; for (int i = 1; i < keysList.Count; i++) { tmp = keysList[i].ToString(); valueTmp = (int)ht[keysList[i]];//次数 int j = i; while (j > 0 && valueTmp > (int)ht[keysList[j - 1]]) { keysList[j] = keysList[j - 1]; j--; } keysList[j] = tmp;//j=0 } //打印出来 foreach (object item in keysList) { Console.WriteLine((string)item + ":" + (string)ht[item]); } }