代码复审
对于结队编程抱到黎柱金同学的大腿让我轻松许多,结对伙伴的词频统计程序写的很好,现在我对大神代码做些个人意见的评价。
伙伴的代码整体都很好,代码简洁,没冗余代码,而且用C#语言实现面向对象,层次化抽象清晰,任务非配合理,没有过多的类冗余属性,而且代码重用也应用很好,进一步使得代码简洁明了。
1 using System; 2 using System.Collections.Generic; 3 using System.IO; 4 using System.Linq; 5 using System.Text; 6 using System.Text.RegularExpressions; 7 using System.Threading.Tasks; 8 9 namespace WordFrequency 10 { 11 class Program 12 { 13 static void Main(string[] args) 14 { 15 TallyMode mode; 16 string path; 17 string outputFileName = "lizhujin@outlook.com.txt"; 18 19 #region tally mode & path 20 switch (args.Count()) 21 { 22 case 0: 23 Console.WriteLine("Please specify a directory!"); 24 return; 25 case 1: 26 mode = TallyMode.Normal; 27 path = args[0]; 28 break; 29 case 2: 30 if (args[0] == "-e2") 31 mode = TallyMode.E2; 32 else if (args[0] == "-e3") 33 mode = TallyMode.E3; 34 else 35 { 36 Console.WriteLine("The argument must be -e2 or -e3. Scanning cancelled."); 37 return; 38 } 39 path = args[1]; 40 break; 41 default: 42 Console.WriteLine("Arguments incorrect. Scanning cancelled."); 43 return; 44 } 45 #endregion 46 47 WordManager wordManager; 48 try 49 { 50 wordManager = new WordManager(path, mode); 51 } 52 catch (IOException e) 53 { 54 Console.WriteLine(e.Message); 55 return; 56 } 57 58 Console.WriteLine("Path: {0}", path); 59 Console.WriteLine("Start scanning..."); 60 wordManager.ScanAndCount(new List<string>() { ".txt", ".cpp", ".h", ".cs" }, false); 61 Console.WriteLine("Complete!"); 62 63 Console.WriteLine(new string('-', Console.WindowWidth - 1)); 64 var writer = new StreamWriter(outputFileName, false, Encoding.Default); 65 wordManager.Output(true, writer); 66 //wordManager.Output(true, Console.Out); 67 writer.Close(); 68 Console.WriteLine("The results have been saved to \"{0}\"", outputFileName); 69 } 70 } 71 }
伙伴将命令行输入可能出错进行判断,而且在控制台有人性化输出,让别人可以知道自己的输入问题所在,而且还进行异常处理,以防输入过程出现问题,使得程序更加稳定。在文件处理时也采用了异常处理,使得程序严谨正确性大大提高。
1 using System; 2 using System.Collections.Generic; 3 using System.IO; 4 using System.Linq; 5 using System.Text; 6 using System.Text.RegularExpressions; 7 using System.Threading.Tasks; 8 9 namespace WordFrequency 10 { 11 class WordManager 12 { 13 /* the list is sorted by words' lowercases, and it's not of the final order */ 14 /* in consideration of extended mode,it can also used for list of "word group"s */ 15 SortedList<string, WordInfo> WordList = new SortedList<string, WordInfo>(); 16 17 DirectoryInfo RootDirectory; 18 TallyMode Mode; 19 20 public WordManager(string path, TallyMode mode) 21 { 22 if (!Directory.Exists(path)) 23 throw new IOException("The directory specified doesn't exist!"); 24 25 RootDirectory = new DirectoryInfo(path); 26 Mode = mode; 27 } 28 29 /// <summary> 30 /// 31 /// </summary> 32 /// <param name="extensions"> 33 /// each extension should contains prefix dot, e.g., ".png" 34 /// </param> 35 public void ScanAndCount(IList<string> extensions, bool showLogs) 36 { 37 /* scan files and sub-directories recursively */ 38 var resultFileInfos = 39 from fileInfo in RootDirectory.EnumerateFiles("*", SearchOption.AllDirectories) 40 where ((from ext in extensions where ext.Equals(fileInfo.Extension, StringComparison.OrdinalIgnoreCase) select ext).Count() > 0) 41 select fileInfo; 42 43 /* count words in each file */ 44 foreach (var fileInfo in resultFileInfos) 45 { 46 if (showLogs) 47 Console.WriteLine("Scanning {0}", fileInfo.FullName); 48 CountWords(fileInfo); 49 } 50 } 51 52 public void Output(bool sort, TextWriter writer) 53 { 54 var wordInfoList = WordList.Values.ToList(); 55 56 if (sort) 57 wordInfoList.Sort(); 58 59 if (Mode == TallyMode.Normal) 60 foreach (var wordInfo in wordInfoList) 61 writer.WriteLine("{0}: {1}", wordInfo.Word, wordInfo.Frequency); 62 else 63 for (int i = 0; i < WordList.Count() && i < 10; i++) 64 writer.WriteLine("{0}: {1}", wordInfoList[i].Word, wordInfoList[i].Frequency); 65 } 66 67 /* count words in specified file and store them to WordList */ 68 private void CountWords(FileInfo fileInfo) 69 { 70 FileStream readStream = fileInfo.OpenRead(); 71 StreamReader reader = new StreamReader(readStream, Encoding.Default); 72 string text = reader.ReadToEnd(); 73 74 int i = 0; 75 while (i < text.Length) 76 { 77 string word; 78 79 if ((word = CurrentWord(text, i)) == null) 80 { 81 i++; 82 continue; 83 } 84 85 i += word.Length; 86 87 string word1 = null; 88 if (Mode == TallyMode.E2 || Mode == TallyMode.E3) 89 { 90 if (i >= text.Length || text[i] != ' ' || (word1 = CurrentWord(text, i + 1)) == null) 91 continue; 92 word += ' ' + word1; 93 } 94 95 string word2 = null; 96 if (Mode == TallyMode.E3) 97 { 98 if (i + word1.Length + 1 >= text.Length || text[i + word1.Length + 1] != ' ' || 99 (word2 = CurrentWord(text, i + word1.Length + 2)) == null) 100 continue; 101 word += ' ' + word2; 102 } 103 104 /* add this word to the list */ 105 string lowerCase = word.ToLower(); 106 if (WordList.ContainsKey(lowerCase)) 107 WordList[lowerCase].Add(word); 108 else 109 WordList[lowerCase] = new WordInfo(word); 110 111 } 112 } 113 114 /// <summary> 115 /// if there is a legal string starting with s[i], returns it, 116 /// otherwise returns null 117 /// </summary> 118 /// <param name="s"></param> 119 /// <param name="i"></param> 120 /// <returns></returns> 121 private string CurrentWord(string s, int i) 122 { 123 if (i >= s.Length) 124 return null; 125 126 /* not alphabetic */ 127 if (!IsAlphabetic(s[i])) 128 return null; 129 130 /* alphabetic, but not following a delemeter */ 131 if (i > 0 && !IsDelimiter(s[i - 1])) 132 return null; 133 134 /* legal start, but less than 3 alphabetic successively */ 135 if (i + 2 >= s.Length || !IsAlphabetic(s[i + 1]) || !IsAlphabetic(s[i + 2])) 136 return null; 137 138 /* build word */ 139 StringBuilder wordBuilder = new StringBuilder(s.Substring(i, 3)); 140 i += 3; 141 while (i < s.Length && IsAlphanumerical(s[i])) 142 { 143 wordBuilder.Append(s[i]); 144 i++; 145 } 146 string word = wordBuilder.ToString(); 147 148 return word; 149 } 150 151 private bool IsAlphabetic(char c) 152 { 153 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; 154 } 155 156 private bool IsAlphanumerical(char c) 157 { 158 return IsAlphabetic(c) || c >= '0' && c <= '9'; 159 } 160 161 private bool IsDelimiter(char c) 162 { 163 return !IsAlphanumerical(c); 164 } 165 } 166 }
代码中的类、函数与变量的命名体现了显式表达原则,让人一看就可以了解其作用与目的,可读性增添不少。
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6 7 namespace WordFrequency 8 { 9 class WordInfo : IComparable<WordInfo> 10 { 11 public string Word { get; private set; } 12 public int Frequency { get; private set; } 13 14 public WordInfo(string word) 15 { 16 Word = word; 17 Frequency = 1; 18 } 19 20 public void Add(string newForm) 21 { 22 Frequency++; 23 24 /* update the word */ 25 Word = string.CompareOrdinal(Word, newForm) < 0 ? Word : newForm; 26 } 27 28 public int CompareTo(WordInfo other) 29 { 30 /* frequency */ 31 if (this.Frequency > other.Frequency) 32 return -1; 33 else if (this.Frequency < other.Frequency) 34 return 1; 35 /* lexical */ 36 else 37 return string.CompareOrdinal(this.Word, other.Word); 38 } 39 } 40 }
伙伴的代码经过他本人的优化已经体现了正确性与高效性,但是也有些细节可以进一步提高。
首先,代码中增添了些许注释,但是相对于代码来说还不是足够的,可以在再适当增添些,增加代码可读性。
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6 7 namespace WordFrequency 8 { 9 enum TallyMode 10 { 11 Normal, 12 E2, 13 E3 14 } 15 }
然后,他在模式二与模式三时的单词排序时,可以变换下算法,提高程序效率。他的代码在三种模式都是进行所有单词全部排序,而在模式二和模式三时,只需要输出TOP10频率的单词。因而,可以每次输出都遍历下全部单词,输出最高频率,此时的时间复杂度为O(10*N),比现在O(N*log(N))提高不少。
最后,也是算法方面可以提高的问题,在WordManager.cs中,如下代码(在WorldManager.cs中的104行开始):
/* add this word to the list */ string lowerCase = word.ToLower(); if (WordList.ContainsKey(lowerCase)) WordList[lowerCase].Add(word); else WordList[lowerCase] = new WordInfo(word);
这里if判断语句containskey和wordlist[lowercase]执行了两次对lowercase的查找,如果改成一次查找就能缩短一半的时间。像这样:
/* add this word to the list */ string lowerCase = word.ToLower(); WordInfo value; WordList.TryGetValue(lowerCase, out value); if (value != null) value.Add(word); else WordList[lowerCase] = new WordInfo(word);
以上就是我对黎柱金同学的个人项目代码复审报告。