代码复审

  对于结队编程抱到黎柱金同学的大腿让我轻松许多,结对伙伴的词频统计程序写的很好,现在我对大神代码做些个人意见的评价。

  伙伴的代码整体都很好,代码简洁,没冗余代码,而且用C#语言实现面向对象,层次化抽象清晰,任务非配合理,没有过多的类冗余属性,而且代码重用也应用很好,进一步使得代码简洁明了。

 1 using System;
 2 using System.Collections.Generic;
 3 using System.IO;
 4 using System.Linq;
 5 using System.Text;
 6 using System.Text.RegularExpressions;
 7 using System.Threading.Tasks;
 8 
 9 namespace WordFrequency
10 {
11     class Program
12     {
13         static void Main(string[] args)
14         {
15             TallyMode mode;
16             string path;
17             string outputFileName = "lizhujin@outlook.com.txt";
18 
19             #region tally mode & path
20             switch (args.Count())
21             {
22                 case 0:
23                     Console.WriteLine("Please specify a directory!");
24                     return;
25                 case 1:
26                     mode = TallyMode.Normal;
27                     path = args[0];
28                     break;
29                 case 2:
30                     if (args[0] == "-e2")
31                         mode = TallyMode.E2;
32                     else if (args[0] == "-e3")
33                         mode = TallyMode.E3;
34                     else
35                     {
36                         Console.WriteLine("The argument must be -e2 or -e3. Scanning cancelled.");
37                         return;
38                     }
39                     path = args[1];
40                     break;
41                 default:
42                     Console.WriteLine("Arguments incorrect. Scanning cancelled.");
43                     return;
44             }
45             #endregion
46 
47             WordManager wordManager;
48             try
49             {
50                 wordManager = new WordManager(path, mode);
51             }
52             catch (IOException e)
53             {
54                 Console.WriteLine(e.Message);
55                 return;
56             }
57 
58             Console.WriteLine("Path: {0}", path);
59             Console.WriteLine("Start scanning...");
60             wordManager.ScanAndCount(new List<string>() { ".txt", ".cpp", ".h", ".cs" }, false);
61             Console.WriteLine("Complete!");
62 
63             Console.WriteLine(new string('-', Console.WindowWidth - 1));
64             var writer = new StreamWriter(outputFileName, false, Encoding.Default);
65             wordManager.Output(true, writer);
66             //wordManager.Output(true, Console.Out);
67             writer.Close();
68             Console.WriteLine("The results have been saved to \"{0}\"", outputFileName);
69         }
70     }
71 }

  伙伴将命令行输入可能出错进行判断,而且在控制台有人性化输出,让别人可以知道自己的输入问题所在,而且还进行异常处理,以防输入过程出现问题,使得程序更加稳定。在文件处理时也采用了异常处理,使得程序严谨正确性大大提高。

 

  1 using System;
  2 using System.Collections.Generic;
  3 using System.IO;
  4 using System.Linq;
  5 using System.Text;
  6 using System.Text.RegularExpressions;
  7 using System.Threading.Tasks;
  8 
  9 namespace WordFrequency
 10 {
 11     class WordManager
 12     {
 13         /* the list is sorted by words' lowercases, and it's not of the final order */
 14         /* in consideration of extended mode,it can also used for list of "word group"s */
 15         SortedList<string, WordInfo> WordList = new SortedList<string, WordInfo>();
 16 
 17         DirectoryInfo RootDirectory;
 18         TallyMode Mode;
 19 
 20         public WordManager(string path, TallyMode mode)
 21         {
 22             if (!Directory.Exists(path))
 23                 throw new IOException("The directory specified doesn't exist!");
 24 
 25             RootDirectory = new DirectoryInfo(path);
 26             Mode = mode;
 27         }
 28 
 29         /// <summary>
 30         /// 
 31         /// </summary>
 32         /// <param name="extensions">
 33         /// each extension should contains prefix dot, e.g., ".png"
 34         /// </param>
 35         public void ScanAndCount(IList<string> extensions, bool showLogs)
 36         {
 37             /* scan files and sub-directories recursively */
 38             var resultFileInfos =
 39                 from fileInfo in RootDirectory.EnumerateFiles("*", SearchOption.AllDirectories)
 40                 where ((from ext in extensions where ext.Equals(fileInfo.Extension, StringComparison.OrdinalIgnoreCase) select ext).Count() > 0)
 41                 select fileInfo;
 42 
 43             /* count words in each file */
 44             foreach (var fileInfo in resultFileInfos)
 45             {
 46                 if (showLogs)
 47                     Console.WriteLine("Scanning {0}", fileInfo.FullName);
 48                 CountWords(fileInfo);
 49             }
 50         }
 51 
 52         public void Output(bool sort, TextWriter writer)
 53         {
 54             var wordInfoList = WordList.Values.ToList();
 55 
 56             if (sort)
 57                 wordInfoList.Sort();
 58 
 59             if (Mode == TallyMode.Normal)
 60                 foreach (var wordInfo in wordInfoList)
 61                     writer.WriteLine("{0}: {1}", wordInfo.Word, wordInfo.Frequency);
 62             else
 63                 for (int i = 0; i < WordList.Count() && i < 10; i++)
 64                     writer.WriteLine("{0}: {1}", wordInfoList[i].Word, wordInfoList[i].Frequency);
 65         }
 66 
 67         /* count words in specified file and store them to WordList */
 68         private void CountWords(FileInfo fileInfo)
 69         {
 70             FileStream readStream = fileInfo.OpenRead();
 71             StreamReader reader = new StreamReader(readStream, Encoding.Default);
 72             string text = reader.ReadToEnd();
 73 
 74             int i = 0;
 75             while (i < text.Length)
 76             {
 77                 string word;
 78 
 79                 if ((word = CurrentWord(text, i)) == null)
 80                 {
 81                     i++;
 82                     continue;
 83                 }
 84 
 85                 i += word.Length;
 86 
 87                 string word1 = null;
 88                 if (Mode == TallyMode.E2 || Mode == TallyMode.E3)
 89                 {
 90                     if (i >= text.Length || text[i] != ' ' || (word1 = CurrentWord(text, i + 1)) == null)
 91                         continue;
 92                     word += ' ' + word1;
 93                 }
 94 
 95                 string word2 = null;
 96                 if (Mode == TallyMode.E3)
 97                 {
 98                     if (i + word1.Length + 1 >= text.Length || text[i + word1.Length + 1] != ' ' ||
 99                         (word2 = CurrentWord(text, i + word1.Length + 2)) == null)
100                         continue;
101                     word += ' ' + word2;
102                 }
103 
104                 /* add this word to the list */
105                 string lowerCase = word.ToLower();
106                 if (WordList.ContainsKey(lowerCase))
107                     WordList[lowerCase].Add(word);
108                 else
109                     WordList[lowerCase] = new WordInfo(word);
110 
111             }
112         }
113 
114         /// <summary>
115         /// if there is a legal string starting with s[i], returns it,
116         /// otherwise returns null
117         /// </summary>
118         /// <param name="s"></param>
119         /// <param name="i"></param>
120         /// <returns></returns>
121         private string CurrentWord(string s, int i)
122         {
123             if (i >= s.Length)
124                 return null;
125 
126             /* not alphabetic */
127             if (!IsAlphabetic(s[i]))
128                 return null;
129 
130             /* alphabetic, but not following a delemeter */
131             if (i > 0 && !IsDelimiter(s[i - 1]))
132                 return null;
133 
134             /* legal start, but less than 3 alphabetic successively */
135             if (i + 2 >= s.Length || !IsAlphabetic(s[i + 1]) || !IsAlphabetic(s[i + 2]))
136                 return null;
137 
138             /* build word */
139             StringBuilder wordBuilder = new StringBuilder(s.Substring(i, 3));
140             i += 3;
141             while (i < s.Length && IsAlphanumerical(s[i]))
142             {
143                 wordBuilder.Append(s[i]);
144                 i++;
145             }
146             string word = wordBuilder.ToString();
147 
148             return word;
149         }
150 
151         private bool IsAlphabetic(char c)
152         {
153             return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
154         }
155 
156         private bool IsAlphanumerical(char c)
157         {
158             return IsAlphabetic(c) || c >= '0' && c <= '9';
159         }
160 
161         private bool IsDelimiter(char c)
162         {
163             return !IsAlphanumerical(c);
164         }
165     }
166 }

  代码中的类、函数与变量的命名体现了显式表达原则,让人一看就可以了解其作用与目的,可读性增添不少。

 

 1 using System;
 2 using System.Collections.Generic;
 3 using System.Linq;
 4 using System.Text;
 5 using System.Threading.Tasks;
 6 
 7 namespace WordFrequency
 8 {
 9     class WordInfo : IComparable<WordInfo>
10     {
11         public string Word { get; private set; }
12         public int Frequency { get; private set; }
13 
14         public WordInfo(string word)
15         {
16             Word = word;
17             Frequency = 1;
18         }
19 
20         public void Add(string newForm)
21         {
22             Frequency++;
23 
24             /* update the word */
25             Word = string.CompareOrdinal(Word, newForm) < 0 ? Word : newForm;
26         }
27 
28         public int CompareTo(WordInfo other)
29         {
30             /* frequency */
31             if (this.Frequency > other.Frequency)
32                 return -1;
33             else if (this.Frequency < other.Frequency)
34                 return 1;
35             /* lexical */
36             else
37                 return string.CompareOrdinal(this.Word, other.Word);
38         }
39     }
40 }

 

  伙伴的代码经过他本人的优化已经体现了正确性与高效性,但是也有些细节可以进一步提高。

  首先,代码中增添了些许注释,但是相对于代码来说还不是足够的,可以在再适当增添些,增加代码可读性。

 1 using System;
 2 using System.Collections.Generic;
 3 using System.Linq;
 4 using System.Text;
 5 using System.Threading.Tasks;
 6 
 7 namespace WordFrequency
 8 {
 9     enum TallyMode
10     {
11         Normal,
12         E2,
13         E3
14     }
15 }

  

  然后,他在模式二与模式三时的单词排序时,可以变换下算法,提高程序效率。他的代码在三种模式都是进行所有单词全部排序,而在模式二和模式三时,只需要输出TOP10频率的单词。因而,可以每次输出都遍历下全部单词,输出最高频率,此时的时间复杂度为O(10*N),比现在O(N*log(N))提高不少。

 

 

  最后,也是算法方面可以提高的问题,在WordManager.cs中,如下代码(在WorldManager.cs中的104行开始):

 

/* add this word to the list */
                string lowerCase = word.ToLower();
                if (WordList.ContainsKey(lowerCase))
                    WordList[lowerCase].Add(word);
                else
                    WordList[lowerCase] = new WordInfo(word);

  

  

  这里if判断语句containskey和wordlist[lowercase]执行了两次对lowercase的查找,如果改成一次查找就能缩短一半的时间。像这样:

 /* add this word to the list */
                string lowerCase = word.ToLower();
                WordInfo value;
                WordList.TryGetValue(lowerCase, out value);
                if (value != null)
                    value.Add(word);
                else
                    WordList[lowerCase] = new WordInfo(word);

 

 

 

  以上就是我对黎柱金同学的个人项目代码复审报告。

 

posted @ 2014-10-24 00:10  程序刚  阅读(257)  评论(0编辑  收藏  举报