《构建之法》—— 系统分析与设计第四次作业

第四次作业——结对编程WordCount

github项目地址	作业项目地址
结对编程伙伴博客地址	作业要求链接
作业要求地址	作业要求地址

一、PSP表格及代码规范

（1）PSP表格

PSP2.1	Personal Software Process Stages	预估耗时（分钟）	实际耗时（分钟）
Planning	计划	30	30
· Estimate	· 估计这个任务需要多少时间	30	30
Development	开发	770	720
· Analysis	· 需求分析 (包括学习新技术)	300	240
· Design Spec	· 生成设计文档	30	20
· Design Review	· 设计复审 (和同事审核设计文档)	30	20
· Coding Standard	· 代码规范 (为目前的开发制定合适的规范)	20	15
· Design	· 具体设计	30	30
· Coding	· 具体编码	300	315
· Code Review	· 代码复审	30	40
· Test	· 测试（自我测试，修改代码，提交修改）	30	40
Reporting	报告	150	120
· Test Report	· 测试报告	60	45
· Size Measurement	· 计算工作量	30	30
· Postmortem & Process Improvement Plan	· 事后总结, 并提出过程改进计划	60	45
	合计	950	900

（2）代码规范：

命名规范：1.类名首字母要大写，使用能够反映类功能的名词或名词短语命名类。

2.类成员变量首单词小写，变量名前可加_前缀。

3.方法名第一个字符要大写，且应使用动词或动词短语。

4.参数首字符小写，采用描述性参数名称。

5.接口名称要有意义，接口修饰符只能用public和internal。

6.每条语句至少占一行，过长语句断为两行显示。

7.语句嵌套不超过3层。

详细代码规范见：[代码规范

二、结对过程，以及讨论的结对照片

结对过程：在这里插入图片描述

emmm 真香。

结对编程的照片：
在这里插入图片描述

三、解题思路

由于二人对C#都并不熟悉，解题思路就是百度，每遇到一个问题就查，编写边学。

需求分析

参考作业要求我们将项目目标概括为如下几点：

统计文件中的字符总数。（只考虑AscII码，并将分隔符考虑在内）
统计文件中的单词总数。（将以四个英文字母开头的字符串视为单词，不区分大小写）
统计文件的有效行数。（包含非空白字符的行均需纳入统计）
统计单词频数，并将频数最高的十个单词输出到屏幕和txt文件中。（按频率降序输出，频率相同时，优先输出字典序靠前的）
接口封装。
添加词组统计和自定义输出的新功能。
视任务进度与情况完成GUI界面。

四、代码说明

统计词频、字符数、行数

public class WordCalculate
    {
        public long charactersnumber = 0;  //字符数
        public long wordsnumber = 0;  //单词数
        public long linesnumber = 0;  //行数
        public long phrasenumber = 0; //词组数
        //数据统计
        public void Calculate(string dataline, WordTrie wtrie)
        {
            if (string.IsNullOrEmpty(dataline)) return;
            string word = null;
            for (int i = 0, len = dataline.Length; i < len; i++)
            {
                char unit = dataline[i];
                if (unit >= 65 && unit <= 90){
                    unit = (char)(unit + 32);
                }  //大写转小写
                if ((unit >= 48 && unit <= 57) || (unit >= 97 && unit <= 122)){
                    word = String.Concat(word, unit);
                }
                else{
                    if (!string.IsNullOrEmpty(word)){  //判断是否为词尾后的字符
                        if (word[0] >= 97 && word[0] <= 122){  //首字符是否为字母
                            wtrie.Insert(word);
                        }
                        word = null;
                    }
                }
            }
            if (!string.IsNullOrEmpty(word))  //判断行尾是否有单词
            {
                if (word[0] >= 97 && word[0] <= 122){  //首字符是否为字母
                 wtrie.Insert(word);
                }
                word = null;
            }
            this.linesnumber++;  //统计行数
            this.wordsnumber += wtrie.CountSum;  //统计单词数
            this.charactersnumber += dataline.Length;  //统计字符数
        }
    }

读取文件

 public WordCalculate Input(WordCalculate datanumber, WordTrie wtrie)
        {
            FileStream fs = null;
            StreamReader sr = null;
            String dataline = String.Empty;
            try
            {
                fs = new FileStream(this.pathIn, FileMode.Open);
                sr = new StreamReader(fs);
                while ((dataline = sr.ReadLine()) != null)
                {
                    datanumber.Calculate(dataline, wtrie);  //按行统计数据
                }
            }
            catch { Console.WriteLine("文档读取失败！"); }
            finally
            {
                if (sr != null) { sr.Close(); }
                if (fs != null) { fs.Close(); }
            }
            return datanumber;
        }

文件写入

public void Output(WordCalculate datanumber, WordTrie wtrie,int n)
        {
            FileStream fs = null;
            StreamWriter sw = null;
            List<WordTrie.ListUnit> WordList = new List<WordTrie.ListUnit>();
            try
            {
                fs = new FileStream(this.pathOut, FileMode.Create);
                sw = new StreamWriter(fs);
                WordList = wtrie.Sort();
                sw.WriteLine(String.Concat("characters:", datanumber.charactersnumber, "\n"));
                sw.WriteLine(String.Concat("words:", datanumber.wordsnumber, "\n"));
                sw.WriteLine(String.Concat("lines:", datanumber.linesnumber, "\n"));
                sw.WriteLine("\n词频\t单词\n");
                Console.WriteLine(String.Concat("characters：", datanumber.charactersnumber));
                Console.WriteLine(String.Concat("words：", datanumber.wordsnumber));
                Console.WriteLine(String.Concat("lines：", datanumber.linesnumber, "\n"));
                //Console.WriteLine("\n词频\t单词\n");
                for (int i = 0; (i < n && i < datanumber.wordsnumber); i++)
                {
                    sw.WriteLine(WordList[i].Word + "：" + String.Concat(WordList[i].WordNum));
                    Console.WriteLine(WordList[i].Word+"："+String.Concat(WordList[i].WordNum));
                }
            }
            //catch { Console.WriteLine("文档写入失败！"); }
            finally
            {
                if (sw != null) { sw.Close(); }
                if (fs != null) { fs.Close(); }
            }
        }

利用Trie树统计词频
（按理说用字典类更好做的······ trie统计词频的效率会更高一点，但是完全没有办法写词组，最后还是向字典类底头···）

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace wordCount
{
    class WordTrie
    {
        //Trie树节点
        private class TrieNode
        {
            public int PrefixNum = 0;  //前缀词频
            public int WordNum = 0;  //词频
            public Dictionary<char, TrieNode> Sons = new Dictionary<char, TrieNode>();  //子节点
            public bool IsEnd = false;  //是否可为终节点
            public char Val;  //节点值
            public string Word = null;  //单词值
            //构造函数
            public TrieNode() { }
            public TrieNode(char val)
            {
                Val = val;
            }
        }

        private TrieNode _Root = new TrieNode();

        //所有单词词频总和
        public int CountSum
        {
            get { return _Root.PrefixNum; }
        }

        //插入单词
        public void Insert(string word)
        {
            if (string.IsNullOrEmpty(word)) return;
            TrieNode node = _Root;
            node.PrefixNum++;
            for (int i = 0, len = word.Length; i < len; i++)
            {
                char pos = word[i];
                if (!node.Sons.ContainsKey(pos))
                {
                    node.Sons[pos] = new TrieNode(pos);
                }
                node.Sons[pos].PrefixNum++;
                node = node.Sons[pos];
            }
            node.Word = word;
            node.IsEnd = true;
            node.WordNum++;
        }

        //获取前缀词频
        public int PrefixCount(string prefix)
        {
            return GetCount(prefix, false);
        }

        //获取单词词频
        public int WordCount(string word)
        {
            return GetCount(word, true);
        }

        private int GetCount(string str, bool isword)
        {
            if (string.IsNullOrEmpty(str)) return -1;
            TrieNode node = _Root;
            for (int i = 0, len = str.Length; i < len; i++)
            {
                char pos = str[i];
                if (!node.Sons.ContainsKey(pos)) return 0;
                else node = node.Sons[pos];
            }
            return isword ? node.WordNum : node.PrefixNum;
        }

        //是否包含指定的单词
        public bool ContainsWord(string word)
        {
            return WordCount(word) > 0;
        }

        //单词表单元
        public class ListUnit
        {
            public string Word;  //单词
            public int WordNum;  //词频
        }

        //词频排序
        public List<ListUnit> Sort()
        {
            TrieNode node = _Root;
            List<ListUnit> WordList = new List<ListUnit>();
            WordList = WordPreOrder(node, WordList);
            //按词频降序排列，若词频相等按字典序排列
            WordList.Sort((a, b) =>
            {
                if (a.WordNum.CompareTo(b.WordNum) != 0)
                    return -a.WordNum.CompareTo(b.WordNum);
                else
                    return a.Word.CompareTo(b.Word);
            });
            return WordList;
        }

        //单词表生成（Trie树的前序遍历）
        private List<ListUnit> WordPreOrder(TrieNode node, List<ListUnit> WordList)
        {
            if (node.PrefixNum == 0) { return WordList; }
            if (node.WordNum != 0)
            {
                ListUnit unit = new ListUnit();
                unit.Word = node.Word;
                unit.WordNum = node.WordNum;
                WordList.Add(unit);
            }
            foreach (char key in node.Sons.Keys)
            {
                WordList = WordPreOrder(node.Sons[key], WordList);
            }
            return WordList;
        }
    }
}

关于Tire树的相关参考如下：字典树(trie树)实现词频查找

五、测试及性能分析

详见我队友写的博客
传送门

六、收获与体会

找队友要提早，差点成为孤家寡人。
明明有好用的字典类，就不要想着搞花里胡哨的东西（明示trie树，这玩意给后续工作带来了极大的困难）。
结对编程效率的确是要比一个人要高出不少，一个人的时候经常会进入一种迷之脑回路，队友可以救你出来。两个人交换意见也可以更快速的找出解决方案。
结对编程中要特别注意沟通问题，两个人的想法不一致可能会导致各干各的···，多交流沟通才能更好的完成项目。（体量越大团队，沟通越重要啊）。
对C#还是不熟悉，基本靠百度和菜鸟教程解决问题，真的让人很是头大。

posted @ 2019-10-14 17:08 LeeHan 阅读(187) 评论(1) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

LeeHan