结对第二次—文献摘要热词统计及进阶需求

班级：软件工程1916|W
作业：结对第一次—原型设计（文献摘要热词统计）
结对学号：221600107 陈家豪、221600110 公孙骏杰
课程目标：实现一个能够对文本文件中的单词的词频进行统计的控制台程序
Github仓库地址：基础部分GitHub

1.Github签入记录

2.具体分工

我们两个人各自实现了基础部分的两个功能
陈家豪负责编写了字符统计和单词统计函数，撰写博客
公孙骏杰负责编写了行数统计和词频统计函数，签入GitHub

3.PSP表格

PSP2.1	Personal Software Process Stages	预估耗时（分钟）	实际耗时（分钟）
Planning	计划
• Estimate	• 估计这个任务需要多少时间	1280	1400
Development	开发
• Analysis	• 需求分析 (包括学习新技术)	100	100
• Design Spec	• 生成设计文档	180	100
• Design Review	• 设计复审	10	10
• Coding Standard	• 代码规范 (为目前的开发制定合适的规范)	30	30
• Design	• 具体设计	60	60
• Coding	• 具体编码	600	800
• Code Review	• 代码复审	60	60
• Test	• 测试（自我测试，修改代码，提交修改）	120	120
Reporting	报告
• Test Report	• 测试报告	60	60
• Size Measurement	• 计算工作量	20	20
• Postmortem & Process Improvement Plan	• 事后总结, 并提出过程改进计划	40	40
	合计	1280	1400

4.解题思路

拿到题目后便开始用C语言实现（后来仔细看题目要求C++实现便进行了小改），因此代码中有很多C语言的痕迹。看到题目的要求就开始做了，然后许多对字符的处理函数都上网查了一遍，算是对C语言和C++做了一次复习吧。

5.设计实现过程

代码主要由四个函数组成：字符统计函数、单词统计函数、行数统计函数、词频统计函数。

单元测试的部分对各个函数都进行了测试：

测试了输出的字符数是否正确
测试了输出的单词数是否正确
测试了输出的行数是否正确
测试了输出的词频排序是否正确

算法设计过程
部分流程图：

题目中要求统计文件的单词总数，单词：至少以4个英文字母开头，跟上字母数字符号，单词以分隔符分割，不区分大小写。于是就需要记录单词开头的字母数量，来进行判断。如果开头是数字，则一直读到下一个分隔符；如果读到超过了四个以上的字母，说明该字符串为单词，则继续读取直至分隔符。
部分测试过程：
第一篇文章：

第二篇文章：

6.关键代码

字符统计函数

void CharCount() //字符数统计函数
{
    FILE *fp;
    int c = 0;
    char ch;
    if((fp = fopen("input.txt","r")) == NULL)
    {
        printf("file read failure.");
    }
    ch = fgetc(fp);
    while(ch != EOF)
    {
            c++;
            ch = fgetc(fp);
    }
    freopen("result.txt","a",stdout);
    printf("characters：%d.\n",c);
    fclose(fp);
}

单词数统计函数

void WordCount() //单词数统计函数
{
    FILE *fp;
    int w = 0;
	int a = 0;
    char ch;
    if((fp = fopen("input.txt","r")) == NULL)
    {
        printf("file read failure.");
    }
    ch = fgetc(fp);
    while(ch != EOF)
    {
        if ((ch >= 'a'&&ch <= 'z')||(ch >= 'A'&&ch <='Z'))
        {
            while ((ch >= 'a'&&ch <= 'z')||(ch >= 'A'&&ch <= 'Z'))
            {
            	a++;
                ch = fgetc(fp);
            }
			if (a >= 4)
			{
				w++;
				while (ch >= '0'&&ch <= '9')
				{
					ch = fgetc(fp);
				}
				a = 0; 
			}
			else
			{
				while (ch >= '0'&&ch <= '9')
				{
					ch = fgetc(fp);
				}
				a = 0; 
			}
        }
        else if ((ch >= '0'&&ch <= '9'))
        {
        	while ((ch >= 'a'&&ch <= 'z')||(ch >= 'A'&&ch <= 'Z')||(ch >= '0'&&ch <= '9'))
        	{
        		ch = fgetc(fp);
			}
		}
        else 
        {
            ch = fgetc(fp);
        }
    }
    freopen("result.txt","a",stdout);
    printf("words：%d.\n",w);
    fclose(fp);

}

//行数统计函数

void LineCount() //行数统计函数
{
    FILE *fp;
    int l = 1;
    char ch;
    if((fp = fopen("input.txt","r")) == NULL)
    {
        printf("file read failure.");
    }
    ch = fgetc(fp);
    while(ch != EOF)
    {
        if (ch == '\n')
        {
            l++;
            ch = fgetc(fp);
        }
        else
        {
            ch = fgetc(fp);
        }
    }
    freopen("result.txt","a",stdout);
    printf("lines：%d.\n",l);
    fclose(fp);
}

词频统计函数

typedef pair<string, int> PAIR;

bool cmp_by_value(const PAIR& lhs, const PAIR& rhs)
{
    return lhs.second > rhs.second;
}

struct CmpByValue
{
    bool operator()(const PAIR& lhs, const PAIR& rhs)
    {
        return lhs.second > rhs.second;
    }
};

map<string,int> words;
void Transform()
{
#ifdef LOCAL
    freopen("input.txt", "r", stdin); 
    freopen("result.txt", "a", stdout);
#endif // LOCAL
    string s;
    words.clear();
    while(cin>>s)
    {
        transform(s.begin(), s.end(), s.begin(), ::tolower);
        if(!words.count(s)) words[s]=0;
        words[s]++;
    }

    //把 map 中元素转存到 vector 中
    vector<PAIR> words_vec(words.begin(), words.end());
    sort(words_vec.begin(), words_vec.end(), CmpByValue());
// sort(name_score_vec.begin(), name_score_vec.end(), cmp_by_value);
    int top10=0;
    for (int i = 0; i != words_vec.size(); ++i)
    {
    	if(top10!=10)
    	{
    		
				if(words_vec[i].first.length()>=4)
    	if(isalpha(words_vec[i].first.at(0)))
    	{
    		 cout <<"<"<< words_vec[i].first<<">"<<" "<<words_vec[i].second << endl;
    		 top10++;
		}
		}
    
		else break;
       
    }

}

7.遇到的困难及解决方法

题目需求有的很让人看不懂，有时候会做到一半发现需求跟我想的有出入，然后就要改，不过也只能慢慢改了。

8.评价你的队友

合作很愉快！

posted on 2019-03-15 18:58 哈库呐玛塔塔611 阅读(128) 评论(1) 编辑收藏举报