wordcount
一、需求分析:
1、统计代码的行数、字符数、单词数等。
2、代码要在文件中输入,支持txt、exe、cpp等格式的文件。
3、可以统计多个文件,文件名称由用户输入。
4、扩展功能为统计注释行数,输出各单词出现的频率,输出26个英文字母出现次数。
二、数据结构:
struct word { char data[15]; int num; }; struct word vocabulary[200];
定义单词的结构体,用来存放各个单词,以及单词出现的次数。
三、功能介绍及代码说明:
1、读文件
void read(char filename[]) { FILE *fp; char ch, a[15]; int i = 0, j = 0; count = 1; size3 = 0; size = 0; fp = fopen(filename, "r"); if (fp == NULL) { printf("the file not found!"); exit(-1); } ch = fgetc(fp); size++; if (ch == '/') { ch = fgetc(fp); size++; if (ch == '/') { size3++; ch = fgetc(fp); size++; } } for (; ch != EOF;) { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { LetNum(ch); a[i] = ch; i++; } else { if (ch == '/') { ch = fgetc(fp); if (ch == '/') { size3++; ch = fgetc(fp); size++; continue; } } if (ch == '\n') { count++; } if (i != 0) { a[i] = '\0'; strcpy_s(c[j], a); j++; i = 0; } } ch = fgetc(fp); size++; } a[i] = '\0'; strcpy_s(c[j], a); size1 = j; fclose(fp); }
读文件是一个函数,函数的参数为用户输入的文件名称。此函数中包含统计字符数、行数、单词数以及注释行数的功能。读文件使用函数ch=fgetc(fp),每次只读入一个字符。循环读入,ch=EOF时循环结束,关闭文件。每次读完size+1,用来统计总字符数;当读入的字符为‘\n’时,count+1,用来统计总行数;当读入的字符为‘/’,且下一个字符还是‘/’时,size3+1,用来统计注释行数;把一串英文字符识别为单词并存入数组c[200][15]中。
2、统计英文字母出现次数
void LetNum(char ch) { int s; s = ch; switch (s) { case 'a':letter[0]++; break; case 'b':letter[1]++; break; case 'c':letter[2]++; break; case 'd':letter[3]++; break; case 'e':letter[4]++; break; case 'f':letter[5]++; break; case 'g':letter[6]++; break; case 'h':letter[7]++; break; case 'i':letter[8]++; break; case 'j':letter[9]++; break; case 'k':letter[10]++; break; case 'l':letter[11]++; break; case 'm':letter[12]++; break; case 'n':letter[13]++; break; case 'o':letter[14]++; break; case 'p':letter[15]++; break; case 'q':letter[16]++; break; case 'r':letter[17]++; break; case 's':letter[18]++; break; case 't':letter[19]++; break; case 'u':letter[20]++; break; case 'v':letter[21]++; break; case 'w':letter[22]++; break; case 'x':letter[23]++; break; case 'y':letter[24]++; break; case 'z':letter[25]++; break; case 'A':letter[0]++; break; case 'B':letter[1]++; break; case 'C':letter[2]++; break; case 'D':letter[3]++; break; case 'E':letter[4]++; break; case 'F':letter[5]++; break; case 'G':letter[6]++; break; case 'H':letter[7]++; break; case 'I':letter[8]++; break; case 'J':letter[9]++; break; case 'K':letter[10]++; break; case 'L':letter[11]++; break; case 'M':letter[12]++; break; case 'N':letter[13]++; break; case 'O':letter[14]++; break; case 'P':letter[15]++; break; case 'Q':letter[16]++; break; case 'R':letter[17]++; break; case 'S':letter[18]++; break; case 'T':letter[19]++; break; case 'U':letter[20]++; break; case 'V':letter[21]++; break; case 'W':letter[22]++; break; case 'X':letter[23]++; break; case 'Y':letter[24]++; break; case 'Z':letter[25]++; break; } return; }
函数为LetNum(char ch);传入参数为每次从文件中读取的字符。结果存在数组letter[26]中,开始要给数组赋初值0。判断ch的值,对应字母+1。
3、单词频数统计
void Inset() { int i, j, flag; size2 = 0; flag = 0; for (j = 0; j <= size1; j++) { for (i = 0; i <= size2; i++) { if (strcmp(vocabulary[i].data, c[j]) == 0) { vocabulary[i].num++; flag = 1; break; } } if (flag == 0) { strcpy_s(vocabulary[i - 1].data, c[j]); vocabulary[i - 1].num = 1; size2++; } flag = 0; } return; }
函数为Inset();参数为空。结果存入结构体vocabulary[200]中。调用数组c[200][15]里已存好的单词,如果单词已存在在vocabulary[i]中,vocabulary[i].num++;如果不存在,则把单词存入vocabulary[i].data中,num=1。
4、结果显示:
void DisplayLetter() { if (letter[0] != 0) printf("a:%d\n", letter[0]); if (letter[1] != 0) printf("b:%d\n", letter[1]); if (letter[2] != 0) printf("c:%d\n", letter[2]); if (letter[3] != 0) printf("d:%d\n", letter[3]); if (letter[4] != 0) printf("e:%d\n", letter[4]); if (letter[5] != 0) printf("f:%d\n", letter[5]); if (letter[6] != 0) printf("g:%d\n", letter[6]); if (letter[7] != 0) printf("h:%d\n", letter[7]); if (letter[8] != 0) printf("i:%d\n", letter[8]); if (letter[9] != 0) printf("j:%d\n", letter[9]); if (letter[10] != 0) printf("k:%d\n", letter[10]); if (letter[11] != 0) printf("l:%d\n", letter[11]); if (letter[12] != 0) printf("m:%d\n", letter[12]); if (letter[13] != 0) printf("n:%d\n", letter[13]); if (letter[14] != 0) printf("o:%d\n", letter[14]); if (letter[15] != 0) printf("p:%d\n", letter[15]); if (letter[16] != 0) printf("q:%d\n", letter[16]); if (letter[17] != 0) printf("r:%d\n", letter[17]); if (letter[18] != 0) printf("s:%d\n", letter[18]); if (letter[19] != 0) printf("t:%d\n", letter[19]); if (letter[20] != 0) printf("u:%d\n", letter[20]); if (letter[21] != 0) printf("v:%d\n", letter[21]); if (letter[22] != 0) printf("w:%d\n", letter[22]); if (letter[23] != 0) printf("x:%d\n", letter[23]); if (letter[24] != 0) printf("y:%d\n", letter[24]); if (letter[25] != 0) printf("z:%d\n", letter[25]); return; }
此函数显示各字母出现的次数。
void DisplayWord() { Inset(); for (int i = 0; i < size2; i++) { printf("%s:%d\n", vocabulary[i].data, vocabulary[i].num); } return; }
此函数显示单词频数。
printf("单词数:%d\n", size1); printf("行数:%d\n", count); printf("总字符数:%d\n", size); printf("注释行数:%d\n", size3);
字符数、单词数、行数、注释行的显示在主函数中直接输出。
四、调试及分析
程序运行结果截图
代码文件截图:
五、源代码:
#include<stdlib.h> #include<string.h> #include<stdio.h> #include<Windows.h> struct word { char data[15]; int num; }; struct word vocabulary[200]; char c[200][15]; int letter[26] = { 0 }; int size; int size1; int size2 = 0; int size3; int count; void LetNum(char ch) { int s; s = ch; switch (s) { case 'a':letter[0]++; break; case 'b':letter[1]++; break; case 'c':letter[2]++; break; case 'd':letter[3]++; break; case 'e':letter[4]++; break; case 'f':letter[5]++; break; case 'g':letter[6]++; break; case 'h':letter[7]++; break; case 'i':letter[8]++; break; case 'j':letter[9]++; break; case 'k':letter[10]++; break; case 'l':letter[11]++; break; case 'm':letter[12]++; break; case 'n':letter[13]++; break; case 'o':letter[14]++; break; case 'p':letter[15]++; break; case 'q':letter[16]++; break; case 'r':letter[17]++; break; case 's':letter[18]++; break; case 't':letter[19]++; break; case 'u':letter[20]++; break; case 'v':letter[21]++; break; case 'w':letter[22]++; break; case 'x':letter[23]++; break; case 'y':letter[24]++; break; case 'z':letter[25]++; break; case 'A':letter[0]++; break; case 'B':letter[1]++; break; case 'C':letter[2]++; break; case 'D':letter[3]++; break; case 'E':letter[4]++; break; case 'F':letter[5]++; break; case 'G':letter[6]++; break; case 'H':letter[7]++; break; case 'I':letter[8]++; break; case 'J':letter[9]++; break; case 'K':letter[10]++; break; case 'L':letter[11]++; break; case 'M':letter[12]++; break; case 'N':letter[13]++; break; case 'O':letter[14]++; break; case 'P':letter[15]++; break; case 'Q':letter[16]++; break; case 'R':letter[17]++; break; case 'S':letter[18]++; break; case 'T':letter[19]++; break; case 'U':letter[20]++; break; case 'V':letter[21]++; break; case 'W':letter[22]++; break; case 'X':letter[23]++; break; case 'Y':letter[24]++; break; case 'Z':letter[25]++; break; } return; } void Inset() { int i, j, flag; size2 = 0; flag = 0; for (j = 0; j <= size1; j++) { for (i = 0; i <= size2; i++) { if (strcmp(vocabulary[i].data, c[j]) == 0) { vocabulary[i].num++; flag = 1; break; } } if (flag == 0) { strcpy_s(vocabulary[i - 1].data, c[j]); vocabulary[i - 1].num = 1; size2++; } flag = 0; } return; } void read(char filename[]) { FILE *fp; char ch, a[15]; int i = 0, j = 0; count = 1; size3 = 0; size = 0; fp = fopen(filename, "r"); if (fp == NULL) { printf("the file not found!"); exit(-1); } ch = fgetc(fp); size++; if (ch == '/') { ch = fgetc(fp); size++; if (ch == '/') { size3++; ch = fgetc(fp); size++; } } for (; ch != EOF;) { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { LetNum(ch); a[i] = ch; i++; } else { if (ch == '/') { ch = fgetc(fp); if (ch == '/') { size3++; ch = fgetc(fp); size++; continue; } } if (ch == '\n') { count++; } if (i != 0) { a[i] = '\0'; strcpy_s(c[j], a); j++; i = 0; } } ch = fgetc(fp); size++; } a[i] = '\0'; strcpy_s(c[j], a); size1 = j; fclose(fp); } void DisplayLetter() { if (letter[0] != 0) printf("a:%d\n", letter[0]); if (letter[1] != 0) printf("b:%d\n", letter[1]); if (letter[2] != 0) printf("c:%d\n", letter[2]); if (letter[3] != 0) printf("d:%d\n", letter[3]); if (letter[4] != 0) printf("e:%d\n", letter[4]); if (letter[5] != 0) printf("f:%d\n", letter[5]); if (letter[6] != 0) printf("g:%d\n", letter[6]); if (letter[7] != 0) printf("h:%d\n", letter[7]); if (letter[8] != 0) printf("i:%d\n", letter[8]); if (letter[9] != 0) printf("j:%d\n", letter[9]); if (letter[10] != 0) printf("k:%d\n", letter[10]); if (letter[11] != 0) printf("l:%d\n", letter[11]); if (letter[12] != 0) printf("m:%d\n", letter[12]); if (letter[13] != 0) printf("n:%d\n", letter[13]); if (letter[14] != 0) printf("o:%d\n", letter[14]); if (letter[15] != 0) printf("p:%d\n", letter[15]); if (letter[16] != 0) printf("q:%d\n", letter[16]); if (letter[17] != 0) printf("r:%d\n", letter[17]); if (letter[18] != 0) printf("s:%d\n", letter[18]); if (letter[19] != 0) printf("t:%d\n", letter[19]); if (letter[20] != 0) printf("u:%d\n", letter[20]); if (letter[21] != 0) printf("v:%d\n", letter[21]); if (letter[22] != 0) printf("w:%d\n", letter[22]); if (letter[23] != 0) printf("x:%d\n", letter[23]); if (letter[24] != 0) printf("y:%d\n", letter[24]); if (letter[25] != 0) printf("z:%d\n", letter[25]); return; } void DisplayWord() { Inset(); for (int i = 0; i < size2; i++) { printf("%s:%d\n", vocabulary[i].data, vocabulary[i].num); } return; } void main() { char filename[20]; while (scanf("%s", filename) != EOF) { read(filename); printf("单词数:%d\n", size1); printf("行数:%d\n", count); printf("总字符数:%d\n", size); printf("各个字母的个数\n"); DisplayLetter(); printf("各个单词并显示单词个数\n"); DisplayWord(); printf("注释行数:%d\n", size3); } }
六、总结
这个程序还是有很多的不足,还是需要慢慢的完善,功能还是不够强大。没有统计空行和代码行;之统计了”//“这一种表示方式的注释,没有统计“/*... ...*/”这种方式的不足。以后我也会多加思考,改进我的代码,完善功能。