一个英语文本中找出出现频率最高的10个单词
本次作业中要统计一个英语文本中出现频率最高的是个单词,所以一定会用到关于文本的输入和输出,而很难的是怎样将每一个单词的字母统一并比较,还有就是将单词与单词之间的空格分析出来,然后就是统计每个单词出现的频率,并且将每个单词出现的频率进行比较,输出出现频率最高的是个单词,但是因为对于C++中文件学的并不是很好,所以程序编写遇到了很多障碍!
#include<iostream>
#include<fstream>
#include<string>
#include<sys/timeb.h>
#include<windows.h>
using namespace std;
struct Word
{
char name[30];
int num;
struct Word *next;
};
void ReadFile(struct Word *head);
void OutOff(struct Word *head,int n);
void Sort(struct Word *head);
void main()
{
long t;
struct Word *head;
head=new Word;
head->next=NULL;
cout<<endl;
LARGE_INTEGER BegainTime ;
LARGE_INTEGER EndTime ;
LARGE_INTEGER Frequency ;
QueryPerformanceFrequency(&Frequency);
QueryPerformanceCounter(&BegainTime) ;
ReadFile(head);
Sort(head);
OutOff(head,10);
QueryPerformanceCounter(&EndTime) ;
}
void ReadFile(struct Word *head)//读入文件识别并储存单词,统计每个单词出现的次数
{
char a,Tword[30];
struct Word *p;
ifstream infile("father.txt");
infile>>noskipws;
if(!infile)
{
cout<<"cannot open!"<<endl;
return;
}
while(infile)
{
int i=0;
infile.get(a);
Tword[0]=' ';//标记位用于判断是否输入单词
while((a>='a'&&a<='z')||(a>='A'&&a<='Z')||temp[0]==' ')
{
if(a>='a'&&a<='z'||a>='A'&&a<='Z')
{
Tword[i]=a;
i++;
}
infile.get(a);
if(infile.eof())
break;
}
Tword[i]='\0';
p=head->next;
while(p)
{
if(!_stricmp(Tword,p->name))
{
p->num++;
break;
}
p=p->next;
}
if(!p&&Tword[0]!='\0')
{
p=new Word;
strcpy(p->name,Tword);
p->num=1;
p->next=head->next;
head->next=p;
}
}
infile.close();
}
void OutOff(struct Word *head,int n)
{
struct Word *p;
p=head->next;
cout<<" 序号"<<" "<<"单词"<<" "<<"频率"<<endl;
for(int i=0;i<n;i++)
{
cout<<" "<<i<<" "<<p->name<<"------"<<p->num<<endl;
p=p->next;
}
}
void Sort(struct Word *head)
{
struct Word *p,*q,*s,*l;
q=head;
p=head->next;
s=p->next;
p->next=NULL;
while(s)
{
while(p&&p->num>s->num)
{
q=p;
p=p->next;
}
q->next=s;
l=s->next;
s->next=p;
s=l;
p=head->next;
q=head;
}
}