单词统计

单词统计
题目描述：
输入一段英文文本，用程序统计出现频率最高和最低的两个单词;
英文文本中仅出现这四类字符：空格( )、英文逗号(,)、英文句号(.)、英文大小写字母（a-z、A-Z）
单词之间的分隔符仅考虑这三种：空格( )、英文逗号(,)、英文句号(.); 仅大小写不同的单词算同一个单词；
如果两个单词出现次数相同，则在文本中首次出现的单词优先返回。返回的单词统一用小写字母返回例如：
输入字符串“Hello world, i said hello world to the world”，返回“world”,“i” 输入字符串“Somebody like somebody，i do not like it”，返回“somebody”，“i” 要求实现函数：
void WordStat(const char * pInputStr, char * pOutputHotWord, char * pOutputColdWord);

【输入】pInputStr：输入字符串，指向一段英文文本

【输出】pOutputHotWord：输出字符串，返回出现次数最多的单词，该指针所指存储空间已经分配好，且足够大

pOutputColdWord：输出字符串，返回出现次数最少的单词，该指针所指存储空间已经分配好，且足够大

#include "stdafx.h"
#include<map>
#include <string>
#include<iostream>
using namespace std;
#define INFINITE 100000000

void WordStat(const char * pInputStr, char * pOutputHotWord, char * pOutputColdWord)
{
	char a = 1;
	int n = 0;
	const char*p = pInputStr;
	map<string, int>word_count,word_order;//开始用map<char*,int>,后面的++word_count[p1];语句出错，是对指针计数，而不是1字符串，所以换用string
	map<string, int>::iterator it, itmax, itmin;
	while (*p != '\0')
	{
		if (*p != ' '&&*p != ','&&*p != '.')
		{
			
			char *p2 = new char[100];
			char*p1 = p2;
			while (*p != ' '&&*p != ','&&*p != '.'&&*p != '\0')
			{
				if (*p - 'A' >= 0 && *p - 'Z' <= 0)
					*p2 = *p + 32;
				else
					*p2 = *p;
				++p2;
				++p;

			}
			*p2 = '\0';
			//string s = p1;
			//cout << p1<< endl;
			//++word_count[p1];
			if (word_count.find(p1) == word_count.end())//p1是第一次出现
				word_order[p1] = n++;
			++word_count[p1];
		}
		//if (*p == '\0')
		//	break;
		++p;
	}
	int max = 0;
	int min = INFINITE;
	int i;
	for (it = word_count.begin(); it != word_count.end(); it++)
	{
		i = it->second;
		if ( i> max)
		{
			max = i;
			itmax = it;
		}
		if (i < min)
		{
			min = i;
			itmin = it;
		}
		if (i == max)
		{
			if (word_order.find(it->first)->second<word_order.find(itmax->first)->second)
				itmax = it;
		}
		if (i == min)
		{
			if (word_order.find(it->first)->second<word_order.find(itmin->first)->second)
				itmin = it;
		}

	}

	strcpy(pOutputHotWord, itmax->first.c_str());
	strcpy(pOutputColdWord, itmin->first.c_str());


}






int _tmain(int argc, _TCHAR* argv[])
{
	/*map<char*, int>word_count;
	map<char*, int>::iterator it;
	//word_count["Anna"] = 1;
	++word_count["Anna"];
	++word_count["Anna"];
	word_count["baby"] = 1;
	for (it = word_count.begin(); it != word_count.end();it++)
	cout << it->first<<" "<<it->second<<endl;*/



	//char * pInputStr = "Hello world, i said hello world to the world";
	char * pInputStr = "Somebody like somebody,i do  not like it";
	char * pOutputHotWord = new char[100];
	char * pOutputColdWord = new char[100];

	WordStat(pInputStr, pOutputHotWord, pOutputColdWord);
	
	cout << pOutputHotWord << endl;
	cout << pOutputColdWord << endl;

	system("pause");
	return 0;
}

说两个问题

第一个问题

原来用map<char*,int>word_count;来定义word_count，但后来++word_count[p1];语句出错，原因是对指针计数了，因为每个单词的指针不一样，例如指向第一个somebody的指针和第二个somebody的指针肯定不一样，所以达不到单词计数的目的，所以换用string。

第二个问题

map实际存储的顺序是字典序，不是数据先后进来的顺序，具体到这里，word_count存储的顺序就是

do，i，it，like，not，somebody

所以另外构造一个word_order记录顺序。

posted on 2015-07-12 13:45 moffis 阅读(206) 评论(0) 收藏举报

刷新页面返回顶部

单词统计

导航

公告