统计英文文章中单词出现次数
注意:文中包含中文会出错!!!
#include <iostream> #include <vector> #include <map> #include <string> #include <cctype> #include <fstream> #include <iomanip> #include <algorithm> using namespace std; //将字符串分割成单词 vector<string> Split(const string & s); //用于向量排序的比较 bool cmp(const pair<string, int> & x, const pair<string, int> & y); int main() { //输入文件路径 string filePath; getline(cin, filePath); //打开文件 ifstream in(filePath/*"2012CET4.txt"*/); string line; //读取一行字符串 map<string, int> counters; //统计单词出现次数 while (getline(in, line)) { vector<string> v = Split(line); for (vector<string>::iterator it = v.begin(); it != v.end(); ++it) { ++counters[*it]; } } //对结果按统计次数排序 vector<pair<string, int>> sv; for (map<string, int>::const_iterator it = counters.begin(); it != counters.end(); ++it) { sv.push_back(make_pair(it->first, it->second)); } sort(sv.begin(), sv.end(), cmp); //打印结果 for (vector<pair<string, int>>::const_iterator it = sv.begin(); it != sv.end(); ++it) { cout << setiosflags(ios::left) << setw(20) << it->first << "\t" << it->second << endl; } //保存结果 ofstream out("counts.txt"); for (vector<pair<string, int>>::const_iterator it = sv.begin(); it != sv.end(); ++it) { out << setiosflags(ios::left) << setw(20) << it->first << "\t" << it->second << endl; } out.close(); cin.get(); return 0; } vector<string> Split(const string & s) { vector<string> ret; typedef string::size_type string_size; string_size i = 0; while (i != s.size()) { //忽略字符串开始的空白符 while (i != s.size() && isspace(s[i])) ++i; string_size j = i; while (j != s.size() && !isspace(s[j])) ++j; //存储单词,并去除单词最后一位可能的标点符号 if (i != j) { ret.push_back(s.substr(i, ispunct(s[j - 1]) ? j - i - 1 : j - i)); i = j; } } return ret; } bool cmp(const pair<string, int> & x, const pair<string, int> & y) { return x.second > y.second; }