统计英文文章中单词出现次数

注意:文中包含中文会出错!!!

 

#include <iostream>
#include <vector>
#include <map>
#include <string>
#include <cctype>
#include <fstream>
#include <iomanip>
#include <algorithm>

using namespace std;

//将字符串分割成单词
vector<string> Split(const string & s);

//用于向量排序的比较
bool cmp(const pair<string, int> & x, const pair<string, int> & y);

int main()
{
    //输入文件路径
    string filePath;
    getline(cin, filePath);

    //打开文件
    ifstream in(filePath/*"2012CET4.txt"*/);
    
    string line;    //读取一行字符串
    map<string, int> counters;    //统计单词出现次数

    while (getline(in, line))
    {
        vector<string> v = Split(line);
        for (vector<string>::iterator it = v.begin(); it != v.end(); ++it)
        {
            ++counters[*it];
        }
    }

    //对结果按统计次数排序
    vector<pair<string, int>> sv;
    for (map<string, int>::const_iterator it = counters.begin(); it != counters.end(); ++it)
    {
        sv.push_back(make_pair(it->first, it->second));
    }
    sort(sv.begin(), sv.end(), cmp);

    //打印结果
    for (vector<pair<string, int>>::const_iterator it = sv.begin(); it != sv.end(); ++it)
    {
        cout << setiosflags(ios::left) << setw(20) << it->first << "\t" << it->second << endl;
    }

    //保存结果
    ofstream out("counts.txt");
    for (vector<pair<string, int>>::const_iterator it = sv.begin(); it != sv.end(); ++it)
    {
        out << setiosflags(ios::left) << setw(20) << it->first << "\t" << it->second << endl;
    }
    out.close();

    cin.get();

    return 0;
}

vector<string> Split(const string & s)
{
    vector<string> ret;
    typedef string::size_type string_size;
    string_size i = 0;

    while (i != s.size())
    {
        //忽略字符串开始的空白符
        while (i != s.size() && isspace(s[i]))
            ++i;

        string_size j = i;
        while (j != s.size() && !isspace(s[j]))
            ++j;

        //存储单词,并去除单词最后一位可能的标点符号
        if (i != j)
        {
            ret.push_back(s.substr(i, ispunct(s[j - 1]) ? j - i - 1 : j - i));            
            i = j;
        }
    }

    return ret;
}

bool cmp(const pair<string, int> & x, const pair<string, int> & y)
{
    return x.second > y.second;
}

 

 

 

posted @ 2013-11-14 22:10  BornThisWay  阅读(1841)  评论(0编辑  收藏  举报