C++ 10 文件文本查询示例-容器的使用

解决思路

  1. 读取文件到set容器,行号与索引对应,内容为行文本。
  2. 构建Map容器,保存单词到所在行数set容器的映射。
  3. 循环读取输入单词,使用map.find()获取行号列表,再跟进行号获取对应文本。
  4. 输出结果。

TextQuery.h

#ifndef TEXTQUERY_H
#define TEXTQUERY_H
#include <string>
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
class TextQuery {
public:
	// 字符串长度类型
	typedef std::string::size_type str_size;
	// 行号
	typedef std::vector<std::string>::size_type line_no;
	// 读取文件构造容器和映射关系
	void read_file(std::ifstream &is)
	{
		std::cout << "读取文件" << std::endl;
		store_file(is); // 从文件输入流到容器
		build_map();  // 用容器构建单词-行号列表的map
	}
	// 执行查询,返回所在行号列表
	std::set<line_no> run_query(const std::string&) const;
	// 根据行号获取行文本
	std::string text_line(line_no) const;
	// 获取文本容器的大小
	str_size size() const {
		return lines_of_text.size();
	}
	void display_map();
private:
	// 从文件输入流到容器
	void store_file(std::ifstream&);
	// 用容器构建单词-行号列表的map
	void build_map();
	// 储存文件文本的容器
	std::vector<std::string> lines_of_text;
	// 储存单词对应行号列表的映射
	std::map<std::string, std::set<line_no> > word_map;
	// 去空格转小写
	static std::string cleanup_str(const std::string&);
};
#endif

mian.cpp

#include "TextQuery.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <set>
#include <map>
#include <string>

using std::ifstream; using std::map; using std::set; using std::multimap; using std::multiset;
using std::string; using std::cin; using std::cout; using std::endl;
ifstream& open_file(ifstream&, const string&);
void print_results(const set<TextQuery::line_no>& locs,	const string& sought, const TextQuery &file);
string make_plural(size_t ctr, const string &word, const string &ending);

int main()
{
	string file1 = "/share/file.txt";
	ifstream infile; // 打开特点文件

	if (!open_file(infile, file1)) {
		std::cerr << "error: unable to open input file:" << file1 << endl;
		return -1;
	}
	TextQuery tq;
	tq.read_file(infile); // 读取文件、构造map
	tq.display_map();
	while (true) {
		cout << "enter word to look for , q to quit:";
		string s;
		cin >> s;
		if (!cin || s == "q") break;

		set<TextQuery::line_no> locs = tq.run_query(s);

		print_results(locs, s, tq);

	}

	return 0;
}

ifstream& open_file(ifstream &is, const string &file_path)
{
	is.close();
	is.clear();
	is.open(file_path.c_str());
	return is;
}
void print_results(const set<TextQuery::line_no>& locs,	const string& sought, const TextQuery &file)
{
	typedef set<TextQuery::line_no> line_nums;
	line_nums::size_type size = locs.size();
	cout << "\n" << sought << " occurs " << size << " "
		<< make_plural(size, "time", "s") << endl;

	for (line_nums::const_iterator it = locs.begin(); it != locs.end(); ++it)
	{
		cout << "\t(line" << (*it) + 1 << ") "
			<< file.text_line(*it) << endl;
	}
}

string make_plural(size_t ctr, const string &word, const string &ending)
{
	return ctr == 1 ? word : word + ending;
}

TextQuery.cpp

#include "TextQuery.h"
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
#include <stdexcept>

using std::istringstream;
using std::set;
using std::string;
using std::getline;
using std::map;
using std::vector;
using std::cerr;
using std::cout;
using std::cin;
using std::ifstream;
using std::endl;
using std::ispunct;
using std::tolower;
using std::strlen;
using std::out_of_range;

// 根据行号获取行文本
string TextQuery::text_line(line_no line) const
{
	if (line < lines_of_text.size())
		return lines_of_text[line];
	throw std::out_of_range("line number out of range");
}

// 从文件输入流到容器
void TextQuery::store_file(ifstream &is)
{
	cout << "从文件输入流到容器" << endl;
	string textline;
	while (getline(is, textline))
	{
		cout << "读取文件: " << textline << endl;
		lines_of_text.push_back(textline);
	}
}


// 用容器构建单词-行号列表的map
void TextQuery::build_map()
{
	for (line_no line_num = 0;
		line_num != lines_of_text.size();
		++line_num)
	{
		istringstream line(lines_of_text[line_num]);
		string word;
		while (line >> word)
		{
			word_map[cleanup_str(word)].insert(line_num);
		}
	}
}

set<TextQuery::line_no>
TextQuery::run_query(const string &query_word) const
{
	map < string, set<line_no> >::const_iterator
		loc = word_map.find(cleanup_str(query_word));
	if (loc == word_map.end())
		return set<line_no>();
	else
		return loc->second;
}

void TextQuery::display_map()
{
	map< string, set<line_no> >::iterator iter = word_map.begin(),
		iter_end = word_map.end();
	for (; iter != iter_end; ++iter)
	{
		cout << "word: " << iter->first << " {";

		const set<line_no> &text_locs = iter->second;
		set<line_no>::const_iterator loc_iter = text_locs.begin(),
			loc_iter_end = text_locs.end();

		while (loc_iter != loc_iter_end)
		{
			cout << *loc_iter;

			if (++loc_iter != loc_iter_end)
				cout << ", ";
		}
		cout << "}\n";
	}
	cout << endl;
}

string TextQuery::cleanup_str(const string &word)
{
	string ret;
	for (string::const_iterator it = word.begin(); it != word.end(); ++it)
	{
		if (!ispunct(*it))
			ret += tolower(*it);
	}
	return ret;
}
posted @ 2017-06-23 19:28  张狂莫怪  Views(214)  Comments(0Edit  收藏  举报