查找只有一个字母不相同的单词
//set是一个排序后的容器,该容器不容许重复.
//map用来存储排序后的键值对,键必须唯一.
//set和map支持在最坏情况下对基本操作insert,erase和find仅消耗对数时间.
//相应的底层实现是平衡二叉树.典型的用法不是使用AVL树,而常常是使用自顶向下的红黑树.
//set和map的每一个元素都是一个pair的实例.
1 #include<iostream>
2 #include<fstream>
3 #include<map>
4 #include<string>
5 #include<vector>
6 #include<set>
7 using namespace std;
8
9 //根据一篇英文文档构建词典
10 set<string> buildDicFromDoc(string file){
11 set<string> dic;
12 ifstream infile(file.c_str()); //由于历史原因,IO标准库使用C风格的字符串而不是C++ string类型的字符串
13 if(!infile)
14 return dic;
15 string word="";
16 while(infile>>word)
17 dic.insert(word);
18 return dic;
19 }
20
21 //从字典中寻找只有一个字母不同的单词
22 map<string,vector<string> > computeAdjacentWords(const vector<string>& words){
23 map<string,vector<string> > adjWords;
24 map<int,vector<string> > wordsByLength;
25 //按长度对单词进行分组
26 for(int i=0;i<words.size();i++)
27 wordsByLength[words[i].length()].push_back(words[i]);
28 //分别对每一组进行操作
29 map<int,vector<string> >::const_iterator itr;
30 for(itr=wordsByLength.begin();itr!=wordsByLength.end();++itr){
31 const vector<string> & groupsWords=itr->second;
32 int groupNum=itr->first;
33
34 //对同一组单词上的每个位置进行操作
35 for(int i=0;i<groupNum;i++){
36 //擦掉第i个位置上的字母,其余部分相同的单词放入同一个map
37 map<string,vector<string> > repToWord;
38 for(int j=0;j<groupsWords.size();j++){
39 string rep=groupsWords[j];
40 rep.erase(i,1);
41 repToWord[rep].push_back(groupsWords[j]);
42 }
43
44 //查找相近的单词
45 map<string,vector<string> >::const_iterator itr2;
46 for(itr2=repToWord.begin();itr2!=repToWord.end();++itr2){
47 const vector<string> & clique=itr2->second;
48 if(clique.size()>=2)
49 for(int p=0;p<clique.size();p++)
50 for(int q=p+1;q<clique.size();q++){
51 adjWords[clique[p]].push_back(clique[q]);
52 adjWords[clique[q]].push_back(clique[p]);
53 }
54 }
55 }
56 }
57 return adjWords;
58 }
59
60 //打印相近单词
61 void printHighChangeables(const map<string,vector<string> > & adjWords,int minWords=15){
62 map<string,vector<string> >::const_iterator itr;
63 for(itr=adjWords.begin();itr!=adjWords.end();++itr){
64 const pair<string,vector<string> > & entry=*itr;
65 const vector<string> & words=entry.second;
66
67 if(words.size()>=minWords){
68 cout<<entry.first<<"("<<words.size()<<")";
69 for(int i=0;i<words.size();i++)
70 cout<<" "<<words[i];
71 cout<<endl;
72 }
73 }
74 }
75
76 int main(){
77 vector<string> dic;
78 set<string> temp=buildDicFromDoc("doc_en.txt");
79 set<string>::const_iterator itr;
80 for(itr=temp.begin();itr!=temp.end();++itr)
81 dic.push_back(*itr);
82 const map<string,vector<string> > & adjWords=computeAdjacentWords(dic);
83 printHighChangeables(adjWords,2);
84
85 return 0;
86 }
87
bike(2) like bile
fine(4) line mine pine wine
like(2) bike line
line(5) fine mine pine wine like
mine(4) fine line pine wine
pine(4) fine line mine wine
wide(3) wine wipe wire
wine(10) fine line mine pine wide wipe wire wing wink wins
wing(3) wine wink wins
wink(3) wine wing wins
wins(3) wine wing wink
wipe(3) wide wine wire
wire(3) wide wine wipe
本文来自博客园,作者:高性能golang,转载请注明原文链接:https://www.cnblogs.com/zhangchaoyang/articles/1809335.html