egrep命令的实现 分类: 编译原理 2014-06-01 23:41 329人阅读 评论(0) 收藏
本程序实现了egrep命令,首先将正则表达式转换为NFA,并实现模拟NFA的算法。
本程序使用flex实现词法分析,bison实现语法分析
若给定的一行字符串中存在一个字串能被该NFA接受,则输出整行。所用语法如下:
S-->S|S
|SS
|S*
|(S)
|a
bison程序:
%{ #include <string> #include <iostream> #include <fstream> #include <sstream> #include <vector> #include <stack> #include <algorithm> #include <boost/foreach.hpp> #include <set> #include <map> using namespace std; typedef string state; typedef set<string> states; typedef map<string, states> transition; class value { public: string* symbol; state *start; state *final; map<state,transition> *transitions; }; char* filename; int number;//用于生成状态号 int yylex(void); void yyerror(char const *); template<class TYPE> string transfer(TYPE init); void Scopy(value S,value &result); void Skleen(value S,value &result); void SconnectS(value S1,value S2,value &result); void SorS(value S1,value S2,value &result); states epsilonClosure(states T,map<state,transition> transitions); states move(states T, string a,map<state,transition> transitions); void simulateNFA(string str,value NFA); %} %define api.value.type { class value } //优先级为:闭包>连接>并 %token ASCII %token LP %token RP %left OR %left CONNECT %left KLEEN %expect 4 %% lines: lines S '\n' { number=0; ifstream in(filename); string line; while(getline(in,line)) simulateNFA(line,$2); } | lines '\n' | | error '\n' {yyerrok;} ; S: S KLEEN {Skleen($1,$$);} |S S %prec CONNECT {SconnectS($1,$2,$$);} |S OR S {SorS($1,$3,$$);} |LP S RP {Scopy($2,$$);} |ASCII { $$.start=new string(transfer<int>(number++)); $$.final=new string(transfer<int>(number++)); states accetping; accetping.insert(*$$.final); $$.transitions=new map<state,transition>(); (*$$.transitions)[*$$.start][*$$.symbol]=accetping; } ; %% #include "lex.yy.c" int main(int argc,char*argv[]) { number=0; filename=argv[1]; return yyparse(); } void yyerror(char const *s) { cout<<s<<endl; } states epsilonClosure(states T,map<state,transition> transitions) { stack<state> S; BOOST_FOREACH(state u, T) { S.push(u); } while (!S.empty()) { state t = S.top(); S.pop(); BOOST_FOREACH(state u, transitions[t]["epsilon"]) { if (T.count(u) == 0) { T.insert(u); S.push(u); } } } return T; } states move(states T,string a,map<state,transition> transitions) { states result; BOOST_FOREACH(state u, T) BOOST_FOREACH(state v, transitions[u][a]) { result.insert(v); } return result; } void simulateNFA(string str,value NFA) { bool flag=false; //穷举字串测试,若被NFA受则退出循环 for(int i=0;i<str.length();i++) { for(int j=0;j<=str.length()-i;j++) { string substr; substr=str.substr(i,j); states S; string c;//转移符号 int count=0; if(j==0) {c="epsilon";} else {c=transfer<char>(substr[0]);} S.insert(*NFA.start); S=epsilonClosure(S,*NFA.transitions); while(count<substr.length()) { S=epsilonClosure(move(S,c,*NFA.transitions),*NFA.transitions); c=substr[++count]; } if (S.count(*NFA.final)!= 0) { flag=true; break; } } if(flag) break; } if(flag) cout<<str<<endl; } template<class TYPE> string transfer(TYPE init) { stringstream ss; ss<<init; string str; ss>>str; return str; } /*result-->(S) 直接拷贝S至转换表至result*/ void Scopy(value S,value &result) { result.start=new string(*S.start); result.final=new string(*S.final); result.transitions=new map<state,transition>(); copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> > (*result.transitions,(*result.transitions).begin())); } /*result-->S* 首先拷贝s的转换表至result,然后对result生成新的开始态、结束态, 并将result的开始态连至S的开始态和result的结束态,边上符号为"epsilon"; 将S的结束态连至S的开始态和result的结束态,边上符号为"epsilon"*/ void Skleen(value S,value &result) { result.start=new string(transfer<int>(number++)); result.final=new string(transfer<int>(number++)); result.transitions=new map<state,transition>(); copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> > (*result.transitions,(*result.transitions).begin())); states accepting; accepting.insert(*S.start); accepting.insert(*result.final); (*result.transitions)[*result.start]["epsilon"]=accepting; (*result.transitions)[*S.final]["epsilon"]=accepting; } /*result-->S1 S2 分别将S1、S2的转换表拷贝至result,再将S2的开始态改为S1的结束态*/ void SconnectS(value S1,value S2,value &result) { result.start=new string(*S1.start); result.final=new string(*S2.final); result.transitions=new map<state,transition>(); copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> > (*result.transitions,(*result.transitions).begin())); copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> > (*result.transitions,(*result.transitions).end())); map<state,transition>::iterator it; if((it=(*result.transitions).find(*S2.start))!=(*result.transitions).end()) { pair<state,transition> temp; temp=make_pair(*S1.final,it->second); (*result.transitions).erase(*S2.start); (*result.transitions).insert(temp); } } /*result-->S|S 分别将S1、S2的转换表拷贝至result,然后对result生成新的开始态、结束态, 并将result的开始态连至S1和S2的开始态,边上符号为"epsilon";将S1和S2的结 束态连至result的结束态,边上符号为"epsilon"*/ void SorS(value S1,value S2,value &result) { result.start=new string(transfer(number++)); result.final=new string(transfer(number++)); result.transitions=new map<state,transition>(); copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> > (*result.transitions,(*result.transitions).begin())); copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> > (*result.transitions,(*result.transitions).end())); states accepting; accepting.insert(*S1.start); accepting.insert(*S2.start); (*result.transitions)[*result.start]["epsilon"]=accepting; accepting.clear(); accepting.insert(*result.final); (*result.transitions)[*S1.final]["epsilon"]=accepting; (*result.transitions)[*S2.final]["epsilon"]=accepting; }
flex程序:
%{ #include<string> %} escape "\\("|"\\)"|"\\|"|"\\*" %% [ \t]+ \n {return yytext[0];} "(" {return LP;} ")" {return RP;} "|" {return OR;} "*" {return KLEEN;} {escape} { yylval.symbol=new string(transfer(yytext).substr(1,1)); return ASCII; } . { yylval.symbol=new string(transfer(yytext)); return ASCII; }假设将bison程序拷至biaon.y,flex程序拷至flex.l。
运行如下:
flex lex.l
bison bison.y
g++ bison.tab.c -ly -ll
./a.out filname
最后可输入待测试正则表达式
版权声明:本文为博主原创文章,未经博主允许不得转载。