egrep命令的实现 分类: 编译原理 2014-06-01 23:41 329人阅读 评论(0) 收藏

本程序实现了egrep命令,首先将正则表达式转换为NFA,并实现模拟NFA的算法。

本程序使用flex实现词法分析,bison实现语法分析

若给定的一行字符串中存在一个字串能被该NFA接受,则输出整行。
所用语法如下:
S-->S|S
   |SS
   |S*
   |(S)

   |a


bison程序:

%{
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <stack>
#include <algorithm>
#include <boost/foreach.hpp>
#include <set>
#include <map>
using namespace std;
typedef string state;
typedef set<string> states;
typedef map<string, states> transition;
class value
{ 
  public:
  string* symbol;
  state *start;
  state *final;
  map<state,transition> *transitions;
};
char* filename;
int number;//用于生成状态号
int yylex(void);
void yyerror(char const *);
template<class TYPE>
string transfer(TYPE init);
void Scopy(value S,value &result);
void Skleen(value S,value &result);
void SconnectS(value S1,value S2,value &result);
void SorS(value S1,value S2,value &result);
states epsilonClosure(states T,map<state,transition> transitions);
states move(states T, string a,map<state,transition> transitions);
void simulateNFA(string str,value NFA); 
%}

%define api.value.type { class value }
//优先级为:闭包>连接>并
%token ASCII
%token LP
%token RP
%left OR
%left CONNECT
%left KLEEN
%expect 4
%%
lines: lines S '\n'   
       {
         number=0;
         ifstream in(filename);
         string line;
         while(getline(in,line))
         simulateNFA(line,$2);
       }
       | lines '\n'
       |
       | error '\n' {yyerrok;}
       ;
S:   S KLEEN   
     {Skleen($1,$$);}
    |S S %prec CONNECT   
     {SconnectS($1,$2,$$);}
    |S OR S    
     {SorS($1,$3,$$);} 
    |LP S RP   
     {Scopy($2,$$);} 
    |ASCII 
    {
      $$.start=new string(transfer<int>(number++));
      $$.final=new string(transfer<int>(number++));
      states accetping;
      accetping.insert(*$$.final);
      $$.transitions=new map<state,transition>();
      (*$$.transitions)[*$$.start][*$$.symbol]=accetping;
    }
    ;
%%
#include "lex.yy.c"

int main(int argc,char*argv[]) {
    number=0;
    filename=argv[1];
    return yyparse();
}

void yyerror(char const *s)
{ 
   cout<<s<<endl;
}
states epsilonClosure(states T,map<state,transition> transitions) 
{
    stack<state> S;
    BOOST_FOREACH(state u, T) 
    { S.push(u); }
    while (!S.empty())
    {
        state t = S.top(); 
        S.pop();
        BOOST_FOREACH(state u, transitions[t]["epsilon"]) 
         {
            if (T.count(u) == 0) 
             {  
                T.insert(u);
                S.push(u);
             }
         }
    }  
    return T;
}
    
states move(states T,string a,map<state,transition> transitions) 
{
    states result; 
    BOOST_FOREACH(state u, T) 
      BOOST_FOREACH(state v, transitions[u][a]) 
       { result.insert(v); }
    return result;
}

void simulateNFA(string str,value NFA)
{
 bool flag=false;
//穷举字串测试,若被NFA受则退出循环
 for(int i=0;i<str.length();i++)
  {
   for(int j=0;j<=str.length()-i;j++)
    {
     string substr;
     substr=str.substr(i,j);
     states S;
     string c;//转移符号
     int count=0;
     if(j==0)
     {c="epsilon";}
     else
     {c=transfer<char>(substr[0]);}
     S.insert(*NFA.start);
     S=epsilonClosure(S,*NFA.transitions); 
     while(count<substr.length())
     {
      S=epsilonClosure(move(S,c,*NFA.transitions),*NFA.transitions);
      c=substr[++count];
     }
     if (S.count(*NFA.final)!= 0) 
     {
       flag=true;
       break;
     }
    }
    if(flag)
    break;
  }
  if(flag)
  cout<<str<<endl;
}

template<class TYPE>
string transfer(TYPE init)
{
  stringstream ss;
  ss<<init;
  string str;
  ss>>str;
  return str;
}
/*result-->(S)
直接拷贝S至转换表至result*/
void Scopy(value S,value &result)
{ 
  result.start=new string(*S.start);
  result.final=new string(*S.final);
  result.transitions=new map<state,transition>(); 
  copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> >
  (*result.transitions,(*result.transitions).begin()));
}
/*result-->S*
首先拷贝s的转换表至result,然后对result生成新的开始态、结束态,
并将result的开始态连至S的开始态和result的结束态,边上符号为"epsilon";
将S的结束态连至S的开始态和result的结束态,边上符号为"epsilon"*/
void Skleen(value S,value &result)
{
   result.start=new string(transfer<int>(number++));
   result.final=new string(transfer<int>(number++));
   result.transitions=new map<state,transition>();
   copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> >
   (*result.transitions,(*result.transitions).begin())); 
   states accepting;
   accepting.insert(*S.start);
   accepting.insert(*result.final);
   (*result.transitions)[*result.start]["epsilon"]=accepting;
   (*result.transitions)[*S.final]["epsilon"]=accepting;
}
/*result-->S1 S2
分别将S1、S2的转换表拷贝至result,再将S2的开始态改为S1的结束态*/
void SconnectS(value S1,value S2,value &result)
{
  result.start=new string(*S1.start);
  result.final=new string(*S2.final);
  result.transitions=new map<state,transition>(); 
  copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> >
  (*result.transitions,(*result.transitions).begin())); 
  copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> > 
  (*result.transitions,(*result.transitions).end())); 
  map<state,transition>::iterator it;
  if((it=(*result.transitions).find(*S2.start))!=(*result.transitions).end())
  {
     pair<state,transition> temp;
     temp=make_pair(*S1.final,it->second);
     (*result.transitions).erase(*S2.start); 
     (*result.transitions).insert(temp);
  }
}
/*result-->S|S
分别将S1、S2的转换表拷贝至result,然后对result生成新的开始态、结束态,
并将result的开始态连至S1和S2的开始态,边上符号为"epsilon";将S1和S2的结
束态连至result的结束态,边上符号为"epsilon"*/
void SorS(value S1,value S2,value &result)
{
  result.start=new string(transfer(number++));
  result.final=new string(transfer(number++));
  result.transitions=new map<state,transition>();
  copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> > 
  (*result.transitions,(*result.transitions).begin())); 
  copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> >
  (*result.transitions,(*result.transitions).end())); 
  states accepting;
  accepting.insert(*S1.start);
  accepting.insert(*S2.start);  
  (*result.transitions)[*result.start]["epsilon"]=accepting;
  accepting.clear();
  accepting.insert(*result.final);
  (*result.transitions)[*S1.final]["epsilon"]=accepting;
  (*result.transitions)[*S2.final]["epsilon"]=accepting;
}

flex程序:

%{
#include<string>
%}
escape "\\("|"\\)"|"\\|"|"\\*"
%%
[ \t]+   
\n       {return yytext[0];}
"("      {return LP;}
")"      {return RP;}
"|"      {return OR;}
"*"      {return KLEEN;}
{escape} {
          yylval.symbol=new string(transfer(yytext).substr(1,1));
          return ASCII;
         }
.        {
          yylval.symbol=new string(transfer(yytext));
          return ASCII;
         }
假设将bison程序拷至biaon.y,flex程序拷至flex.l。

运行如下:
flex lex.l
bison bison.y
g++ bison.tab.c -ly -ll
./a.out filname
最后可输入待测试正则表达式


版权声明:本文为博主原创文章,未经博主允许不得转载。

posted @ 2014-06-01 23:41  xiaoluo91  阅读(181)  评论(0编辑  收藏  举报