简单正则表达式和非确定有穷自动机
正则表达式
正则表达式的基本语法有三个,连接,或,闭包,分别对应于 ab ,a|b,a*
为了简单起见,添加了\w,\n,\a,分别表示字母,数字,字母+数字
非确定有穷自动机(NFA)
倒序读正则表达式,向多叉树中插入节点(贪心)
// main.cpp #include <iostream> #include <vector> #include <string> #include <map> #include <cstdlib> using namespace std; const int DEBUG = 1; const char voidchar = '$'; // const char *word = "\\w"; const char *number = "\\n"; const char *alnum = "\\a"; const char *pstring = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; struct node { node(int num):val(), number(num){} int number; map<char, node*> val; }; class deal { private: int count; bool pack_flag; bool is_end; bool getpackflag() { if (pack_flag) { pack_flag = false; return true; } return false; } bool getendflag() { if (is_end) { is_end = false; return true;} return false; } public: deal():head(NULL), cur(NULL), pack_flag(false), is_end(false), count(0) { head = new node(count++); cur = head; } void receive(string s); private: map<int, node*> quitnode; node *head; node *cur; void addnode(char); void addnode(string); // void addquitnode(int num,node *p) {quitnode[num] = p;} }; int main() { deal d; d.receive("asf[\\n]a*s[\\a][s jakf]*f"); return 0; } // receive a re pattern void deal::receive(string s) { cur = head; is_end = false; pack_flag = false; if (DEBUG) cout << "get " << s <<endl; for(int x = s.size() - 1; x >= 0; ) { if (x == 0) is_end = true; if (s[x] == ']') { int y = x; for (; y >= 0; y--) { if (s[y] == '[') break; } string content = s.substr(y + 1, x - y - 1); if (y < 0) {cout << "\n no match for \"]\"\n";exit(0);} else if (y == 0) is_end = true; if (DEBUG) cout << "content = " << content <<endl; addnode(content); x = y - 1; } else if (s[x] == '*') { pack_flag = true; x--; } else { addnode(s[x]); x--; } } } void deal::addnode(char c) { if (true) { node *p = new node(count++); cur->val[c] = p; cur = p; if (DEBUG) cout << "count = " << count << ", char = " <<c ; if (getendflag()) quitnode[count - 1] = p; if (getpackflag()) { p->val[voidchar] = cur; if (DEBUG) cout<< " packed"; } if (DEBUG) cout << endl; } } void deal::addnode(string s) { node *pstart = new node(count++); node *pend = new node(count++); cur->val[voidchar] = pstart; cur = pstart; if (getendflag()) quitnode[count -1] = pend; if (true) { bool flag = false; int startchar, endchar; if (s == number) { flag = true; startchar = 0; endchar = 10; } else if (s == word) { flag = true; startchar = 10; endchar = 36 + 26; } else if (s == alnum) { flag = true; startchar = 0; endchar = 36 + 26; } if (DEBUG) cout << "add char "; // \'" << pstring[c] << "\'\n"; if (flag) for (int c = startchar; c < endchar; c++) { node *p = new node(count++); cur->val[pstring[c]] = p; p->val[voidchar] = pend; if (DEBUG) cout << "\'" << pstring[c] <<"\'"; } if (!flag) { for (int x = 0; x < s.size(); x++) { node *p = new node(count++); cur->val[s[x]] = p; p->val[voidchar] = pend; if (DEBUG) cout << "\'" << s[x] <<"\'"; } } } if (getpackflag()) { pend->val[voidchar] = pstart; pstart->val[voidchar] = pend; if (DEBUG) cout<<" packed"; } if (DEBUG) cout<< endl; node *plast = new node(count++); pend->val[voidchar] = plast; cur = plast; }