简单正则表达式和非确定有穷自动机

正则表达式

正则表达式的基本语法有三个,连接,或,闭包,分别对应于 ab ,a|b,a*

为了简单起见,添加了\w,\n,\a,分别表示字母,数字,字母+数字

非确定有穷自动机(NFA)

 倒序读正则表达式,向多叉树中插入节点(贪心)

 

// main.cpp
#include <iostream>
#include <vector>
#include <string>
#include <map>
#include <cstdlib>
using namespace std;

const int DEBUG = 1;
const char voidchar = '$';  //
const char *word = "\\w";
const char *number = "\\n";
const char *alnum = "\\a";
const char *pstring = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
struct node {
    node(int num):val(), number(num){}
    int number;
    map<char, node*> val;
};

class deal {
private:
    int count;
    bool pack_flag;
    bool is_end;
    bool getpackflag() {
        if (pack_flag) { pack_flag = false; return true; }
        return false;
    }
    bool getendflag() {
        if (is_end) { is_end = false; return true;}
        return false;
    }
public:
    deal():head(NULL), cur(NULL), pack_flag(false), is_end(false), count(0) {
        head = new node(count++);
        cur = head;
    }
    void receive(string s);
private:
    map<int, node*> quitnode;
    node *head;
    node *cur;
    void addnode(char);  
    void addnode(string); 
    //  void addquitnode(int num,node *p) {quitnode[num] = p;}
};

int main() {
    deal d;
    d.receive("asf[\\n]a*s[\\a][s jakf]*f");
    return 0;
}

// receive a re pattern
void deal::receive(string s) {
    cur = head;
    is_end = false;
    pack_flag = false;
    if (DEBUG) cout << "get " << s <<endl;
    for(int x = s.size() - 1; x >= 0; ) {
        if (x == 0) is_end = true;
        if (s[x] == ']') {
            int y = x;
            for (; y >= 0; y--) {
                if (s[y] == '[') break;
            }
            string content = s.substr(y + 1, x - y - 1);
            if (y < 0) {cout << "\n no match for \"]\"\n";exit(0);}
            else if (y == 0) is_end = true;
            if (DEBUG) cout << "content = " << content <<endl;
            addnode(content);
            x = y - 1;
        } else if (s[x] == '*') {
            pack_flag = true;
            x--;
        } else {
            addnode(s[x]);
            x--;
        }
    }    
}

void deal::addnode(char c) {
    if (true) {
        node *p = new node(count++);
        cur->val[c] = p;
        cur = p;
        if (DEBUG) cout << "count = " << count << ", char = " <<c ;
        if (getendflag()) quitnode[count - 1] = p;
        if (getpackflag()) {
            p->val[voidchar] = cur;    
            if (DEBUG) cout<< " packed";
        }
        if (DEBUG) cout << endl;
    }
}
void deal::addnode(string s) {
    node *pstart = new node(count++);
    node *pend = new node(count++);
    cur->val[voidchar] = pstart;
    cur = pstart;
        
    if (getendflag()) quitnode[count -1] = pend;
    if (true) {
        bool flag = false;
        int startchar, endchar;
        if (s == number) {
            flag = true;
            startchar = 0; endchar = 10;
        } else if (s == word) {
            flag = true;
            startchar = 10; endchar = 36 + 26;
        } else if (s == alnum) {
            flag = true;
            startchar = 0; endchar = 36 + 26;
        }
        if (DEBUG) cout << "add char "; // \'" << pstring[c] << "\'\n";
        if (flag) for (int c = startchar; c < endchar; c++) {
            node *p = new node(count++);
            cur->val[pstring[c]] = p;
            p->val[voidchar] = pend;
            if (DEBUG) cout << "\'" << pstring[c] <<"\'";
        }
        if (!flag) {
            for (int x = 0; x < s.size(); x++) {
                node *p = new node(count++);
                cur->val[s[x]] = p;
                p->val[voidchar] = pend;
                if (DEBUG) cout << "\'" << s[x] <<"\'";
            }
        }
    }
    if (getpackflag()) {
        pend->val[voidchar] = pstart;
        pstart->val[voidchar] = pend;
        if (DEBUG) cout<<" packed";
    }
    if (DEBUG) cout<< endl;
    node *plast = new node(count++);
    pend->val[voidchar] = plast;
    cur = plast;
}

 

posted @ 2016-09-23 14:39  backinfile  阅读(1331)  评论(0编辑  收藏  举报