【编译原理】正则式转NFA转DFA 代码实现(C/C++)

直接上代码:

#include<bits/stdc++.h>

using namespace std;

//nfa结构定义
struct nst {
    vector<int> a[26], e; //接收a-z会到达的状态,接收eps会到达的状态
    bool f = 0; //=0为可接受态
};
vector<nst> nfa;
set<char>alp;
string str;
set<int>accepted;

struct dst {
    vector<int> a[26]; //int a[26] a[0] = 5
    bool f = 0; //1为可接受态
};
vector<dst> dfa;

stack<int> st;
int nfa_size = 0, dfa_size;
string dispregex;
struct nst init_nfa_state;
struct dst init_dfa_state;

//产生式定义
struct production{
    int a, b, c;
};

/***************************** regex to nfa ****************************/

//把单词隔开
string insert_concat(string regexp) {
    string ret = "";
    char c, c2;
    for (unsigned int i = 0; i < regexp.size(); i++) {
        c = regexp[i];
        if (i + 1 < regexp.size()) {
            c2 = regexp[i + 1];
            ret += c;
            if (c != '(' && c2 != ')' && c != '|' && c2 != '|' && c2 != '*') {
                ret += '.';
            }
        }
    }
    ret += regexp[regexp.size() - 1];
    return ret;
}

//输出nfa
void print_nfa() {
//    cout << "------------------------------------------------------------------------\n";
    cout << str << ":\n";
    cout << "正则式 转 NFA: \n";
    cout << "K = {";
    for (int i = 0; i < nfa.size(); i++) {
        if (i)  cout << ", ";
        cout << char(i + 'A');
    }
    cout << "};\n";
    cout << "Σ = {";
    auto it = alp.begin();
    cout << *it, it++;
    for (; it != alp.end(); it++) {
        cout << ", " << *it;
    }
    cout << "};\n";

    vector<production> ans;
    set<int> in; //记录有入度的点
    for (int i = 0; i < nfa.size(); i++) {
        bool &f = nfa[i].f;
        for (int j = 0; j < 26; j++) {
            if (nfa[i].a[j].size() == 0)    continue;
            f |= 1;
            for (auto k : nfa[i].a[j])  ans.push_back({i, j, k}), in.insert(k);
        }
        if (nfa[i].e.size() == 0)   continue;
        for (auto j : nfa[i].e)     ans.push_back({i, -1, j}), in.insert(j);
        f |= 1;
    }

    for (int i = 0; i < ans.size(); i++) {
        if (i)  cout << ", ";
        cout << "f(";
        cout << char(ans[i].a + 'A');
        cout << ", ";
        if (ans[i].b == -1) cout << "ε";
        else    cout << char(ans[i].b + 'a');
        cout << ") = ";
        cout << char(ans[i].c + 'A');
    }
    cout << ";\n";

    //没有入度就是起点
    for (int i = 0; i < nfa.size(); i++) {
        if (!in.count(i)) {
            cout << char(i + 'A') << ";\n";
            break;
        }
    }

    cout << "Z = {";
    vector<int> final;
    for (int i = 0; i < nfa.size(); i++) {
        if (!nfa[i].f)    final.push_back(i), accepted.insert(i);
    }
    for (int i = 0; i < final.size(); i++) {
        if (i)  cout << ", ";
        cout << char(final[i] + 'A');
    }
    cout << "};\n";
    cout << "..........................................\n";
}

//处理字母
void character(int i) {
    nfa.push_back(init_nfa_state);
    nfa.push_back(init_nfa_state);
    nfa[nfa_size].a[i].push_back(nfa_size + 1);
    st.push(nfa_size);
    //cout << char(i + 'a') << ' ' << nfa_size << ' ';
    nfa_size++;
    st.push(nfa_size);
    //cout << nfa_size << endl;
    nfa_size++;
    //print_nfa();
}

//处理'|'
void union_() {
    nfa.push_back(init_nfa_state);
    nfa.push_back(init_nfa_state);

    int d = st.top();
    st.pop();
    int c = st.top();
    st.pop();
    int b = st.top();
    st.pop();
    int a = st.top();
    st.pop();
    //cout << "| " << a << ' ' << b << ' ' << c << ' ' << d << endl;

    nfa[nfa_size].e.push_back(a);
    nfa[nfa_size].e.push_back(c);
    st.push(nfa_size);
    nfa_size++;
    nfa[b].e.push_back(nfa_size);
    nfa[d].e.push_back(nfa_size);
    st.push(nfa_size);
    nfa_size++;

    //print_nfa();
}

//处理'.'
void concatenation() {
    int d = st.top();
    st.pop();
    int c = st.top();
    st.pop();
    int b = st.top();
    st.pop();
    int a = st.top();
    st.pop();
    //cout << ". " << a << ' ' << b << ' ' << c << ' ' << d << endl;
    nfa[b].e.push_back(c);
    //nfa_size++;
    st.push(a);
    st.push(d);

    //print_nfa();
}

//处理'*'
void kleene_star() {
    //取出前两个
    //cout << st.size() << endl;
    int b = st.top();
    st.pop();
    int a = st.top();
    st.pop();
    //cout << "* " << a << ' ' << b << endl;
    //cout << nfa_size << endl;
    //再加三条边
    nfa.push_back(init_nfa_state);
    nfa.push_back(init_nfa_state);
    nfa[b].e.push_back(a);
    nfa[nfa_size].e.push_back(a);
    nfa[nfa_size].e.push_back(nfa_size + 1);
    nfa[b].e.push_back(nfa_size + 1);
    st.push(nfa_size);
    //cout << "** " << nfa_size << ' ';
    nfa_size++;
    st.push(nfa_size);
    //cout << nfa_size << endl;
    nfa_size++;
    //print_nfa();
    //cout << "------------------------------------\n";
}

//后缀转nfa
void postfix_to_nfa(string postfix) {
    for (unsigned int i = 0; i < postfix.size(); i++) {
        char ch = postfix[i];
        if (ch <= 'z' && ch >= 'a')     character(ch - 'a');
        else if (ch == '*')     kleene_star();
        else if (ch == '.')     concatenation();
        else if (ch == '|')     union_();
        else {
            cout << "输入为非法字符!读入只能是 a-z, |, *, ()" << endl;
        }
    }
}

//出入栈优先级
int priority(char c) {
    switch (c) {
        case '*':
            return 3;
        case '.':
            return 2;
        case '|':
            return 1;
        default:
            return 0;
    }
}

//正则式转后缀表达式
string regexp_to_postfix(string regexp) {
    string postfix = "";
    stack<char> op;
    char c;
    for (unsigned int i = 0; i < regexp.size(); i++) {
        char ch = regexp[i];
        if (ch <= 'z' && ch >= 'a')     postfix += ch;
        else if (ch == '(')     op.push(ch);
        else if (ch == ')') {
            while (op.top() != '(') {
                postfix += op.top();
                op.pop();
            }
            op.pop();
        }
        else {
            while (!op.empty()) {
                c = op.top();
                if (priority(c) >= priority(ch)) {
                    postfix += op.top();
                    op.pop();
                } else break;
            }
            op.push(regexp[i]);
        }
    }
    while (!op.empty()) {
        postfix += op.top();
        op.pop();
    }
    return postfix;
}

/***************************** nfa to dfa ****************************/

void print_dfa() {
    //cout << dfa_size << endl;
    dfa_size++;
    //cout << dfa_size << endl;
    cout << "NFA 转 DFA: " << endl;
    cout << "K = {";
    for (int i = 0; i < dfa_size; i++) {
        if (i)  cout << ", ";
        cout << i;
    }
    cout << "};\n";

    int fst = 0; //是否为第一次输出
    for (int i = 0; i < dfa_size; i++) {
        for (int j = 0; j < 26; j++) {
            if (dfa[i].a[j].size() == 0)    continue;
            for (auto k : dfa[i].a[j]) {
                if (fst)    cout << ", ";
                else    fst = 1;
                cout << "f(" << i << ", " << char(j + 'a') << ") = " << k;
            }
        }
    }
    cout << ";\n";

    cout << "Z = {";
    vector<int> final;
    for (int i = 0; i < dfa_size; i++) {
        if (dfa[i].f)   final.push_back(i);
    }
    for (int i = 0; i < final.size(); i++) {
        if (i)  cout << ", ";
        cout << final[i];
    }
    cout << "};\n";
    cout << "------------------------------------------------------------------------\n";
}

void epsilon_closure(int state, set<int> &si) {
    for (unsigned int i = 0; i < nfa[state].e.size(); i++) {
        if (si.count(nfa[state].e[i]) == 0) {
            si.insert(nfa[state].e[i]);
            epsilon_closure(nfa[state].e[i], si);
        }
    }
}

//打印闭包(调试)
void print_epsilon(int n, set<int>eps[]) {
    cout << "===========================\n";
    for (int i = 0; i < n; i++) {
        cout << i << ": ";
        for (auto j: eps[i]) cout << j << ' ';
        cout << endl;
    }
    cout << "===========================\n";
}

//nfa转dfa
void nfa_to_dfa(int start_state, int n) {
    dfa.resize(n);
    map<set<int>, int> mp, idx; //记录所有出现过的状态, state对应的状态数字
    set<int>si; //起始状态集

    //求所有状态的空闭包
    set<int> eps[n];
    for (int i = 0; i < n; i++)     epsilon_closure(i, eps[i]);
    si = eps[start_state];
    si.insert(start_state); //至少得有一个起始状态哦
    idx[si] = 0;
    //print_epsilon(n, eps);

    queue<set<int>> q;
    q.push(si);
    while (!q.empty()) {
        auto ss = q.front();
        q.pop();
        mp[ss]++;
        if (mp[ss] > 1) continue;

//        cout << "ss: ";
//        for (auto j : ss)   cout << j << ' ';
//        cout << endl;

        //接下来算第一行:起始状态先读入一个对应字符再做eps闭包
        for (auto ch: alp) { //第几个字符
            set<int> state; //记录状态
            int i = ch - 'a';
            for (auto st: ss) { //对应起点
                //st通过一个i边能到达的集合
                for (auto j: nfa[st].a[i]) {
                    state.insert(j); //首先是自己
                    //加上j的空闭包
                    for (auto k: eps[j]) state.insert(k);
                }
            }
            if (state.size() && mp[state] == 0) q.push(state);
            if (state.size()) {
                //cout << i << ": ";
                if (!idx.count(state))  dfa_size++, idx[state] = dfa_size;
                dfa[idx[ss]].a[i].push_back(idx[state]);
                //cout << "ATTENTION!!! " << idx[ss] << ' ' << idx[state] << endl;
                for (auto j : state) {
                    //cout << j << ' ';
                    if (accepted.count(j)) {
                        dfa[idx[state]].f = 1;
                        //cout << "& " << idx[state] << "\n";
                        break;
                    }
                }
                //cout << endl;
            }
        }
//        cout << "+++++++++++++++++++++++\n";
    }
}

/***************************** solve ****************************/

//每组处理前的清空
void clear() {
    nfa_size = dfa_size = 0;
    while(!st.empty())  st.pop();
    alp.clear();
    nfa.clear();
    dfa.clear();
}

//判断是否有非法输入
bool check (string postfix) {
    for (unsigned int i = 0; i < postfix.size(); i++) {
        char ch = postfix[i];
        if (ch <= 'z' && ch >= 'a') continue;
        else if (ch == '*')    continue;
        else if (ch == '.')    continue;
        else if (ch == '|')    continue;
        cout << "输入为非法字符!读入只能是: 小写英文字母, |, *, ()" << endl;
        return false; //检测到非法字符
    }
    return true;
}

void solve() {
    clear();
    string regexp, postfix;
    regexp = str;
    for (auto i : regexp) {
        if (i >= 'a' && i <= 'z')   alp.insert(i);
    }

    dispregex = regexp;
    regexp = insert_concat(regexp);
    cout << regexp << endl;
    postfix = regexp_to_postfix(regexp);
    cout << "Postfix Expression: " << postfix << endl;
    bool suc = check(postfix);
    if (suc) {
        postfix_to_nfa(postfix);
        print_nfa();
    }
    //cout << nfa_size << endl;
    //nfa_to_dfa(nfa_size);

    //开始转dfa
    int final_state = st.top();
    st.pop();
    int start_state = st.top();
    st.pop();
    //cout << start_state << ' ' << final_state << endl;
    nfa[final_state].f = 1;
    nfa_to_dfa(start_state, nfa.size());
    print_dfa();

    cout << endl << endl;

}

int main() {
    freopen("in3.txt", "r", stdin);
    freopen("out3.txt", "w", stdout);
    while (cin >> str) {
        solve();
    }
}

//正则式 -> 有空nfa -> 去空nfa -> dfa -> 极小化dfa

输入文件in3.txt

a*b
a(ab)*
a|b*
ab*
a*

输出文件out3.txt

a*.b
Postfix Expression: a*b.
a*b:
正则式 转 NFA: 
K = {A, B, C, D, E, F};
Σ = {a, b};
f(A, a) = B, f(B, ε) = A, f(B, ε) = D, f(C, ε) = A, f(C, ε) = D, f(D, ε) = E, f(E, b) = F;
C;
Z = {F};
..........................................
NFA 转 DFA: 
K = {0, 1, 2};
f(0, a) = 1, f(0, b) = 2, f(1, a) = 1, f(1, b) = 2;
Z = {2};
------------------------------------------------------------------------


a.(a.b)*
Postfix Expression: aab.*.
a(ab)*:
正则式 转 NFA: 
K = {A, B, C, D, E, F, G, H};
Σ = {a, b};
f(A, a) = B, f(B, ε) = G, f(C, a) = D, f(D, ε) = E, f(E, b) = F, f(F, ε) = C, f(F, ε) = H, f(G, ε) = C, f(G, ε) = H;
A;
Z = {H};
..........................................
NFA 转 DFA: 
K = {0, 1, 2, 3};
f(0, a) = 1, f(1, a) = 2, f(2, b) = 3, f(3, a) = 2;
Z = {1, 3};
------------------------------------------------------------------------


a|b*
Postfix Expression: ab*|
a|b*:
正则式 转 NFA: 
K = {A, B, C, D, E, F, G, H};
Σ = {a, b};
f(A, a) = B, f(B, ε) = H, f(C, b) = D, f(D, ε) = C, f(D, ε) = F, f(E, ε) = C, f(E, ε) = F, f(F, ε) = H, f(G, ε) = A, f(G, ε) = E;
G;
Z = {H};
..........................................
NFA 转 DFA: 
K = {0, 1, 2};
f(0, a) = 1, f(0, b) = 2, f(2, b) = 2;
Z = {1, 2};
------------------------------------------------------------------------


a.b*
Postfix Expression: ab*.
ab*:
正则式 转 NFA: 
K = {A, B, C, D, E, F};
Σ = {a, b};
f(A, a) = B, f(B, ε) = E, f(C, b) = D, f(D, ε) = C, f(D, ε) = F, f(E, ε) = C, f(E, ε) = F;
A;
Z = {F};
..........................................
NFA 转 DFA: 
K = {0, 1, 2};
f(0, a) = 1, f(1, b) = 2, f(2, b) = 2;
Z = {1, 2};
------------------------------------------------------------------------


a*
Postfix Expression: a*
a*:
正则式 转 NFA: 
K = {A, B, C, D};
Σ = {a};
f(A, a) = B, f(B, ε) = A, f(B, ε) = D, f(C, ε) = A, f(C, ε) = D;
C;
Z = {D};
..........................................
NFA 转 DFA: 
K = {0, 1};
f(0, a) = 1, f(1, a) = 1;
Z = {1};
------------------------------------------------------------------------



posted @ 2024-04-16 20:09  Sakana~  阅读(830)  评论(0编辑  收藏  举报