编译原理习题4.4.8关于构造CNF

一、CNF的格式是:

1、A->BC

2、D->a

从上面的CNF的格式可以看出:不存在:A->B 、A->BCD 或者A->bC之类的形式

 

所以从普通文法转CNF文法需要将上述的A->B 、A->BCD 或者A->bC转为CNF严格要求的范式。

 

二、通过上面对CNF格式的分析在编写转换CNF算法的时候需要考虑上面的几个不合格的样式

顺序:

1、先将A->B 转换为A->c 或者A->CD

2、将A->bC 转换为A->BC 、B->b之类的

3、将A->BCD 转换为A->ED 、E->BC之类的(如果不止三个非终结符也是一样的,先将前两个提取出来)

 

则CNF的转换过程有如下的形式

GramManager.h

复制代码
#pragma once

#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <utility>


using namespace std;

class GramManager
{
public:
    GramManager(const vector<pair<string, string>> &baseGrams);
    ~GramManager();

    vector<pair<string, string>> FormatCNFGrams();

private:
    void TransferBaseGrams();
    vector<string> SplitByDelim(const string &str, const string &delim);

    void TryMoveSingle();
    void TryUpdateMulTerminalID();
    void TryUpdateMulToTwoID();

private:
    void updateDestGram(string key, string oldGram, const unordered_set<string> &newGramList);
    void deleteGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram);
    void addGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram);

    string assureGetNoneTerminalID(const string &terminalID);
    void updateDestSrcKey(string oldKey, const string &newKey);

    string formatGram(const string &gramID);

private:
    string m_startGramID; // 开始符号
    vector<pair<string, string>> m_baseGrams;
    unordered_map<string, string> m_terminalDestSrc;
    unordered_map<string, unordered_set<string>> m_gramsSrcDest;
    unordered_map<string, unordered_set<string>> m_gramsDestSrc;
    unordered_map<string, string> m_destBaseMap;
};
View Code
复制代码

GramManager.cpp

复制代码
#include "GramManager.h"

#include "GramTransfer.h"


GramManager::GramManager(const vector<pair<string, string>> &baseGrams)
    : m_baseGrams(baseGrams)
{
    TransferBaseGrams();
}


GramManager::~GramManager()
{
}

std::vector<std::pair<std::string, std::string>> GramManager::FormatCNFGrams()
{
    vector<pair<string, string>> result;

    TryMoveSingle();
    TryUpdateMulTerminalID();
    TryUpdateMulToTwoID();

    // start
    string keyGram = formatGram(m_startGramID);
    for (auto &subGramId : m_gramsSrcDest[m_startGramID]) {
        result.push_back(pair<string, string>(keyGram, formatGram(subGramId)));
    }

    for (auto &srcDestGramID : m_gramsSrcDest) {
        if (srcDestGramID.first == m_startGramID) {
            continue;
        }

        keyGram = formatGram(srcDestGramID.first);
        for (auto &subGramId : srcDestGramID.second) {
            result.push_back(pair<string, string>(keyGram, formatGram(subGramId)));
        }
    }

    return result;
}

void GramManager::TransferBaseGrams()
{
    if (m_baseGrams.empty()) {
        return;
    }

    unordered_map<string, string> umBaseDestMap;

    for (auto &gramKeyValue : m_baseGrams) {
        string &key = gramKeyValue.first;
        vector<string> grams = SplitByDelim(gramKeyValue.second, " ");

        if (umBaseDestMap.find(key) == umBaseDestMap.end()) {
            umBaseDestMap[key] = GramTransfer::GenNonTerminalGramID();
        }
        string destKey = umBaseDestMap[key];
        string destGram = "";

        for (auto &srcGram : grams) {
            if (umBaseDestMap.find(srcGram) == umBaseDestMap.end()) {
                if ('A' <= srcGram[0] && srcGram[0] <= 'Z') {
                    umBaseDestMap[srcGram] = GramTransfer::GenNonTerminalGramID();
                }
                else {
                    umBaseDestMap[srcGram] = GramTransfer::GenTerminalGramID();
                    m_terminalDestSrc[umBaseDestMap[srcGram]] = srcGram;
                }
            }

            destGram += umBaseDestMap[srcGram];
        }
        addGram(m_gramsSrcDest, destKey, destGram);
        addGram(m_gramsDestSrc, destGram, destKey);
    }

    m_startGramID = umBaseDestMap[m_baseGrams[0].first];

    for (auto &baseDestGram : umBaseDestMap) {
        m_destBaseMap[baseDestGram.second] = baseDestGram.first;
    }
}

vector<string> GramManager::SplitByDelim(const string &str, const string &delim)
{
    vector<string> result;
    if (str.empty()) {
        return result;
    }

    string strs = str + delim;
    string::size_type size = strs.length();
    for (int i = 0; i < size; ++i) {
        string::size_type pos = strs.find(delim, i);
        if (pos < size) {
            string subStr = strs.substr(i, pos - i);
            if (!subStr.empty()) {
                result.push_back(subStr);
            }
            i = pos + delim.size() - 1;
        }
    }

    return result;
}



void GramManager::TryMoveSingle()
{
    bool haveSingleGramID = true;
    while (haveSingleGramID) {
        haveSingleGramID = false;
        for (auto &gramSrcDest : m_gramsSrcDest) {
            for (auto &gramDest : gramSrcDest.second) {
                if (GramTransfer::IsOneNonTerminalGramID(gramDest)) {
                    haveSingleGramID = true;

                    updateDestGram(gramSrcDest.first, gramDest, m_gramsSrcDest[gramDest]);

                    break;
                }
            }
            if (haveSingleGramID) {
                break;
            }
        }
    }
}

void GramManager::TryUpdateMulTerminalID()
{
    bool haveUpdateTerminalID = true;
    while (haveUpdateTerminalID) {
        haveUpdateTerminalID = false;
        for (auto &gramSrcDest : m_gramsSrcDest) {
            for (auto &gramDest : gramSrcDest.second) {
                string terminalGramID = GramTransfer::GetTerminalGramIDInMulGrams(gramDest);
                if (!terminalGramID.empty()) {
                    haveUpdateTerminalID = true;

                    string nonTerminalGramID = assureGetNoneTerminalID(terminalGramID);
                    string newDestGramID = gramDest;
                    size_t index = newDestGramID.find(terminalGramID);
                    while (index != string::npos) {
                        newDestGramID = newDestGramID.replace(index, terminalGramID.length(), nonTerminalGramID);
                        index = newDestGramID.find(terminalGramID);
                    }

                    updateDestSrcKey(gramDest, newDestGramID);

                    break;
                }
            }
            if (haveUpdateTerminalID) {
                break;
            }
        }
    }
}

void GramManager::TryUpdateMulToTwoID()
{
    bool haveUpdateGramID = true;
    while (haveUpdateGramID) {
        haveUpdateGramID = false;
        for (auto &gramSrcDest : m_gramsSrcDest) {
            for (auto &gramDest : gramSrcDest.second) {
                if (GramTransfer::GetSubGramIDCount(gramDest) > 2) {
                    haveUpdateGramID = true;

                    string wouldUpdateSubGramID = GramTransfer::GetWouldMoveSubGramID(gramDest);
                    string newWouldReplaceGramID = assureGetNoneTerminalID(wouldUpdateSubGramID);

                    string newDestGramID = gramDest;
                    newDestGramID = newDestGramID.replace(0, wouldUpdateSubGramID.length(), newWouldReplaceGramID);
                    updateDestSrcKey(gramDest, newDestGramID);

                    break;
                }
            }
            if (haveUpdateGramID) {
                break;
            }
        }
    }
}

void GramManager::updateDestGram(string key, string oldGram, const unordered_set<string> &newGramList)
{
    deleteGram(m_gramsSrcDest, key, oldGram);
    deleteGram(m_gramsDestSrc, oldGram, key);

    for (auto &newGram : newGramList) {
        addGram(m_gramsSrcDest, key, newGram);
        addGram(m_gramsDestSrc, newGram, key);
    }
}

void GramManager::deleteGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram)
{
    grams[key].erase(gram);
    if (grams[key].empty()) {
        grams.erase(key);
    }
}

void GramManager::addGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram)
{
    if (grams[key].find(gram) == grams[key].end()) {
        grams[key].insert(gram);
    }
}

std::string GramManager::assureGetNoneTerminalID(const string &terminalID)
{
    string noneTerminalID = "";
    auto iter = m_gramsDestSrc.find(terminalID);
    if (iter != m_gramsDestSrc.end()) {
        for (auto& tempNoneTerminalID : iter->second) {
            if (m_gramsSrcDest[tempNoneTerminalID].size() == 1) {
                noneTerminalID = tempNoneTerminalID;
                break;
            }
        }
    }

    if (noneTerminalID.empty()) {
        noneTerminalID = GramTransfer::GenNonTerminalGramID();
        m_gramsSrcDest[noneTerminalID].insert(terminalID);
        m_gramsDestSrc[terminalID].insert(noneTerminalID);
    }

    return noneTerminalID;
}

void GramManager::updateDestSrcKey(string oldKey, const string &newKey)
{
    m_gramsDestSrc[newKey].insert(m_gramsDestSrc[oldKey].begin(), m_gramsDestSrc[oldKey].end());
    for (auto &srcKey : m_gramsDestSrc[oldKey]) {
        m_gramsSrcDest[srcKey].erase(oldKey);
        m_gramsSrcDest[srcKey].insert(newKey);
    }
    m_gramsDestSrc.erase(oldKey);
}

std::string GramManager::formatGram(const string &gramID)
{
    string result;
    vector<string> grams = GramTransfer::Split(gramID);

    for (int i = 0; i < grams.size(); ++i) {
        if (i > 0) {
            result.push_back(' ');
        }

        if (m_destBaseMap.find(grams[i]) != m_destBaseMap.end()) {
            result.append(m_destBaseMap[grams[i]]);
        }
        else {
            result.append(grams[i]);
        }
    }

    return result;
}
View Code
复制代码

GramTransfer.h

复制代码
#pragma once

#include <string>
#include <vector>
#include <utility>
#include <unordered_map>

using namespace std;

class GramTransfer
{
public:
    GramTransfer();
    ~GramTransfer();

    static string GenNonTerminalGramID();
    static string GenTerminalGramID();

    static vector<string> Split(const string &gramID);
    static string Replace(string gramID, const string &src, const string &dest);

    static bool IsOneNonTerminalGramID(const string &gramID);
    static string GetTerminalGramIDInMulGrams(const string &gramID);
    static int GetSubGramIDCount(const string &gramID);
    static string GetWouldMoveSubGramID(const string &gramID);

private:
    const static string m_kNonTerminal;
    const static string m_kTerminal;
    const static int m_kGramLen;
};
View Code
复制代码

GramTransfer.cpp

复制代码
#include "GramTransfer.h"

const string GramTransfer::m_kNonTerminal = "A";
const string GramTransfer::m_kTerminal = "a";
const int GramTransfer::m_kGramLen = 3;

GramTransfer::GramTransfer()
{
}


GramTransfer::~GramTransfer()
{
}

std::string GramTransfer::GenNonTerminalGramID()
{
    static int index = -1;
    ++index;
    string strIndex = to_string(index);

    return m_kNonTerminal + string(m_kGramLen - strIndex.length(), '0') + strIndex;
}

std::string GramTransfer::GenTerminalGramID()
{
    static int index = -1;
    ++index;
    string strIndex = to_string(index);

    return m_kTerminal + string(m_kGramLen - strIndex.length(), '0') + strIndex;
}

std::vector<std::string> GramTransfer::Split(const string &gramID)
{
    vector<string> result;
    int realGramLen = m_kGramLen + 1;
    for (int start = 0; start + realGramLen <= gramID.length(); start += realGramLen) {
        result.push_back(gramID.substr(start, realGramLen));
    }

    return result;
}

std::string GramTransfer::Replace(string gramID, const string &src, const string &dest)
{
    string::size_type pos = gramID.find(src);
    if (pos != string::npos) {
        gramID.erase(pos, src.length());
        gramID.insert(pos, dest);
    }

    return gramID;
}

bool GramTransfer::IsOneNonTerminalGramID(const string &gramID)
{
    return gramID.length() == m_kGramLen + 1 && gramID[0] == m_kNonTerminal[0];
}

std::string GramTransfer::GetTerminalGramIDInMulGrams(const string &gramID)
{
    int realGramLen = m_kGramLen + 1;
    if (gramID.length() <= realGramLen) {
        return "";
    }

    for (int start = 0; start + realGramLen <= gramID.length(); start += realGramLen) {
        if (gramID[start] == m_kTerminal[0]) {
            return gramID.substr(start, realGramLen);
        }
    }

    return "";
}

int GramTransfer::GetSubGramIDCount(const string &gramID)
{
    int realGramLen = m_kGramLen + 1;
    return gramID.length() / realGramLen;
}

std::string GramTransfer::GetWouldMoveSubGramID(const string &gramID)
{
    if (GetSubGramIDCount(gramID) <= 2) {
        return "";
    }

    int realGramLen = m_kGramLen + 1;
    return gramID.substr(0, realGramLen * 2);
}
View Code
复制代码

测试代码

复制代码
#include "GramManager.h"

#include <iostream>
#include <string>
#include <vector>
#include <utility>

using namespace std;

vector<pair<string, string>> genBaseGram() {
    vector<pair<string, string>> result;
    result.push_back(pair<string, string>("E", "E + T"));
    result.push_back(pair<string, string>("E", "T"));
    result.push_back(pair<string, string>("T", "T * F"));
    result.push_back(pair<string, string>("T", "F"));
    result.push_back(pair<string, string>("F", "( E )"));
    result.push_back(pair<string, string>("F", "id"));

    return result;
}

void printCNFGrams(vector<pair<string, string>> result) {
    for (auto &gramKeyVal : result) {
        cout << gramKeyVal.first << " -> " << gramKeyVal.second << endl;
    }
}

int main() {
    GramManager *pGramManager = new GramManager(genBaseGram());
    vector<pair<string, string>> result = pGramManager->FormatCNFGrams();
    printCNFGrams(result);

    system("pause");
    
    return 0;
}
View Code
复制代码

 

如上,先将本地的非终结符全部转换为长度为5的代码识别的非终结符是为了程序在处理的过程中比较好进行处理解析

 

 

后续:

将普通的文法转换为CNF文法则在使用CYK算法进行识别普通的串是很方便。因为CNF的特性可以看出CNF其实是一棵二叉树,终结符是它的叶子节点。

CYK算法其实是动态规划:F(x,y) = {F(x, i) && F(i, y),  x <= i && i <= y}

 

posted @   LCAC  阅读(273)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
点击右上角即可分享
微信分享提示