编译原理习题4.4.8关于构造CNF
一、CNF的格式是:
1、A->BC
2、D->a
从上面的CNF的格式可以看出:不存在:A->B 、A->BCD 或者A->bC之类的形式
所以从普通文法转CNF文法需要将上述的A->B 、A->BCD 或者A->bC转为CNF严格要求的范式。
二、通过上面对CNF格式的分析在编写转换CNF算法的时候需要考虑上面的几个不合格的样式
顺序:
1、先将A->B 转换为A->c 或者A->CD
2、将A->bC 转换为A->BC 、B->b之类的
3、将A->BCD 转换为A->ED 、E->BC之类的(如果不止三个非终结符也是一样的,先将前两个提取出来)
则CNF的转换过程有如下的形式
GramManager.h

#pragma once #include <string> #include <vector> #include <unordered_map> #include <unordered_set> #include <utility> using namespace std; class GramManager { public: GramManager(const vector<pair<string, string>> &baseGrams); ~GramManager(); vector<pair<string, string>> FormatCNFGrams(); private: void TransferBaseGrams(); vector<string> SplitByDelim(const string &str, const string &delim); void TryMoveSingle(); void TryUpdateMulTerminalID(); void TryUpdateMulToTwoID(); private: void updateDestGram(string key, string oldGram, const unordered_set<string> &newGramList); void deleteGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram); void addGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram); string assureGetNoneTerminalID(const string &terminalID); void updateDestSrcKey(string oldKey, const string &newKey); string formatGram(const string &gramID); private: string m_startGramID; // 开始符号 vector<pair<string, string>> m_baseGrams; unordered_map<string, string> m_terminalDestSrc; unordered_map<string, unordered_set<string>> m_gramsSrcDest; unordered_map<string, unordered_set<string>> m_gramsDestSrc; unordered_map<string, string> m_destBaseMap; };
GramManager.cpp

#include "GramManager.h" #include "GramTransfer.h" GramManager::GramManager(const vector<pair<string, string>> &baseGrams) : m_baseGrams(baseGrams) { TransferBaseGrams(); } GramManager::~GramManager() { } std::vector<std::pair<std::string, std::string>> GramManager::FormatCNFGrams() { vector<pair<string, string>> result; TryMoveSingle(); TryUpdateMulTerminalID(); TryUpdateMulToTwoID(); // start string keyGram = formatGram(m_startGramID); for (auto &subGramId : m_gramsSrcDest[m_startGramID]) { result.push_back(pair<string, string>(keyGram, formatGram(subGramId))); } for (auto &srcDestGramID : m_gramsSrcDest) { if (srcDestGramID.first == m_startGramID) { continue; } keyGram = formatGram(srcDestGramID.first); for (auto &subGramId : srcDestGramID.second) { result.push_back(pair<string, string>(keyGram, formatGram(subGramId))); } } return result; } void GramManager::TransferBaseGrams() { if (m_baseGrams.empty()) { return; } unordered_map<string, string> umBaseDestMap; for (auto &gramKeyValue : m_baseGrams) { string &key = gramKeyValue.first; vector<string> grams = SplitByDelim(gramKeyValue.second, " "); if (umBaseDestMap.find(key) == umBaseDestMap.end()) { umBaseDestMap[key] = GramTransfer::GenNonTerminalGramID(); } string destKey = umBaseDestMap[key]; string destGram = ""; for (auto &srcGram : grams) { if (umBaseDestMap.find(srcGram) == umBaseDestMap.end()) { if ('A' <= srcGram[0] && srcGram[0] <= 'Z') { umBaseDestMap[srcGram] = GramTransfer::GenNonTerminalGramID(); } else { umBaseDestMap[srcGram] = GramTransfer::GenTerminalGramID(); m_terminalDestSrc[umBaseDestMap[srcGram]] = srcGram; } } destGram += umBaseDestMap[srcGram]; } addGram(m_gramsSrcDest, destKey, destGram); addGram(m_gramsDestSrc, destGram, destKey); } m_startGramID = umBaseDestMap[m_baseGrams[0].first]; for (auto &baseDestGram : umBaseDestMap) { m_destBaseMap[baseDestGram.second] = baseDestGram.first; } } vector<string> GramManager::SplitByDelim(const string &str, const string &delim) { vector<string> result; if (str.empty()) { return result; } string strs = str + delim; string::size_type size = strs.length(); for (int i = 0; i < size; ++i) { string::size_type pos = strs.find(delim, i); if (pos < size) { string subStr = strs.substr(i, pos - i); if (!subStr.empty()) { result.push_back(subStr); } i = pos + delim.size() - 1; } } return result; } void GramManager::TryMoveSingle() { bool haveSingleGramID = true; while (haveSingleGramID) { haveSingleGramID = false; for (auto &gramSrcDest : m_gramsSrcDest) { for (auto &gramDest : gramSrcDest.second) { if (GramTransfer::IsOneNonTerminalGramID(gramDest)) { haveSingleGramID = true; updateDestGram(gramSrcDest.first, gramDest, m_gramsSrcDest[gramDest]); break; } } if (haveSingleGramID) { break; } } } } void GramManager::TryUpdateMulTerminalID() { bool haveUpdateTerminalID = true; while (haveUpdateTerminalID) { haveUpdateTerminalID = false; for (auto &gramSrcDest : m_gramsSrcDest) { for (auto &gramDest : gramSrcDest.second) { string terminalGramID = GramTransfer::GetTerminalGramIDInMulGrams(gramDest); if (!terminalGramID.empty()) { haveUpdateTerminalID = true; string nonTerminalGramID = assureGetNoneTerminalID(terminalGramID); string newDestGramID = gramDest; size_t index = newDestGramID.find(terminalGramID); while (index != string::npos) { newDestGramID = newDestGramID.replace(index, terminalGramID.length(), nonTerminalGramID); index = newDestGramID.find(terminalGramID); } updateDestSrcKey(gramDest, newDestGramID); break; } } if (haveUpdateTerminalID) { break; } } } } void GramManager::TryUpdateMulToTwoID() { bool haveUpdateGramID = true; while (haveUpdateGramID) { haveUpdateGramID = false; for (auto &gramSrcDest : m_gramsSrcDest) { for (auto &gramDest : gramSrcDest.second) { if (GramTransfer::GetSubGramIDCount(gramDest) > 2) { haveUpdateGramID = true; string wouldUpdateSubGramID = GramTransfer::GetWouldMoveSubGramID(gramDest); string newWouldReplaceGramID = assureGetNoneTerminalID(wouldUpdateSubGramID); string newDestGramID = gramDest; newDestGramID = newDestGramID.replace(0, wouldUpdateSubGramID.length(), newWouldReplaceGramID); updateDestSrcKey(gramDest, newDestGramID); break; } } if (haveUpdateGramID) { break; } } } } void GramManager::updateDestGram(string key, string oldGram, const unordered_set<string> &newGramList) { deleteGram(m_gramsSrcDest, key, oldGram); deleteGram(m_gramsDestSrc, oldGram, key); for (auto &newGram : newGramList) { addGram(m_gramsSrcDest, key, newGram); addGram(m_gramsDestSrc, newGram, key); } } void GramManager::deleteGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram) { grams[key].erase(gram); if (grams[key].empty()) { grams.erase(key); } } void GramManager::addGram(unordered_map<string, unordered_set<string>> &grams, const string &key, const string &gram) { if (grams[key].find(gram) == grams[key].end()) { grams[key].insert(gram); } } std::string GramManager::assureGetNoneTerminalID(const string &terminalID) { string noneTerminalID = ""; auto iter = m_gramsDestSrc.find(terminalID); if (iter != m_gramsDestSrc.end()) { for (auto& tempNoneTerminalID : iter->second) { if (m_gramsSrcDest[tempNoneTerminalID].size() == 1) { noneTerminalID = tempNoneTerminalID; break; } } } if (noneTerminalID.empty()) { noneTerminalID = GramTransfer::GenNonTerminalGramID(); m_gramsSrcDest[noneTerminalID].insert(terminalID); m_gramsDestSrc[terminalID].insert(noneTerminalID); } return noneTerminalID; } void GramManager::updateDestSrcKey(string oldKey, const string &newKey) { m_gramsDestSrc[newKey].insert(m_gramsDestSrc[oldKey].begin(), m_gramsDestSrc[oldKey].end()); for (auto &srcKey : m_gramsDestSrc[oldKey]) { m_gramsSrcDest[srcKey].erase(oldKey); m_gramsSrcDest[srcKey].insert(newKey); } m_gramsDestSrc.erase(oldKey); } std::string GramManager::formatGram(const string &gramID) { string result; vector<string> grams = GramTransfer::Split(gramID); for (int i = 0; i < grams.size(); ++i) { if (i > 0) { result.push_back(' '); } if (m_destBaseMap.find(grams[i]) != m_destBaseMap.end()) { result.append(m_destBaseMap[grams[i]]); } else { result.append(grams[i]); } } return result; }
GramTransfer.h

#pragma once #include <string> #include <vector> #include <utility> #include <unordered_map> using namespace std; class GramTransfer { public: GramTransfer(); ~GramTransfer(); static string GenNonTerminalGramID(); static string GenTerminalGramID(); static vector<string> Split(const string &gramID); static string Replace(string gramID, const string &src, const string &dest); static bool IsOneNonTerminalGramID(const string &gramID); static string GetTerminalGramIDInMulGrams(const string &gramID); static int GetSubGramIDCount(const string &gramID); static string GetWouldMoveSubGramID(const string &gramID); private: const static string m_kNonTerminal; const static string m_kTerminal; const static int m_kGramLen; };
GramTransfer.cpp

#include "GramTransfer.h" const string GramTransfer::m_kNonTerminal = "A"; const string GramTransfer::m_kTerminal = "a"; const int GramTransfer::m_kGramLen = 3; GramTransfer::GramTransfer() { } GramTransfer::~GramTransfer() { } std::string GramTransfer::GenNonTerminalGramID() { static int index = -1; ++index; string strIndex = to_string(index); return m_kNonTerminal + string(m_kGramLen - strIndex.length(), '0') + strIndex; } std::string GramTransfer::GenTerminalGramID() { static int index = -1; ++index; string strIndex = to_string(index); return m_kTerminal + string(m_kGramLen - strIndex.length(), '0') + strIndex; } std::vector<std::string> GramTransfer::Split(const string &gramID) { vector<string> result; int realGramLen = m_kGramLen + 1; for (int start = 0; start + realGramLen <= gramID.length(); start += realGramLen) { result.push_back(gramID.substr(start, realGramLen)); } return result; } std::string GramTransfer::Replace(string gramID, const string &src, const string &dest) { string::size_type pos = gramID.find(src); if (pos != string::npos) { gramID.erase(pos, src.length()); gramID.insert(pos, dest); } return gramID; } bool GramTransfer::IsOneNonTerminalGramID(const string &gramID) { return gramID.length() == m_kGramLen + 1 && gramID[0] == m_kNonTerminal[0]; } std::string GramTransfer::GetTerminalGramIDInMulGrams(const string &gramID) { int realGramLen = m_kGramLen + 1; if (gramID.length() <= realGramLen) { return ""; } for (int start = 0; start + realGramLen <= gramID.length(); start += realGramLen) { if (gramID[start] == m_kTerminal[0]) { return gramID.substr(start, realGramLen); } } return ""; } int GramTransfer::GetSubGramIDCount(const string &gramID) { int realGramLen = m_kGramLen + 1; return gramID.length() / realGramLen; } std::string GramTransfer::GetWouldMoveSubGramID(const string &gramID) { if (GetSubGramIDCount(gramID) <= 2) { return ""; } int realGramLen = m_kGramLen + 1; return gramID.substr(0, realGramLen * 2); }
测试代码

#include "GramManager.h" #include <iostream> #include <string> #include <vector> #include <utility> using namespace std; vector<pair<string, string>> genBaseGram() { vector<pair<string, string>> result; result.push_back(pair<string, string>("E", "E + T")); result.push_back(pair<string, string>("E", "T")); result.push_back(pair<string, string>("T", "T * F")); result.push_back(pair<string, string>("T", "F")); result.push_back(pair<string, string>("F", "( E )")); result.push_back(pair<string, string>("F", "id")); return result; } void printCNFGrams(vector<pair<string, string>> result) { for (auto &gramKeyVal : result) { cout << gramKeyVal.first << " -> " << gramKeyVal.second << endl; } } int main() { GramManager *pGramManager = new GramManager(genBaseGram()); vector<pair<string, string>> result = pGramManager->FormatCNFGrams(); printCNFGrams(result); system("pause"); return 0; }
如上,先将本地的非终结符全部转换为长度为5的代码识别的非终结符是为了程序在处理的过程中比较好进行处理解析
后续:
将普通的文法转换为CNF文法则在使用CYK算法进行识别普通的串是很方便。因为CNF的特性可以看出CNF其实是一棵二叉树,终结符是它的叶子节点。
CYK算法其实是动态规划:F(x,y) = {F(x, i) && F(i, y), x <= i && i <= y}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!