一个 C/C++ 声明器的实现
// Declarer.h
//
#ifndef DECLARER_H
#define DECLARER_H
#include <vector>
#include <string>
enum DIRECTION {LEFT = -1, RIGHT = 1};
class Declarer
{
public:
Declarer() ;
~Declarer() ;
public:
// Interface
std::string operator()(const std::string &declare) ;
private:
// logic
void setDeclare(const std::string &declare) ;
std::string getParseResult() ;
void translate() ; // Translate 'm_inorder' sequence into humanized statement
void preParse() ; // Split key words into independent string in vector 'm_preParsed'
void parseToQueue() ; // Put the right order sequence into 'm_inorder'
void parseStatement(int begin, int end) ; // From 'begin' to 'end' - 1 is valid
void intoArray(int begin, int end) ;
void intoFunction(int begin, int end) ;
bool isKeyWord(const std::string &str) ;
void parseWord(int pos) ; // The 'pos' is the position in 'm_inorder'
// to get the corresponding string, you'd
// use m_preParsed[m_inorder[pos]].
// Translate word at 'm_preParsed[m_preParsed[pos]]'
// into chinese is really a big work
int nextIdentifier(int begin) ; // From 'begin' to 'end' - 1 is valid
// the position in 'm_preParsed'
// if function fail, return -1
int nextPos(int pos, enum DIRECTION direction) ; // Search 'm_passedInPreParsed' in right direction,
// find the first element which is false
// return it's index.
// if not found, return -1
int nextLeftPos(int pos) ;
int argumentEndPos(int begin) ; // Input the argument begin position
// and return the one after end position
void setSequence(int pos) ; // Put the 'pos' into 'm_inorder' and sign it passed.
void unsetSequence(int pos) ; // Take out the 'pos' from 'm_inorder' and unsign it passed.
public:
// Debug
void ShowInorderString() ; // Swtich 'm_inorder' to string vector and output to stdout
private:
// data
std::string m_unParsed ;
std::vector<std::string> m_preParsed ;
std::string m_afterParsed ;
std::vector<bool> m_passedInPreParsed ;
std::vector<size_t> m_inorder ; // The parse order
static const char *DIVIDER ;
static const char Declarer::KEY_WORD[][10] ;
};
bool isOf(const char a, const char *excludeChars) ; // 'a' is anyone in 'excludeChars' return true,
// otherwise return false
int firstNonBlank(const std::string &str, int begin) ;// Find first position where it is not a Blank
// if there is no any position, return length of 'str'
bool isWord(const std::string &str) ;
bool isType(const std::string &str) ;
void showPreParsed(const std::vector<std::string> &v) ;
enum DIRECTION reverseDirection(enum DIRECTION direction) ;
#endif //DECLARER_H
// Declarer.cpp
//
#include "StdAfx.h"
#include "Declarer.h"
#include <iostream>
#include <cassert>
#include <algorithm>
#include <iomanip>
using namespace std ;
const char *Declarer::DIVIDER = " *()[]," ;
const char Declarer::KEY_WORD[][10] = {
"int",
"const",
"*",
"[","]",
"(", ")",
","
} ;
// const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)
Declarer::Declarer()
{}
Declarer::~Declarer()
{}
// Interface
string Declarer::operator()(const std::string &declare)
{
setDeclare(declare) ;
// Split original line into single word
preParse() ;
// Resort the original sequence into parse-able sequence
parseToQueue() ;
// Translate the parse-able sequence into human language
translate() ;
return getParseResult() ;
}
// logic
void Declarer::setDeclare(const std::string &declare)
{
m_unParsed = declare ;
}
string Declarer::getParseResult()
{
return m_afterParsed ;
}
void Declarer::translate()
{
for (int i = 0 ;i < (int)m_inorder.size() ;i++)
{
parseWord(i) ;
}//for
}
void Declarer::preParse()
{
int len = m_unParsed.length() ;
int i = 0, j = 0 ;
while (i < (int)len && j < (int)len)
{
// If I meet an divider such as: *, (, ), or a blank
// we should treat the letters between 'm_unParsed[i]' and 'm_unParsed[j-1]' as a word
// and put it into 'm_preParsed'
// in this way, we split all part in declaration into single word in vector
string toPush ;
if (isOf(m_unParsed[i], DIVIDER))
{
toPush = m_unParsed[i] ;
j = ++i ;
}
else
{
while (j < (int)len && !isOf(m_unParsed[j], DIVIDER))
{
j++ ;
}//while
toPush = m_unParsed.substr(i, j - i) ;
i = j ;
}//if
// We don't push Blank even though Blank is key-word
if (" " != toPush)
{
m_preParsed.push_back(toPush) ;
}//if
}//while
i == j ? NULL : (m_preParsed.push_back(m_unParsed.substr(i, j - i)), NULL) ;
}
void Declarer::parseToQueue()
{
parseStatement(0, m_preParsed.size()) ;
}
void Declarer::parseStatement(int begin, int end)
{
enum DIRECTION direction = RIGHT ;
m_passedInPreParsed.resize(m_preParsed.size(), false) ;
int pos = nextIdentifier(begin) ;
if (-1 == pos)
{
return ;
}//if
while (0 < count(m_passedInPreParsed.begin() + begin, m_passedInPreParsed.begin() + end, false))
{
string str = m_preParsed[pos] ;
setSequence(pos) ;
// Get the next direction
if ("(" == m_preParsed[pos])
{
direction = RIGHT ;
pos = nextPos(pos, direction) ;
// If '(' after an identifier, it seem to be a function pointer
// we call this funciton recursively
if (end > pos && "(" == m_preParsed[pos])
{
// We put '(' at the begin of function argument
// and then go into recursion
setSequence(pos++) ;
// We signed the argument begin and end position for future use
int argumentBegin = pos ;
int argumentEnd = argumentEndPos(argumentBegin) ;
// We travel all arguments, each argument is a recursion
while (true)
{
parseStatement(argumentBegin, argumentEnd) ;
setSequence(argumentEnd) ;
// If there are multiple arguments, we continue to recurse
if ("," == m_preParsed[argumentEnd])
{
argumentBegin = argumentEnd + 1 ;
argumentEnd = argumentEndPos(argumentBegin) ;
}
else if (")" == m_preParsed[argumentEnd])
{
break ;
}
else
{
cerr <<"Unknow argument end:" <<m_preParsed[argumentEnd] ;
}//if
}//while
direction = LEFT ;
}//if "("
}
else if (")" == m_preParsed[pos])
{
direction = LEFT ;
}//if "("
// If direction > 0, we go right, if < 0, we go left
if (begin > nextPos(pos, direction) || nextPos(pos, direction) >= end)
{
// Go reverse
direction = reverseDirection(direction) ;
}//if
pos = nextPos(pos, direction) ;
}//while
}
int Declarer::nextIdentifier(int begin)
{
// We treat the first non-keyword word as identifier
for (int i = begin ;i < (int)m_preParsed.size() ;i++)
{
if (!isKeyWord(m_preParsed[i]))
{
return i ;
}//if
}//for i = 0
return -1 ;
}
bool Declarer::isKeyWord(const string &str)
{
// We search from KEY_WORD one by one,
// if and only if 'str' is not anyone in KEY_WORD shoud we
// return true, otherwise, return false
for (int j = 0 ;j < sizeof(KEY_WORD)/sizeof(KEY_WORD[0]) ;j++)
{
if (!strcmp(str.c_str(), KEY_WORD[j]))
{
return true ;
}//if
}//for j = 0
return false ;
}
void Declarer::parseWord(int pos)
{
assert(m_preParsed.size() == m_inorder.size()) ; // m_preParsed.size() must equal to m_inorder.size()
int len = (int)m_preParsed.size() ;
string parsedString ;
string before_word = 0 <= pos - 1 ? m_preParsed[m_inorder[pos - 1]] : "" ; // The word in front of 'pos'
string word = m_preParsed[m_inorder[pos]] ;
string after_word = len > pos + 1 ? m_preParsed[m_inorder[pos + 1]] : "" ; // The word after 'pos'
if (KEY_WORD[0] == word) // int
{
parsedString = " int 型" ;
parsedString += "const" == before_word || "const" == after_word ? "常" : "变" ;
parsedString += "量; " ;
}
else if (KEY_WORD[1] == word) // const
{
parsedString = "" ;
}
else if (KEY_WORD[2] == word) // *
{
parsedString = "const" == before_word ? "常" : "" ;
parsedString += "指针, 这个" ;
parsedString += "const" == before_word ? "常" : "" ;
parsedString += "指针指向一个" ;
}
else if (KEY_WORD[3] == word) // [
{
parsedString = "数组, 这个数组存放的是" ;
}
else if (KEY_WORD[4] == word) // ]
{
parsedString = "" ;
}
else if (KEY_WORD[5] == word) // (
{
parsedString = "(" == before_word ? "函数, 这个函数有如下参数: " : "" ;
}
else if (KEY_WORD[6] == word) // )
{
// If an type exists between ")" and "(", we thought it a function return
bool functionReturn = false ;
while ((int)m_inorder.size() > pos && "(" != m_preParsed[m_inorder[pos]])
{
if (isType(m_preParsed[m_inorder[pos++]]))
{
functionReturn = true ;
break ;
}//if
}//while
parsedString = functionReturn ? "函数返回" : "" ;
}
else if (KEY_WORD[7] == word) // ,
{
parsedString = "另一个参数是" ;
}
else // identifier
{
parsedString = " " + word + " 是一个" ;
}//if
m_afterParsed += parsedString ;
}
int Declarer::nextPos(int pos, enum DIRECTION direction)
{
if (LEFT == direction)
{
for (int i = pos - 1 ;i >= 0 ;i--)
{
if (!m_passedInPreParsed[i])
{
return i ;
}//if
}//for
}
else // RIGHT == direction
{
int len = m_passedInPreParsed.size() ;
for (int i = pos + 1 ;i < (int)len ;i++)
{
if (!m_passedInPreParsed[i])
{
return i ;
}//if
}//for
}//for
return -1 ;
}
int Declarer::argumentEndPos(int begin)
{
assert(0 <= begin) ;
int nest = 0 ;
int end = begin ;
while (end < (int)m_preParsed.size() && 0 <= nest)
{
if ("(" == m_preParsed[end])
{
nest++ ;
}
else if (")" == m_preParsed[end])
{
if (0 > --nest)
{
break ;
}//if 0 > --nest
}
else if ("," == m_preParsed[end] && 0 == nest)
{
break ;
}//if
end++ ;
}//for
return end ;
}
void Declarer::setSequence(int pos)
{
m_inorder.push_back(pos) ;
m_passedInPreParsed[pos] = true ;
}
void Declarer::unsetSequence(int pos)
{
m_inorder.erase(m_inorder.begin() + pos, m_inorder.begin() + pos + 1) ;
m_passedInPreParsed[pos] = false ;
}
// Debug
void Declarer::ShowInorderString()
{
for (int i = 0 ;i < (int)m_inorder.size() ;i++)
{
cout <<setw(3) <<m_inorder[i] <<": " <<m_preParsed[m_inorder[i]] <<endl ;
}//for
}
//
bool isOf(const char a, const char *excludeChars)
{
assert(NULL != excludeChars) ;
while ('\0' != *excludeChars)
{
if (a == *excludeChars)
{
return true ;
}//if
excludeChars++ ;
}//while
return false ;
}
int firstNonBlank(const string &str, int begin)
{
assert(0 <= begin) ;
int len = str.length() ;
for (int i = (int)begin ;i < (int)len ;i++)
{
if (!isOf(str[i], " "))
{
return i ;
}//if
}//for
return len ;
}
bool isWord(const string &str)
{
int len = str.size() ;
for (int i = 0 ;i < len ;i++)
{
if (('a' > str[i] || 'z' < str[i])
&& ('A' > str[i] || 'Z' < str[i]))
{
return false ;
}//if
}//for
return true ;
}
bool isType(const string &str)
{
return "int" == str ;
}
void showPreParsed(const vector<string> &v)
{
int len = v.size() ;
for (int i = 0 ;i < len ;i++)
{
cout <<v[i] <<endl ;
}//for
}
enum DIRECTION reverseDirection(enum DIRECTION direction)
{
return LEFT == direction ? RIGHT : LEFT ;
};
// main.cpp
//
#include "stdafx.h"
#include <iostream>
using namespace std ;
#include "Declarer.h"
int main (int argc, char **argv)
{
string declare[] = {"int i",
"int *j",
"const int x",
"const int *y",
"const int * const z",
"const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)" } ;
for (int i = 0 ;i < sizeof(declare)/sizeof(declare[0]) ;i++)
{
Declarer d ;
cout <<declare[i] <<endl ;
cout <<d(declare[i]) <<endl ;
d.ShowInorderString() ;
system("pause") ;
}//for
return 0 ;
}
上图说明:
输出分为 6 个测试, 分别是:
- 简单变量声明 int i
- 简单指针声明 int *j
- 常量变量声明 const int x
- 常指针声明 const int *y
- 常指针常量声明 const int * const z
- 一个变态的复杂声明 const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)
其中, 每个测试分为三行输出, 分别是:
- 原始 C 语言声明
- 经过解析后的可读文字
- 分析原始声明后形成的栈, 通过 "右左法则" 实现. 主要是用于调试输出. 此栈分为左右两部分, 左边是符号在原始声明栈中的位置, 右边是次数字在原始声明栈中对应的字符.
拿最后一个变态声明来说明图例:
原始声明如下:
const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)
人类思维过程如下(因人而异):
- 首先我们人类看到 fun1 是一个标识符
- 然后发现 fun1 这个标识符是一个指针
- 再看到这个指针指向了一个函数, 这个函数有 n 多参数
- 这个函数的第一个参数是标识符 fun2
- 然而发现它又是一个指针
- fun2 这个指针又指向了一个函数, 这个函数又有 n 多参数
- fun2 指向的这个函数的第一个参数是标识符 arg21, 它是一个数组, 这个数组存放的是 const int
- fun2 指向的这个函数的第二个参数是标识符 arg22, 他是一个常量变量
- 现在返回到 fun1 这个指针所指向的函数, 它的第二个参数是一个简单 int 型变量 arg1
- fun1 这个指针所指向的函数返回一个 const int * const
至此, 整个分析结束.
当然, 计算机不能像人类一样的思维, 所以上述人类思维过程不代表程序实现逻辑.