一个 C/C++ 声明器的实现

// Declarer.h
//

#ifndef DECLARER_H
#define DECLARER_H

#include
<vector>
#include
<string>

enum DIRECTION {LEFT = -1, RIGHT = 1};

class Declarer
{
public:
Declarer() ;
~Declarer() ;
public:
// Interface
std::string operator()(const std::string &declare) ;

private:
// logic
void setDeclare(const std::string &declare) ;
std::
string getParseResult() ;

void translate() ;                   // Translate 'm_inorder' sequence into humanized statement
void preParse() ; // Split key words into independent string in vector 'm_preParsed'
void parseToQueue() ; // Put the right order sequence into 'm_inorder'
void parseStatement(int begin, int end) ; // From 'begin' to 'end' - 1 is valid
void intoArray(int begin, int end) ;
void intoFunction(int begin, int end) ;
bool isKeyWord(const std::string &str) ;

void parseWord(int pos) ; // The 'pos' is the position in 'm_inorder'
                                    // to get the corresponding string, you'd
                                    // use m_preParsed[m_inorder[pos]].
                                    // Translate word at 'm_preParsed[m_preParsed[pos]]'
                                    // into chinese is really a big work

int nextIdentifier(int begin) ;            // From 'begin' to 'end' - 1 is valid
                                    // the position in 'm_preParsed'
                                    // if function fail, return -1
    int nextPos(int pos, enum DIRECTION direction) ;  // Search 'm_passedInPreParsed' in right direction, 
                                    // find the first element which is false
                                    // return it's index.
                                    // if not found, return -1
int nextLeftPos(int pos) ;
int argumentEndPos(int begin) ; // Input the argument begin position
                                    // and return the one after end position
void setSequence(int pos) ; // Put the 'pos' into 'm_inorder' and sign it passed.
void unsetSequence(int pos) ; // Take out the 'pos' from 'm_inorder' and unsign it passed.
public:
// Debug
void ShowInorderString() ; // Swtich 'm_inorder' to string vector and output to stdout
private:
// data
std::string m_unParsed ;
std::vector
<std::string> m_preParsed ;
std::
string m_afterParsed ;
std::vector
<bool> m_passedInPreParsed ;
std::vector
<size_t> m_inorder ; // The parse order

static const char *DIVIDER ;
static const char Declarer::KEY_WORD[][10] ;
};

bool isOf(const char a, const char *excludeChars) ; // 'a' is anyone in 'excludeChars' return true,
                                    // otherwise return false
int firstNonBlank(const std::string &str, int begin) ;// Find first position where it is not a Blank
                                    // if there is no any position, return length of 'str'
bool isWord(const std::string &str) ;
bool isType(const std::string &str) ;

void showPreParsed(const std::vector<std::string> &v) ;
enum DIRECTION reverseDirection(enum DIRECTION direction) ;

#endif //DECLARER_H

  

// Declarer.cpp
//

#include
"StdAfx.h"
#include
"Declarer.h"
#include
<iostream>
#include
<cassert>
#include
<algorithm>
#include
<iomanip>
using namespace std ;

const char *Declarer::DIVIDER = " *()[]," ;
const char Declarer::KEY_WORD[][10] = {
"int",
"const",
"*",
"[","]",
"(", ")",
","
} ;

// const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)

Declarer::Declarer()
{}

Declarer::
~Declarer()
{}

// Interface
string Declarer::operator()(const std::string &declare)
{
setDeclare(declare) ;

// Split original line into single word
preParse() ;
// Resort the original sequence into parse-able sequence
parseToQueue() ;
// Translate the parse-able sequence into human language
translate() ;

return getParseResult() ;
}

// logic
void Declarer::setDeclare(const std::string &declare)
{
m_unParsed
= declare ;
}

string Declarer::getParseResult()
{
return m_afterParsed ;
}

void Declarer::translate()
{
for (int i = 0 ;i < (int)m_inorder.size() ;i++)
{
parseWord(i) ;
}
//for
}

void Declarer::preParse()
{
int len = m_unParsed.length() ;

int i = 0, j = 0 ;
while (i < (int)len && j < (int)len)
{
// If I meet an divider such as: *, (, ), or a blank
// we should treat the letters between 'm_unParsed[i]' and 'm_unParsed[j-1]' as a word
// and put it into 'm_preParsed'
// in this way, we split all part in declaration into single word in vector
string toPush ;
if (isOf(m_unParsed[i], DIVIDER))
{
toPush
= m_unParsed[i] ;
j
= ++i ;
}
else
{
while (j < (int)len && !isOf(m_unParsed[j], DIVIDER))
{
j
++ ;
}
//while
toPush = m_unParsed.substr(i, j - i) ;

i
= j ;
}
//if

// We don't push Blank even though Blank is key-word
if (" " != toPush)
{
m_preParsed.push_back(toPush) ;
}
//if
}//while
i == j ? NULL : (m_preParsed.push_back(m_unParsed.substr(i, j - i)), NULL) ;
}


void Declarer::parseToQueue()
{
parseStatement(
0, m_preParsed.size()) ;
}

void Declarer::parseStatement(int begin, int end)
{
enum DIRECTION direction = RIGHT ;
m_passedInPreParsed.resize(m_preParsed.size(),
false) ;

int pos = nextIdentifier(begin) ;
if (-1 == pos)
{
return ;
}
//if

while (0 < count(m_passedInPreParsed.begin() + begin, m_passedInPreParsed.begin() + end, false))
{
string str = m_preParsed[pos] ;
setSequence(pos) ;

// Get the next direction
if ("(" == m_preParsed[pos])
{
direction
= RIGHT ;
pos
= nextPos(pos, direction) ;
// If '(' after an identifier, it seem to be a function pointer
// we call this funciton recursively
if (end > pos && "(" == m_preParsed[pos])
{
// We put '(' at the begin of function argument
// and then go into recursion
setSequence(pos++) ;

// We signed the argument begin and end position for future use
int argumentBegin = pos ;
int argumentEnd = argumentEndPos(argumentBegin) ;
// We travel all arguments, each argument is a recursion
while (true)
{
parseStatement(argumentBegin, argumentEnd) ;
setSequence(argumentEnd) ;

// If there are multiple arguments, we continue to recurse
if ("," == m_preParsed[argumentEnd])
{
argumentBegin
= argumentEnd + 1 ;
argumentEnd
= argumentEndPos(argumentBegin) ;
}
else if (")" == m_preParsed[argumentEnd])
{
break ;
}
else
{
cerr
<<"Unknow argument end:" <<m_preParsed[argumentEnd] ;
}
//if
}//while
direction = LEFT ;
}
//if "("
}
else if (")" == m_preParsed[pos])
{
direction
= LEFT ;
}
//if "("

// If direction > 0, we go right, if < 0, we go left
if (begin > nextPos(pos, direction) || nextPos(pos, direction) >= end)
{
// Go reverse
direction = reverseDirection(direction) ;
}
//if
pos = nextPos(pos, direction) ;
}
//while
}

int Declarer::nextIdentifier(int begin)
{
// We treat the first non-keyword word as identifier
for (int i = begin ;i < (int)m_preParsed.size() ;i++)
{
if (!isKeyWord(m_preParsed[i]))
{
return i ;
}
//if
}//for i = 0

return -1 ;
}

bool Declarer::isKeyWord(const string &str)
{
// We search from KEY_WORD one by one,
// if and only if 'str' is not anyone in KEY_WORD shoud we
// return true, otherwise, return false
for (int j = 0 ;j < sizeof(KEY_WORD)/sizeof(KEY_WORD[0]) ;j++)
{
if (!strcmp(str.c_str(), KEY_WORD[j]))
{
return true ;
}
//if
}//for j = 0

return false ;
}

void Declarer::parseWord(int pos)
{
assert(m_preParsed.size()
== m_inorder.size()) ; // m_preParsed.size() must equal to m_inorder.size()

int len = (int)m_preParsed.size() ;
string parsedString ;
string before_word = 0 <= pos - 1 ? m_preParsed[m_inorder[pos - 1]] : "" ; // The word in front of 'pos'
string word = m_preParsed[m_inorder[pos]] ;
string after_word = len > pos + 1 ? m_preParsed[m_inorder[pos + 1]] : "" ; // The word after 'pos'

if (KEY_WORD[0] == word) // int
{
parsedString
= " int 型" ;
parsedString
+= "const" == before_word || "const" == after_word ? "" : "" ;
parsedString
+= "量; " ;
}
else if (KEY_WORD[1] == word) // const
{
parsedString
= "" ;
}
else if (KEY_WORD[2] == word) // *
{
parsedString
= "const" == before_word ? "" : "" ;
parsedString
+= "指针, 这个" ;
parsedString
+= "const" == before_word ? "" : "" ;
parsedString
+= "指针指向一个" ;
}
else if (KEY_WORD[3] == word) // [
{
parsedString
= "数组, 这个数组存放的是" ;
}
else if (KEY_WORD[4] == word) // ]
{
parsedString
= "" ;
}
else if (KEY_WORD[5] == word) // (
{
parsedString
= "(" == before_word ? "函数, 这个函数有如下参数: " : "" ;
}
else if (KEY_WORD[6] == word) // )
{
// If an type exists between ")" and "(", we thought it a function return
bool functionReturn = false ;
while ((int)m_inorder.size() > pos && "(" != m_preParsed[m_inorder[pos]])
{
if (isType(m_preParsed[m_inorder[pos++]]))
{
functionReturn
= true ;
break ;
}
//if
}//while
parsedString = functionReturn ? "函数返回" : "" ;
}
else if (KEY_WORD[7] == word) // ,
{
parsedString
= "另一个参数是" ;
}
else // identifier
{
parsedString
= " " + word + " 是一个" ;
}
//if

m_afterParsed
+= parsedString ;
}

int Declarer::nextPos(int pos, enum DIRECTION direction)
{
if (LEFT == direction)
{
for (int i = pos - 1 ;i >= 0 ;i--)
{
if (!m_passedInPreParsed[i])
{
return i ;
}
//if
}//for
}
else // RIGHT == direction
{
int len = m_passedInPreParsed.size() ;
for (int i = pos + 1 ;i < (int)len ;i++)
{
if (!m_passedInPreParsed[i])
{
return i ;
}
//if
}//for
}//for

return -1 ;
}

int Declarer::argumentEndPos(int begin)
{
assert(
0 <= begin) ;

int nest = 0 ;
int end = begin ;
while (end < (int)m_preParsed.size() && 0 <= nest)
{
if ("(" == m_preParsed[end])
{
nest
++ ;
}
else if (")" == m_preParsed[end])
{
if (0 > --nest)
{
break ;
}
//if 0 > --nest
}
else if ("," == m_preParsed[end] && 0 == nest)
{
break ;
}
//if
end++ ;
}
//for

return end ;
}

void Declarer::setSequence(int pos)
{
m_inorder.push_back(pos) ;
m_passedInPreParsed[pos]
= true ;
}

void Declarer::unsetSequence(int pos)
{
m_inorder.erase(m_inorder.begin()
+ pos, m_inorder.begin() + pos + 1) ;
m_passedInPreParsed[pos]
= false ;
}

// Debug
void Declarer::ShowInorderString()
{
for (int i = 0 ;i < (int)m_inorder.size() ;i++)
{
cout
<<setw(3) <<m_inorder[i] <<": " <<m_preParsed[m_inorder[i]] <<endl ;
}
//for
}

//
bool isOf(const char a, const char *excludeChars)
{
assert(NULL
!= excludeChars) ;

while ('\0' != *excludeChars)
{
if (a == *excludeChars)
{
return true ;
}
//if

excludeChars
++ ;
}
//while

return false ;
}

int firstNonBlank(const string &str, int begin)
{
assert(
0 <= begin) ;

int len = str.length() ;
for (int i = (int)begin ;i < (int)len ;i++)
{
if (!isOf(str[i], " "))
{
return i ;
}
//if
}//for

return len ;
}

bool isWord(const string &str)
{
int len = str.size() ;
for (int i = 0 ;i < len ;i++)
{
if (('a' > str[i] || 'z' < str[i])
&& ('A' > str[i] || 'Z' < str[i]))
{
return false ;
}
//if
}//for

return true ;
}

bool isType(const string &str)
{
return "int" == str ;
}

void showPreParsed(const vector<string> &v)
{
int len = v.size() ;
for (int i = 0 ;i < len ;i++)
{
cout
<<v[i] <<endl ;
}
//for
}

enum DIRECTION reverseDirection(enum DIRECTION direction)
{
return LEFT == direction ? RIGHT : LEFT ;
};

  

// main.cpp
//

#include
"stdafx.h"
#include
<iostream>
using namespace std ;
#include
"Declarer.h"

int main (int argc, char **argv)
{
string declare[] = {"int i",
"int *j",
"const int x",
"const int *y",
"const int * const z",
"const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)" } ;

for (int i = 0 ;i < sizeof(declare)/sizeof(declare[0]) ;i++)
{
Declarer d ;

cout
<<declare[i] <<endl ;
cout
<<d(declare[i]) <<endl ;
d.ShowInorderString() ;

system(
"pause") ;
}
//for

return 0 ;
}

  

上图说明:
   输出分为 6 个测试, 分别是:

  1. 简单变量声明                             int i
  2. 简单指针声明                             int *j
  3. 常量变量声明                             const int x
  4. 常指针声明                               const int *y
  5. 常指针常量声明                          const int * const z
  6. 一个变态的复杂声明                    const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)


其中, 每个测试分为三行输出, 分别是:

  1. 原始 C 语言声明
  2. 经过解析后的可读文字
  3. 分析原始声明后形成的栈, 通过 "右左法则" 实现. 主要是用于调试输出. 此栈分为左右两部分, 左边是符号在原始声明栈中的位置, 右边是次数字在原始声明栈中对应的字符. 

拿最后一个变态声明来说明图例:

原始声明如下: 

const int * const (*fun1)(int (*fun2)(const int arg21[], int const arg22), int arg1)
人类思维过程如下(因人而异):
  1. 首先我们人类看到 fun1 是一个标识符
  2. 然后发现 fun1 这个标识符是一个指针
  3. 再看到这个指针指向了一个函数, 这个函数有 n 多参数
  4. 这个函数的第一个参数是标识符 fun2
  5. 然而发现它又是一个指针
  6. fun2 这个指针又指向了一个函数, 这个函数又有 n 多参数
  7. fun2 指向的这个函数的第一个参数是标识符 arg21, 它是一个数组, 这个数组存放的是 const int
  8. fun2 指向的这个函数的第二个参数是标识符 arg22, 他是一个常量变量
  9. 现在返回到 fun1 这个指针所指向的函数, 它的第二个参数是一个简单 int 型变量 arg1
  10. fun1 这个指针所指向的函数返回一个 const int * const

至此, 整个分析结束. 

当然, 计算机不能像人类一样的思维, 所以上述人类思维过程不代表程序实现逻辑. 


                    

 

posted @ 2011-07-29 21:23  walfud  阅读(938)  评论(2编辑  收藏  举报