标准纯C++实现简单的词法分析器(二)
说明: 此词法分析器所支持的关键字和操作符号都是 C-(mini C)的,关于 C-, 可以参阅 《编译原理及实践》附录;
以下为关键字和操作符定义:
enum tokenType {
// reserved Keyword
k_INT = 0/* int */, k_ELSE/* else */, k_RETURN/* return */,
k_VOID/* void */, k_IF/* if */,k_WHILE/* while */,
k_READ/* read */, k_WRITE/* write */,
// operations
ASSIGN/* = */, PLUS/* + */, MINUS/* - */, TIMES/* * */, DIV/* / */,
MOD/* % */, LT/* < */, GT/* > */,
// interpunctions
LPARAN, RPARAN/* ( ) */, LBRACE, RBRACE/* { } */, LSQUARE, RSQUARE/* [ ] */,
COMMA/* , */, SEMI/* ; */,
// complex operations
EQ/* == */, NEQ/* != */,NGT/* <= */, NLT/* >= */,
// others
k_EOF, k_ID, k_NUM, k_ERROR, k_NONE
};
Token 定义:
class Token{
public:
Token &operator=(const Token& rh);
tokenType type;
string str;
};
重载了 = 操作符,因为在很多时候需要使用 赋值 操作。这样可以简化实现。
以下为词法扫描器的定义:
class Scanner: public Tokenizer {
public:
Scanner(string& filename);
virtual ~Scanner();
void build_key_map(); // build the keyword map;
Token& nextToken(); // return next token !! primary interface
void push(); // push back current token, next return it
int errCount() const { return err_count; }
int warnCount()const { return warn_count; }
bool getListFile(); // create log file sourcename.log
// if in key_word, return tokenType value else return k_NONE;
tokenType reservedLookup(const string& word);
std::vector<Token> key_word;
protected:
void add_warn() { ++warn_count; }
void add_err() { ++err_count;}
Token m_token; // store current token
bool m_pushed; // push back curren token flag;
bool TraceScan;
int warn_count; // count warning
int err_count; // count the error;
};
实现:
/**: construction & destruction
&
* author: lonelyforest;
* data: 2006.03.16
*/
Scanner::Scanner(string &filename):Tokenizer(filename)
{
TraceScan = true;
m_pushed = false;
err_count = 0;
warn_count = 0;
m_token.type = k_NONE;
build_key_map(); // initial key words map
}
--------------------------------------------
主要部分,状态机实现:
/*: s_state;
&
* 状态机的各个状态,nextToken() 的辅助
* 状态。
*/
enum stateType {
s_START, s_INID, s_INNUM, s_INCOMMENT,
s_INASSIGN, s_INL, s_ING, s_DONE};
主要状态机实现待续, 如果太长, 阅读很麻烦,不得不分开
以下为关键字和操作符定义:
enum tokenType {
// reserved Keyword
k_INT = 0/* int */, k_ELSE/* else */, k_RETURN/* return */,
k_VOID/* void */, k_IF/* if */,k_WHILE/* while */,
k_READ/* read */, k_WRITE/* write */,
// operations
ASSIGN/* = */, PLUS/* + */, MINUS/* - */, TIMES/* * */, DIV/* / */,
MOD/* % */, LT/* < */, GT/* > */,
// interpunctions
LPARAN, RPARAN/* ( ) */, LBRACE, RBRACE/* { } */, LSQUARE, RSQUARE/* [ ] */,
COMMA/* , */, SEMI/* ; */,
// complex operations
EQ/* == */, NEQ/* != */,NGT/* <= */, NLT/* >= */,
// others
k_EOF, k_ID, k_NUM, k_ERROR, k_NONE
};
Token 定义:
class Token{
public:
Token &operator=(const Token& rh);
tokenType type;
string str;
};
重载了 = 操作符,因为在很多时候需要使用 赋值 操作。这样可以简化实现。
以下为词法扫描器的定义:
class Scanner: public Tokenizer {
public:
Scanner(string& filename);
virtual ~Scanner();
void build_key_map(); // build the keyword map;
Token& nextToken(); // return next token !! primary interface
void push(); // push back current token, next return it
int errCount() const { return err_count; }
int warnCount()const { return warn_count; }
bool getListFile(); // create log file sourcename.log
// if in key_word, return tokenType value else return k_NONE;
tokenType reservedLookup(const string& word);
std::vector<Token> key_word;
protected:
void add_warn() { ++warn_count; }
void add_err() { ++err_count;}
Token m_token; // store current token
bool m_pushed; // push back curren token flag;
bool TraceScan;
int warn_count; // count warning
int err_count; // count the error;
};
实现:
/**: construction & destruction
&
* author: lonelyforest;
* data: 2006.03.16
*/
Scanner::Scanner(string &filename):Tokenizer(filename)
{
TraceScan = true;
m_pushed = false;
err_count = 0;
warn_count = 0;
m_token.type = k_NONE;
build_key_map(); // initial key words map
}
--------------------------------------------
主要部分,状态机实现:
/*: s_state;
&
* 状态机的各个状态,nextToken() 的辅助
* 状态。
*/
enum stateType {
s_START, s_INID, s_INNUM, s_INCOMMENT,
s_INASSIGN, s_INL, s_ING, s_DONE};
主要状态机实现待续, 如果太长, 阅读很麻烦,不得不分开