<转>LUA语法分析
本文选自:http://www.cnblogs.com/nazhizq/p/6516561.html
一步步调试,在lparser.c文件中luaY_parser函数是语法分析的重点函数,词法分析也是在这个过程中调用的。在这个过程中,用到一些数据结构,下面会详细说。
Proto *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff, const char *name) { struct LexState lexstate; struct FuncState funcstate; lexstate.buff = buff; luaX_setinput(L, &lexstate, z, luaS_new(L, name)); open_func(&lexstate, &funcstate);//初始化funcstate funcstate.f->is_vararg = VARARG_ISVARARG; /* main func. is always vararg */ luaX_next(&lexstate); //Luax_next用于获取下一个字符 chunk(&lexstate);//代码块分析 check(&lexstate, TK_EOS);//判断lua程序文件是否到达末尾 close_func(&lexstate);//关闭程序 lua_assert(funcstate.prev == NULL); lua_assert(funcstate.f->nups == 0); lua_assert(lexstate.fs == NULL); return funcstate.f; }
好,不着急,一步一步来看。lua_State ,LexState ,FuncState 是啥玩意呢?
lua_state是lua程序运行过程中一直存在的,并且一个运行程序只有一个lua_State实例。
struct lua_State { CommonHeader; lu_byte status; StkId top; /* first free slot in the stack */ StkId base; /* base of current function */ global_State *l_G;//全局状态的指针 CallInfo *ci; /* call info for current function */当前函数的调用信息 const Instruction *savedpc; /* `savedpc' of current function */记录上一个函数的pc位置 StkId stack_last; /* last free slot in the stack */ StkId stack; /* stack base */ CallInfo *end_ci; /* points after end of ci array*/函数调用栈的栈顶 CallInfo *base_ci; /* array of CallInfo's */函数调用栈的栈底 int stacksize; int size_ci; /* size of array `base_ci' */ unsigned short nCcalls; /* number of nested C calls */ lu_byte hookmask; lu_byte allowhook; int basehookcount; int hookcount; lua_Hook hook; TValue l_gt; /* table of globals */ TValue env; /* temporary place for environments */ GCObject *openupval; /* list of open upvalues in this stack */ GCObject *gclist; struct lua_longjmp *errorJmp; /* current error recover point */ ptrdiff_t errfunc; /* current error handling function (stack index) */ };
LexState是用于存储词法分析时的上下文数据。
typedef struct LexState { int current; /* current character (charint) */指向下一个要读取的字符 int linenumber; /* input line counter */行号 int lastline; /* line of last token `consumed' */ Token t; /* current token */ Token lookahead; /* look ahead token */ 预读的下一个token struct FuncState *fs; /* `FuncState' is private to the parser */函数状态的数据结构 struct lua_State *L; ZIO *z; /* input stream */ 输入流 Mbuffer *buff; /* buffer for tokens */ 临时缓冲区 TString *source; /* current source name */ 源文件名 char decpoint; /* locale decimal point */ } LexState;
FuncState是用于存储函数状态的数据结构。
typedef struct FuncState { Proto *f; /* current function header */函数头信息 Table *h; /* table to find (and reuse) elements in `k' */ struct FuncState *prev; /* enclosing function */指向函数链表的上一个函数 struct LexState *ls; /* lexical state */ struct lua_State *L; /* copy of the Lua state */ struct BlockCnt *bl; /* chain of current blocks */ int pc; /* next position to code (equivalent to `ncode') */ int lasttarget; /* `pc' of last `jump target' */ int jpc; /* list of pending jumps to `pc' */ int freereg; /* first free register */ int nk; /* number of elements in `k' */ int np; /* number of elements in `p' */ short nlocvars; /* number of elements in `locvars' */local变量个数 lu_byte nactvar; /* number of active local variables */ upvaldesc upvalues[LUAI_MAXUPVALUES]; /* upvalues */ unsigned short actvar[LUAI_MAXVARS]; /* declared-variable stack */ } FuncState;
初始化完成后,就要进行词法分析,即读取下一个token,调用luaX_next(&lexstate); 下面进入llex.c文件的源代码中
void luaX_next (LexState *ls) { ls->lastline = ls->linenumber; if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ ls->t = ls->lookahead; /* use this one */ ls->lookahead.token = TK_EOS; /* and discharge it */ } else ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */调用llex函数读取下一个token }
llex函数:里面是一大串的switch...case...语句,对各种可能的情况进行处理,正常的变量名或者保留字会进入default语句,分别处理空格,数字或者变量名。
for (;;) { switch (ls->current) { case '\n': case '\r': case '-': case '[': case '=': case '<': case '>': case '~': case '"': case '\'': case '.': case EOZ: default: { if (isspace(ls->current)) { lua_assert(!currIsNewline(ls)); next(ls); continue; } else if (isdigit(ls->current)) { read_numeral(ls, seminfo); return TK_NUMBER; } else if (isalpha(ls->current) || ls->current == '_') { /* identifier or reserved word */ TString *ts; do { save_and_next(ls); } while (isalnum(ls->current) || ls->current == '_'); ts = luaX_newstring(ls, luaZ_buffer(ls->buff), luaZ_bufflen(ls->buff)); if (ts->tsv.reserved > 0) /* reserved word? */ return ts->tsv.reserved - 1 + FIRST_RESERVED; else { seminfo->ts = ts; return TK_NAME; } } else { int c = ls->current; next(ls); return c; /* single-char tokens (+ - / ...) */ } } } }
luaX_newstring用于生成变量名,如果全局变量表中没有该变量的字符串,则会创建新的变量字符串。对每个token,如果是保留字段,都会预先加载在全局变量表中,因此,如果不是保留字段,就会生成TK_NAME。保留字段的判定来自于if (ts->tsv.reserved > 0),关于Token的种类,定义在llex.h头文件中。
获取token字符串后,进入chunk代码:
static void chunk (LexState *ls) { /* chunk -> { stat [`;'] } */ int islast = 0; enterlevel(ls);//内嵌调用层数 while (!islast && !block_follow(ls->t.token)) {//当前token既不是block的开始也不是结束 islast = statement(ls);//代码语句分析 testnext(ls, ';'); lua_assert(ls->fs->f->maxstacksize >= ls->fs->freereg && ls->fs->freereg >= ls->fs->nactvar); ls->fs->freereg = ls->fs->nactvar; /* free registers */ } leavelevel(ls); }
statement函数用于分析语义,里面是也是大大的switch...case...语句。如果是if, while, do, for, function等等关键字,都会进入相应的处理函数中,在default语句中处理赋值和函数调用的分析。
static int statement (LexState *ls) { int line = ls->linenumber; /* may be needed for error messages */ switch (ls->t.token) { case TK_IF: { /* stat -> ifstat */ ifstat(ls, line); return 0; } case TK_WHILE: { /* stat -> whilestat */ whilestat(ls, line); return 0; } case TK_DO: { /* stat -> DO block END */ luaX_next(ls); /* skip DO */ block(ls); check_match(ls, TK_END, TK_DO, line); return 0; } case TK_FOR: { /* stat -> forstat */ forstat(ls, line); return 0; } case TK_REPEAT: { /* stat -> repeatstat */ repeatstat(ls, line); return 0; } case TK_FUNCTION: { funcstat(ls, line); /* stat -> funcstat */ return 0; } case TK_LOCAL: { /* stat -> localstat */ luaX_next(ls); /* skip LOCAL */ if (testnext(ls, TK_FUNCTION)) /* local function? */ localfunc(ls); else localstat(ls); return 0; } case TK_RETURN: { /* stat -> retstat */ retstat(ls); return 1; /* must be last statement */ } case TK_BREAK: { /* stat -> breakstat */ luaX_next(ls); /* skip BREAK */ breakstat(ls); return 1; /* must be last statement */ } default: { exprstat(ls); return 0; /* to avoid warnings */ } } }
语句中的表达式通过exprstat(ls)函数处理,还有lua代码指令的生成,有时间再写。
本博客注有“转”字样的为转载文章,其余为本人原创文章,转载请务必注明出处或保存此段。c++/lua/windows逆向交流群:69148232