自制编译器 青木峰郎 笔记 Ch8 AST生成

8.1 表达式的抽象语法树

Literal

Type表示类型的定义,如struct pr{int first; int second;},TypeRef则是类型的名称。struct pr;
定义TypeRef后,在类型定义之前就能编写用到了该类型的代码。

// #@@range/primary{
ExprNode primary():
{
    Token t;
    ExprNode n;
}
{
      t=<INTEGER>
        {
            return integerNode(location(t), t.image);
        }
    | t=<CHARACTER>
        {
            return new IntegerLiteralNode(location(t),
                                          IntegerTypeRef.charRef(),
                                          characterCode(t.image));
        }
    | t=<STRING>
        {
            return new StringLiteralNode(location(t),
                new PointerTypeRef(IntegerTypeRef.charRef()),
                stringValue(t.image));
        }
    | t=<IDENTIFIER>
        {
            return new VariableNode(location(t), t.image);
        }
    | "(" n=expr() ")"
        {
            return n;
        }
}
// #@@}

Unary


// #@@range/unary{
ExprNode unary():
{
    ExprNode n;
    TypeNode t;
}
{
      "++" n=unary()    { return new PrefixOpNode("++", n); }
    | "--" n=unary()    { return new PrefixOpNode("--", n); }
    | "+" n=term()      { return new UnaryOpNode("+", n); }
    | "-" n=term()      { return new UnaryOpNode("-", n); }
    | "!" n=term()      { return new UnaryOpNode("!", n); }
    | "~" n=term()      { return new UnaryOpNode("~", n); }
    | "*" n=term()      { return new DereferenceNode(n); }
    | "&" n=term()      { return new AddressNode(n); }
    | LOOKAHEAD(3) <SIZEOF> "(" t=type() ")"
        {
            return new SizeofTypeNode(t, size_t());
        }
    | <SIZEOF> n=unary()
        {
            return new SizeofExprNode(n, size_t());
        }
    | n=postfix()       { return n; }
}
// #@@}

注意sizeof也是unary的。

// #@@range/term{
ExprNode term():
{
    TypeNode t;
    ExprNode n;
}
{
      LOOKAHEAD("(" type())
      "(" t=type() ")" n=term()     { return new CastNode(t, n); }
    | n=unary()                     { return n; }
}
// #@@}

类型转换是一元的。term是unary运算或者是类型转化。


// #@@range/postfix{
ExprNode postfix():
{
    ExprNode expr, idx;
    String memb;
    List<ExprNode> args;
}
{
    expr=primary()
    ( "++"                  { expr = new SuffixOpNode("++", expr); }
    | "--"                  { expr = new SuffixOpNode("--", expr); }
    | "[" idx=expr() "]"    { expr = new ArefNode(expr, idx); }
    | "." memb=name()       { expr = new MemberNode(expr, memb); }
    | "->" memb=name()      { expr = new PtrMemberNode(expr, memb); }
    | "(" args=args() ")"   { expr = new FuncallNode(expr, args); }
    )*
        {
            return expr;
        }
}
// #@@}

后缀运算符可以叠加

二元运算符

// #@@range/expr2{
ExprNode expr2():
{ ExprNode l, r; }
{
    l=expr1() ( "+" r=expr1() { l = new BinaryOpNode(l, "+", r); }
              | "-" r=expr1() { l = new BinaryOpNode(l, "-", r); }
              )*
        {
            return l;
        }
}
// #@@}

// #@@range/expr1{
ExprNode expr1():
{ ExprNode l, r; }
{
    l=term() ( "*" r=term() { l = new BinaryOpNode(l, "*", r); }
             | "/" r=term() { l = new BinaryOpNode(l, "/", r); }
             | "%" r=term() { l = new BinaryOpNode(l, "%", r); }
             )*
        {
            return l;
        }
}
// #@@}

赋值表达式

二元运算符有无结合,左结合和右结合之分,cbc中只有赋值运算符=是右结合的。而无结合运算符比如不允许x OP y OP z,例如xy==z这样的运算出现。
一般来说,以下两种方法都能用于写可结合的二元运算符。

expr1 ("+" expr1())*

或者

expr1 :{}
{
      term() "=" expr()
}

左结合的操作符左子树可以形成新的表达式,所以用expr1 ("+" expr1())*这种方式比较好。同理,右结合用第二种方式比较好。

8.2 语句的AST


// #@@range/if_stmt{
IfNode if_stmt():
{
    Token t;
    ExprNode cond;
    StmtNode thenBody, elseBody = null;
}
{
    t=<IF> "(" cond=expr() ")" thenBody=stmt()
            [LOOKAHEAD(1) <ELSE> elseBody=stmt()]
        {
            return new IfNode(location(t), cond, thenBody, elseBody);
        }
}
// #@@}

// #@@range/while_stmt{
WhileNode while_stmt():
{
    Token t;
    ExprNode cond;
    StmtNode body;
}
{
    t=<WHILE> "(" cond=expr() ")" body=stmt()
        {
            return new WhileNode(location(t), cond, body);
        }
}
// #@@}

DoWhileNode dowhile_stmt():
{
    Token t;
    ExprNode cond;
    StmtNode body;
}
{
    t=<DO> body=stmt() <WHILE> "(" cond=expr() ")" ";"
        {
            return new DoWhileNode(location(t), body, cond);
        }
}

ForNode for_stmt():
{
    Token t;
    ExprNode init = null, cond = null, incr = null;
    StmtNode body;
}
{
    t=<FOR> "(" [init=expr()] ";"
              [cond=expr()] ";"
              [incr=expr()] ")" body=stmt()
        {
            return new ForNode(location(t), init, cond, incr, body);
        }
}


AST示例

root@cf43f429204e:/# cat if_test.cb
import stdio;
int main(int argc, char **argv)
{
   if(1) if (2) { puts("OK"); }
else if(3) {puts("NO");}
else{puts("ERR");}
    return 0;}

root@cf43f429204e:/# cbc --dump-ast if_test.cb
<<AST>> (if_test.cb:1)
variables:
functions:
    <<DefinedFunction>> (if_test.cb:2)
    name: "main"
    isPrivate: false
    params:
        parameters:
            <<CBCParameter>> (if_test.cb:2)
            name: "argc"
            typeNode: int
            <<CBCParameter>> (if_test.cb:2)
            name: "argv"
            typeNode: char**
    body:
        <<BlockNode>> (if_test.cb:3)
        variables:
        stmts:
            <<IfNode>> (if_test.cb:4)
            cond:
                <<IntegerLiteralNode>> (if_test.cb:4)
                typeNode: int
                value: 1
            thenBody:
                <<IfNode>> (if_test.cb:4)
                cond:
                    <<IntegerLiteralNode>> (if_test.cb:4)
                    typeNode: int
                    value: 2
                thenBody:
                    <<BlockNode>> (if_test.cb:4)
                    variables:
                    stmts:
                        <<ExprStmtNode>> (if_test.cb:4)
                        expr:
                            <<FuncallNode>> (if_test.cb:4)
                            expr:
                                <<VariableNode>> (if_test.cb:4)
                                name: "puts"
                            args:
                                <<StringLiteralNode>> (if_test.cb:4)
                                value: "OK"
                elseBody:
                    <<IfNode>> (if_test.cb:5)
                    cond:
                        <<IntegerLiteralNode>> (if_test.cb:5)
                        typeNode: int
                        value: 3
                    thenBody:
                        <<BlockNode>> (if_test.cb:5)
                        variables:
                        stmts:
                            <<ExprStmtNode>> (if_test.cb:5)
                            expr:
                                <<FuncallNode>> (if_test.cb:5)
                                expr:
                                    <<VariableNode>> (if_test.cb:5)
                                    name: "puts"
                                args:
                                    <<StringLiteralNode>> (if_test.cb:5)
                                    value: "NO"
                    elseBody:
                        <<BlockNode>> (if_test.cb:6)
                        variables:
                        stmts:
                            <<ExprStmtNode>> (if_test.cb:6)
                            expr:
                                <<FuncallNode>> (if_test.cb:6)
                                expr:
                                    <<VariableNode>> (if_test.cb:6)
                                    name: "puts"
                                args:
                                    <<StringLiteralNode>> (if_test.cb:6)
                                    value: "ERR"
            elseBody: null
            <<ReturnNode>> (if_test.cb:7)
            expr:
                <<IntegerLiteralNode>> (if_test.cb:7)
                typeNode: int
                value: 0

stmts, block


// #@@range/stmts{
List<StmtNode> stmts():
{
    List<StmtNode> ss = new ArrayList<StmtNode>();
    StmtNode s;
}
{
    (s=stmt() { if (s != null) ss.add(s); })*
        {
            return ss;
        }
}
// #@@}

// #@@range/stmt{
StmtNode stmt():
{
    StmtNode n = null;
    ExprNode e = null;
}
{
    ( ";"
    | LOOKAHEAD(2) n=labeled_stmt()
    | e=expr() ";" { n = new ExprStmtNode(e.location(), e); }
    | n=block()
    | n=if_stmt()
    | n=while_stmt()
    | n=dowhile_stmt()
    | n=for_stmt()
    | n=switch_stmt()
    | n=break_stmt()
    | n=continue_stmt()
    | n=goto_stmt()
    | n=return_stmt()
    )
        {
            return n;
        }
}
// #@@}
// #@@range/block{
BlockNode block():
{
    Token t;
    List<DefinedVariable> vars;
    List<StmtNode> stmts;
}
{
    t="{" vars=defvar_list() stmts=stmts() "}"
        {
            return new BlockNode(location(t), vars, stmts);
        }
}
// #@@}

8.3 声明的AST

声明变量

// #@@range/defvar_list{
List<DefinedVariable> defvar_list():
{
    List<DefinedVariable> result = new ArrayList<DefinedVariable>();
    List<DefinedVariable> vars;
}
{
    ( vars=defvars() { result.addAll(vars); } )*
        {
            return result;
        }
}
// #@@}

// #@@range/defvars{
List<DefinedVariable> defvars():
{
    List<DefinedVariable> defs = new ArrayList<DefinedVariable>();
    boolean priv;
    TypeNode type;
    String name;
    ExprNode init = null;
}
{
    priv=storage() type=type() name=name() ["=" init=expr()]
        {
            defs.add(new DefinedVariable(priv, type, name, init));
            init = null;
        }
    ( "," name=name() ["=" init=expr()]
        {
            defs.add(new DefinedVariable(priv, type, name, init));
            init = null;
        }
    )* ";"
        {
            return defs;
        }
}
// #@@}

声明函数


// #@@range/defun{
DefinedFunction defun():
{
    boolean priv;
    TypeRef ret;
    String n;
    Params ps;
    BlockNode body;
}
{
    priv=storage() ret=typeref() n=name() "(" ps=params() ")" body=block()
        {
            TypeRef t = new FunctionTypeRef(ret, ps.parametersTypeRef());
            return new DefinedFunction(priv, new TypeNode(t), n, ps, body);
        }
}
// #@@}

// #@@range/storage{
boolean storage():
{ Token t = null; }
{
    [t=<STATIC>] { return (t == null ? false : true); }
}
// #@@}

// #@@range/storage{
Params params():
{
    Token t;
    Params params;
}
{
      LOOKAHEAD(<VOID> ")")
      t=<VOID>
        {
            return new Params(location(t), new ArrayList<CBCParameter>());
        }
    | params=fixedparams()
            ["," "..." { params.acceptVarargs(); }]
        {
            return params;
        }
}
// #@@}

// #@@range/fixedparams{
Params fixedparams():
{
    List<CBCParameter> params = new ArrayList<CBCParameter>();
    CBCParameter param, param1;
}
{
    param1=param() { params.add(param1); }
    ( LOOKAHEAD(2) "," param=param() { params.add(param); } )*
        {
            return new Params(param1.location(), params);
        }
}
// #@@}

// #@@range/param{
CBCParameter param():
{
    TypeNode t;
    String n;
}
{
    t=type() n=name() { return new CBCParameter(t, n); }
}
// #@@}

// #@@range/block{
BlockNode block():
{
    Token t;
    List<DefinedVariable> vars;
    List<StmtNode> stmts;
}
{
    t="{" vars=defvar_list() stmts=stmts() "}"
        {
            return new BlockNode(location(t), vars, stmts);
        }
}
// #@@}

声明列表本身


// #@@range/top_defs{
Declarations top_defs():
{
    Declarations decls = new Declarations();
    DefinedFunction defun;
    List<DefinedVariable> defvars;
    Constant defconst;
    StructNode defstruct;
    UnionNode defunion;
    TypedefNode typedef;
}
{
    ( LOOKAHEAD(storage() typeref() <IDENTIFIER> "(")
      defun=defun()         { decls.addDefun(defun); }
    | LOOKAHEAD(3)
      defvars=defvars()     { decls.addDefvars(defvars); }
    | defconst=defconst()   { decls.addConstant(defconst); }
    | defstruct=defstruct() { decls.addDefstruct(defstruct); }
    | defunion=defunion()   { decls.addDefunion(defunion); }
    | typedef=typedef()     { decls.addTypedef(typedef); }
    )*
        {
            return decls;
        }
}
// #@@}

import

// #@@range/import_stmts{
Declarations import_stmts():
{
    String libid;
    Declarations impdecls = new Declarations();
}
{
    (libid=import_stmt()
        {
            try {
                Declarations decls = loader.loadLibrary(libid, errorHandler);
                if (decls != null) {
                    impdecls.add(decls);
                    addKnownTypedefs(decls.typedefs());
                }
            }
            catch (CompileException ex) {
                throw new ParseException(ex.getMessage());
            }
        }
    )*
        {
            return impdecls;
        }
}
// #@@}

// #@@range/import_stmt{
String import_stmt():
{
    StringBuffer buf = new StringBuffer();
    String n;
}
{
    <IMPORT> n=name()   { buf.append(n); }
    ("." n=name()       { buf.append("."); buf.append(n); } )*
    ";"
        {
            return buf.toString();
        }
}
// #@@}

8.4 cbc解析器启动

  1. --deug-parser或者enable_tracing能够输出log
  2. 注意javaCC可能会抛出LookaheadSuccess异常
posted @ 2021-02-06 00:04  雪溯  阅读(247)  评论(0编辑  收藏  举报