dm1299

[swarthmore cs75] Compiler 2 – Boa

课程回顾

Swarthmore学院16年开的编译系统课,总共10次大作业。本随笔记录了相关的课堂笔记以及第4次大作业。

  • A-Normal Form

在80年代,函数式语言编译器主要使用Continuation-passing style(CPS)作为中间代码表示形式。 1992年Sabry和Felleisen引入了另一种和CPS一样简单的表示形式:A-normal form(ANF),并且证明了:使用ANF作为中间代码表示形式能够和使用CPS一样容易生成汇编代码并进行代码优化。

  • Why:为什么要转化为ANF的形式?从下面的例子可以看到,在计算第二个表达式的时候,必须首先把(2 - 3)的计算结果存在某个地方。难道需要另外的逻辑,把中间结果存储到esp中?但是这样做的话扩展性就会很差,这样就有了ANF表示形式(let..in...的编译过程会对变量进行处理)。

    源码 x86汇编 ANF形式(参考:实现一) 简化的ANF(参考:实现二)
    (5 + 4) + (3 + 2) mov EAX, 5
    add EAX, 4
    add EAX, 3
    add EAX, 2
    let v1 = 5 + 4 in
    let v2 = 3 + 2 in
    let v3 = v1 + v2 in
    v3
    let v1 = 5 + 4 in
    let v2 = 3 + 2 in
    v1 + v2
    (2 - 3) + (4 * 5) mov EAX, 2
    sub EAX, 3
    ?????
    let v1 = 2 - 3 in
    let v2 = 4 * 5 in
    let v3 = v1 + v2 in
    v3
    let v1 = 2 - 3 in
    let v2 = 4 * 5 in
    v1 + v2
  • How:如何将一个算数表达式转换为ANF表达式?下面提供了两种实现:
    Intput Language

    type expr =
      | Num of int
      | Id of string
      | Plus of expr * expr
    

    Restricted Language

    type immexpr =
      | ImmNum of int
      | ImmId of string
    
    type cexpr =
      | CPlus of immexpr * immexpr
      | CImmExpr of immexpr
    
    type aexpr =
      | ALet of string * cexpr * aexpr
      | ACExpr of cexpr
    

    实现一:

    let rec anf (e : expr) (expr_with_hole : (immexpr -> aexpr)) =
      match e with
        | Num(n) -> (expr_with_hole (ImmNum(n)))
        | Id(x) -> (expr_with_hole (ImmId(x)))
        | Plus(left, right) ->
          let varname = gen_temp "v" in
          anf left (fun limm ->
            anf right (fun rimm ->
              ALet(varname, CPlus(limm, rimm),
                (expr_with_hole (ImmId(varname))))))
    
    

    输入:

    anf (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun imm -> ACExpr(CImmExpr(imm)))

    ......

    => anf (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun imm -> ACExpr(CImmExpr(imm)))

    => anf Plus(Num(5), Num(4)) (fun limm ->
    anf Plus(Num(3), Num(2)) (fun rimm ->
    ALet("v1", CPlus(limm, rimm),
    ((fun imm -> ACExpr(CImmExpr(imm))) (ImmId("v1"))))))

    => anf Num(5) (fun limm ->
    anf Num(4) (fun rimm ->
    ALet("v2", CPlus(limm, rimm), ((fun limm ->
    anf Plus(Num(3), Num(2)) (fun rimm ->
    ALet("v1", CPlus(limm, rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1")))))) (ImmId("v2"))))))

    => anf Num(4) (fun rimm ->
    ALet("v2", CPlus(ImmNum(5), rimm), ((fun limm ->
    anf Plus(Num(3), Num(2)) (fun rimm ->
    ALet("v1", CPlus(limm, rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1")))))) (ImmId("v2")))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)), ((fun limm ->
    anf Plus(Num(3), Num(2)) (fun rimm ->
    ALet("v1", CPlus(limm, rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1")))))) (ImmId("v2"))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)), (
    anf Plus(Num(3), Num(2)) (fun rimm ->
    ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1")))))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    anf Num(3) (fun limm ->
    anf Num(2) (fun rimm ->
    ALet("v3", CPlus(limm, rimm),
    ((fun rimm ->
    ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3")))))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    anf Num(3) (fun limm ->
    anf Num(2) (fun rimm ->
    ALet("v3", CPlus(ImmNum(3), rimm)),
    ((fun rimm ->
    ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3"))))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    anf Num(2) (fun rimm ->
    ALet("v3", CPlus(ImmNum(3), rimm)),
    ((fun rimm ->
    ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3")))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
    ((fun rimm ->
    ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3"))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
    ((fun rimm ->
    ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3"))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
    ALet("v1", CPlus(ImmId("v2"), ImmId("v3")), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1")))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
    ALet("v1", CPlus(ImmId("v2"), ImmId("v3")), ((fun imm ->
    ACExpr(CImmExpr(imm))) (ImmId("v1")))))

    => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
    ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
    ALet("v1", CPlus(ImmId("v2"), ImmId("v3")),
    ACExpr(CImmExpr(ImmId("v1")))))

    输出:

    ALet ("v2", CPlus (ImmNum(5), ImmNum(4)),
     ALet ("v3", CPlus (ImmNum(3), ImmNum(2)),
      ALet ("v1", CPlus (ImmId("v2"), ImmId("v3")), ACExpr (CImmExpr (ImmId ("v1"))))))

    实现二:

    let rec anf_c (e : expr) (expr_with_c_hole : cexpr -> aexpr) : aexpr = 
      match e with 
        | Num(n) -> expr_with_c_hole (CImmExpr(ImmNum(n)))
        | Id(x) -> expr_with_c_hole (CImmExpr(ImmId(x)))
        | Plus(left, right) ->
          anf_imm left (fun limm ->
            anf_imm right (fun rimm ->
              (expr_with_c_hole (CPlus(limm, rimm)))))
        
    and anf_imm (e : expr) (expr_with_imm_hole : immexpr -> aexpr) : aexpr = 
      match e with
        | Num(n) -> (expr_with_imm_hole (ImmNum(n)))
        | Id(x) -> (expr_with_imm_hole (ImmId(x)))
        | Plus(left, right) -> 
          let varname = gen_temp "v" in
          anf_imm left (fun limm -> 
            anf_imm right (fun rimm ->
              ALet(varname, CPlus(limm, rimm),
                (expr_with_imm_hole (ImmId(varname))))))
    

    输入:

    anf_c (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun c -> ACExpr(c))

    ......
        anf_c (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun c -> ACExpr(c))
    

    => anf_imm Plus(Num(5), Num(4)) (fun limm ->
    anf_imm Plus(Num(3), Num(2)) (fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(limm, rimm)))))

    => anf_imm Num(5) (fun limm ->
    anf_imm Num(4) (fun rimm ->
    ALet("v1", CPlus(limm, rimm), ((fun limm ->
    anf_imm Plus(Num(3), Num(2)) (fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) (ImmId("v1"))))))

    => anf_imm Num(4) (fun rimm ->
    ALet("v1", CPlus(ImmNum(5), rimm), ((fun limm ->
    anf_imm Plus(Num(3), Num(2)) (fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) (ImmId("v1")))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)), ((fun limm ->
    anf_imm Plus(Num(3), Num(2)) (fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) (ImmId("v1"))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)),
    anf_imm Plus(Num(3), Num(2)) (fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)),
    anf_imm Num(3) (fun limm ->
    anf_imm Num(2) (fun rimm ->
    ALet("v2", CPlus(limm, rimm), ((fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))) (ImmId("v2")))))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)),
    anf_imm Num(2) (fun rimm ->
    ALet("v2", CPlus(ImmNum(3), rimm), ((fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))) (ImmId("v2"))))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)),
    ALet("v2", CPlus(ImmNum(3), ImmNum(2)), ((fun rimm ->
    ((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))) (ImmId("v2")))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)),
    ALet("v2", CPlus(ImmNum(3), ImmNum(2)),
    ((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), ImmId("v2"))))))

    => ALet("v1", CPlus(ImmNum(5), Num(4)),
    ALet("v2", CPlus(ImmNum(3), ImmNum(2)),
    ACExpr(CPlus(ImmId("v1"), ImmId("v2")))))

    输出: > ALet ("v1", CPlus (ImmNum(5), ImmNum(4)),  ALet ("v2", CPlus (ImmNum(3), ImmNum(2)),   ACExpr (CPlus (ImmId("v1"), ImmId("v2")))))

编程作业

本次大作业是为Boa编程语言实现一个小型编译器,其编译过程为:boa源代码 -> expr(user-facing) -> aexpr(compiler-facing) -> instruction list(x86_32汇编)。

  • 具体语法
    boa源代码

    <expr> :=
      | let <bindings> in <expr>
      | if <expr>: <expr> else: <expr>
      | <binop-expr>
    
    <binop-expr> :=
      | <number>
      | <identifier>
      | add1(<expr>)
      | sub1(<expr>)
      | <expr> + <expr>
      | <expr> - <expr>
      | <expr> * <expr>
      | ( <expr> )
    
    <bindings> :=
      | <identifier> = <expr>
      | <identifier> = <expr>, <bindings> 
    
  • 抽象语法
    expr(user-facing)

    type prim1 =
      | Add1
      | Sub1
    
    type prim2 =
      | Plus
      | Minus
      | Times
    
    type expr =
      | ELet of (string * expr) list * expr
      | EPrim1 of prim1 * expr
      | EPrim2 of prim2 * expr * expr
      | EIf of expr * expr * expr
      | ENumber of int
      | EId of string
    

    aexpr(compiler-facing)

    type immexpr =
      | ImmNumber of int
      | ImmId of string
    
    and cexpr =
      | CPrim1 of prim1 * immexpr
      | CPrim2 of prim2 * immexpr * immexpr
      | CIf of immexpr * aexpr * aexpr
      | CImmExpr of immexpr
    
    and aexpr =
      | ALet of string * cexpr * aexpr
      | ACExpr of cexpr
    

* 程序例子(每行分别表示boa/expr/aexpr或pretty-print)
    
    + 例1:
      ```text
      # 输出 41
      41 
    
      ENumber(41)
   
      ACExpr(CImmExpr(ImmNumber(41))) 
      ```
    + 例2:
      ```text
       # 输出4
       sub1(5) 

       EPrim1(Sub1, ENum(5))  

       ALet("temp_unary_1", CPrim1(Sub1, ImmNumber(55)),  
         ACExpr(CImmExpr(ImmId("temp_unary_1")))) 
       ```

    + 例3:
      ```text
        # 输出8
        if 5 - 5: 6 else: 8 

        EIf(EPrim2(Minus, ENumber(5), ENumber(5)), ENumber(6), ENumber(8))

        ALet("temp_binary_2", CPrim2(Minus, CImmExpr(ImmNumber(5)), CImmExpr(ImmNumber(5))), 
          ALet("temp_if_1", CIf(ImmId("temp_binary_2"), ACExpr(CImmExpr(ImmNumber(6))), ACExpr(CImmExpr(ImmNumber(8)))),         
            ACExpr(CImmExpr(ImmId("temp_if_1")))))
      ```
    
    + 例4:
      ```text
        # 输出14
        (5 + 4) + (3 + 2) 

        EPrim2(Plus, EPrim2(Plus, ENumber(5), ENumber(4)), EPrim2(Plus, ENumber(3), ENumber(2)))

        ALet("temp_binary_2", CPrim2(Plus, ImmNumber(5), ImmNumber(4)), 
          ALet("temp_binary_3", CPrim2(Plus, ImmNumber(3), ImmNumber(2)), 
            ALet("temp_binary_1", CPrim2(Plus, ImmId("temp_binary_2"), ImmId("temp_binary_3")), ACExpr(CImmExpr(ImmId("temp_binary_1"))))))
       ```

     + 例5:
       ```text
         # 输出10
         let x = (let y=10 in y), z=9 in x 
     
         ELet([("x", ELet([("y", ENumber(10))], EId("y"))); ("z", ENumber(9));], EId("x"))

         ALet("y", CImmExpr(ImmNumber(10)), 
           ALet("x", CImmExpr(ImmId("y")),  
             ALet("z", CImmExpr(ImmNumber(9)), ACExpr(CImmExpr(ImmId("x"))))))
        ```

    + 例6:
      ```text
      # 输出10
      let x = 10, y = 9 in 
      if (x - y) * 2: x else: y 

      ELet([("x", ENumber(10)); ("y", ENumber(9))],
        EIf(EPrim2(Times, EPrim2(Minus, EId("x"), EId("y")), ENumber(2)),
          EId("x"),
          EId("y")))

      # pretty-print
      (let x = 10 in 
      (let y = 9 in 
      (let temp_binary_3 = (x - y) in 
      (let temp_binary_2 = (temp_binary_3 * 2) in 
      (let temp_if_1 = (if temp_binary_2: x else: y) 
      in temp_if_1)))))     
      ```    

    + 例7:
      ```text
      # 输出25
      let c1 = 1 in 
      let c2 = 0 in
      (let x = (if c1: 5 + 5 else: 6 * 2) in
      (let y = (if c2: x * 3 else: x + 5) in
      (x + y))) 

      ELet([("c1", ENumber(1));], ELet([("c2", ENumber(0));], 
        ELet([("x", EIf(EId("c1"), EPrim2(Plus, ENumber(5), ENumber(5)), EPrim2(Times, ENumber(6), ENumber(2))))], 
        ELet([("y", EIf(EId("c2"), EPrim2(Plus, EId("x"), ENumber(3)), EPrim2(Plus, EId("x"), ENumber(5))))], 
        EPrim2(Plus, EId("x"), EId("y"))))))

      # pretty-print
      (let c1 = 1 in 
      (let c2 = 0 in 
      (let temp_if_1 = (if c1: (let temp_binary_7 = (5 + 5) in temp_binary_7) else: (let temp_binary_6 = (6 * 2) in temp_binary_6)) in 
      (let x = temp_if_1 in 
      (let temp_if_2 = (if c2: (let temp_binary_5 = (x + 3) in temp_binary_5) else: (let temp_binary_4 = (x + 5) in temp_binary_4)) in 
      (let y = temp_if_2 in 
      (let temp_binary_3 = (x + y) in 
      temp_binary_3)))))))
      ```

* 将expr类型编译为aexpr类型
   输出可以参考上述程序例子生成的aexpr格式。
  ```ocaml
  let rec anf_k (e : expr) (k : immexpr -> aexpr) : aexpr =
    match e with
      | EPrim1(op, e) ->
        let tmp = gen_temp "unary" in
        anf_k e (fun imm -> ALet(tmp, CPrim1(op, imm), k (ImmId(tmp))))
      | ELet(binds, body) ->
      let rec helper binds =
        match binds with
          | [] -> anf_k body k
          | (id, e)::rest -> anf_k e (fun imm -> ALet(id, CImmExpr(imm), (helper rest)))
      in 
      helper binds
      | EPrim2(op, left, right) ->
        let tmp = gen_temp "binary" in
        anf_k left (fun limm ->
          anf_k right (fun rimm -> 
            ALet(tmp, CPrim2(op, limm, rimm), k (ImmId(tmp)))))
      | EIf(cond, thn, els) ->
        let tmp = gen_temp "if" in
        let ret = (fun imm -> ACExpr(CImmExpr(imm))) in
        anf_k cond (fun immcond ->
          ALet(tmp, CIf(immcond, anf_k thn ret, anf_k els ret), (k (ImmId(tmp)))))
      | ENumber(n) ->
        (k (ImmNumber(n)))
      | EId(name) ->
        (k (ImmId(name)))
  • 将cexpr类型编译为instruction list(生成汇编代码)
    根据不同子类型,需要执行不同的操作:

    • CImmExpr:只需要把相应的数字或id变量值移动到eax寄存器即可。
    • CPrim1:递归对表达式求值,然后根据Add1/Sub1,对eax寄存器进行+1/-1操作。
    • CPrim2:把左操作数移动到eax寄存器中,然后根据Plus/Minus/Times,用右表达式值对eax寄存器进行+/-/*的操作。
    • CIf:条件语句生成的汇编代码结构如下所示,只需要按照格式拼接就行。

      cmp eax, 0 ; check if eax is equal to 0
      je else_branch
      ; commands for then branch go here
      jmp end_of_if
      else_branch:
      ; commands for else branch go here
      end_of_if:

    let acompile_imm_arg (i : immexpr) _ (env : (string * int) list) : arg =
      match i with
        | ImmNumber(n) -> Const(n)
        | ImmId(name) -> 
          match (find env name) with
            | Some(si) -> RegOffset((-4) * si, ESP)
            | None -> failwith (sprintf "An identifier is unbound (there is no surrounding let binding for %s)" name)
    
    let acompile_imm (i : immexpr) (si : int) (env : (string * int) list) : instruction list =
      [ IMov(Reg(EAX), acompile_imm_arg i si env) ]
    
    let rec acompile_step (s : cexpr) (si : int) (env : (string * int) list) : instruction list =
      match s with
        | CImmExpr(i) -> acompile_imm i si env
        | CPrim1(op, e) ->
          let prelude = acompile_imm e si env in
          begin match op with
            | Add1 ->
              prelude @ [
                IAdd(Reg(EAX), Const(1))
              ]
            | Sub1 ->
              prelude @ [
                IAdd(Reg(EAX), Const(-1))
              ]
          end
        | CPrim2(op, left, right) ->
          let prelude = acompile_imm left si env in
          let arg = acompile_imm_arg right si env in
          begin match op with
            | Plus -> 
              prelude @ [ 
                IAdd(Reg(EAX), arg) 
              ]
            | Minus -> 
              prelude @ [ 
                ISub(Reg(EAX), arg) 
              ]
            | Times -> 
              prelude @ [ 
                IMul(Reg(EAX), arg) 
              ]
          end 
        | CIf(cond, thn, els) ->
          let tmp_else = gen_temp "else" in
          let tmp_endif = gen_temp "endif" in
          (acompile_imm cond si env) @
          [
            ICmp(Reg(EAX), Const(0)); 
            IJe(tmp_else);
          ] @
          (acompile_expr thn si env) @
          [
            IJmp(tmp_endif);
            ILabel(tmp_else);
          ] @
          (acompile_expr els si env) @
          [
            ILabel(tmp_endif);
          ]
    
    and acompile_expr (e : aexpr) (si : int) (env : (string * int) list) : instruction list =
      match e with
        | ALet(id, e, body) ->
          let prelude = acompile_step e (si + 1) env in
          let body = acompile_expr body (si + 1) ((id, si)::env) in
          prelude @ [
            IMov(RegOffset(-4 * si, ESP), Reg(EAX))
          ] @ body
        | ACExpr(s) -> acompile_step s si env
    

参考资料

starter-boa
cs75-anf
cs4410-anf
A-Normalization: Why and How

posted on 2019-02-12 10:25  dm1299  阅读(233)  评论(0编辑  收藏  举报

导航