中间代码生成器-5-编译原理
中间代码生成器
一、实验目的
掌握中间代码生成器的构造原理和编程方法。
二、实验内容
用自顶向下方法或Yacc进行语法分析的基础上,编写一个中间代码生成程序。(见教材附录 A.1,p394)
program → block
block → { decls stmts }
decls → decls decl | e
decl → type id ;
type → type [num] //数组可以不做
| basic //四种基本数据类型 int | float | char | bool
stmts → stmts stmt | e
stmt → id= expr ;
| if ( bool ) stmt
| if ( bool) stmt else stmt
| while (bool) stmt
| do stmt while (bool ) ;
| break ; //break可以不做
| block
bool → bool || join | join
join → join && equality | equality
equality → equality == rel | equality != rel | rel
rel → expr < expr
| expr <= expr
| expr > expr
| expr >= expr
| expr
expr → expr + term
| expr - term
| term
term → term * unary
| term / unary
| unary
unary → !unary | - unary | factor
factor → ( expr ) | id| num
。
三、实验要求
1.个人完成,提交实验报告。
2.实验的结果为:生成中间代码 (三地址码)
{
a=10;
x = b - c;
y = a * x;
z = x * d;
abcd = a + x +y;
if(a>10)
a=a+1;
a=a+2;
}
实验结果生成的代码片段
0: a = 10
1: t0 = b - c
2: x = t0
3: t1 = a * x
4: y = t1
5: t2 = x * d
6: z = t2
7: t3 = a + x
8: t4 = t3 + y
9: abcd = t4
10: if a > 10 goto 12
11: goto 14
12: t5 = a + 1
13: a = t5
14: t6 = a + 2
15: a = t6
个人对于本次中间代码生成器-编译实验的心得:
#define YYSTYPE node:定义符号栈属性结点类型 typedef struct node { instrlist *truelist, *falselist, *nextlist;//相当于有一个 正确链表与 错误链表 每个链表结点都有个值 char addr[256];// 地址字段 存放具体结点的内容比如 a 8 10 等 char lexeme[256];//字符串指端 char oper[3];//操作符字段 具体存放操作符对应的字符串 int instr; //是否要回填指示 具体内容是行号 }node; int main() { list = newcodelist(); //申请一个内存空间给list // int linecnt, capacity; 行数 与 最大行数 // int temp_index; // char **code; 总代码 freopen("test.in", "rt+", stdin); freopen("test.out", "wt+", stdout); //打开文件 yyparse(); print(list); fclose(stdin); //关闭文件 fclose(stdout); //关闭文件 return 0; } int gen_if(codelist *dst, node left, char* op, node right) { sprintf(tmp, "if %s %s %s goto", left.addr, op, right.addr);//第一个字符串 操作符 第二个字符串 Gen(dst, tmp); //将tmp的内容存放到 总字符串 list的code字段当中 return 0; } int Gen(codelist *dst, char *str) //输入 第一个参数为 list 第二个参数是 总字符串 tmp[1024] { //目标是 将tmp的内容存放到 总字符串 list的code字段当中 if (dst->linecnt >= dst->capacity) { dst->capacity += 1024; dst->code = (char**)realloc(dst->code, sizeof(char*)*dst->capacity); if (dst->code == NULL) { printf("short of memeory\n"); return 0; } } dst->code[dst->linecnt] = (char*)malloc(strlen(str)+20); //申请一个内存空间存放中间字符串 strcpy(dst->code[dst->linecnt], str); //字符串复制 dst->linecnt++; //行号加1 return 0; } int backpatch(codelist *dst, instrlist *list, int instrno) //总代码 dst 错误列表list 错误的指向行数 { if (list!=NULL) //如果错误的列表不为空 { listele *p=list->first; char tmp[20]; sprintf(tmp, " %d", instrno); 取出一个 while (p!=NULL) { if (p->instrno<dst->linecnt) strcat(dst->code[p->instrno], tmp); //错误列表:1 2 3 4 假设都指向instrno=6 那么将6转化为字符串tmp //再添加到每个字符串的末尾 p=p->next; } } return 0; } // int linecnt, capacity; 行数 与 最大行数 // int temp_index; // char **code;
重点是对课上的回填技术,还有
栈中的可能属性类别也就是node结点的属性种类,还有的是全局变量list使用的结点除了行号,最大行号,还有就是三地址码存放的chor **类型的code。
jia.l
%{ #include "jia.tab.h" %} delim [ \t\n\r] ws {delim}+ letter [A-Za-z] digit [0-9] id {letter}({letter}|{digit})* integer {digit}+ exponent E[+-]?{integer} number {integer}{exponent}? real integer(\.integer)?{exponent}? %option noyywrap %% if { return( IF ); } else { return( ELSE ); } while { return( WHILE ); } do { return( DO ); } for { return( FOR ); } switch { return( SWITCH ); } case { return( CASE ); } default { return( DEFAULT ); } break { return( BREAK ); } true { return( TRUE ); } false { return( FALSE ); } int { return( INT ); } long { return( LONG ); } char { return( CHAR ); } bool { return( BOOL ); } float { return( FLOAT ); } double { return( DOUBLE ); } "&&" { return( AND ); } "||" { return( OR ); } "!" { return( '!'); } "<"|"<="|">"|">="|"!="|"==" { filloperator(&yylval, yytext); return( REL); } "++" { return( INC ); } "--" { return( DEC ); } "+" { return( '+' ); } "-" { return( '-' ); } "*" { return( '*' ); } "/" { return( '/' ); } "=" { return( '=' ); } "{" { return( '{' ); } "}" { return( '}' ); } "[" { return( '[' ); } "]" { return( ']' ); } "(" { return( '(' ); } ")" { return( ')' ); } ";" { return( ';' ); } {ws} { } {id} { filllexeme(&yylval, yytext); return( ID ); } {number} { filllexeme(&yylval, yytext); return( NUMBER ); } {real} { filllexeme(&yylval, yytext); return( REAL ); } %%
jia.y
%{ #include "xu.h" #define YYSTYPE node #include "jia.tab.h" int yyerror(); int yyerror(char* msg); extern int yylex(); codelist* list; %} %token BASIC NUMBER REAL ID TRUE FALSE %token INT LONG CHAR BOOL FLOAT DOUBLE %token REL %token IF ELSE WHILE DO BREAK FOR SWITCH CASE DEFAULT %token OR AND %left OR %left AND %right '!' %left '+' '-' %left '*' '/' %right UMINUS %right INC DEC %% program : block { } ; block : '{' decls statementlist '}' { } ; decls : decls decl { } | { } ; decl : type ID ';' { } ; type : type '[' NUMBER ']' { } | BASIC { } ; statementlist : statementlist M statement { backpatch(list, $1.nextlist, $2.instr); $$.nextlist = $3.nextlist; } | statement { $$.nextlist = $1.nextlist; } ; statement : IF '(' boolean ')' M statement ELSE N M statement { backpatch(list, $3.truelist, $5.instr); backpatch(list, $3.falselist, $9.instr); $6.nextlist = merge($6.nextlist, $8.nextlist); $$.nextlist = merge($6.nextlist, $10.nextlist); } | IF '(' boolean ')' M statement { backpatch(list, $3.truelist, $5.instr); $$.nextlist = merge($3.falselist, $6.nextlist); } | WHILE M '(' boolean ')' M statement { backpatch(list, $7.nextlist, $2.instr); backpatch(list, $4.truelist, $6.instr); $$.nextlist = $4.falselist; gen_goto(list, $2.instr); } | DO M statement M WHILE '(' boolean ')' M ';' { backpatch(list, $3.nextlist, $4.instr); backpatch(list, $7.truelist, $9.instr); $$.nextlist = $7.falselist; gen_goto(list, $2.instr); } | FOR '(' assignment ';' M boolean ';' M assignment ')' N M statement { backpatch(list, $6.truelist, $12.instr); backpatch(list, $11.nextlist, $5.instr); backpatch(list, $13.nextlist, $8.instr); $$.nextlist = $6.falselist; gen_goto(list, $8.instr); } | BREAK ';' { } | '{' statementlist '}' { $$.nextlist = $2.nextlist; } | assignment ';' { $$.nextlist = NULL; } ; assignment : ID '=' boolean { copyaddr(&$1, $1.lexeme); gen_assignment(list, $1, $3); } ; loc : loc '[' boolean ']' { } | ID { copyaddr(&$$, $1.lexeme); } ; boolean : boolean OR M boolean { backpatch(list, $1.falselist, $3.instr); $$.truelist = merge($1.truelist, $4.truelist); $$.falselist = $4.falselist; } | boolean AND M boolean { backpatch(list, $1.truelist, $3.instr); $$.truelist = $4.truelist; $$.falselist = merge($1.falselist, $4.falselist); } | '!' boolean { $$.truelist = $1.falselist; $$.falselist = $1.truelist; } | '(' boolean ')' { $$.truelist = $1.truelist; $$.falselist = $1.falselist; } | expression REL expression { $$.truelist = new_instrlist(nextinstr(list)); $$.falselist = new_instrlist(nextinstr(list)+1); gen_if(list, $1, $2.oper, $3); gen_goto_blank(list); } | TRUE { copyaddr(&$$, "TRUE"); gen_goto_blank(list); } | FALSE { copyaddr(&$$, "FALSE"); gen_goto_blank(list); } | expression { copyaddr_fromnode(&$$, $1); } ; M : { $$.instr = nextinstr(list); } ; N : { $$.nextlist = new_instrlist(nextinstr(list)); gen_goto_blank(list); } ; expression : expression '+' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "+", $3); } | expression '-' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "-", $3); } | expression '*' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "*", $3); } | expression '/' expression { new_temp(&$$, get_temp_index(list)); gen_3addr(list, $$, $1, "/", $3); } | '-' expression %prec UMINUS { new_temp(&$$, get_temp_index(list)); gen_2addr(list, $$, "-", $2); } | loc { copyaddr_fromnode(&$$, $1); } | NUMBER { copyaddr(&$$, $1.lexeme); } | REAL { copyaddr(&$$, $1.lexeme); } ; %% int yyerror(char* msg) { printf("\nERROR with message: %s\n", msg); return 0; } int main() { list = newcodelist(); //申请一个内存空间给list // int linecnt, capacity; // int temp_index; // char **code; freopen("test.in", "rt+", stdin); freopen("test.out", "wt+", stdout); //打开文件 yyparse(); print(list); fclose(stdin); //关闭文件 fclose(stdout); //关闭文件 return 0; }
xu.h
#ifndef CP_H #define CP_H #include <stdio.h> #include <string.h> #include <malloc.h> typedef struct listele { int instrno; struct listele *next; }listele; listele* new_listele(int no) { listele* p = (listele*)malloc(sizeof(listele)); p->instrno = no; p->next = NULL; return p; } typedef struct instrlist { listele *first,*last; }instrlist; instrlist* new_instrlist(int instrno) { instrlist* p = (instrlist*)malloc(sizeof(instrlist)); p->first = p->last = new_listele(instrno); return p; } instrlist* merge(instrlist *list1, instrlist *list2) { instrlist *p; if (list1 == NULL) p = list2; else { if (list2!=NULL) { if (list1->last == NULL) { list1->first = list2->first; list1->last =list2->last; }else list1->last->next = list2->first; list2->first = list2->last = NULL; free(list2); } p = list1; } return p; } typedef struct node { instrlist *truelist, *falselist, *nextlist; char addr[256]; char lexeme[256]; char oper[3]; int instr; }node; int filloperator(node *dst, char *src) { strcpy(dst->oper, src); return 0; } int filllexeme(node *dst, char *yytext) { strcpy(dst->lexeme, yytext); return 0; } int copyaddr(node *dst, char *src) { strcpy(dst->addr, src); return 0; } int new_temp(node *dst, int index) { sprintf(dst->addr, "t%d", index); return 0; } int copyaddr_fromnode(node *dst, node src) { strcpy(dst->addr, src.addr); return 0; } typedef struct codelist { int linecnt, capacity; int temp_index; char **code; }codelist; codelist* newcodelist() { codelist* p = (codelist*)malloc(sizeof(codelist)); p->linecnt = 0; p->capacity = 1024; p->temp_index = 0; p->code = (char**)malloc(sizeof(char*)*1024); return p; } int get_temp_index(codelist* dst) { return dst->temp_index++; } int nextinstr(codelist *dst) { return dst->linecnt; } int Gen(codelist *dst, char *str) { if (dst->linecnt >= dst->capacity) { dst->capacity += 1024; dst->code = (char**)realloc(dst->code, sizeof(char*)*dst->capacity); if (dst->code == NULL) { printf("short of memeory\n"); return 0; } } dst->code[dst->linecnt] = (char*)malloc(strlen(str)+20); strcpy(dst->code[dst->linecnt], str); dst->linecnt++; return 0; } char tmp[1024]; int gen_goto_blank(codelist *dst) { sprintf(tmp, "goto"); Gen(dst, tmp); return 0; } int gen_goto(codelist *dst, int instrno) { sprintf(tmp, "goto %d", instrno); Gen(dst, tmp); return 0; } int gen_if(codelist *dst, node left, char* op, node right) { sprintf(tmp, "if %s %s %s goto", left.addr, op, right.addr); Gen(dst, tmp); return 0; } int gen_1addr(codelist *dst, node left, char* op) { sprintf(tmp, "%s %s", left.addr, op); Gen(dst, tmp); return 0; } int gen_2addr(codelist *dst, node left, char* op, node right) { sprintf(tmp, "%s = %s %s", left.addr, op, right.addr); Gen(dst, tmp); return 0; } int gen_3addr(codelist *dst, node left, node op1, char* op, node op2) { sprintf(tmp, "%s = %s %s %s", left.addr, op1.addr, op, op2.addr); Gen(dst, tmp); return 0; } int gen_assignment(codelist *dst, node left, node right) { gen_2addr(dst, left, "", right); return 0; } int backpatch(codelist *dst, instrlist *list, int instrno) { if (list!=NULL) { listele *p=list->first; char tmp[20]; sprintf(tmp, " %d", instrno); while (p!=NULL) { if (p->instrno<dst->linecnt) strcat(dst->code[p->instrno], tmp); p=p->next; } } return 0; } int print(codelist* dst) { int i; for (i=0; i < dst->linecnt; i++) printf("%5d: %s\n", i, dst->code[i]); return 0; } #endif
test.in
{ a=10; x = b - c; y = a * x; z = x * d; abcd = a + x +y; if(a>10) a=a+1; a=a+2; while(a>0) a=a-1; b=b+1; }