承接上一篇日志, 我对原来的解释器做一点小小的改动.
大致增加了如下特性:
1, 允许for(int i = 0; i < 23; i++) 这样使用for循环, 即在for的括号内部定义循环控制变量.
2,增加了continue关键字.
3, 增加了对一些新的数据类型的支持, bool, char, short, int, long, float, double都给予了支持.
4, 增加了true/false关键字, 直接对bool变量赋值.
关于增加新的数据类型这件事, 一开始我的想法是:
定义一个结构体, 用来表达上面支持的所有类型, 该结构体会根据实际的类型返回正确的值.
1 struct var 2 { 3 int type; 4 union 5 { 6 char char_val; 7 bool bool_val; 8 int int_val; 9 double double_val; 10 float float_val; 11 long long_val; 12 }value; 13 };
当做加法运算的时候, c = a + b; 这里a 和b可以利用某种运算直接返回正确的值, 这样就可以依赖C++语言自身来处理这个加法了.
不过至于如何能让一个结构体根据实际类型返回正确的值这个问题, 其实就是在构造一种动态类型, 仔细想想python里面的动态类型应该就是用C这样写出来的.
关于如何实现动态语言类型, 可以看看<python源码剖析>的吧, 不过这里就不搞这件事了, 我直接用一些判断撸过了~
1 //这是个匿名的var结构体, 存储了变量的类型和值, 但是是个匿名的, 2 //类似python里面的变量的引用的概念 3 struct anonymous_var 4 { 5 token_ireps var_type; // data type 6 long int_value; 7 double float_value; 8 }; 9 // This structure encapsulates the info 10 // associated with variables. 11 struct var 12 { 13 char var_name[MAX_ID_LEN + 1]; // name 14 anonymous_var value; 15 };
上面就是我现在使用的结构体的代码, 用long值存储所有的整型数据, double值存储所有的浮点数.
做各种运算的时候, 会将低级别的统一成高级别的来操作, 就是做了一下算数转换, 这样实现起来比较简单.
所有的整型之间运算使用long值来结算, 所有浮点使用double值类结算, 整型和浮点之间运算会把整型数据先转化成浮点的.
结构体里面只存了long和double, 而在需要使用相应的值的时候(比如要打印一个char)会再把对应的高级类型转化回去, 可以参看exec_cout函数体.
下面是我改进后的代码:
敬告, 代码依然很脆弱, 请使用简单而且语法正确的程序测试, 而且可能我的改动会引入更多的bug, 所以仅供参考.
1 /************************************************************************/ 2 /* mccommon.h, 头文件里面声明了Minicpp使用的函数, 声明主要使用的 3 * 最新修改时间2012-9-15, 主要增加了var.cpp里面函数的声明 */ 4 /************************************************************************/ 5 6 const int MAX_T_LEN = 128; // max token length 7 const int MAX_ID_LEN = 31; // max identifier length 8 const int PROG_SIZE = 10000; // max program size 9 const int NUM_PARAMS = 31; // max number of parameters 10 11 // Enumeration of token types. 12 enum tok_types { UNDEFTT, DELIMITER, IDENTIFIER, 13 NUMBER, KEYWORD, TEMP, STRING, BLOCK, TYPE 14 }; 15 16 // Enumeration of internal representation of tokens. 17 //大部分指定的是C++里面的关键字, 当然cout和cin也被加进来了 18 enum token_ireps { UNDEFTOK, ARG, BOOL, CHAR, SHORT, INT, LONG, FLOAT, DOUBLE, FALSE, TRUE, SWITCH, 19 CASE, IF, ELSE, FOR, DO, WHILE, BREAK, 20 RETURN, COUT, CIN, END, ENDL, DEFAULT, CONTINUE, NONE 21 }; 22 23 24 //上面两个枚举的区别是上面的比较广义,指代各种token的类型, 比如block, 这个是不会是关键字的, 25 //下面的可以理解成就是关键字 26 27 //这是个匿名的var结构体, 存储了变量的类型和值, 但是是个匿名的, 28 //类似python里面的变量的引用的概念 29 struct anonymous_var 30 { 31 token_ireps var_type; // data type 32 long int_value; 33 double float_value; 34 }; 35 // This structure encapsulates the info 36 // associated with variables. 37 struct var 38 { 39 char var_name[MAX_ID_LEN + 1]; // name 40 anonymous_var value; 41 }; 42 43 44 // This structure encapsulates function info. 45 struct func_type 46 { 47 char func_name[MAX_ID_LEN + 1]; // name 48 token_ireps ret_type; // return type 49 char *loc; // location of entry point in program 50 }; 51 52 // Enumeration of two-character operators, such as <=. 53 //这里注意从1开始, 以免用到0出现误解. 54 enum double_ops { LT = 1, LE, GT, GE, EQ, NE, LS, RS, INC, DEC }; 55 56 // These are the constants used when throwing a 57 // syntax error exception. 58 // 59 // NOTE: SYNTAX is a generic error message used when 60 // nothing else seems appropriate. 61 enum error_msg 62 { 63 SYNTAX, NO_EXP, NOT_VAR, DUP_VAR, DUP_FUNC, 64 SEMI_EXPECTED, UNBAL_BRACES, FUNC_UNDEF, 65 TYPE_EXPECTED, RET_NOCALL, PAREN_EXPECTED, 66 WHILE_EXPECTED, QUOTE_EXPECTED, DIV_BY_ZERO, 67 BRACE_EXPECTED, COLON_EXPECTED, UNSUPPORTED_TYPE 68 //最后加了一个UNSUPPORTED_TYPE, 是因为最近只支持一些简单的内建类型 69 }; 70 71 72 73 74 75 //下面这些是全局变量, 用来控制解析的, 估计以后如果重构的话就是parser类的主要成员变量. 76 extern char *prog; // current location in source code 77 extern char *p_buf; // points to start of program buffer 78 79 extern char token[MAX_T_LEN + 1]; // string version of token 80 extern tok_types token_type; // contains type of token 81 extern token_ireps tok; // internal representation of token 82 83 extern anonymous_var ret_value; // function return value 84 85 extern bool breakfound; // true if break encountered 86 extern bool continuefound;//true if continue encountered 87 88 // Exception class for Mini C++. 89 class InterpExc 90 { 91 error_msg err; 92 public: 93 InterpExc(error_msg e) 94 { 95 err = e; 96 } 97 error_msg get_err() 98 { 99 return err; 100 } 101 }; 102 103 // Interpreter prototypes. 104 void prescan(); 105 void decl_global(); 106 void call(); 107 void putback(); 108 void decl_local(); 109 void exec_if(); 110 void find_eob(); 111 void exec_for(); 112 void exec_switch(); 113 void get_params(); 114 void get_args(); 115 void exec_while(); 116 void exec_do(); 117 void exec_cout(); 118 void exec_cin(); 119 void assign_var(char *var_name, anonymous_var value); 120 bool load_program(char *p, char *fname); 121 anonymous_var find_var(char *s); 122 void interp(); 123 void func_ret(); 124 char *find_func(char *name); 125 bool is_var(char *s); 126 token_ireps find_var_type(char *s); 127 void find_eol(); 128 129 130 // Parser prototypes, 这些函数主要用来parse表达式. 131 void eval_exp(anonymous_var &value); 132 void eval_exp0(anonymous_var &value); 133 void eval_exp1(anonymous_var &value); 134 void eval_exp2(anonymous_var &value); 135 void eval_exp3(anonymous_var &value); 136 void eval_exp4(anonymous_var &value); 137 void eval_exp5(anonymous_var &value); 138 void atom(anonymous_var &value); 139 void sntx_err(error_msg error); 140 void putback(); 141 bool isdelim(char c); 142 token_ireps look_up(char *s); 143 anonymous_var find_var(char *s); 144 tok_types get_token(); 145 int internal_func(char *s); 146 bool is_var(char *s); 147 148 // "Standard library" prototypes, 这几个函数里面是调用了一下C++的库函数, 封装了一下. 149 anonymous_var call_getchar(); 150 anonymous_var call_putchar(); 151 anonymous_var call_abs(); 152 anonymous_var call_rand(); 153 154 155 156 //下面这些是为了支持多种类型二增加的一些函数. 157 //在var.cpp里面实现. 158 anonymous_var add(anonymous_var &a, anonymous_var &b); 159 anonymous_var sub(anonymous_var &a, anonymous_var &b); 160 anonymous_var mul(anonymous_var &a, anonymous_var &b); 161 anonymous_var div(anonymous_var &a, anonymous_var &b); 162 void cin_var(anonymous_var &v); 163 void cout_var(anonymous_var &v); 164 bool is_valid_type(token_ireps ti); 165 void init_var(anonymous_var &v); 166 void neg_var(anonymous_var &v); 167 bool zero(double x); 168 void abs_var(anonymous_var &v); 169 int cmp(anonymous_var &a, anonymous_var &b); 170 bool is_float_type(token_ireps type); 171 bool is_int_type(token_ireps type); 172 bool get_bool_val(anonymous_var &v); 173 void adaptive_assign_var(anonymous_var &a, anonymous_var &b); 174 175 176 /******************************************************************** 177 var.cpp, 主要是为了增加对多种数值类型的支持而添加的 178 目前支持bool, char, short, int, long, float, double 179 基本只使用long和double进行结算. 180 **********************************************************************/ 181 182 #include <iostream> 183 #include <cmath> 184 #include "mccommon.h" 185 using namespace std; 186 187 inline bool zero(double x) 188 { 189 const double EPS = 1e-9; 190 return abs(x) < EPS; 191 } 192 bool is_int_type(token_ireps type) 193 { 194 if(type >= BOOL && type <= LONG) return true; 195 return false; 196 } 197 bool is_float_type(token_ireps type) 198 { 199 if(type >= FLOAT && type <= DOUBLE) return true; 200 else return false; 201 } 202 bool check_valid_type(anonymous_var &val) 203 { 204 if(!(is_int_type(val.var_type) || is_float_type(val.var_type))) return false; 205 return true; 206 } 207 anonymous_var add(anonymous_var &a, anonymous_var &b) 208 { 209 if(!check_valid_type(a) || !check_valid_type(b)) throw InterpExc(UNSUPPORTED_TYPE); 210 anonymous_var res; 211 if(a.var_type > b.var_type) res.var_type = a.var_type; 212 else res.var_type = b.var_type; 213 214 if(is_int_type(res.var_type)) 215 { 216 res.int_value = a.int_value + b.int_value; 217 } 218 else 219 { 220 if(is_int_type(a.var_type)) 221 { 222 res.float_value = double(a.int_value) + b.float_value; 223 } 224 else if(is_int_type(b.var_type)) 225 { 226 res.float_value = a.float_value + double(b.int_value); 227 } 228 else 229 { 230 res.float_value = a.float_value + b.float_value; 231 } 232 } 233 return res; 234 } 235 236 anonymous_var sub(anonymous_var &a, anonymous_var &b) 237 { 238 if(!check_valid_type(a) || !check_valid_type(b)) throw InterpExc(UNSUPPORTED_TYPE); 239 anonymous_var res; 240 if(a.var_type > b.var_type) res.var_type = a.var_type; 241 else res.var_type = b.var_type; 242 243 if(is_int_type(res.var_type)) 244 { 245 res.int_value = a.int_value - b.int_value; 246 } 247 else 248 { 249 if(is_int_type(a.var_type)) 250 { 251 res.float_value = double(a.int_value) - b.float_value; 252 } 253 else if(is_int_type(b.var_type)) 254 { 255 res.float_value = a.float_value - double(b.int_value); 256 } 257 else 258 { 259 res.float_value = a.float_value - b.float_value; 260 } 261 } 262 return res; 263 264 } 265 266 anonymous_var mul(anonymous_var &a, anonymous_var &b) 267 { 268 if(!check_valid_type(a) || !check_valid_type(b)) throw InterpExc(UNSUPPORTED_TYPE); 269 anonymous_var res; 270 271 if(a.var_type > b.var_type) res.var_type = a.var_type; 272 else res.var_type = b.var_type; 273 274 if(is_int_type(res.var_type)) 275 { 276 res.int_value = a.int_value * b.int_value; 277 } 278 else 279 { 280 if(is_int_type(a.var_type)) 281 { 282 res.float_value = double(a.int_value) * b.float_value; 283 } 284 else if(is_int_type(b.var_type)) 285 { 286 res.float_value = a.float_value * double(b.int_value); 287 } 288 else 289 { 290 res.float_value = a.float_value * b.float_value; 291 } 292 } 293 return res; 294 } 295 296 anonymous_var div(anonymous_var &a, anonymous_var &b) 297 { 298 if(!check_valid_type(a) || !check_valid_type(b)) throw InterpExc(UNSUPPORTED_TYPE); 299 anonymous_var res; 300 if(a.var_type > b.var_type) res.var_type = a.var_type; 301 else res.var_type = b.var_type; 302 303 if(is_int_type(b.var_type)) 304 { 305 if(0 == a.int_value) throw InterpExc(DIV_BY_ZERO); 306 } 307 else 308 { 309 if(zero(b.float_value)) throw InterpExc(DIV_BY_ZERO); 310 } 311 312 if(is_int_type(res.var_type)) 313 { 314 res.int_value = a.int_value / b.int_value; 315 } 316 else 317 { 318 if(is_int_type(a.var_type)) 319 { 320 res.float_value = double(a.int_value) / b.float_value; 321 } 322 else if(is_int_type(b.var_type)) 323 { 324 res.float_value = a.float_value / double(b.int_value); 325 } 326 else 327 { 328 res.float_value = a.float_value / b.float_value; 329 } 330 } 331 return res; 332 } 333 334 335 //因为现在我只用long 和double来存储表示所有的数值类型, 336 //在从控制台读取内容的时候会不方便, 现在先用 337 void cin_var(anonymous_var &v) 338 { 339 switch (v.var_type) 340 { 341 case BOOL: 342 { 343 bool tmp_var; 344 cin >> tmp_var; 345 v.int_value = tmp_var; 346 break; 347 } 348 case CHAR: 349 { 350 char tmp_var; 351 cin >> tmp_var; 352 v.int_value = tmp_var; 353 break; 354 } 355 case SHORT: 356 { 357 short tmp_var; 358 cin >> tmp_var; 359 v.int_value = tmp_var; 360 break; 361 } 362 case INT: 363 { 364 int tmp_var; 365 cin >> tmp_var; 366 v.int_value = tmp_var; 367 break; 368 } 369 case LONG: 370 { 371 long tmp_var; 372 cin >> tmp_var; 373 v.int_value = tmp_var; 374 break; 375 } 376 case FLOAT: 377 { 378 float tmp_var; 379 cin >> tmp_var; 380 v.float_value = tmp_var; 381 break; 382 } 383 case DOUBLE: 384 { 385 double tmp_var; 386 cin >> tmp_var; 387 v.float_value = tmp_var; 388 break; 389 } 390 default: 391 throw InterpExc(UNSUPPORTED_TYPE); 392 break; 393 } 394 } 395 396 397 //输出的时候, 要先转化成相应的类型, 然后再打印 398 void cout_var(anonymous_var &v) 399 { 400 switch(v.var_type) 401 { 402 case BOOL: 403 cout << bool(v.int_value != 0); 404 break; 405 case CHAR: 406 cout << char(v.int_value); 407 break; 408 case SHORT: 409 cout << short(v.int_value); 410 break; 411 case INT: 412 cout << int(v.int_value); 413 break; 414 case LONG: 415 cout << long(v.int_value); 416 break; 417 case FLOAT: 418 cout << float(v.float_value); 419 break; 420 case DOUBLE: 421 cout << double(v.float_value); 422 break; 423 default: 424 throw InterpExc(UNSUPPORTED_TYPE); 425 break; 426 } 427 } 428 429 bool is_valid_type(token_ireps ti) 430 { 431 return ti >= BOOL && ti <= DOUBLE; 432 } 433 434 void init_var(anonymous_var &v) 435 { 436 v.int_value = 0; 437 v.float_value = 0.0; 438 } 439 440 void neg_var(anonymous_var &v) 441 { 442 if(is_int_type(v.var_type)) 443 { 444 v.int_value = -v.int_value; 445 } 446 else if(is_float_type(v.var_type)) 447 { 448 v.float_value = v.float_value; 449 } 450 else 451 { 452 throw InterpExc(UNSUPPORTED_TYPE); 453 } 454 } 455 456 void abs_var(anonymous_var &v) 457 { 458 if(is_int_type(v.var_type)) 459 { 460 v.int_value = abs(v.int_value); 461 } 462 else if(is_float_type(v.var_type)) 463 { 464 v.float_value = abs(v.float_value); 465 } 466 else 467 { 468 throw InterpExc(UNSUPPORTED_TYPE); 469 } 470 } 471 472 473 474 int cmp(anonymous_var &a, anonymous_var &b) 475 { 476 if(!check_valid_type(a) || !check_valid_type(b)) throw InterpExc(UNSUPPORTED_TYPE); 477 if(is_int_type(a.var_type)) 478 { 479 if(is_int_type(b.var_type)) 480 { 481 if(a.int_value == b.int_value) return 0; 482 if(a.int_value < b.int_value) return -1; 483 return 1; 484 } 485 else if(is_float_type(a.var_type)) 486 { 487 if(zero(a.int_value - b.float_value)) return 0; 488 if(a.int_value < b.float_value) return -1; 489 return 1; 490 } 491 } 492 else 493 { 494 if(is_int_type(b.var_type)) 495 { 496 if(zero(a.float_value - b.int_value)) return 0; 497 if(a.float_value < b.int_value) return -1; 498 return 1; 499 } 500 else 501 { 502 if(zero(a.float_value - b.float_value)) return 0; 503 if(a.float_value < b.float_value) return -1; 504 return 1; 505 } 506 } 507 } 508 509 bool get_bool_val(anonymous_var &v) 510 { 511 bool bool_val = false; 512 if(is_int_type(v.var_type)) 513 { 514 bool_val = v.int_value != 0; 515 } 516 else if(is_float_type(v.var_type)) 517 { 518 bool_val = !zero(v.float_value); 519 } 520 else 521 { 522 throw InterpExc(UNSUPPORTED_TYPE); 523 } 524 return bool_val; 525 } 526 527 528 //这个函数适配性地进行了赋值. 529 void adaptive_assign_var(anonymous_var &a, anonymous_var &b) 530 { 531 if(!check_valid_type(a) || !check_valid_type(b)) throw InterpExc(UNSUPPORTED_TYPE); 532 533 if(is_int_type(a.var_type)) 534 { 535 if(is_int_type(b.var_type)) 536 { 537 a.int_value = b.int_value; 538 } 539 else 540 { 541 a.int_value = int(b.float_value); 542 } 543 } 544 else 545 { 546 if(is_int_type(b.var_type)) 547 { 548 a.float_value = double(b.int_value); 549 } 550 else 551 { 552 a.float_value = b.float_value; 553 } 554 } 555 } 556 557 558 559 /********************************************************************* 560 parser.cpp 用来递归下降地解析表达式, 使用anonymous_var类型传递中间结果 561 *********************************************************************/ 562 563 564 // Recursive descent parser for integer expressions. 565 // 566 #include <iostream> 567 #include <cstring> 568 #include <cstdlib> 569 #include <cctype> 570 #include "mccommon.h" 571 572 using namespace std; 573 574 // This structure links a library function name 575 // with a pointer to that function. 576 struct intern_func_type 577 { 578 char *f_name; // function name 579 anonymous_var (*p)(); // pointer to the function 580 } intern_func[] = 581 { 582 "getchar", call_getchar, 583 "putchar", call_putchar, 584 "abs", call_abs, 585 "rand", call_rand, 586 "", 0 // null terminate the list 587 }; 588 589 // Keyword lookup table. 590 // Keywords must be entered lowercase. 591 // 定义关键字, 对应一个tok的id, 能提高些效率 592 struct commands 593 { 594 char command[20]; 595 token_ireps tok; 596 } com_table[] = 597 { 598 "if", IF, 599 "else", ELSE, 600 "for", FOR, 601 "do", DO, 602 "while", WHILE, 603 "bool", BOOL, 604 "char", CHAR, 605 "short", SHORT, 606 "int", INT, 607 "long", LONG, 608 "float", FLOAT, 609 "double", DOUBLE, 610 "return", RETURN, 611 "switch", SWITCH, 612 "break", BREAK, 613 "case", CASE, 614 "cout", COUT, 615 "cin", CIN, 616 "endl", ENDL, 617 "default", DEFAULT, 618 "continue", CONTINUE, 619 "true", TRUE, 620 "false", FALSE, 621 "", END // mark end of table 622 }; 623 624 //eval_exp不根据之前读到的信息来操作, 是在eval_exp函数里面, 读到什么就调用相应的处理过程. 625 // Entry point into parser. 626 void eval_exp(anonymous_var &value) 627 { 628 get_token(); 629 630 if(!*token) 631 { 632 throw InterpExc(NO_EXP); 633 } 634 635 //对于空语句给予一个默认的设置. 636 if(*token == ';') 637 { 638 value.var_type = BOOL; // empty expression 639 value.int_value = false; 640 return; 641 } 642 643 eval_exp0(value); 644 645 646 //这里会把处理完之后读出来的token再返回去. 647 putback(); // return last token read to input stream 648 } 649 650 651 // Process an assignment expression. 652 void eval_exp0(anonymous_var &value) 653 { 654 // temp holds name of var receiving the assignment. 655 char temp[MAX_ID_LEN + 1]; 656 657 tok_types temp_tok; 658 659 if(token_type == IDENTIFIER) 660 { 661 if(is_var(token)) // if a var, see if assignment 662 { 663 strcpy(temp, token); 664 temp_tok = token_type; 665 get_token(); 666 if(*token == '=') // is an assignment 667 { 668 get_token(); 669 eval_exp0(value); // get value to assign 670 671 assign_var(temp, value); // assign the value 672 673 return; 674 } 675 else // not an assignment 676 { 677 putback(); // restore original token 678 strcpy(token, temp); 679 token_type = temp_tok; 680 } 681 } 682 } 683 eval_exp1(value); 684 } 685 686 // Process relational operators. 687 void eval_exp1(anonymous_var &value) 688 { 689 anonymous_var partial_value; 690 char op; 691 char relops[] = 692 { 693 LT, LE, GT, GE, EQ, NE, 0 694 }; 695 696 eval_exp2(value); 697 698 op = *token; 699 if(strchr(relops, op)) 700 { 701 get_token(); 702 eval_exp2(partial_value); 703 704 switch(op) // perform the relational operation 705 { 706 case LT: 707 { 708 int res = cmp(value, partial_value); 709 value.var_type = BOOL; 710 value.int_value = res < 0; 711 break; 712 } 713 case LE: 714 { 715 int res = cmp(value, partial_value); 716 value.var_type = BOOL; 717 value.int_value = res <= 0; 718 break; 719 } 720 case GT: 721 { 722 int res = cmp(value, partial_value); 723 value.var_type = BOOL; 724 value.int_value = res > 0; 725 break; 726 } 727 case GE: 728 { 729 int res = cmp(value, partial_value); 730 value.var_type = BOOL; 731 value.int_value = res >= 0; 732 break; 733 } 734 case EQ: 735 { 736 int res = cmp(value, partial_value); 737 738 value.var_type = BOOL; 739 value.int_value = (res == 0); 740 break; 741 } 742 case NE: 743 { 744 int res = cmp(value, partial_value); 745 value.var_type = BOOL; 746 value.int_value = !(res == 0); 747 break; 748 } 749 } 750 } 751 } 752 753 // Add or subtract two terms. 754 void eval_exp2(anonymous_var &value) 755 { 756 char op; 757 anonymous_var partial_value; 758 char okops[] = 759 { 760 '(', INC, DEC, '-', '+', 0 761 }; 762 763 eval_exp3(value); 764 765 while((op = *token) == '+' || op == '-') 766 { 767 get_token(); 768 769 if(token_type == DELIMITER && 770 !strchr(okops, *token)) 771 throw InterpExc(SYNTAX); 772 773 eval_exp3(partial_value); 774 775 776 777 switch(op) // add or subtract 778 { 779 case '-': 780 { 781 value = sub(value, partial_value); 782 break; 783 } 784 case '+': 785 { 786 value = add(value, partial_value); 787 break; 788 } 789 } 790 } 791 } 792 793 // Multiply or divide two factors. 794 void eval_exp3(anonymous_var &value) 795 { 796 char op; 797 anonymous_var partial_value; 798 char okops[] = 799 { 800 '(', INC, DEC, '-', '+', 0 801 }; 802 803 eval_exp4(value); 804 805 while((op = *token) == '*' || op == '/' 806 || op == '%') 807 { 808 get_token(); 809 810 if(token_type == DELIMITER && 811 !strchr(okops, *token)) 812 throw InterpExc(SYNTAX); 813 814 eval_exp4(partial_value); 815 816 switch(op) // mul, div, or modulus 817 { 818 case '*': 819 { 820 value = mul(value, partial_value); 821 break; 822 } 823 824 case '/': 825 { 826 //判断除零异常在程序里面做了 827 value = div(value, partial_value); 828 break; 829 } 830 case '%': 831 { 832 anonymous_var tmp = div(value, partial_value); 833 tmp = mul(partial_value, tmp); 834 value = sub(value, tmp); 835 break; 836 } 837 } 838 } 839 } 840 841 // Is a unary +, -, ++, or --. 842 void eval_exp4(anonymous_var &value) 843 { 844 char op; 845 char temp; 846 847 op = '\0'; 848 if(*token == '+' || *token == '-' || 849 *token == INC || *token == DEC) 850 { 851 temp = *token; 852 op = *token; 853 get_token(); 854 value = find_var(token); 855 //处理前缀++, --要把变化反应到变量身上. 856 if(temp == INC) 857 { 858 anonymous_var tmp_var; 859 tmp_var.int_value = 1; 860 tmp_var.var_type = value.var_type; 861 value = add(value, tmp_var); 862 assign_var(token, value); 863 get_token(); 864 return; 865 } 866 if(temp == DEC) 867 { 868 anonymous_var tmp_var; 869 tmp_var.int_value = 1; 870 tmp_var.var_type = value.var_type; 871 value = add(value, tmp_var); 872 assign_var(token, value); 873 get_token(); 874 return; 875 } 876 } 877 878 eval_exp5(value); 879 if(op == '-') 880 { 881 neg_var(value); 882 } 883 } 884 885 // Process parenthesized expression. 886 void eval_exp5(anonymous_var &value) 887 { 888 if((*token == '(')) 889 { 890 get_token(); 891 892 eval_exp0(value); // get subexpression 893 894 if(*token != ')') 895 throw InterpExc(PAREN_EXPECTED); 896 get_token(); 897 } 898 else 899 atom(value); 900 } 901 902 // Find value of number, variable, or function. 903 //增加一个处理功能, 处理浮点数, 但目前只允许***.***的形式, 不允许科学技术法. 904 void atom(anonymous_var &value) 905 { 906 int i; 907 char temp[MAX_ID_LEN + 1]; 908 909 switch(token_type) 910 { 911 case IDENTIFIER: 912 i = internal_func(token); 913 if(i != -1) 914 { 915 // Call "standard library" function. 916 value = ((*intern_func[i].p)()); 917 } 918 else if(find_func(token)) 919 { 920 // Call programmer-created function. 921 call(); 922 value = ret_value;//目前函数还只支持int返回值 923 } 924 else 925 { 926 927 //在这里处理了后缀++, -- 928 value = find_var(token); // get var's value 929 strcpy(temp, token); // save variable name 930 931 // Check for ++ or --. 932 get_token(); 933 if(*token == INC || *token == DEC) 934 { 935 anonymous_var tmp_val = find_var(temp); 936 value = tmp_val; 937 if(*token == INC) 938 { 939 anonymous_var one; 940 one.int_value = 1; 941 one.var_type = tmp_val.var_type; 942 tmp_val = add(tmp_val, one); 943 assign_var(temp, tmp_val); 944 } 945 else 946 { 947 anonymous_var one; 948 one.int_value = 1; 949 one.var_type = tmp_val.var_type; 950 tmp_val = sub(tmp_val, one); 951 assign_var(temp, tmp_val); 952 } 953 954 } 955 else putback(); 956 } 957 958 get_token(); 959 return; 960 case NUMBER: // is numeric constant 961 //这里对浮点和整型类型做个判断 962 if(strchr(token, '.')) 963 { 964 965 value.var_type = DOUBLE; 966 value.float_value = atof(token); 967 } 968 else 969 { 970 value.var_type = INT; 971 value.int_value = atoi(token); 972 } 973 get_token(); 974 975 return; 976 977 //char constant 978 case DELIMITER: // see if character constant 979 if(*token == '\'') 980 { 981 value.var_type = CHAR; 982 value.int_value = *prog; 983 prog++; 984 if(*prog != '\'') 985 throw InterpExc(QUOTE_EXPECTED); 986 987 prog++; 988 get_token(); 989 990 return ; 991 } 992 if(*token == ')') return; // process empty expression 993 else throw InterpExc(SYNTAX); // otherwise, syntax error 994 case KEYWORD: 995 { 996 if(0 == strcmp(token, "true")) 997 { 998 //cout << "jackieddddd" << endl; 999 value.var_type = BOOL; 1000 value.int_value = 1; 1001 } 1002 else if(0 == strcmp(token, "false")) 1003 { 1004 value.var_type = BOOL; 1005 value.int_value = 0; 1006 } 1007 else 1008 { 1009 throw InterpExc(SYNTAX); 1010 } 1011 get_token(); 1012 break; 1013 } 1014 default: 1015 throw InterpExc(SYNTAX); // syntax error 1016 } 1017 } 1018 1019 // Display an error message. 1020 void sntx_err(error_msg error) 1021 { 1022 char *p, *temp; 1023 int linecount = 0; 1024 1025 static char *e[] = //这里的显示信息, 是跟头文件里面定义的错误flag顺序一致的 1026 { 1027 "Syntax error", 1028 "No expression present", 1029 "Not a variable", 1030 "Duplicate variable name", 1031 "Duplicate function name", 1032 "Semicolon expected", 1033 "Unbalanced braces", 1034 "Function undefined", 1035 "Type specifier expected", 1036 "Return without call", 1037 "Parentheses expected", 1038 "While expected", 1039 "Closing quote expected", 1040 "Division by zero", 1041 "{ expected (control statements must use blocks)", 1042 "Colon expected", 1043 "Unsupported type yet", 1044 }; 1045 1046 // Display error and line number. 1047 cout << "\n" << e[error]; 1048 p = p_buf; 1049 while(p != prog) // find line number of error 1050 { 1051 p++; 1052 if(*p == '\r') 1053 { 1054 linecount++; 1055 } 1056 } 1057 cout << " in line " << linecount << endl; 1058 1059 temp = p; 1060 while(p > p_buf && *p != '\n') p--; 1061 1062 // Display offending line. 1063 while(p <= temp) 1064 cout << *p++; 1065 1066 cout << endl; 1067 } 1068 1069 // Get a token. 1070 tok_types get_token() 1071 { 1072 char *temp; 1073 1074 token_type = UNDEFTT; 1075 tok = UNDEFTOK; 1076 1077 temp = token; 1078 *temp = '\0'; 1079 1080 // Skip over white space. 1081 while(isspace(*prog) && *prog) ++prog; 1082 1083 // Skip over newline. 1084 while(*prog == '\r') 1085 { 1086 ++prog; 1087 ++prog; 1088 // Again, skip over white space. 1089 while(*prog && isspace(*prog)) ++prog; 1090 } 1091 1092 // Check for end of program. 1093 if(*prog == '\0') 1094 { 1095 *token = '\0'; 1096 tok = END; 1097 return (token_type = DELIMITER); 1098 } 1099 1100 // Check for block delimiters. 1101 if(strchr("{}", *prog)) 1102 { 1103 *temp = *prog; 1104 temp++; 1105 *temp = '\0'; 1106 prog++; 1107 return (token_type = BLOCK); 1108 } 1109 1110 // Look for comments. 1111 if(*prog == '/') 1112 if(*(prog + 1) == '*') // is a /* comment 1113 { 1114 prog += 2; 1115 1116 //这个循环很给力 1117 do // find end of comment 1118 { 1119 while(*prog != '*') prog++; 1120 prog++; 1121 } 1122 while (*prog != '/'); 1123 prog++; 1124 return (token_type = DELIMITER); 1125 } 1126 else if(*(prog + 1) == '/') // is a // comment 1127 { 1128 prog += 2; 1129 // Find end of comment. 1130 while(*prog != '\r' && *prog != '\0') prog++; 1131 if(*prog == '\r') prog += 2; 1132 return (token_type = DELIMITER); 1133 } 1134 1135 // Check for double-ops. 1136 if(strchr("!<>=+-", *prog)) 1137 { 1138 switch(*prog) 1139 { 1140 case '=': 1141 if(*(prog + 1) == '=') 1142 { 1143 prog++; 1144 prog++; 1145 *temp = EQ; 1146 temp++; 1147 *temp = EQ; 1148 temp++; 1149 *temp = '\0'; 1150 } 1151 break; 1152 case '!': 1153 if(*(prog + 1) == '=') 1154 { 1155 prog++; 1156 prog++; 1157 *temp = NE; 1158 temp++; 1159 *temp = NE; 1160 temp++; 1161 *temp = '\0'; 1162 } 1163 break; 1164 case '<': 1165 if(*(prog + 1) == '=') 1166 { 1167 prog++; 1168 prog++; 1169 *temp = LE; 1170 temp++; 1171 *temp = LE; 1172 } 1173 else if(*(prog + 1) == '<') 1174 { 1175 prog++; 1176 prog++; 1177 *temp = LS; 1178 temp++; 1179 *temp = LS; 1180 } 1181 else 1182 { 1183 prog++; 1184 *temp = LT; 1185 } 1186 temp++; 1187 *temp = '\0'; 1188 break; 1189 case '>': 1190 if(*(prog + 1) == '=') 1191 { 1192 prog++; 1193 prog++; 1194 *temp = GE; 1195 temp++; 1196 *temp = GE; 1197 } 1198 else if(*(prog + 1) == '>') 1199 { 1200 prog++; 1201 prog++; 1202 *temp = RS; 1203 temp++; 1204 *temp = RS; 1205 } 1206 else 1207 { 1208 prog++; 1209 *temp = GT; 1210 } 1211 temp++; 1212 *temp = '\0'; 1213 break; 1214 case '+': 1215 if(*(prog + 1) == '+') 1216 { 1217 prog++; 1218 prog++; 1219 *temp = INC; 1220 temp++; 1221 *temp = INC; 1222 temp++; 1223 *temp = '\0'; 1224 } 1225 break; 1226 case '-': 1227 if(*(prog + 1) == '-') 1228 { 1229 prog++; 1230 prog++; 1231 *temp = DEC; 1232 temp++; 1233 *temp = DEC; 1234 temp++; 1235 *temp = '\0'; 1236 } 1237 break; 1238 } 1239 1240 if(*token) return(token_type = DELIMITER); 1241 } 1242 1243 // Check for other delimiters. 1244 if(strchr("+-*^/%=;:(),'", *prog)) 1245 { 1246 *temp = *prog; 1247 prog++; 1248 temp++; 1249 *temp = '\0'; 1250 return (token_type = DELIMITER); 1251 } 1252 1253 // Read a quoted string. 1254 if(*prog == '"') 1255 { 1256 prog++; 1257 while(*prog != '"' && *prog != '\r' && *prog) 1258 { 1259 // Check for \n escape sequence. 1260 if(*prog == '\\') 1261 { 1262 if(*(prog + 1) == 'n') 1263 { 1264 prog++; 1265 *temp++ = '\n'; 1266 } 1267 } 1268 else if((temp - token) < MAX_T_LEN) 1269 *temp++ = *prog; 1270 1271 prog++; 1272 } 1273 if(*prog == '\r' || *prog == 0) 1274 throw InterpExc(SYNTAX); 1275 prog++; 1276 *temp = '\0'; 1277 return (token_type = STRING); 1278 } 1279 1280 // Read an integer number, or float 1281 //由于现在还没加入结构体和类, 所以直接这样判断'.'还是可以的, 不过会有隐患, 要记得~ 1282 if(isdigit(*prog) || *prog == '.') 1283 { 1284 while(!isdelim(*prog)) //这样判断安全吗? 1285 { 1286 if((temp - token) < MAX_ID_LEN) 1287 *temp++ = *prog; 1288 prog++; 1289 } 1290 *temp = '\0'; 1291 return (token_type = NUMBER); 1292 } 1293 1294 // Read identifier or keyword. 1295 if(isalpha(*prog)) 1296 { 1297 while(!isdelim(*prog)) 1298 { 1299 if((temp - token) < MAX_ID_LEN) 1300 *temp++ = *prog; 1301 prog++; 1302 } 1303 token_type = TEMP; 1304 } 1305 1306 *temp = '\0'; 1307 1308 // Determine if token is a keyword or identifier. 1309 if(token_type == TEMP) 1310 { 1311 tok = look_up(token); // convert to internal form 1312 if(tok) token_type = KEYWORD; // is a keyword 1313 else token_type = IDENTIFIER; 1314 } 1315 1316 // Check for unidentified character in file. 1317 if(token_type == UNDEFTT) 1318 throw InterpExc(SYNTAX); 1319 1320 return token_type; 1321 } 1322 1323 // Return a token to input stream. 1324 void putback() 1325 { 1326 char *t; 1327 1328 t = token; 1329 for(; *t; t++) prog--; 1330 } 1331 1332 // Look up a token's internal representation in the 1333 // token table. 1334 token_ireps look_up(char *s) 1335 { 1336 int i; 1337 1338 //C++本来就区分大小写的, 为什么还给程序转化.. 1339 //char *p; 1340 1341 //// Convert to lowercase. 1342 //p = s; 1343 //while(*p) 1344 //{ 1345 // *p = tolower(*p); 1346 // p++; 1347 //} 1348 1349 // See if token is in table. 1350 for(i = 0; *com_table[i].command; i++) 1351 { 1352 if(!strcmp(com_table[i].command, s)) 1353 return com_table[i].tok; 1354 } 1355 1356 return UNDEFTOK; // unknown command 1357 } 1358 1359 // Return index of internal library function or -1 if 1360 // not found. 1361 int internal_func(char *s) 1362 { 1363 int i; 1364 1365 for(i = 0; intern_func[i].f_name[0]; i++) 1366 { 1367 if(!strcmp(intern_func[i].f_name, s)) return i; 1368 } 1369 return -1; 1370 } 1371 1372 // Return true if c is a delimiter. 1373 bool isdelim(char c) 1374 { 1375 if(strchr(" !:;,+-<>'/*%^=()", c) || c == 9 || 1376 c == '\r' || c == 0) return true; 1377 return false; 1378 } 1379 1380 1381 1382 1383 /*********************************************************************** 1384 minicpp.cpp 主函数在这里, 对于for, if while switch等的实现也写在了这里. 1385 ************************************************************************/ 1386 #include <iostream> 1387 #include <fstream> 1388 #include <new> 1389 #include <stack> 1390 #include <vector> 1391 #include <cstring> 1392 #include <cstdlib> 1393 #include <cctype> 1394 #include "mccommon.h" 1395 1396 using namespace std; 1397 1398 char *prog; // current execution point in source code 1399 char *p_buf; // points to start of program buffer 1400 1401 // This vector holds info for global variables. 1402 vector<var> global_vars; 1403 1404 // This vector holds info for local variables 1405 // and parameters. 1406 vector<var> local_var_stack; 1407 1408 // This vector holds info about functions. 1409 vector<func_type> func_table; 1410 1411 // Stack for managing function scope. 1412 1413 stack<int> func_call_stack; 1414 1415 // Stack for managing nested scopes. 1416 //整形的栈, 存储的是本函数压栈之前栈的大小. 1417 stack<int> nest_scope_stack; 1418 1419 char token[MAX_T_LEN + 1]; // current token 1420 tok_types token_type; // token type 1421 token_ireps tok; // internal representation 1422 1423 anonymous_var ret_value; // function return value 1424 1425 bool breakfound = false; // true if break encountered 1426 bool continuefound = false; 1427 1428 1429 1430 1431 int main(int argc, char *argv[]) 1432 { 1433 if(argc != 2) 1434 { 1435 cout << "Usage: minicpp <filename>\n"; 1436 return 1; 1437 } 1438 1439 // Allocate memory for the program. 1440 try 1441 { 1442 p_buf = new char[PROG_SIZE]; 1443 } 1444 catch (bad_alloc exc) 1445 { 1446 cout << "Could Not Allocate Program Buffer\n"; 1447 return 1; 1448 } 1449 1450 // Load the program to execute. 1451 if(!load_program(p_buf, argv[1])) return 1; 1452 1453 // Set program pointer to start of program buffer. 1454 prog = p_buf; 1455 1456 try 1457 { 1458 // Find the location of all functions 1459 // and global variables in the program. 1460 prescan(); 1461 1462 // Next, set up the call to main(). 1463 1464 // Find program starting point. 1465 prog = find_func("main"); 1466 1467 // Check for incorrect or missing main() function. 1468 if(!prog) 1469 { 1470 cout << "main() Not Found\n"; 1471 return 1; 1472 } 1473 1474 // Back up to opening (. 1475 prog--; 1476 1477 // Set the first token to main 1478 strcpy(token, "main"); 1479 1480 // Call main() to start interpreting. 1481 call(); 1482 } 1483 catch(InterpExc exc) 1484 { 1485 sntx_err(exc.get_err()); 1486 return 1; 1487 } 1488 catch(bad_alloc exc) 1489 { 1490 cout << "Out Of Memory\n"; 1491 return 1; 1492 } 1493 1494 return ret_value.int_value; 1495 } 1496 1497 // Load a program. 1498 bool load_program(char *p, char *fname) 1499 { 1500 int i = 0; 1501 1502 ifstream in(fname, ios::in | ios::binary); 1503 if(!in) 1504 { 1505 cout << "Cannot Open file.\n"; 1506 return false; 1507 } 1508 1509 do 1510 { 1511 *p = in.get(); 1512 p++; 1513 i++; 1514 } 1515 while(!in.eof() && i < PROG_SIZE); 1516 1517 if(i == PROG_SIZE) 1518 { 1519 cout << "Program Too Big\n"; 1520 return false; 1521 } 1522 1523 // Null terminate the program. Skip any EOF 1524 // mark if present in the file. 1525 if(*(p - 2) == 0x1a) *(p - 2) = '\0'; 1526 else *(p - 1) = '\0'; 1527 1528 in.close(); 1529 1530 return true; 1531 } 1532 1533 // Find the location of all functions in the program 1534 // and store global variables. 1535 void prescan() 1536 { 1537 char *p, *tp; 1538 char temp[MAX_ID_LEN + 1]; 1539 token_ireps datatype; 1540 func_type ft; 1541 1542 // When brace is 0, the current source position 1543 // is outside of any function. 1544 int brace = 0; 1545 1546 p = prog; 1547 1548 do 1549 { 1550 // Bypass code inside functions, brace==0, 保证了现在是在全局作用域 1551 while(brace) 1552 { 1553 get_token(); 1554 if(tok == END) throw InterpExc(UNBAL_BRACES); 1555 if(*token == '{') brace++; 1556 if(*token == '}') brace--; 1557 } 1558 1559 tp = prog; // save current position 1560 get_token(); 1561 1562 // See if global var type or function return type. 1563 if(is_valid_type(tok)) 1564 { 1565 datatype = tok; // save data type 1566 get_token(); 1567 1568 if(token_type == IDENTIFIER) 1569 { 1570 strcpy(temp, token); 1571 get_token(); 1572 1573 if(*token != '(') // must be global var 1574 { 1575 prog = tp; // return to start of declaration 1576 decl_global(); 1577 } 1578 else if(*token == '(') // must be a function 1579 { 1580 1581 // See if function already defined. 1582 for(unsigned i = 0; i < func_table.size(); i++) 1583 if(!strcmp(func_table[i].func_name, temp)) 1584 throw InterpExc(DUP_FUNC); 1585 1586 ft.loc = prog; 1587 ft.ret_type = datatype; 1588 strcpy(ft.func_name, temp); 1589 func_table.push_back(ft); 1590 1591 do 1592 { 1593 get_token(); 1594 } 1595 while(*token != ')'); 1596 // Next token will now be opening curly 1597 // brace of function. 1598 } 1599 else putback(); 1600 } 1601 } 1602 else 1603 { 1604 if(*token == '{') brace++; 1605 if(*token == '}') brace--; 1606 } 1607 } 1608 while(tok != END); 1609 if(brace) throw InterpExc(UNBAL_BRACES); 1610 prog = p; 1611 } 1612 1613 // Interpret a single statement or block of code. When 1614 // interp() returns from its initial call, the final 1615 // brace (or a return) in main() has been encountered. 1616 1617 //对于interp我做了一个小改动, 如果执行语句里面有break, 那么就在推出interp之前让程序把整个block的代码都走一遍, 但是不执行了 1618 //这样, 以后调用interp的程序就不用再为break后面的语句做清理工作了. 1619 //在interp里面, 遇到{}会产生一个新的名字空间, 遇到int 和char还会declare一个local变量 1620 void interp() 1621 { 1622 anonymous_var value; 1623 int block = 0; 1624 char *tmp_prog = NULL; 1625 //break语句会对外面的控制流程造成影响, 但是continue不会, 它只会不让本次循环后面的语句不执行. 1626 //但是还是要维护一个全局的continue, 因为本block需要知道子block里面是不是有continue; 1627 do 1628 { 1629 if(breakfound || continuefound) 1630 { 1631 1632 //如果这是个{}包含的块, 那么就用find_eob把整个块吃掉 1633 if(block && tmp_prog) 1634 { 1635 1636 prog = tmp_prog; 1637 find_eob(); 1638 } 1639 else 1640 { 1641 //对于知识一条语句的块, 在break跳出之前吃掉这个分号 1642 get_token(); 1643 1644 } 1645 return; 1646 } 1647 1648 token_type = get_token(); 1649 //对于那些exec_while, exec_while那个向前看的token是在这里读出来的 1650 //跟eval_exp没有关系. 1651 1652 // See what kind of token is up. 1653 if(token_type == IDENTIFIER || 1654 *token == INC || *token == DEC) 1655 { 1656 // Not a keyword, so process expression. 1657 putback(); // restore token to input stream for 1658 // further processing by eval_exp() 1659 eval_exp(value); // process the expression 1660 //eval_exp和exec_while是相同的层次, 在interp看到向前看字符的时候, 就会递归调用相应的过程. 1661 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 1662 } 1663 else if(token_type == BLOCK) // block delimiter? 1664 { 1665 if(*token == '{') // is a block 1666 { 1667 putback(); 1668 tmp_prog = prog; 1669 get_token(); 1670 block = 1; // interpreting block, not statement 1671 // Record nested scope. 1672 nest_scope_stack.push(local_var_stack.size()); 1673 //nest_scope_stack里面存的是上一个block的stack的位置, 1674 //用户恢复栈. 1675 } 1676 else // is a }, so reset scope and return 1677 { 1678 // Reset nested scope. 1679 local_var_stack.resize(nest_scope_stack.top()); 1680 nest_scope_stack.pop(); 1681 return; 1682 } 1683 } 1684 else if(is_valid_type(tok)) 1685 { 1686 putback(); 1687 decl_local(); 1688 } 1689 else // is keyword 1690 switch(tok) 1691 { 1692 case RETURN: // return from function call, 不要在这里清理局部作用域了, call里面做了处理. 1693 /*if(block) 1694 { 1695 local_var_stack.resize(nest_scope_stack.top()); 1696 nest_scope_stack.pop(); 1697 }*/ 1698 func_ret(); 1699 return; 1700 case IF: // process an if statement 1701 exec_if(); 1702 break; 1703 case ELSE: // process an else statement 1704 find_eob(); // find end of else block 1705 // and continue execution 1706 break; 1707 case WHILE: // process a while loop 1708 exec_while(); 1709 break; 1710 case DO: // process a do-while loop 1711 exec_do(); 1712 break; 1713 case FOR: // process a for loop 1714 exec_for(); 1715 1716 break; 1717 case BREAK: // handle break 1718 breakfound = true; 1719 // Reset nested scope. 1720 //这里要特判一下是不是从一个block里面的break, 因为在我修改之后, for while的循环体现在可以是 1721 //一个单个的语句了 1722 if(block) 1723 { 1724 local_var_stack.resize(nest_scope_stack.top()); 1725 nest_scope_stack.pop(); 1726 } 1727 break; 1728 case CONTINUE: 1729 { 1730 continuefound = true; 1731 if(block) 1732 { 1733 local_var_stack.resize(nest_scope_stack.top()); 1734 nest_scope_stack.pop(); 1735 } 1736 break; 1737 } 1738 case SWITCH: // handle a switch statement 1739 exec_switch(); 1740 break; 1741 case COUT: // handle console output 1742 exec_cout(); 1743 //cout << "breakfuond :" << breakfound << endl; 1744 break; 1745 case CIN: // handle console input 1746 exec_cin(); 1747 break; 1748 case END: 1749 exit(0); 1750 1751 } 1752 } 1753 while (tok != END && block); 1754 return; 1755 } 1756 1757 1758 //可以使用map优化. 1759 // Return the entry point of the specified function. 1760 // Return NULL if not found. 1761 char *find_func(char *name) 1762 { 1763 unsigned i; 1764 1765 for(i = 0; i < func_table.size(); i++) 1766 if(!strcmp(name, func_table[i].func_name)) 1767 return func_table[i].loc; 1768 1769 return NULL; 1770 } 1771 1772 // Declare a global variable. 1773 void decl_global() 1774 { 1775 token_ireps vartype; 1776 var v; 1777 1778 get_token(); // get type 1779 1780 vartype = tok; // save var type 1781 1782 anonymous_var value; 1783 1784 // Process comma-separated list. 1785 do 1786 { 1787 v.value.var_type = vartype; 1788 init_var(v.value); // init to 0 1789 get_token(); // get name 1790 1791 // See if variable is a duplicate. 1792 for(unsigned i = 0; i < global_vars.size(); i++) 1793 if(!strcmp(global_vars[i].var_name, token)) 1794 throw InterpExc(DUP_VAR); 1795 1796 strcpy(v.var_name, token); 1797 global_vars.push_back(v); 1798 1799 putback(); 1800 eval_exp(value); //这个eval_exp会实现赋值, 这里value只是个哑元, 我们不用 1801 1802 get_token(); 1803 1804 } 1805 while(*token == ','); 1806 1807 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 1808 } 1809 1810 // Declare a local variable. 1811 void decl_local() 1812 { 1813 var v; 1814 1815 get_token(); // get var type 1816 v.value.var_type = tok; // store type 1817 1818 init_var(v.value); // init var to 0, 对局部变量也直接初始化成0了.. 1819 anonymous_var value; 1820 1821 // Process comma-separated list. 1822 do 1823 { 1824 get_token(); // get var name 1825 1826 // See if variable is already the name 1827 // of a local variable in this scope. 1828 if(!local_var_stack.empty()) 1829 for(int i = local_var_stack.size() - 1; 1830 i >= nest_scope_stack.top(); i--) 1831 { 1832 if(!strcmp(local_var_stack[i].var_name, token)) 1833 throw InterpExc(DUP_VAR); 1834 } 1835 1836 strcpy(v.var_name, token); 1837 local_var_stack.push_back(v); 1838 putback(); 1839 eval_exp(value);//这个eval_exp会实现赋值, 这里value只是个哑元, 我们不用 1840 get_token(); 1841 } 1842 while(*token == ','); 1843 1844 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 1845 } 1846 1847 // Call a function. 1848 void call() 1849 { 1850 char *loc, *temp; 1851 int lvartemp; 1852 1853 // First, find entry point of function. 1854 loc = find_func(token); 1855 1856 if(loc == NULL) 1857 throw InterpExc(FUNC_UNDEF); // function not defined 1858 else 1859 { 1860 // Save local var stack index. 1861 lvartemp = local_var_stack.size(); 1862 1863 //get_args 和get_params先后调用 , 进行了一下替换 1864 get_args(); // get function arguments 1865 temp = prog; // save return location 1866 1867 func_call_stack.push(lvartemp); // push local var index 1868 1869 prog = loc; // reset prog to start of function 1870 get_params(); // load the function's parameters with 1871 // the values of the arguments 1872 1873 interp(); // interpret the function 1874 1875 prog = temp; // reset the program pointer 1876 1877 if(func_call_stack.empty()) throw InterpExc(RET_NOCALL); 1878 1879 // Reset local_var_stack to its previous state. 1880 1881 //这里的resize会把后面的刚刚压入栈的变量删掉. 1882 local_var_stack.resize(func_call_stack.top()); 1883 func_call_stack.pop(); 1884 } 1885 } 1886 1887 // Push the arguments to a function onto the local 1888 // variable stack. 1889 void get_args() 1890 { 1891 anonymous_var value, temp[NUM_PARAMS]; 1892 int count = 0; 1893 var vt; 1894 1895 count = 0; 1896 get_token(); 1897 if(*token != '(') throw InterpExc(PAREN_EXPECTED); 1898 1899 // Process a comma-separated list of values. 1900 do 1901 { 1902 eval_exp(value); 1903 temp[count] = value; // save temporarily 1904 get_token(); 1905 count++; 1906 } 1907 while(*token == ','); 1908 count--; 1909 1910 // Now, push on local_var_stack in reverse order. 1911 for(; count >= 0; count--) 1912 { 1913 vt.value = temp[count]; 1914 local_var_stack.push_back(vt); 1915 } 1916 } 1917 1918 // Get function parameters. 1919 1920 //在这个函数里面实现了从实参到形参的转化工作, 不错. 1921 void get_params() 1922 { 1923 var *p; 1924 int i; 1925 1926 i = local_var_stack.size() - 1; 1927 1928 // Process comma-separated list of parameters. 1929 do 1930 { 1931 get_token(); 1932 p = &local_var_stack[i]; 1933 if(*token != ')' ) 1934 { 1935 if(is_valid_type(tok)) 1936 throw InterpExc(TYPE_EXPECTED); 1937 1938 p->value.var_type = tok; 1939 get_token(); 1940 1941 // Link parameter name with argument already on 1942 // local var stack. 1943 strcpy(p->var_name, token); 1944 get_token(); 1945 i--; 1946 } 1947 else break; 1948 } 1949 while(*token == ','); 1950 1951 //在这里判了一下, 看最后一个读到的是不是')' 1952 if(*token != ')') throw InterpExc(PAREN_EXPECTED); 1953 } 1954 1955 // Return from a function. 1956 void func_ret() 1957 { 1958 anonymous_var value; 1959 1960 //value = 0; 1961 1962 // Get return value, if any. 1963 //目前设定是只支持int返回值. 1964 eval_exp(value); 1965 1966 ret_value = value; 1967 } 1968 1969 // Assign a value to a variable. 1970 void assign_var(char *vname, anonymous_var value) 1971 { 1972 //cout << "assign_var" << endl; 1973 // First, see if it's a local variable. 1974 if(!local_var_stack.empty()) 1975 for(int i = local_var_stack.size() - 1; 1976 i >= func_call_stack.top(); i--) 1977 { 1978 if(!strcmp(local_var_stack[i].var_name, 1979 vname)) 1980 { 1981 adaptive_assign_var(local_var_stack[i].value, value); 1982 return; 1983 } 1984 } 1985 1986 // Otherwise, try global vars. 1987 for(unsigned i = 0; i < global_vars.size(); i++) 1988 if(!strcmp(global_vars[i].var_name, vname)) 1989 { 1990 adaptive_assign_var(global_vars[i].value, value); 1991 //cout << value.float_value << " >>>" << endl; 1992 return; 1993 } 1994 1995 throw InterpExc(NOT_VAR); // variable not found 1996 } 1997 1998 // Find the value of a variable. 1999 anonymous_var find_var(char *vname) 2000 { 2001 // First, see if it's a local variable. 2002 if(!local_var_stack.empty()) 2003 for(int i = local_var_stack.size() - 1; 2004 i >= func_call_stack.top(); i--) 2005 { 2006 if(!strcmp(local_var_stack[i].var_name, vname)) 2007 return local_var_stack[i].value; 2008 } 2009 2010 // Otherwise, try global vars. 2011 for(unsigned i = 0; i < global_vars.size(); i++) 2012 if(!strcmp(global_vars[i].var_name, vname)) 2013 return global_vars[i].value; 2014 2015 throw InterpExc(NOT_VAR); // variable not found 2016 } 2017 2018 2019 //在处理if的时候也处理了else的模块 2020 // Execute an if statement. 2021 void exec_if() 2022 { 2023 anonymous_var cond; 2024 2025 eval_exp(cond); // get if expression. 2026 2027 if(get_bool_val(cond)) // if true, process target of IF 2028 { 2029 // Confirm start of block. 2030 2031 interp(); 2032 } 2033 else 2034 { 2035 // Otherwise skip around IF block and 2036 // process the ELSE, if present. 2037 2038 find_eob(); // find start of next line 2039 get_token(); 2040 2041 if(tok != ELSE) 2042 { 2043 // Restore token if no ELSE is present. 2044 putback(); 2045 return; 2046 } 2047 // Confirm start of block. 2048 get_token(); 2049 2050 if(tok == IF) 2051 { 2052 exec_if(); 2053 return; 2054 } 2055 putback(); 2056 interp(); 2057 } 2058 } 2059 2060 // Execute a switch statement. 2061 void exec_switch() 2062 { 2063 anonymous_var sval, cval; 2064 int brace; 2065 2066 eval_exp(sval); // Get switch expression. 2067 2068 // Check for start of block. 2069 if(*token != '{') 2070 throw InterpExc(BRACE_EXPECTED); 2071 2072 // Record new scope. 2073 nest_scope_stack.push(local_var_stack.size()); 2074 2075 // Now, check case statements. 2076 for(;;) 2077 { 2078 brace = 1; 2079 // Find a case statement. 2080 do 2081 { 2082 get_token(); 2083 if(*token == '{') brace++; 2084 else if(*token == '}') brace--; 2085 } 2086 while(tok != CASE && tok != END && brace && tok != DEFAULT); 2087 2088 // If no matching case found, then skip. 2089 if(!brace) break; 2090 2091 2092 if(tok == END) throw InterpExc(SYNTAX); 2093 if(tok == DEFAULT) 2094 { 2095 get_token(); 2096 if(*token != ':') 2097 throw InterpExc(COLON_EXPECTED); 2098 do 2099 { 2100 interp(); 2101 get_token(); 2102 if(*token == '}') 2103 { 2104 putback(); 2105 break; 2106 } 2107 putback(); 2108 //if(*token == '{') brace++; 2109 //else if(*token == '}') brace--; 2110 } 2111 while(!breakfound && tok != END); 2112 2113 brace = 1; 2114 2115 // Find end of switch statement. 2116 while(brace) 2117 { 2118 get_token(); 2119 if(*token == '{') brace++; 2120 else if(*token == '}') brace--; 2121 } 2122 breakfound = false; 2123 2124 break; 2125 2126 } 2127 2128 // Get value of the case statement. 2129 eval_exp(cval); 2130 2131 // Read and discard the : 2132 get_token(); 2133 2134 if(*token != ':') 2135 throw InterpExc(COLON_EXPECTED); 2136 2137 // If values match, then interpret. 2138 if(0 == cmp(cval, sval)) 2139 { 2140 2141 do 2142 { 2143 interp(); 2144 2145 get_token(); 2146 if(*token == '}') 2147 { 2148 putback(); 2149 break; 2150 } 2151 putback(); 2152 } 2153 while(!breakfound && tok != END && brace); 2154 2155 brace = 1; 2156 2157 // Find end of switch statement. 2158 while(brace) 2159 { 2160 get_token(); 2161 if(*token == '{') brace++; 2162 else if(*token == '}') brace--; 2163 } 2164 breakfound = false; 2165 2166 break; 2167 } 2168 } 2169 } 2170 2171 // Execute a while loop. 2172 //同下面的do while, 这个也会putback while 2173 void exec_while() 2174 { 2175 anonymous_var cond; 2176 char *temp; 2177 2178 putback(); // put back the while 2179 temp = prog; // save location of top of while loop 2180 2181 get_token(); 2182 eval_exp(cond); // check the conditional expression 2183 2184 if(get_bool_val(cond)) 2185 interp(); // if true, interpret 2186 else // otherwise, skip to end of loop 2187 { 2188 find_eob(); 2189 return; 2190 } 2191 continuefound = false; 2192 if(!breakfound) 2193 prog = temp; // loop back to top 2194 else 2195 { 2196 breakfound = false; 2197 return; 2198 } 2199 } 2200 2201 // Execute a do loop. 2202 2203 //解释: exec_do是在主函数读到了do的时候才会调用, 因此 2204 //在exec_do调用的时候, do这个token已经被读出来了, 2205 //而exec_do还想要在需要继续执行的时候是prog复位到do, 那么就得在程序开始putback一下. 2206 void exec_do() 2207 { 2208 anonymous_var cond; 2209 char *temp; 2210 2211 // Save location of top of do loop. 2212 putback(); // put back do 2213 temp = prog; 2214 2215 get_token(); // get start of loop block 2216 2217 // Confirm start of block. 2218 get_token(); 2219 if(*token != '{') 2220 throw InterpExc(BRACE_EXPECTED); 2221 putback(); 2222 2223 interp(); // interpret loop 2224 2225 // Check for break in loop. 2226 if(breakfound) 2227 { 2228 breakfound = false; 2229 get_token(); 2230 if(tok != WHILE) throw InterpExc(WHILE_EXPECTED); 2231 eval_exp(cond); // check the loop condition 2232 return; 2233 } 2234 if(continuefound) 2235 { 2236 continuefound = false; 2237 prog = temp; 2238 return; 2239 } 2240 2241 get_token(); 2242 if(tok != WHILE) throw InterpExc(WHILE_EXPECTED); 2243 2244 eval_exp(cond); // check the loop condition 2245 2246 // If true loop; otherwise, continue on. 2247 2248 if(get_bool_val(cond)) prog = temp; 2249 } 2250 2251 // Execute a for loop. 2252 //但是for就不能像while和do while那样, 在需要继续循环的时候复位prog指针了, 因为for 2253 //复位的话, 初始点也跟着复位了, 就是for(int i= 0; i< 12; i++)里面的i也会变成0 2254 void exec_for() 2255 { 2256 anonymous_var cond; 2257 char *temp, *temp2; 2258 int paren ; 2259 2260 //for_local用来标记是不是在for()内部定义了新变量, 如果是, 就会产生新的作用域 2261 bool for_local = false; 2262 2263 get_token(); // skip opening ( 2264 get_token(); 2265 2266 if(is_valid_type(tok))//当前读入的token是个类型关键字, 这样就会触发一个局部作用域 2267 { 2268 putback(); 2269 nest_scope_stack.push(local_var_stack.size()); 2270 for_local = true; 2271 decl_local(); 2272 2273 } 2274 else 2275 { 2276 eval_exp(cond); // initialization expression 2277 } 2278 2279 //这个是decl_local和eval_exp最后读到的token, 已经被读出来了 2280 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 2281 2282 prog++; // get past the ; 2283 temp = prog; 2284 2285 for(;;) 2286 { 2287 // Get the value of the conditional expression. 2288 eval_exp(cond); 2289 2290 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 2291 prog++; // get past the ; 2292 temp2 = prog; 2293 2294 // Find start of for block. 2295 paren = 1; 2296 while(paren) 2297 { 2298 get_token(); 2299 if(*token == '(') paren++; 2300 if(*token == ')') paren--; 2301 } 2302 2303 2304 // If condition is true, interpret 2305 //现在从for()后面开始interpret 2306 // 2307 if(get_bool_val(cond)) 2308 { 2309 //continue只对interp里面的执行起作用, 不会对外面有影响. 2310 interp(); 2311 //cout << prog << endl; 2312 2313 } 2314 else // otherwise, skip to end of loop 2315 { 2316 find_eob(); 2317 if(for_local) 2318 { 2319 local_var_stack.resize(nest_scope_stack.top()); 2320 nest_scope_stack.pop(); 2321 } 2322 return; 2323 } 2324 if(breakfound) 2325 { 2326 breakfound = false; 2327 if(for_local) 2328 { 2329 local_var_stack.resize(nest_scope_stack.top()); 2330 nest_scope_stack.pop(); 2331 } 2332 return; 2333 } 2334 if(continuefound) 2335 { 2336 continuefound = false; 2337 } 2338 2339 2340 prog = temp2; // go to increment expression 2341 2342 // Check for break in loop. 2343 2344 2345 2346 // Evaluate the increment expression. 2347 eval_exp(cond); 2348 2349 prog = temp; // loop back to top 2350 } 2351 2352 } 2353 2354 // Execute a cout statement. 2355 void exec_cout() 2356 { 2357 anonymous_var val; 2358 2359 get_token(); 2360 if(*token != LS) throw InterpExc(SYNTAX); 2361 do 2362 { 2363 get_token(); 2364 2365 if(token_type == STRING) 2366 { 2367 // Output a string. 2368 cout << token; 2369 } 2370 else if(tok == ENDL) 2371 { 2372 cout << endl; 2373 } 2374 else 2375 { 2376 //cout << token << " :---" << endl; 2377 putback(); 2378 eval_exp(val); 2379 //cout << val.float_value << "<<<" << endl; 2380 cout_var(val); 2381 } 2382 2383 get_token(); 2384 } 2385 while(*token == LS); //<< 2386 2387 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 2388 } 2389 2390 // Execute a cin statement. 2391 void exec_cin() 2392 { 2393 token_ireps vtype; 2394 2395 get_token(); 2396 if(*token != RS) throw InterpExc(SYNTAX); 2397 2398 do 2399 { 2400 get_token(); 2401 if(token_type != IDENTIFIER) 2402 throw InterpExc(NOT_VAR); 2403 2404 vtype = find_var_type(token); 2405 anonymous_var tmp; 2406 tmp.var_type = vtype; 2407 2408 cin_var(tmp); 2409 assign_var(token, tmp); 2410 get_token(); 2411 } 2412 while(*token == RS); //RS 是>> 2413 2414 if(*token != ';') throw InterpExc(SEMI_EXPECTED); 2415 } 2416 2417 2418 // Find the end of a block. 2419 //#这里find_eob在逻辑上做了一点修改, 由外部保证调用的正确 2420 //如果开始的是{, 那么就处理一个block, 否则就调用find_eol处理一个;语句. 2421 void find_eob() 2422 { 2423 int brace; 2424 2425 get_token(); 2426 //cout << token << " find_eob" <<endl; 2427 if(*token != '{') 2428 { 2429 putback(); 2430 find_eol(); 2431 return ; 2432 } 2433 2434 brace = 1; 2435 2436 do 2437 { 2438 get_token(); 2439 //cout << token << " find_eob" <<endl; 2440 if(*token == '{') brace++; 2441 else if(*token == '}') brace--; 2442 } 2443 while(brace && tok != END); 2444 2445 if(tok == END) throw InterpExc(UNBAL_BRACES); 2446 } 2447 2448 void find_eol() 2449 { 2450 do 2451 { 2452 get_token(); 2453 } 2454 while (*token != ';' && tok != END); 2455 2456 if(tok == END) throw InterpExc(SYNTAX); 2457 } 2458 2459 // Determine if an identifier is a variable. Return 2460 // true if variable is found; false otherwise. 2461 bool is_var(char *vname) 2462 { 2463 // See if vname a local variable. 2464 if(!local_var_stack.empty()) 2465 for(int i = local_var_stack.size() - 1; 2466 i >= func_call_stack.top(); i--) 2467 { 2468 if(!strcmp(local_var_stack[i].var_name, vname)) 2469 return true; 2470 } 2471 2472 // See if vname is a global variable. 2473 for(unsigned i = 0; i < global_vars.size(); i++) 2474 if(!strcmp(global_vars[i].var_name, vname)) 2475 return true; 2476 2477 return false; 2478 } 2479 2480 // Return the type of variable. 2481 token_ireps find_var_type(char *vname) 2482 { 2483 // First, see if it's a local variable. 2484 if(!local_var_stack.empty()) 2485 for(int i = local_var_stack.size() - 1; 2486 i >= func_call_stack.top(); i--) 2487 { 2488 if(!strcmp(local_var_stack[i].var_name, vname)) 2489 return local_var_stack[i].value.var_type; 2490 } 2491 2492 // Otherwise, try global vars. 2493 for(unsigned i = 0; i < global_vars.size(); i++) 2494 if(!strcmp(global_vars[i].var_name, vname)) 2495 return local_var_stack[i].value.var_type; 2496 2497 return UNDEFTOK; 2498 } 2499 2500 2501 /*********************************************************************** 2502 libcpp.cpp, 主要是对库函数的封装 2503 ************************************************************************/ 2504 2505 // Add more of your own, here. 2506 2507 #include <iostream> 2508 #include <cstdlib> 2509 #include <cstdio> 2510 #include "mccommon.h" 2511 2512 using namespace std; 2513 2514 // Read a character from the console. 2515 // If your compiler supplies an unbuffered 2516 // character intput function, feel free to 2517 // substitute it for the call to cin.get(). 2518 anonymous_var call_getchar() 2519 { 2520 char ch; 2521 2522 ch = getchar(); 2523 2524 // Advance past () 2525 get_token(); 2526 if(*token != '(') 2527 throw InterpExc(PAREN_EXPECTED); 2528 2529 get_token(); 2530 if(*token != ')') 2531 throw InterpExc(PAREN_EXPECTED); 2532 anonymous_var val; 2533 val.var_type = CHAR; 2534 val.int_value = ch; 2535 return val; 2536 } 2537 2538 // Write a character to the display. 2539 anonymous_var call_putchar() 2540 { 2541 anonymous_var value; 2542 2543 eval_exp(value); 2544 2545 putchar(char(value.int_value)); 2546 2547 return value; 2548 } 2549 2550 // Return absolute value. 2551 anonymous_var call_abs() 2552 { 2553 anonymous_var val; 2554 2555 eval_exp(val); 2556 abs_var(val); 2557 return val; 2558 } 2559 2560 // Return a randome integer. 2561 anonymous_var call_rand() 2562 { 2563 2564 // Advance past () 2565 get_token(); 2566 if(*token != '(') 2567 throw InterpExc(PAREN_EXPECTED); 2568 2569 get_token(); 2570 if(*token != ')') 2571 throw InterpExc(PAREN_EXPECTED); 2572 2573 anonymous_var val; 2574 val.var_type = INT; 2575 val.int_value = rand(); 2576 return val; 2577 }
~~