简单的词法分析和语法分析(C++实现,CodeBlocks+GCC编译)
说明:
分析的语言是SNL语言,详见《编译程序的设计与实现》( 刘磊、金英、张晶、张荷花、单郸编著)
词法分析就是实现了词法分析的自动机
语法分析使用递归下降法
运行结果:
词法分析 得到TokenList
语法分析 输出语法树
运行输出:
代码:
main.cpp
1 #include<stdio.h> 2 #include<stdlib.h> 3 #include<vector> 4 #include<string.h> 5 using namespace std; 6 #define NUMOFRESERVED 22 7 #define NUMOFSYMBOLS 20 8 #define RES 0 9 #define ID 1 10 #define NUM 2 11 #define SYM 3 12 #define STR 4 13 14 #define PLUS 100 15 #define SUB 101 16 #define MUL 102 17 #define DIV 103 18 #define LT 104 19 #define LBRACK 105 20 #define RBRACK 106 21 #define LSQUBRACK 107 22 #define RSQUBRACK 108 23 #define POINT 109 24 #define SEMI 110 25 #define LBRACE 111 26 #define RBRACE 112 27 #define EOFF 113 28 #define BLANK 114 29 #define QUO 115 30 #define EQU 116 31 #define INDEX 117 32 #define ASSI 118 33 #define COM 119 34 35 36 #define uint unsigned 37 38 typedef struct Token { 39 int id; 40 int ptr; 41 int linenum; 42 } Token; 43 typedef struct Node { 44 char *desc; 45 vector<Node*> children; 46 Node(const char* str) { 47 desc=new char[30]; 48 strcpy(desc,str); 49 } 50 } Node; 51 52 vector<Token>* tokens; 53 int Token_n=0; 54 const char* reserved_words[NUMOFRESERVED]= {"begin","end","program","var", 55 "type","procedure","while","endwh", 56 "integer","char","array","of", 57 "intc","record","if","then", 58 "else","fi","do","write", 59 "read","return" 60 }; 61 62 static const char* symbol_table[NUMOFSYMBOLS]= {"+","-","*","/","<", 63 "(",")","[","]",".", 64 ";","{","}","EOF","BLANK", 65 "\'","=","..",":=","," 66 }; 67 void syntaxError(const char* message); 68 Node* MatchRES(const char* expected); 69 Node* MatchSYM(const int expected); 70 Node* MatchID(); 71 Node* MatchNUM(); 72 bool isRES(const char* res); 73 bool isSYM(const int res); 74 bool isID(); 75 bool isNUM(); 76 bool isLineEnd(); 77 Node* Program(); 78 Node* ProgramHead(); 79 Node* DeclarePart(); 80 Node* TypeDec(); 81 Node* TypeDeclaration(); 82 Node* TypeDecList(); 83 Node* TypeDecMore(); 84 Node* TypeId(); 85 Node* TypeName(); 86 Node* BaseType(); 87 Node* StructureType(); 88 Node* ArrayType(); 89 Node* RecType(); 90 Node* FieldDecList(); 91 Node* FieldDecMore(); 92 Node* IdList(); 93 Node* IdMore(); 94 Node* VarDec(); 95 Node* VarDeclaration(); 96 Node* VarDecList(); 97 Node* VarDecMore(); 98 Node* VarIdList(); 99 Node* VarIdMore(); 100 Node* ProcDec(); 101 Node* ProcDeclaration(); 102 Node* ParamList(); 103 Node* ParamDecList(); 104 Node* ParamMore(); 105 Node* Param(); 106 Node* FormList(); 107 Node* FidMore(); 108 Node* ProcDecPart(); 109 Node* ProcBody(); 110 Node* ProgramBody(); 111 Node* StmList(); 112 Node* StmMore(); 113 Node* Stm(); 114 Node* AssCall(); 115 Node* AssignmentRest(); 116 Node* ConditionalStm(); 117 Node* LoopStm(); 118 Node* InputStm(); 119 Node* OutputStm(); 120 Node* ReturnStm(); 121 Node* CallStmRest(); 122 Node* ActParamList(); 123 Node* ActParamMore(); 124 Node* Exp(); 125 Node* Term(); 126 Node* OtherFactor(); 127 Node* OtherTerm(); 128 Node* Factor(); 129 Node* Variable(); 130 Node* VariMore(); 131 Node* FieldVar(); 132 Node* FieldVarMore(); 133 Node* Parse(); 134 Node* CmpOp(); 135 Node* AddOp(); 136 Node* MultOp(); 137 138 void show_tree(Node* root,int depth,vector<int>* v,bool islast); 139 140 int add(vector<Node* >& children,Node* node) { 141 if(node!=NULL) { 142 children.push_back(node); 143 } 144 return 0; 145 } 146 #define COMMENT_ERROR -1000 147 #define STR_ERROR -1001 148 #define SYM_ERROR -1002 149 #define NOT_REC_SYMBOL -1003 150 vector<char*> ID_table; 151 vector<uint> NUM_table; 152 vector<char*> STR_table; 153 int ID_n=0,NUM_n=0,STR_n=0; 154 int num_of_lines=1; 155 FILE * file; 156 157 158 159 /***************************************************************************************/ 160 /***************** 词法分析 *****************/ 161 /***************************************************************************************/ 162 163 void handle_error(int error_num,const char* str) { 164 printf("行号:%d \t",num_of_lines); 165 switch(error_num) { 166 case COMMENT_ERROR: 167 printf("注释未结束 \t"); 168 break; 169 case SYM_ERROR: 170 printf("出现未定义的符号,您想输入的是\":=\"吗? \t"); 171 break; 172 case STR_ERROR: 173 printf("字符串未结束 \t"); 174 break; 175 case NOT_REC_SYMBOL: 176 printf("出现未定义的符号 \t"); 177 break; 178 } 179 printf("%s\n",str); 180 exit(-1); 181 } 182 char getNextChar() { 183 char c; 184 c=fgetc(file); 185 return c; 186 } 187 188 void ungetNextChar() { 189 fseek(file,-1,SEEK_CUR); 190 } 191 192 int reservedLookup(char* str) { 193 for(int i=0; i<NUMOFRESERVED; i++) { 194 if(strcmp(str,reserved_words[i])==0)return i; 195 } 196 return -1; 197 } 198 int str_to_num(char* str) { 199 int len=strlen(str); 200 int n=0; 201 for(int i=0; i<len; i++) { 202 n=n*10+str[i]-'0'; 203 } 204 return n; 205 } 206 int addIDTable(char* str) { 207 ID_table.push_back(str); 208 return ID_n++; 209 } 210 int addNUMTable(int num) { 211 NUM_table.push_back(num); 212 return NUM_n++; 213 } 214 int addSTRTable(char* str ) { 215 STR_table.push_back(str); 216 return STR_n++; 217 } 218 Token scan() { 219 static const char symbols[12]= {'+','-','*','/','<','(',')','[',']',';','=',','}; 220 static const int symbols_n[12]= {PLUS,SUB,MUL,DIV,LT,LBRACK,RBRACK,LSQUBRACK,RSQUBRACK,SEMI,EQU,COM}; 221 Token t; 222 t.linenum=num_of_lines; 223 char* str=new char[256]; 224 strcpy(str,"");//字符串要初始化,不然有乱码 225 char c[2]= {'a','\0'}; 226 c[0]=getNextChar(); 227 LS0: 228 if(c[0]>='a'&&c[0]<='z')goto LS1; 229 if(c[0]>='A'&&c[0]<='Z')goto LS1; 230 if(c[0]>='0'&&c[0]<='9')goto LS2; 231 int indexofsym; 232 for(indexofsym=0; indexofsym<12; indexofsym++) { 233 if(c[0]==symbols[indexofsym]) { 234 goto LS3; 235 } 236 } 237 if(c[0]=='.')goto LS4; 238 if(c[0]=='{')goto LS5; 239 if(c[0]==':')goto LS6; 240 if(c[0]=='\'')goto LS7; 241 if(c[0]=='\n')goto LS8; 242 if(c[0]==EOF)goto LS9; 243 if(c[0]=='\t'||c[0]==' ')goto LS10; 244 goto OTHER; 245 LS1: 246 strcat(str,c); 247 c[0]=getNextChar(); 248 if(c[0]>='a'&&c[0]<='z')goto LS1; 249 else if(c[0]>='A'&&c[0]<='Z')goto LS1; 250 else if(c[0]>='0'&&c[0]<='9')goto LS1; 251 ungetNextChar(); 252 int nres; 253 if(nres=reservedLookup(str),nres!=-1) { 254 t.id=RES; 255 t.ptr=nres; 256 return t; 257 } else { 258 t.id=ID; 259 t.ptr=addIDTable(str); 260 return t; 261 } 262 LS2: 263 strcat(str,c); 264 c[0]=getNextChar(); 265 if(c[0]>='0'&&c[0]<='9')goto LS2; 266 ungetNextChar(); 267 t.id=NUM; 268 t.ptr=addNUMTable(str_to_num(str)); 269 return t; 270 LS3: 271 t.ptr=symbols_n[indexofsym]; 272 t.id=SYM; 273 return t; 274 LS4: 275 strcat(str,c); 276 c[0]=getNextChar(); 277 if(c[0]=='.') { 278 t.id=SYM; 279 t.ptr=INDEX; 280 return t; 281 } 282 else { 283 t.id=SYM; 284 t.ptr=POINT; 285 ungetNextChar(); 286 return t; 287 } 288 LS5: 289 while(c[0]=getNextChar(),c[0]!=EOF&&c[0]!='}') { 290 if(c[0]=='\n'||c[0]=='\r')num_of_lines++; 291 } 292 if(c[0]=='}') { 293 c[0]=getNextChar(); 294 goto LS0; 295 } else { 296 handle_error(COMMENT_ERROR,""); 297 } 298 LS6: 299 strcat(str,c); 300 c[0]=getNextChar(); 301 if(c[0]=='=') { 302 t.id=SYM; 303 t.ptr=ASSI; 304 return t; 305 } else { 306 handle_error(SYM_ERROR,"符号为 \':\'"); 307 } 308 LS7: 309 while(c[0]=getNextChar(),c[0]!=EOF&&c[0]!='\'') { 310 strcat(str,c); 311 } 312 if(c[0]=='\'') { 313 t.id=STR; 314 t.ptr=addSTRTable(str); 315 return t; 316 } else { 317 handle_error(STR_ERROR,""); 318 } 319 LS8: 320 t.id=-1; 321 num_of_lines++; 322 return t; 323 LS9: 324 t.id=SYM; 325 t.ptr=EOFF; 326 return t; 327 LS10: 328 t.id=-1; 329 return t; 330 OTHER: 331 handle_error(NOT_REC_SYMBOL,c); 332 t.id=-1; 333 return t; 334 } 335 void show_token(Token t) { 336 if(t.id==STR) { 337 printf("<%d %s %s> \n",t.linenum,"STR",STR_table[t.ptr]); 338 return ; 339 } 340 if(t.id==NUM) { 341 printf("<%d %s %d> \n",t.linenum,"NUM",NUM_table[t.ptr]); 342 return ; 343 } 344 if(t.id==ID) { 345 printf("<%d %s %s> \n",t.linenum,"ID",ID_table[t.ptr]); 346 return ; 347 } 348 if(t.id==RES) { 349 printf("<%d %s %s> \n",t.linenum,"RES",reserved_words[t.ptr]); 350 return ; 351 } 352 if(t.id==SYM) { 353 printf("<%d %s %s> \n",t.linenum,"SYM",symbol_table[t.ptr-100]); 354 return ; 355 } 356 return ; 357 } 358 vector<Token>* getTokenlist(const char* filename) { 359 tokens->clear(); 360 file=fopen(filename,"r"); 361 Token temp; 362 while(temp=scan(),!(temp.id==SYM&&temp.ptr==EOFF)) { 363 if(temp.id!=-1) { 364 tokens->push_back(temp); 365 if(temp.id==RES&&strcmp("end",reserved_words[temp.ptr])==0) { 366 char c; 367 if(c=getNextChar(),c=='.') { 368 break; 369 } else { 370 ungetNextChar(); 371 } 372 } 373 } 374 } 375 fclose(file); 376 return tokens; 377 } 378 void show_Token_list() { 379 int len=tokens->size(); 380 for(int i=0; i<len; i++) { 381 show_token(tokens->at(i)); 382 } 383 } 384 385 void write_token(FILE* file,Token t) { 386 if(t.id==STR) { 387 fprintf(file,"<%s %s> \n","STR",STR_table[t.ptr]); 388 return ; 389 } 390 if(t.id==NUM) { 391 fprintf(file,"<%s %d> \n","NUM",NUM_table[t.ptr]); 392 return ; 393 } 394 if(t.id==ID) { 395 fprintf(file,"<%s %s> \n","ID",ID_table[t.ptr]); 396 return ; 397 } 398 if(t.id==RES) { 399 fprintf(file,"<%s %s> \n","RES",reserved_words[t.ptr]); 400 return ; 401 } 402 if(t.id==SYM) { 403 fprintf(file,"<%s %s> \n","SYM",symbol_table[t.ptr-100]); 404 return ; 405 } 406 return ; 407 } 408 void save_Token_list(const char* filename) { 409 FILE* f=fopen(filename,"w"); 410 int len=tokens->size(); 411 for(int i=0; i<len; i++) { 412 write_token(f,tokens->at(i)); 413 } 414 fclose(f); 415 } 416 417 418 419 420 421 422 /***************************************************************************************/ 423 /***************** 语法分析 *****************/ 424 /***************************************************************************************/ 425 426 void syntaxError(const char* message) { 427 if(strcmp(message,"outofrange")==0) { 428 printf("出现越界错误"); 429 printf(" Token_n:%d tokens->size:%d\n",Token_n,tokens->size()); 430 exit(-1); 431 } 432 printf("行号:%d %s",tokens->at(Token_n).linenum,message); 433 printf("当前token:"); 434 show_token(tokens->at(Token_n)); 435 exit(-1); 436 } 437 Node* MatchRES(const char* expected) { 438 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 439 int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr; 440 if(id==RES&&strcmp(expected,reserved_words[ptr])==0) { 441 Node*t =new Node(expected); 442 Token_n++; 443 return t; 444 } 445 char* message=new char[100]; 446 strcpy(message,"match res error.\nexpect: "); 447 strcat(message,expected); 448 strcat(message,"\n"); 449 syntaxError(message); 450 delete message; 451 return NULL; 452 } 453 Node* MatchNUM() { 454 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 455 int id=tokens->at(Token_n).id; 456 if(id==NUM) { 457 Node* t= new Node("NUM"); 458 Token_n++; 459 return t; 460 } 461 syntaxError("match num error\n"); 462 return NULL; 463 } 464 Node* MatchSTR() { 465 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 466 int id=tokens->at(Token_n).id; 467 if(id==STR) { 468 Node* t= new Node("STR"); 469 Token_n++; 470 return t; 471 } 472 syntaxError("match str error\n"); 473 return NULL; 474 } 475 Node* MatchID() { 476 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 477 int id=tokens->at(Token_n).id; 478 if(id==ID) { 479 Node* t =new Node("ID"); 480 Token_n++; 481 return t; 482 } 483 syntaxError("match id error\n"); 484 return NULL; 485 } 486 Node* MatchSYM(const int expected) { 487 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 488 int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr; 489 if(id==SYM&&ptr==expected) { 490 Node* t =new Node(symbol_table[ptr-100]); 491 Token_n++; 492 return t; 493 } 494 char* message=new char[100]; 495 strcpy(message,"match symbol error.\nexpect: "); 496 strcat(message,symbol_table[expected-100]); 497 strcat(message,"\n"); 498 syntaxError(message); 499 delete message; 500 return NULL; 501 } 502 bool isRES(const char* res) { 503 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 504 int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr; 505 if(id==RES&&strcmp(reserved_words[ptr],res)==0)return true; 506 return false; 507 } 508 bool isSYM(const int res) { 509 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 510 int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr; 511 if(id==SYM&&ptr==res)return true; 512 return false; 513 } 514 bool isID() { 515 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 516 int id=tokens->at(Token_n).id; 517 if(id==ID)return true; 518 return false; 519 } 520 bool isNUM() { 521 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 522 int id=tokens->at(Token_n).id; 523 if(id==NUM)return true; 524 return false; 525 } 526 bool isSTR() { 527 if(Token_n>=(int)tokens->size())syntaxError("outofrange"); 528 int id=tokens->at(Token_n).id; 529 if(id==STR)return true; 530 return false; 531 } 532 Node* Program() { 533 Node* t=new Node("Program"); 534 add(t->children,ProgramHead()); 535 add(t->children,DeclarePart()); 536 add(t->children,ProgramBody()); 537 return t; 538 } 539 Node* ProgramHead() { 540 Node* t=new Node("ProgramHead"); 541 add(t->children,MatchRES("program")); 542 add(t->children,MatchID()); 543 return t; 544 } 545 546 Node* DeclarePart() { 547 Node* t=new Node("DeclarePart"); 548 add(t->children,TypeDeclaration()); 549 add(t->children,VarDeclaration()); 550 add(t->children,ProcDeclaration()); 551 return t; 552 } 553 Node* TypeDec() { 554 Node* t=new Node("TypeDec"); 555 add(t->children,MatchRES("type")); 556 add(t->children,TypeDecList()); 557 return t; 558 } 559 Node* TypeDeclaration() { 560 Node*t=NULL; 561 if(isRES("type")) { 562 t=new Node("TypeDeclaration"); 563 add(t->children,TypeDec()); 564 } 565 return t; 566 } 567 Node* TypeDecList() { 568 Node* t=new Node("TypeDecList"); 569 add(t->children,TypeId()); 570 add(t->children,MatchSYM(EQU)); 571 add(t->children,TypeName()); 572 add(t->children,MatchSYM(SEMI)); 573 add(t->children,TypeDecMore()); 574 return t; 575 } 576 Node* TypeDecMore() { 577 Node*t=NULL; 578 if(isID()) { 579 t=new Node("TypeDecMore"); 580 add(t->children,TypeDecList()); 581 } 582 return t; 583 } 584 Node* TypeId() { 585 Node* t=new Node("TypeId"); 586 add(t->children,MatchID()); 587 return t; 588 } 589 Node* TypeName() { 590 Node* t=new Node("TypeName"); 591 if(isRES("integer")||isRES("char")) { 592 add(t->children,BaseType()); 593 return t; 594 } 595 if(isRES("array")||isRES("record")) { 596 add(t->children,StructureType()); 597 return t; 598 } 599 add(t->children,MatchID()); 600 return t; 601 } 602 Node* BaseType() { 603 Node*t=NULL; 604 if(isRES("integer")) { 605 t=new Node("BaseType"); 606 add(t->children,MatchRES("integer")); 607 return t; 608 } 609 if(isRES("char")) { 610 t=new Node("BaseType"); 611 add(t->children,MatchRES("char")); 612 return t; 613 } 614 syntaxError("not a base type\n"); 615 return t; 616 } 617 Node* StructureType() { 618 Node*t=NULL; 619 if(isRES("array")) { 620 t=new Node("StructureType"); 621 add(t->children,ArrayType()); 622 return t; 623 } 624 if(isRES("record")) { 625 t=new Node("StructureType"); 626 add(t->children,RecType()); 627 return t; 628 } 629 return t; 630 } 631 Node* ArrayType() { 632 Node* t=new Node("ArrayType"); 633 add(t->children,MatchRES("array")); 634 add(t->children,MatchSYM(LSQUBRACK)); 635 add(t->children,MatchNUM()); 636 add(t->children,MatchSYM(INDEX)); 637 add(t->children,MatchNUM()); 638 add(t->children,MatchSYM(RSQUBRACK)); 639 add(t->children,MatchRES("of")); 640 add(t->children,BaseType()); 641 return t; 642 } 643 Node* RecType() { 644 Node* t=new Node("RecType"); 645 add(t->children,MatchRES("record")); 646 add(t->children,FieldDecList()); 647 add(t->children,MatchRES("end")); 648 return t; 649 } 650 Node* FieldDecList() { 651 Node*t=NULL; 652 if(isRES("integer")||isRES("char")) { 653 t=new Node("FieldDecList"); 654 add(t->children,BaseType()); 655 add(t->children,IdList()); 656 add(t->children,MatchSYM(SEMI)); 657 add(t->children,FieldDecMore()); 658 return t; 659 } 660 if(isRES("array")) { 661 t=new Node("FieldDecList"); 662 add(t->children,ArrayType()); 663 add(t->children,IdList()); 664 add(t->children,MatchSYM(SEMI)); 665 add(t->children,FieldDecMore()); 666 return t; 667 } 668 syntaxError("field declare list error\n"); 669 return t; 670 } 671 Node* FieldDecMore() { 672 Node*t=NULL; 673 if(isRES("integer")||isRES("char")||isRES("array")) { 674 t=new Node("FieldDecMore"); 675 add(t->children,FieldDecList()); 676 } 677 return t; 678 } 679 Node* IdList() { 680 Node* t=new Node("IdList"); 681 add(t->children,MatchID()); 682 add(t->children,IdMore()); 683 return t; 684 } 685 Node* IdMore() { 686 Node*t=NULL; 687 if(isSYM(COM)) { 688 t=new Node("IdMore"); 689 add(t->children,MatchSYM(COM)); 690 add(t->children,IdList()); 691 } 692 return t; 693 } 694 Node* VarDec() { 695 Node* t=new Node("VarDec"); 696 add(t->children,MatchRES("var")); 697 add(t->children,VarDecList()); 698 return t; 699 } 700 Node* VarDeclaration() { 701 Node*t=NULL; 702 if(isRES("var")) { 703 t=new Node("VarDeclaration"); 704 add(t->children,VarDec()); 705 } 706 return t; 707 } 708 Node* VarDecList() { 709 Node* t=new Node("VarDecList"); 710 add(t->children,TypeName()); 711 add(t->children,VarIdList()); 712 add(t->children,MatchSYM(SEMI)); 713 add(t->children,VarDecMore()); 714 return t; 715 } 716 Node* VarDecMore() { 717 Node*t=NULL; 718 if(isRES("integer")||isRES("char")||isRES("array")||isRES("record")||isID()) { 719 t=new Node("VarDecMore"); 720 add(t->children,VarDecList()); 721 } 722 return t; 723 } 724 Node* VarIdList() { 725 Node* t=new Node("VarIdList"); 726 add(t->children,MatchID()); 727 add(t->children,VarIdMore()); 728 return t; 729 } 730 Node* VarIdMore() { 731 Node*t=NULL; 732 if(isSYM(COM)) { 733 t=new Node("VarIdMore"); 734 add(t->children,MatchSYM(COM)); 735 add(t->children,VarIdList()); 736 } 737 return t; 738 } 739 Node* ProcDec() { 740 Node* t=new Node("ProcDec"); 741 add(t->children,MatchRES("procedure")); 742 add(t->children,MatchID()); 743 add(t->children,MatchSYM(LBRACK)); 744 add(t->children,ParamList()); 745 add(t->children,MatchSYM(RBRACK)); 746 add(t->children,MatchSYM(SEMI)); 747 add(t->children,DeclarePart()); 748 add(t->children,ProcBody()); 749 add(t->children,ProcDeclaration()); 750 return t; 751 } 752 Node* ProcDeclaration() { 753 Node*t=NULL; 754 if(isRES("procedure")) { 755 t=new Node("ProcDeclaration"); 756 add(t->children,ProcDec()); 757 } 758 return t; 759 } 760 Node* ParamList() { 761 Node*t=NULL; 762 if(isRES("integer")||isRES("char")||isRES("array")||isRES("record")||isID()||isRES("var")) { 763 t=new Node("ParamList"); 764 add(t->children,ParamDecList()); 765 } 766 return t; 767 } 768 Node* ParamDecList() { 769 Node* t=new Node("ParamDecList"); 770 add(t->children,Param()); 771 add(t->children,ParamMore()); 772 return t; 773 } 774 Node* ParamMore() { 775 Node*t=NULL; 776 if(isSYM(SEMI)) { 777 t=new Node("ParamMore"); 778 add(t->children,MatchSYM(SEMI)); 779 add(t->children,ParamDecList()); 780 } 781 return t; 782 } 783 Node* Param() { 784 Node*t=NULL; 785 if(isRES("integer")||isRES("char")||isRES("array")||isRES("record")||isID()) { 786 t=new Node("Param"); 787 add(t->children,TypeName()); 788 add(t->children,FormList()); 789 return t; 790 } 791 if(isRES("var")) { 792 t=new Node("Param"); 793 add(t->children,MatchRES("var")); 794 add(t->children,TypeName()); 795 add(t->children,FormList()); 796 return t; 797 } 798 syntaxError("param error\n"); 799 return t; 800 } 801 Node* FormList() { 802 Node* t=new Node("FormList"); 803 add(t->children,MatchID()); 804 add(t->children,FidMore()); 805 return t; 806 } 807 Node* FidMore() { 808 Node*t=NULL; 809 if(isSYM(COM)) { 810 t=new Node("FidMore"); 811 add(t->children,MatchSYM(COM)); 812 add(t->children,FormList()); 813 } 814 return t; 815 } 816 Node* ProcDecPart() { 817 Node* t=new Node("ProcDecPart"); 818 add(t->children,DeclarePart()); 819 return t; 820 } 821 Node* ProcBody() { 822 Node* t=new Node("ProcBody"); 823 add(t->children,ProgramBody()); 824 return t; 825 } 826 Node* ProgramBody() { 827 Node* t=new Node("ProgramBody"); 828 add(t->children,MatchRES("begin")); 829 add(t->children,StmList()); 830 add(t->children,MatchRES("end")); 831 return t; 832 } 833 Node* StmList() { 834 Node* t=new Node("StmList"); 835 add(t->children,Stm()); 836 add(t->children,StmMore()); 837 return t; 838 } 839 Node* StmMore() { 840 Node*t=NULL; 841 if(isSYM(SEMI)) { 842 t=new Node("StmMore"); 843 add(t->children,MatchSYM(SEMI)); 844 add(t->children,StmList()); 845 } 846 return t; 847 } 848 Node* Stm() { 849 Node* t=new Node("Stm"); 850 if(isRES("if")) { 851 add(t->children,ConditionalStm()); 852 return t; 853 } 854 if(isRES("while")) { 855 add(t->children,LoopStm()); 856 return t; 857 } 858 if(isRES("read")) { 859 add(t->children,InputStm()); 860 return t; 861 } 862 if(isRES("write")) { 863 add(t->children,OutputStm()); 864 return t; 865 } 866 if(isRES("return")) { 867 add(t->children,ReturnStm()); 868 return t; 869 } 870 if(isID()) { 871 add(t->children,MatchID()); 872 add(t->children,AssCall()); 873 return t; 874 } 875 delete t; 876 return NULL; 877 } 878 Node* AssCall() { 879 Node*t=NULL; 880 if(isSYM(LSQUBRACK)||isSYM(POINT)||isSYM(ASSI)) { 881 t=new Node("AssCall"); 882 add(t->children,AssignmentRest()); 883 return t; 884 } 885 if(isSYM(LBRACK)) { 886 t=new Node("AssCall"); 887 add(t->children,CallStmRest()); 888 return t; 889 } 890 syntaxError("ass call error\n"); 891 return t; 892 } 893 Node* AssignmentRest() { 894 Node* t=new Node("AssignmentRest"); 895 if(isSYM(LSQUBRACK)||isSYM(POINT)) { 896 add(t->children,VariMore()); 897 } 898 add(t->children,MatchSYM(ASSI)); 899 add(t->children,Exp()); 900 return t; 901 } 902 Node* ConditionalStm() { 903 Node* t=new Node("ConditionalStm"); 904 add(t->children,MatchRES("if")); 905 add(t->children,Exp()); 906 if(isSYM(LT))add(t->children,MatchSYM(LT)); 907 else if(isSYM(EQU))add(t->children,MatchSYM(EQU)); 908 else syntaxError("condition error\n"); 909 add(t->children,Exp()); 910 add(t->children,MatchRES("then")); 911 add(t->children,StmList()); 912 add(t->children,MatchRES("else")); 913 add(t->children,StmList()); 914 add(t->children,MatchRES("fi")); 915 return t; 916 } 917 Node* LoopStm() { 918 Node* t=new Node("LoopStm"); 919 add(t->children,MatchRES("while")); 920 add(t->children,Exp()); 921 if(isSYM(LT))add(t->children,MatchSYM(LT)); 922 else if(isSYM(EQU))add(t->children,MatchSYM(EQU)); 923 else syntaxError("condition error\n"); 924 add(t->children,Exp()); 925 add(t->children,MatchRES("do")); 926 add(t->children,StmList()); 927 add(t->children,MatchRES("endwh")); 928 return t; 929 } 930 Node* InputStm() { 931 Node* t=new Node("InputStm"); 932 add(t->children,MatchRES("read")); 933 add(t->children,MatchSYM(LBRACK)); 934 add(t->children,MatchID()); 935 add(t->children,MatchSYM(RBRACK)); 936 return t; 937 } 938 Node* OutputStm() { 939 Node* t=new Node("OutputStm"); 940 add(t->children,MatchRES("write")); 941 add(t->children,MatchSYM(LBRACK)); 942 add(t->children,Exp()); 943 add(t->children,MatchSYM(RBRACK)); 944 return t; 945 } 946 Node* ReturnStm() { 947 Node* t=new Node("ReturnStm"); 948 add(t->children,MatchRES("return")); 949 return t; 950 } 951 Node* CallStmRest() { 952 Node* t=new Node("CallStmRest"); 953 add(t->children,MatchSYM(LBRACK)); 954 add(t->children,ActParamList()); 955 add(t->children,MatchSYM(RBRACK)); 956 return t; 957 } 958 Node* ActParamList() { 959 Node*t=NULL; 960 if(isSYM(LBRACK)||isNUM()||isID()) { 961 t=new Node("ActParamList"); 962 add(t->children,Exp()); 963 add(t->children,ActParamMore()); 964 } 965 return t; 966 } 967 Node* ActParamMore() { 968 Node*t=NULL; 969 if(isSYM(COM)) { 970 t=new Node("ActParamMore"); 971 add(t->children,MatchSYM(COM)); 972 add(t->children,ActParamList()); 973 } 974 return t; 975 } 976 Node* Exp() { 977 Node* t=new Node("Exp"); 978 add(t->children,Term()); 979 add(t->children,OtherTerm()); 980 return t; 981 } 982 Node* OtherTerm() { 983 Node*t=NULL; 984 if(isSYM(PLUS)||isSYM(SUB)) { 985 t=new Node("OtherTerm"); 986 add(t->children,AddOp()); 987 add(t->children,Exp()); 988 } 989 return t; 990 } 991 Node* Term() { 992 Node* t=new Node("Term"); 993 add(t->children,Factor()); 994 add(t->children,OtherFactor()); 995 return t; 996 } 997 Node* OtherFactor() { 998 Node*t=NULL; 999 if(isSYM(MUL)||isSYM(DIV)) { 1000 t=new Node("OtherFactor"); 1001 add(t->children,MultOp()); 1002 add(t->children,Term()); 1003 } 1004 return t; 1005 } 1006 Node* Factor() { 1007 Node* t=new Node("Factor"); 1008 if(isNUM()) { 1009 add(t->children,MatchNUM()); 1010 return t; 1011 } 1012 if(isSTR()) { 1013 add(t->children,MatchSTR()); 1014 return t; 1015 } 1016 if(isSYM(LBRACK)) { 1017 add(t->children,MatchSYM(LBRACK)); 1018 add(t->children,Exp()); 1019 add(t->children,MatchSYM(RBRACK)); 1020 return t; 1021 } 1022 if(isID()) { 1023 add(t->children,Variable()); 1024 return t; 1025 } 1026 delete t; 1027 syntaxError("factor error\n"); 1028 return NULL; 1029 } 1030 Node* Variable() { 1031 Node* t=new Node("Variable"); 1032 add(t->children,MatchID()); 1033 add(t->children,VariMore()); 1034 return t; 1035 } 1036 Node* VariMore() { 1037 Node*t=NULL; 1038 if(isSYM(LSQUBRACK)) { 1039 t=new Node("VariMore"); 1040 add(t->children,MatchSYM(LSQUBRACK)); 1041 add(t->children,Exp()); 1042 add(t->children,MatchSYM(RSQUBRACK)); 1043 } 1044 if(isSYM(POINT)) { 1045 t=new Node("VariMore"); 1046 add(t->children,MatchSYM(POINT)); 1047 add(t->children,FieldVar()); 1048 } 1049 return t; 1050 } 1051 Node* FieldVar() { 1052 Node* t=new Node("FieldVar"); 1053 add(t->children,MatchID()); 1054 add(t->children,FieldVarMore()); 1055 return t; 1056 } 1057 Node* FieldVarMore() { 1058 Node*t=NULL; 1059 if(isSYM(LSQUBRACK)) { 1060 t=new Node("FieldVarMore"); 1061 add(t->children,MatchSYM(LSQUBRACK)); 1062 add(t->children,Exp()); 1063 add(t->children,MatchSYM(RSQUBRACK)); 1064 } 1065 return t; 1066 } 1067 Node* CmpOp() { 1068 Node*t=NULL; 1069 if(isSYM(LT)) { 1070 t=new Node("CmpOp"); 1071 add(t->children,MatchSYM(LT)); 1072 return t; 1073 } 1074 if(isSYM(EQU)) { 1075 t=new Node("CmpOp"); 1076 add(t->children,MatchSYM(EQU)); 1077 return t; 1078 } 1079 syntaxError("cmpop error\n"); 1080 return t; 1081 } 1082 Node* AddOp() { 1083 Node*t=NULL; 1084 if(isSYM(PLUS)) { 1085 t=new Node("AddOp"); 1086 add(t->children,MatchSYM(PLUS)); 1087 return t; 1088 } 1089 if(isSYM(SUB)) { 1090 t=new Node("AddOp"); 1091 add(t->children,MatchSYM(SUB)); 1092 return t; 1093 } 1094 syntaxError("cmpop error\n"); 1095 return t; 1096 } 1097 Node* MultOp() { 1098 Node*t=NULL; 1099 if(isSYM(MUL)) { 1100 t=new Node("MultOp"); 1101 add(t->children,MatchSYM(MUL)); 1102 return t; 1103 } 1104 if(isSYM(DIV)) { 1105 t=new Node("MultOp"); 1106 add(t->children,MatchSYM(DIV)); 1107 return t; 1108 } 1109 syntaxError("cmpop error\n"); 1110 return t; 1111 } 1112 Node* Parse() { 1113 num_of_lines=1; 1114 Node* root=Program(); 1115 return root; 1116 } 1117 void show_tree(Node* root,int depth,vector<int>* v,bool islast) { 1118 if(root==NULL)return ; 1119 printf("\n"); 1120 for(int i=0; i<depth; i++) { 1121 if(v->at(i)==1)printf(" │"); 1122 else printf(" "); 1123 } 1124 if(islast) { 1125 printf(" └─%s",root->desc); 1126 v->at(depth)=0; 1127 } else printf(" ├─%s",root->desc); 1128 if(depth+1==(int)v->size())v->push_back(1); 1129 v->at(depth+1)=1; 1130 int len=root->children.size(); 1131 for(int i=0; i<len; i++) { 1132 if(i==len-1)show_tree(root->children.at(i),depth+1,v,true); 1133 else show_tree(root->children.at(i),depth+1,v,false); 1134 } 1135 } 1136 1137 int main() { 1138 tokens=new vector<Token>(); 1139 getTokenlist("cs1.txt"); 1140 save_Token_list("tokens.txt"); 1141 //打印显示 1142 printf("TOKENS:\n\n"); 1143 show_Token_list(); 1144 printf("\n\n\n\n语法树:\n\n"); 1145 vector<int>* v=new vector<int>(); 1146 v->push_back(0); 1147 show_tree(Parse(),0,v,true); 1148 return 0; 1149 }
cs1.txt
1 {冒泡排序} 2 {输入m,表示要排序的数的个数;接着输入m个整数; 3 输出从小到大排序后的结果} 4 5 program p 6 var integer i,j,num; 7 array[1..20] of integer a; 8 9 procedure q(integer num); 10 var integer i,j,k; 11 integer t; 12 begin 13 i:='lklklk'; 14 j:=1; 15 while i < num do 16 j:=num-i+1; 17 k:=1; 18 while k<j do 19 if a[k+1] < a[k] 20 then t:=a[k]; 21 a[k]:=a[k+1]; 22 a[k+1]:=t 23 else t:=0 24 fi; 25 k:=k+1 26 endwh; 27 i:=i+1 28 endwh 29 end 30 31 begin 32 read(num); 33 i:=1; 34 while i<(num+1) do 35 read(j); 36 a[i]:=j; 37 i:=i+1 38 endwh; 39 q(num); 40 i:=1; 41 while i<(num+1) do 42 write(a[i]); 43 i:=i+1 44 endwh 45 end. 46
END
代码写于大三下6月份,编译原理课程设计
随笔写于2016.7.13