【编译原理】c++实现词法分析器
写在前面:本博客为本人原创,严禁任何形式的转载!本博客只允许放在博客园(.cnblogs.com),如果您在其他网站看到这篇博文,请通过下面这个唯一的合法链接转到原文!
本博客全网唯一合法URL:http://www.cnblogs.com/acm-icpcer/p/8867199.html
Talk is cheap, show you my source code:
/* this code was first initiated by TZ contact email:xmb028@163.com personal website:wnm1503303791.github.io personal blogs:www.cnblogs.com/acm-icpcer/ this code has been posted on my personal blog,checking url:www.cnblogs.com/acm-icpcer/p/8867199.html Copyright © 2018 TZ. All Rights Reserved. */ #include<cstdio> #include<cstring> #include<algorithm> #include<iostream> #include<string> #include<vector> #include<stack> #include<bitset> #include<cstdlib> #include<cmath> #include<set> #include<list> #include<deque> #include<map> #include<queue> using namespace std; char buffer[1024]; char remains[13][1024]= { "begin", "call", "const", "do", "end", "if", "odd", "procedure", "read", "then", "var", "while", "write" }; char r_output[13][1024]= { "beginsym", "callsym", "constsym", "dosym", "endsym", "ifsym", "oddsym", "proceduresym", "readsym", "thensym", "varsym", "whilesym", "writesym" }; int number(int i) { char temp[1024]; int t=0; while(buffer[i]>=48&&buffer[i]<=57) { temp[t++]=buffer[i++]; } cout<<"(number,"<<temp<<")"<<endl; return i; } int letter(int i) { char temp[1024]; memset(temp,'\0',strlen(temp)); int t=0; while((buffer[i]>=65&&buffer[i]<=90)||(buffer[i]>=97&&buffer[i]<=122)||(buffer[i]>=48&&buffer[i]<=57))//判断基本字和标识符 { temp[t++]=buffer[i++]; } for(int a=0;a<13;a++) { if(strcmp(temp,remains[a])==0) { cout<<"("<<r_output[a]<<","<<remains[a]<<")"<<endl; return i; } } cout<<"(ident,"<<temp<<")"<<endl;//默认该语法的标识符不可以以数字开头,否则此代码需重构 return i; } int delimiter(int i) { char temp=buffer[i]; switch (temp) { case '(': cout<<"(lparen, "<<buffer[i]<<" )"<<endl; break; case ')': cout<<"(rparen, "<<buffer[i]<<" )"<<endl; break; case ',': cout<<"(comma, "<<buffer[i]<<" )"<<endl; break; case ';': cout<<"(semicolon, "<<buffer[i]<<" )"<<endl; break; case '.': cout<<"(period, "<<buffer[i]<<" )"<<endl; break; } return ++i; } int operators(int i) { char temp=buffer[i]; switch (temp) { case '+': cout<<"(plus, "<<buffer[i]<<" )"<<endl; return ++i; case '-': cout<<"(minus, "<<buffer[i]<<" )"<<endl; return ++i; case '*': cout<<"(times, "<<buffer[i]<<" )"<<endl; return ++i; case '/': cout<<"(slash, "<<buffer[i]<<" )"<<endl; return ++i; case '=': cout<<"(eql, "<<buffer[i]<<" )"<<endl; return ++i; case '<': if(buffer[i+1]=='>') { cout<<"(neq, "<<buffer[i]<<buffer[i+1]<<" )"<<endl; return (i+2); } if(buffer[i+1]=='=') { cout<<"(leq, "<<buffer[i]<<buffer[i+1]<<" )"<<endl; return (i+2); } cout<<"(less, "<<buffer[i]<<" )"<<endl; return ++i; case '>': if(buffer[i+1]=='=') { cout<<"(geq, "<<buffer[i]<<buffer[i+1]<<" )"<<endl; return (i+2); } cout<<"(gtr, "<<buffer[i]<<" )"<<endl; return ++i; case ':': if(buffer[i+1]=='=') { cout<<"(becomes, "<<buffer[i]<<buffer[i+1]<<" )"<<endl; return (i+2); } else return i; } } int main() { memset(buffer,'\0',strlen(buffer)); while(scanf("%s",&buffer)) { int pointer=0; //processing while(pointer<strlen(buffer)) { //1:number if( buffer[pointer]>=48 &&buffer[pointer]<=57 )//go to the number process pointer=number(pointer); //2:letter else if( (buffer[pointer]>=65&&buffer[pointer]<=90) ||(buffer[pointer]>=97&&buffer[pointer]<=122) )//go to letter process pointer=letter(pointer); //3:delimiter else if( buffer[pointer]=='(' ||buffer[pointer]==')' ||buffer[pointer]==',' ||buffer[pointer]==';' ||buffer[pointer]=='.' )//prcessing delimiter,in chinese we call:jie fu~ pointer=delimiter(pointer); //4:operators else if( buffer[pointer]=='+' ||buffer[pointer]=='-' ||buffer[pointer]=='*' ||buffer[pointer]=='/' ||buffer[pointer]=='=' ||buffer[pointer]=='<' ||buffer[pointer]=='>' ||buffer[pointer]==':' )//prcessing operators pointer=operators(pointer); else
{
cout<<"ERROR!"<<endl;
break;
} } } return 0; }
源码运行结果:
将源代码改为使用文件输出后的结果:
tz@COI HZAU
2018/4/17