【编译原理】c++实现词法分析器

写在前面:本博客为本人原创,严禁任何形式的转载!本博客只允许放在博客园(.cnblogs.com),如果您在其他网站看到这篇博文,请通过下面这个唯一的合法链接转到原文!

本博客全网唯一合法URL:http://www.cnblogs.com/acm-icpcer/p/8867199.html

 

 

Talk is cheap, show you my source code:

/*
this code was first initiated by TZ
contact email:xmb028@163.com
personal website:wnm1503303791.github.io
personal blogs:www.cnblogs.com/acm-icpcer/
this code has been posted on my personal blog,checking url:www.cnblogs.com/acm-icpcer/p/8867199.html
Copyright © 2018 TZ.
All Rights Reserved.
*/

#include<cstdio>  
#include<cstring>  
#include<algorithm>  
#include<iostream>  
#include<string>  
#include<vector>  
#include<stack>  
#include<bitset>  
#include<cstdlib>  
#include<cmath>  
#include<set>  
#include<list>  
#include<deque>  
#include<map>  
#include<queue>  
using namespace std;  

char buffer[1024];
char remains[13][1024]=
{
    "begin",
    "call",
    "const",
    "do",
    "end",
    "if",
    "odd",
    "procedure",
    "read",
    "then",
    "var",
    "while",
    "write"
};
char r_output[13][1024]=
{
    "beginsym",
    "callsym",
    "constsym",
    "dosym",
    "endsym",
    "ifsym",
    "oddsym",
    "proceduresym",
    "readsym",
    "thensym",
    "varsym",
    "whilesym",
    "writesym"
};


int number(int i)
{
    char temp[1024];
    int t=0;
    while(buffer[i]>=48&&buffer[i]<=57)
    {
        temp[t++]=buffer[i++];
    }
    cout<<"(number,"<<temp<<")"<<endl;
    return i;
}

int letter(int i)
{
    char temp[1024];
    memset(temp,'\0',strlen(temp));
    int t=0;
    while((buffer[i]>=65&&buffer[i]<=90)||(buffer[i]>=97&&buffer[i]<=122)||(buffer[i]>=48&&buffer[i]<=57))//判断基本字和标识符 
    {
        temp[t++]=buffer[i++];
    }
    for(int a=0;a<13;a++)
    {
        if(strcmp(temp,remains[a])==0)
        {
            cout<<"("<<r_output[a]<<","<<remains[a]<<")"<<endl;
            return i;
        }
    }
    cout<<"(ident,"<<temp<<")"<<endl;//默认该语法的标识符不可以以数字开头,否则此代码需重构 
    return i;
}

int delimiter(int i)
{
    char temp=buffer[i];
    switch (temp)
    {
        case '(':
            cout<<"(lparen,  "<<buffer[i]<<"  )"<<endl;
            break;
        case ')':
            cout<<"(rparen,  "<<buffer[i]<<"  )"<<endl;
            break;
        case ',':
            cout<<"(comma,  "<<buffer[i]<<"  )"<<endl;
            break;
        case ';':
            cout<<"(semicolon,  "<<buffer[i]<<"  )"<<endl;
            break;
        case '.':
            cout<<"(period,  "<<buffer[i]<<"  )"<<endl;
            break;
    }
    return ++i;
}

int operators(int i)
{
    char temp=buffer[i];
    switch (temp)
    {
        case '+':
            cout<<"(plus,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
        case '-':
            cout<<"(minus,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
        case '*':
            cout<<"(times,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
        case '/':
            cout<<"(slash,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
        case '=':
            cout<<"(eql,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
            
        case '<':
            if(buffer[i+1]=='>')
            {
                cout<<"(neq,  "<<buffer[i]<<buffer[i+1]<<"  )"<<endl;
                return (i+2);
            }
            if(buffer[i+1]=='=')
            {
                cout<<"(leq,  "<<buffer[i]<<buffer[i+1]<<"  )"<<endl;
                return (i+2);
            }
            cout<<"(less,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
            
        case '>':
            if(buffer[i+1]=='=')
            {
                cout<<"(geq,  "<<buffer[i]<<buffer[i+1]<<"  )"<<endl;
                return (i+2);
            }
            cout<<"(gtr,  "<<buffer[i]<<"  )"<<endl;
            return ++i;
            
        case ':':
            if(buffer[i+1]=='=')
            {
                cout<<"(becomes,  "<<buffer[i]<<buffer[i+1]<<"  )"<<endl;
                return (i+2);
            }
            else return i;
    }
}

int main()
{
    memset(buffer,'\0',strlen(buffer));
    while(scanf("%s",&buffer))
    {
        int pointer=0;
        //processing 
        while(pointer<strlen(buffer))
        {
            //1:number
            if(              buffer[pointer]>=48
                          &&buffer[pointer]<=57    )//go to the number process
                pointer=number(pointer);
            //2:letter    
            else if(      (buffer[pointer]>=65&&buffer[pointer]<=90)
                          ||(buffer[pointer]>=97&&buffer[pointer]<=122)    )//go to letter process
                pointer=letter(pointer);
            //3:delimiter
            else if(      buffer[pointer]=='('
                        ||buffer[pointer]==')'
                        ||buffer[pointer]==','
                        ||buffer[pointer]==';'
                        ||buffer[pointer]=='.'    )//prcessing delimiter,in chinese we call:jie fu~
                pointer=delimiter(pointer);
            //4:operators    
            else if(      buffer[pointer]=='+'
                        ||buffer[pointer]=='-'
                        ||buffer[pointer]=='*'
                        ||buffer[pointer]=='/'
                        ||buffer[pointer]=='='
                        
                        ||buffer[pointer]=='<'
                        ||buffer[pointer]=='>'
                        ||buffer[pointer]==':'    )//prcessing operators
                pointer=operators(pointer);
            else
       {
         cout<<"ERROR!"<<endl;
         break;

       }
} } return 0; }

 

源码运行结果:

 

将源代码改为使用文件输出后的结果:

 

 

 

tz@COI HZAU

2018/4/17

posted on 2018-04-17 15:50  tuzhuo  阅读(7843)  评论(1编辑  收藏  举报