c语言词法分析器

#include<iostream>
#include<cstring>
#include<cstdio>
#include<fstream>
#include<map>
#include<string>
#include<cstdlib>
#include<set>
#include<fstream>
using namespace std;
string checkstring(string filewriter,int &i);
string checkchar(string filewriter,int &i);
string checkdight(string filewriter,int &i);
string checkoperator(string filewriter,int &i);
bool checkdeadline(char ch);
bool checkletterchar(char ch);
void error();
string scanner();
string reseve[]= {"","auto","break","case","char","const","continue","default","do","double","else","enum","extern",
                  "float","for","goto","if","int","long","register","return","short","signed","sizeof","static","struct","switch","typedef",
                  "unsigned","union","void","volatile","while","main","include","{","}",
                  "(",")","[","]","\"","\'",
                  "#",":",";","<","<=",">",">=","!=","==","+","-","*","//","%","++","--"
                  ,"&&","&","||","|","^","!","~",">>","<<","=","+=","-=","%=","*=","<<=",">>=","&=","|=","/=","^=",",","."
                 };//保留字和所有运算符
char deadline[]= {'?','!','%','&','(',')','*','+',',','-','.','/',':',';','<','=','>','^','{','|','}','~',']','[','\'','\"'};//界符和运算符开头
bool checkdeadline(char ch)//鉴别是否是界符或者运算符第一个符号
{
    for(auto &it:deadline)
    {
        if(it==ch)
            return true;
    }
    return false;
}
string checkoperator(string filewriter,int &i)//判断是否为运算符或者界符
{
    string processstring="";
    if (filewriter.length()>i+2&&filewriter[i]=='<'&&filewriter[i+1]=='<'&&filewriter[i+2]=='=')
    {
        processstring+=filewriter.substr(i,3);
        i+=3;
    }
    else if (filewriter.length()>i+2&&filewriter[i]=='>'&&filewriter[i+1]=='>'&&filewriter[i+2]=='=')
    {
        processstring+=filewriter.substr(i,3);
        i+=3;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='<'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='>'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='!'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='='&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='+'&&filewriter[i+1]=='+')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='-'&&filewriter[i+1]=='-')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='&'&&filewriter[i+1]=='&')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='|'&&filewriter[i+1]=='|')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='>'&&filewriter[i+1]=='>')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='<'&&filewriter[i+1]=='<')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='+'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='-'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='%'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='*'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }

    else if (filewriter.length()>i+1&&filewriter[i]=='&'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='|'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='/'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else if (filewriter.length()>i+1&&filewriter[i]=='^'&&filewriter[i+1]=='=')
    {
        processstring+=filewriter.substr(i,2);
        i+=2;
    }
    else
    {
        processstring+=filewriter.substr(i,1);
        i++;
    }
    return processstring;
}
string checkdight(string filewriter,int &i)//鉴别是否是数字(包含浮点数)
{
    string processstring="";
    while(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0'))
    {
        processstring+=filewriter[i];
        i++;
    }
    if(i<filewriter.length()&&filewriter[i]=='.')
    {
        processstring+=filewriter[i];
        i++;
        if(!(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0')))
        {
            error();
        }
    }
    while(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0'))
    {
        processstring+=filewriter[i];
        i++;
    }
    return processstring;
}
bool checkletterchar(char ch)//判断是否为字母
{
    if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')
    {
        return true;
    }
    return false;
}
string checkchar(string filewriter,int &i)//判断一个字符(包括转义字符)
{
    string processstring;
    if(filewriter[i]=='\\')
    {
        if(i+1<filewriter.length())
        {
            if(filewriter[i+1]=='n'||filewriter[i+1]=='a'||filewriter[i+1]=='b'||filewriter[i+1]=='f'||filewriter[i+1]=='r'||filewriter[i+1]=='t'||filewriter[i+1]=='v'||filewriter[i+1]=='\''||filewriter[i+1]=='\"'||filewriter[i+1]=='\\')
                //转义字符
            {
                processstring=filewriter.substr(i,2);
                i+=2;
            }
            else
            {
                processstring=filewriter.substr(i,1);
                i++;
            }
        }
        else
        {
            processstring=filewriter.substr(i,1);
            i++;
        }
    }
    else
    {
        processstring=filewriter.substr(i,1);
        i++;
    }
    return processstring;

}
string checkstring(string filewriter,int &i)
{
    string processstring="";
    while(i<filewriter.length()&&filewriter[i]!='\"')
    {
        processstring+=checkchar(filewriter,i);
    }
    return processstring;
}
string checkkey(string filewriter,int &i)//鉴别是否是标识符
{
    string processstring="";
    processstring+=filewriter[i];
    i++;
    while(i<filewriter.length()&&((filewriter[i]=='_'||((filewriter[i]>='a'&&filewriter[i]<='z')||(filewriter[i]>='A'&&filewriter[i]<='Z')))||(filewriter[i]<='9'&&filewriter[i]>='0')))
    {
        processstring+=filewriter[i];
        i++;
    }
    return processstring;
}
map<string,int>p;//种类编码的映射
void init()//映射种类编码
{
    int i=0;
    for(auto &it:reseve)
    {
        i++;
        p[it]=i;
    }
}
bool checkfile(string filewriter)//判断文件名是否为.c文件
{
    if(filewriter.length()<3)
        return false;
    if(filewriter[filewriter.length()-1]=='c'&&filewriter[filewriter.length()-2]=='.')
        return true;
    else
        return false;
}
string scanner(string filename)//扫描程序,过滤注释程序
{
    if(!checkfile(filename))
    {
        cout<<"这不是c文件"<<endl;
        error();
    }
    ifstream inf(filename);

    string filewriter;
    string processstring="";
    int i;
    bool flag1=false;
    bool flag2=false;
    while(getline(inf,filewriter))
    {
        flag1=false;
        for(i=0; i<filewriter.length(); i++)
        {
            if(i+1<filewriter.length()&&filewriter[i]=='/'&&filewriter[i+1]=='/')
            {
                if(!flag2&&!flag1)
                {
                    if(i>=1)
                    {
                        processstring+=(filewriter.substr(0,i-1)+" ");
                    }
                    filewriter=filewriter.substr(i+2);
                    i=0;
                    flag1=true;
                    continue;
                }
                flag1=true;
                i++;
            }
            if(i+1<filewriter.length()&&i<filewriter.length()&&filewriter[i]=='/'&&filewriter[i+1]=='*'&&!flag2)
            {
                flag2=true;
                if(!flag1)
                {
                    if(i>=1)
                        processstring+=(filewriter.substr(0,i-1)+" ");
                    filewriter=filewriter.substr(i+2);
                    i=0;
                    continue;
                }
            }
            if(i+1<filewriter.length()&&i<filewriter.length()&&filewriter[i]=='*'&&filewriter[i+1]=='/'&&flag2)
            {
                flag2=false;
                filewriter=filewriter.substr(i+2);
                i=0;
                continue;
            }
        }
        if(!flag1&&!flag2)
            processstring+=(filewriter+"\n");
    }
    inf.close();
    return processstring;
}
void error()
{
  cout<<"error"<<endl;
    exit(0);
}
int main()
{

    init();
    string filewriter,processstring;
    filewriter=scanner("inii.c");
    ofstream onf;
    onf.open("Resultfile.c");
    int i=0;
   cout<<"---扫描程序之后的程序---"<<endl;
    cout<<filewriter<<endl;
     cout<<"---单词---种类编码---单词种类---"<<endl;
     onf<<"---扫描程序之后的程序---"<<endl;
    onf<<filewriter<<endl;
     onf<<"---单词---种类编码---单词种类---"<<endl;
    while(i<filewriter.length())
    {
        while(i<filewriter.length()&&(filewriter[i]=='\n'||filewriter[i]==' '||filewriter[i]=='\t'))//过滤空白符
            i++;
        if(i>=filewriter.length())//判断过滤完空白符后已经到文件末尾
            break;
        if(filewriter[i]=='\"')//判断为字符串常量
        {
            cout<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
             onf<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
            i++;
            processstring=checkstring(filewriter,i);
            cout<<"("<<processstring<<","<<83<<","<<"字符串常量)"<<endl;
           onf<<"("<<processstring<<","<<83<<","<<"字符串常量)"<<endl;
            if(i<filewriter.length()&&filewriter[i]=='\"')
            {
                cout<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
                 onf<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
                i++;
            }
            else
            {
                error();
            }
        }
        else if(filewriter[i]=='\'')//判断是否为字符常量
        {
            cout<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
               onf<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
            i++;
            processstring=checkchar(filewriter,i);
            cout<<"("<<processstring<<","<<84<<","<<"字符常量)"<<endl;
            onf<<"("<<processstring<<","<<84<<","<<"字符常量)"<<endl;
            if(i<filewriter.length()&&filewriter[i]=='\'')
            {
                cout<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
                onf<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
                i++;
            }
            else
            {
                error();
            }
        }
        else if(filewriter[i]<='9'&&filewriter[i]>='0')//判断是否为数字常量
        {
            processstring=checkdight(filewriter,i);
            cout<<"("<<processstring<<","<<82<<","<<"数字常量)"<<endl;
                   onf<<"("<<processstring<<","<<82<<","<<"数字常量)"<<endl;
        }
        else if(checkdeadline(filewriter[i]))//判断是否为运算符或者界符
        {
            processstring=checkoperator(filewriter,i);
            if(p[processstring]<=42&&p[processstring]>=35)
               {
                   cout<<"("<<processstring<<","<<p[processstring]<<","<<"界符)"<<endl;
                   onf<<"("<<processstring<<","<<p[processstring]<<","<<"界符)"<<endl;
               }
            else
               {
                   cout<<"("<<processstring<<","<<p[processstring]<<","<<"运算符)"<<endl;
                   onf<<"("<<processstring<<","<<p[processstring]<<","<<"运算符)"<<endl;
               }
        }
        else if(filewriter[i]=='_'||checkletterchar(filewriter[i]))//判断是否为标识符或者关键字
        {
            processstring=checkkey(filewriter,i);
            if(p[processstring]!=0)
            {
                cout<<"("<<processstring<<","<<p[processstring]<<","<<"关键字)"<<endl;
                onf<<"("<<processstring<<","<<p[processstring]<<","<<"关键字)"<<endl;
            }
            else
               {
                   cout<<"("<<processstring<<","<<81<<","<<"标识符)"<<endl;
                   onf<<"("<<processstring<<","<<81<<","<<"标识符)"<<endl;
               }
        }
        else if(filewriter[i]=='#')//特殊字符
        {
            processstring="";
              while(i<filewriter.length()&&(filewriter[i]!='\n'))
                   processstring+=filewriter[i],i++;
            cout<<"("<<processstring<<","<<43<<","<<"宏定义)"<<endl;
              onf<<"("<<processstring<<","<<43<<","<<"宏定义)"<<endl;
            i++;
        }
        else
        {
            error();
        }
    }
    return 0;
}

  

posted on 2019-12-02 19:19  Carits  阅读(551)  评论(0编辑  收藏  举报