词法分析器

单词状态转换图

算法描述

0、单词以字母开头(保留字、标识符),通过查保留字表可以确定是哪种
1、单词以数字开头进行判断是小数、正整数、科学计数
2、遇到运算符、界符直接存到相关表
3、遇到>得判断后面的符号是不是和>是一个整体(>=),如果是将整体存到界符运算符表中,如果不是就只将>存到表中。(<、&、|类似)
4、上图中其它符号会作为一个新单词的开始继续扫描
5、遇到空格会直接跳过寻找下一个单词。

代码实现


import java.io.*;
import java.util.LinkedHashMap;
import java.util.Map;

public class Word1 {

    public static String [] retainWords = new String[]{//c语言的32个保留字
                "auto", "break", "case", "char", "const", "continue",
                "default", "do", "double", "else", "enum", "extern",
               "float", "for", "goto", "if", "int", "long",
                "register", "return", "short", "signed", "sizeof", "static",
                "struct", "switch", "typedef", "union", "unsigned", "void",
                "volatile", "while"
             };
    public static String [] operatorWords = new String []{//界符
            "+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
            "!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
            "&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
            "}", "\\", ".", ":", "!"};
    public String headFile="";
    public String define="";
    public Map<String,Integer> remainWordMap = new LinkedHashMap<String, Integer>();//保留字
    public Map<String,Integer> identifierMap = new LinkedHashMap<String, Integer>();//标识符
    public Map<String,String> numMap = new LinkedHashMap<String, String>();//整数
    public Map<String,Integer> operatorMap = new LinkedHashMap<String, Integer>();//界符
    public String strToken = "";//存放构成单词符号的字符串
    //判断是否是字母
    public boolean isLetter(char ch){
        return Character.isLetter(ch);
    }
    //判断是否是数字
    public boolean isDigit(char ch){
        return Character.isDigit(ch);
    }
    //判断是否是空格
    public boolean isBC(char ch){
        return " ".equals(ch+"");
    }
    //连接字符
    public void concat(char ch){
        strToken += ch+"";
    }
    //清清除strToken
    public void clearStrToken(){
        strToken = "";
    }
    //判断是否是保留字(返回保留字对应的code)
    public int reserve(){
        for(int i = 0;i < retainWords.length;i++){
            if(strToken.equals(retainWords[i])){
                return i+1;//是保留字
            }
        }

        return -1;//标识符
    }

    public String getHeadFile() {
        return headFile;
    }

    public String getDefine() {
        return define;
    }

    //返回界符对应的code

    public int isOperator(){
        for (int i = 0; i <operatorWords.length ; i++) {
            if(strToken.equals(operatorWords[i])){
                return 33+i;
            }
        }return -1;
    }

    public String filter() throws IOException {//过滤器,过滤注释,去掉无用换行符
        BufferedReader br;
        String all = "";
        int ch;
        br = new BufferedReader(new FileReader("d:\\test.txt"));
        while((ch = br.read()) != -1){
            all += (char)ch + "";
        }
        //头文件和简单宏定义的处理
       String []File = all.split("\n");
       int k = 0;
       while(File[k].contains("include")){
         headFile += File[k].replaceAll("#( |\t|'')*include( |\t|'')*","")+"\n";
         k++;
       }
       while(File[k].contains("define")){
           define += File[k].replaceAll("#( |\t|'')*define( |\t|'')*","")+"\n";
           k++;

       }
       int len = 0;
        for (int i = 0; i < k; i++) {
            len += File[i].length();
        }


       all = new StringBuffer(all).replace(0,len+k,"").toString();//删去宏定义和头文件部分

        for (int i = 0; i <all.length()-1 ; i++) {

            if(all.charAt(i)==all.charAt(i+1)&&all.charAt(i)=='/'){
                for(int j=i+2;j<all.length();j++){
                    if(all.charAt(j)=='\n'){
                        all=all.replaceAll(all.substring(i,j),"");
                        break;
                    }
                }
            }
            else if(all.charAt(i)=='/'&&all.charAt(i+1)=='*'){
                for (int j = i+2; j <all.length()-1 ; j++) {
                    if(all.charAt(j)=='*'&&all.charAt(j+1)=='/'){
                        all = new StringBuffer(all).replace(i,j+1+1,"").toString();
                        break;
                    }
                }
            }
        }
        all = all.replaceAll("\r|\n|\t", "");//去除无用字符
        all = all.trim();//去除前后无用空格


        return all;//返回处理过的源程序
    }


    //扫描程序
    //i扫描完一个单词后就会立刻单词结尾的下一个字符
    public void scanner(String all) throws IOException {
        int i=0;
       while(i<all.length()) {

            if(isLetter(all.charAt(i))){//开头是字母的情况
               concat(all.charAt(i));
               i++;
               while(isLetter(all.charAt(i))||isDigit(all.charAt(i))){
                   concat(all.charAt(i));
                   i++;
               }

               if(reserve()!=-1){//保留字
                remainWordMap.put(strToken,reserve());
               }else{//标识符
                   identifierMap.put(strToken,99);
               }
               clearStrToken();
            }
            if(isDigit(all.charAt(i))){//开头是数字的情况
                concat(all.charAt(i));
                i++;
                while(isDigit(all.charAt(i))){
                    concat(all.charAt(i));
                    i++;
                }

                if(all.charAt(i)=='.'){//浮点数判断
                    concat('.');
                    i++;
                    while(isDigit(all.charAt(i))){
                        concat(all.charAt(i));
                        i++;
                    }
                }
                if(all.charAt(i)=='e'||all.charAt(i)=='E'){//科学计数的判断
                    concat(all.charAt(i));
                    if(all.charAt(i+1)=='+'||all.charAt(i+1)=='-'){//类似于8e+5,8.4e-5
                        concat(all.charAt(i+1));
                        i += 2;
                    }else{//类似于8e9,8.12e9
                        i++;
                    }
                    while(isDigit(all.charAt(i))){
                        concat(all.charAt(i));
                        i++;
                    }
                }
                numMap.put(strToken,"100");
                clearStrToken();
            }

            if (all.charAt(i) == '+' || all.charAt(i) == '-' || all.charAt(i) == '*' || all.charAt(i) == '/'
                    || all.charAt(i) == ';' || all.charAt(i) == '(' || all.charAt(i) == ')' || all.charAt(i) == '^'
                    || all.charAt(i) == ',' || all.charAt(i) == '~' || all.charAt(i) == '#' || all.charAt(i) == '%'
                    || all.charAt(i) == '[' || all.charAt(i) == ']' || all.charAt(i) == '{' || all.charAt(i) == '}'){
                concat(all.charAt(i));
                operatorMap.put(strToken,isOperator());
                clearStrToken();
                if(i==all.length()-1){//因为结束是},在这里必须判断,否则数组越界(i++)
                    break;
                }
                i++;
            }
            if(all.charAt(i)=='='){//=和==
                if(all.charAt(i+1)=='='){
                    strToken += "==";
                    operatorMap.put(strToken,isOperator());
                    i += 2;
                }else{
                    concat('=');
                    operatorMap.put(strToken,isOperator());
                    i++;
                }
                clearStrToken();
            }
            if(all.charAt(i)=='<'){//<和<=和<<
                if(all.charAt(i+1)=='='){
                    strToken += "<=";
                    operatorMap.put(strToken,isOperator());
                    i += 2;
                }else if(all.charAt(i+1)=='<'){
                    strToken += "<<";
                    operatorMap.put(strToken,isOperator());
                    i += 2;
                }else{
                    concat('<');
                    operatorMap.put(strToken,isOperator());
                    i++;
                }
                clearStrToken();
            }
           if(all.charAt(i)=='>'){//> 和>=和 >>
               if(all.charAt(i+1)=='='){
                   strToken += ">=";
                   operatorMap.put(strToken,isOperator());
                   i += 2;
               }else if(all.charAt(i+1)=='>'){
                   strToken += ">>";
                   operatorMap.put(strToken,isOperator());
                   i += 2;
               } else{
                   concat('>');
                   operatorMap.put(strToken,isOperator());
                   i++;
               }
               clearStrToken();
           }
            if(all.charAt(i)=='!'){//!和!=
                if(all.charAt(i+1)=='='){
                    strToken += "!=";
                    operatorMap.put(strToken,isOperator());
                    i += 2;
                }else{
                    concat('!');
                    operatorMap.put(strToken,isOperator());
                    i++;
                }
                clearStrToken();
            }

           if(all.charAt(i)=='&'){//&和&&
               if(all.charAt(i+1)=='&'){
                   strToken += "&&";
                   operatorMap.put(strToken,isOperator());
                   i += 2;
               }else{
                   concat('&');
                   operatorMap.put(strToken,isOperator());
                   i++;
               }
               clearStrToken();
           }

            if(isBC(all.charAt(i))){//判断是否是空格
                i++;
                continue;
            }



        }
    }
    public static void main(String[] args) throws IOException {
        Word1 w = new Word1();
        System.out.println(w.filter());

        System.out.println("包含头文件如下:");
        System.out.println(w.getHeadFile());
        System.out.println("包含宏定义如下:");
        System.out.println(w.getDefine());
        System.out.println("过滤后字符串长度:"+w.filter().length());
        w.scanner(w.filter());
        System.out.println("保留字如下:");

        BufferedWriter out = new BufferedWriter(new FileWriter("d:\\remainWord.txt"));
        //保留字
        for(Map.Entry<String, Integer> mEntry:w.remainWordMap.entrySet()){
            String key=mEntry.getKey();
            Integer value =mEntry.getValue();
            out.write("("+key+","+value+")");
            out.newLine();
            System.out.println("("+key+","+value+")");
        }out.close();
        out = new BufferedWriter(new FileWriter("d:\\operatorWord.txt"));

        //界符
        System.out.println("界符和运算符如下:");
        for(Map.Entry<String, Integer> mEntry:w.operatorMap.entrySet()){
            String key=mEntry.getKey();
            Integer value =mEntry.getValue();
            out.write("("+key+","+value+")");
            out.newLine();
            System.out.println("("+key+","+value+")");
        }out.close();

        out = new BufferedWriter(new FileWriter("d:\\numWord.txt"));
        System.out.println("数字如下:");
        //数字
        for(Map.Entry<String, String> mEntry:w.numMap.entrySet()){
            String key=mEntry.getKey();
            String value =mEntry.getValue();
            out.write("("+key+","+value+")");
            out.newLine();
            System.out.println("("+key+","+value+")");
        }out.close();

        out = new BufferedWriter(new FileWriter("d:\\identifierWord.txt"));
        System.out.println("标识符如下:");
        //标识符
        for(Map.Entry<String, Integer> mEntry:w.identifierMap.entrySet()){
            String key=mEntry.getKey();
            Integer value =mEntry.getValue();
            out.write("("+key+","+value+")");
            out.newLine();
            System.out.println("("+key+","+value+")");
        }out.close();


    }
}


posted @   浅滩浅  阅读(306)  评论(0编辑  收藏  举报
编辑推荐:
· 从二进制到误差:逐行拆解C语言浮点运算中的4008175468544之谜
· .NET制作智能桌面机器人:结合BotSharp智能体框架开发语音交互
· 软件产品开发中常见的10个问题及处理方法
· .NET 原生驾驭 AI 新基建实战系列:向量数据库的应用与畅想
· 从问题排查到源码分析:ActiveMQ消费端频繁日志刷屏的秘密
阅读排行:
· Windows桌面应用自动更新解决方案SharpUpdater5发布
· 我的家庭实验室服务器集群硬件清单
· C# 13 中的新增功能实操
· Supergateway:MCP服务器的远程调试与集成工具
· Vue3封装支持Base64导出的电子签名组件
点击右上角即可分享
微信分享提示