编译原理 #01# 简易词法分析器(JavaScript实现)

// 实验存档

输入示例

main()
{
int a, b;
a = 10;
b = a + 20; 
}

效果图

 全部代码

编辑一份.html文件,将代码拷入,作为网页打开即可使用。

<!DOCTYPE html>
<html>

    <head>
        <meta charset="UTF-8">
        <title>Lexical_Analysis</title>
        <link href="https://fonts.googleapis.com/css?family=Noto+Serif+SC" rel="stylesheet">
        <style>
            main {
                /*对子元素开启弹性布局*/
                display: flex;
                /*弹性元素在必要的时候换行*/
                flex-wrap: wrap;
                /*将弹性元素居中*/
                justify-content: center;
            }
            
            textarea,
            button {
                font-family: 'Noto Serif SC', STFangSong, serif;
                font-size: 17px;
            }
        </style>
    </head>

    <body>
        <main>
            <textarea name="input" rows="20" cols="40"></textarea>
            <textarea name="output" rows="20" cols="40"></textarea>
            <button name="compile">Lexical Analysis</button>
        </main>s

        <script>
            let inputBox = document.querySelector("textarea[name=input]");
            let outputBox = document.querySelector("textarea[name=output]");
            let btnCompile = document.querySelector("button[name=compile]");
            btnCompile.addEventListener("click", event => {
                let inputCode = inputBox.value;
                outputBox.value = JSON.stringify(Lexical_Analysis(inputCode));
            });
            /*
             * 规则:
            识别保留字:if、int、for、while、do、return、break、continue;
            单词种别码为1。
            其他的都识别为标识符;单词种别码为2。
            常数为无符号整形数;单词种别码为3。
            运算符包括:+、-、*、/、=、>、<、>=、<=、!= ;单词种别码为4。
            分隔符包括:,、;、{、}、(、); 单词种别码为5。
             */
            const reservedWords = ['if', 'int', 'for', 'while', 'do', 'return', 'break', 'continue'];
            const operators = ['+', '-', '*', '/', '=', '<', '>', '!', '>=', '<=', '!='];
            const separators = [',', ';', '{', '}', '(', ')'];

            function Lexical_Analysis(str) {
                /**
                 * current用于标识当前字符位置,
                 * str[cur]即为当前字符
                 */
                let cur = 0;
                /**
                 * tokens存储词法分析的最终结果
                 */
                let tokens = [];

                while(cur < str.length) {

                    if(/\s/.test(str[cur])) { // 跳过空格
                        cur++;
                    } else if(/[a-z]/i.test(str[cur])) { // 读单词
                        debugger;
                        let word = "" + str[cur++];
                        // 测试下一位字符,如果不是字母直接进入下一次循环(此时cur已经右移)
                        // 如果是则继续读字母,并将cur向右移动
                        while(cur < str.length && /[a-z]/i.test(str[cur])) {
                            // cur < str.length防止越界
                            word += str[cur++];
                        }
                        if(reservedWords.includes(word)) {
                            tokens.push({
                                type: 1,
                                value: word,
                            }); // 存储保留字(关键字)
                        } else {
                            tokens.push({
                                type: 2,
                                value: word,
                            }); // 存储普通单词                            
                        }
                    } else if(separators.includes(str[cur])) {
                        tokens.push({
                            type: 5,
                            value: str[cur++],
                        }); // 存储分隔符并将cur向右移动                                            
                    } else if(operators.includes(str[cur])) {
                        let operator = "" + str[cur++];
                        if(['>', '<', '!'].includes(operator)) {
                            // 如果下一个字符是=就添加到operator并再次向右移动cur
                            if(str[cur] = '=') {
                                operator += str[cur++];
                            }
                        }
                        tokens.push({
                            type: 4,
                            value: operator,
                        }); // 存储运算符                        
                    } else if(/[0-9]/.test(str[cur])) {
                        let val = "" + str[cur++];
                        // cur < str.length防止越界
                        while(cur < str.length && /[0-9]/.test(str[cur])) {
                            val += str[cur++];
                        }
                        tokens.push({
                            type: 3,
                            value: val,
                        }); // 存储整数数字    
                    } else {
                        return "包含非法字符:" + str[cur];
                    }

                }
                return tokens;
            }
        </script>
    </body>
</html>

 

附件,龙书2.6节练习:

<!DOCTYPE html>
<html>

<head>
    <meta charset="UTF-8">
    <title></title>
    <link href="https://fonts.googleapis.com/css?family=Noto+Serif+SC" rel="stylesheet">
    <style>
        main {
            /*对子元素开启弹性布局*/
            display: flex;
            /*弹性元素在必要的时候换行*/
            flex-wrap: wrap;
            /*将弹性元素居中*/
            justify-content: center;
        }

        textarea,
        button {
            font-family: 'Noto Serif SC', STFangSong, serif;
            font-size: 17px;
        }
    </style>
</head>

<body>
    <main>
        <textarea name="input" rows="20" cols="40"></textarea>
        <textarea name="output" rows="20" cols="40"></textarea>
        <button name="execute">Execute</button>
    </main>

    <script>
        let inputBox = document.querySelector("textarea[name=input]");
        let outputBox = document.querySelector("textarea[name=output]");
        let btnExecute = document.querySelector("button[name=execute]");

        btnExecute.addEventListener("click", event => {
            let tokens = tokenizer(inputBox.value);
            console.log(tokens);
        });

        function tokenizer(input) {
            let s = input;
            let cur = 0;
            let peek = ' ';
            let line = 1;
            let words = new Map();

            let readChar = () => s[cur++];
            let undo = () => cur--;
            let scan = () => { // 每次scan返回一个Token
                // 略过空格,上次设置的peek值并不会被清空
                for (;; peek = readChar()) {
                    if (peek == undefined) {
                        return null; // 读完了
                    } else if (peek == ' ' || peek == '\t') {
                        continue; // 略过空格和Tab
                    } else if (peek == '\n') {
                        line++; // 记录当前行
                    } else {
                        break;
                    }
                }

                // 略过注释
                if ('/' == peek) {
                    peek = readChar();
                    if ('/' == peek) {
                        // 注释类型1
                        peek = readChar();
                        for (;; peek = readChar()) {
                            if (peek == '\n') {
                                break; // 正常退出
                            } else if (peek == undefined) {
                                return null; // 读完了,正常退出
                            }
                        }
                    } else if ('*' == peek) {
                        peek = readChar();
                        // 注释类型2
                        let lastAsterisk = false;
                        for (;; peek = readChar()) {
                            if (peek == undefined) {
                                console.log("注释语法错误01");
                                return null; // 语法错误
                            } else if (peek == '\n') {
                                lastAsterisk = false;
                                line++; // 记录当前行
                            } else if (peek == '*') {
                                lastAsterisk = true;
                            } else if (lastAsterisk && peek == '/') {
                                peek = readChar();
                                break; // 正常退出
                            } else {
                                lastAsterisk = false;
                            }
                        }
                    } else {
                        // 语法错误
                        console.log("注释语法错误02");
                        return null;
                    }
                }

                // 略过空格,上次设置的peek值并不会被清空
                for (;; peek = readChar()) {
                    if (peek == undefined) {
                        return null; // 读完了
                    } else if (peek == ' ' || peek == '\t') {
                        continue; // 略过空格和Tab
                    } else if (peek == '\n') {
                        line++; // 记录当前行
                    } else {
                        break;
                    }
                }

                if (/[0-9.]/.test(peek)) {
                    let temp = peek;
                    let hasPoint = false;
                    if (peek == '.') hasPoint = true;
                    while (/[0-9.]/.test(peek = readChar())) {
                        if (peek == '.' && hasPoint) {
                            console.log("语法错误3,包含多个小数点");
                            return null;
                        } else if (peek == '.') {
                            hasPoint = true;
                            temp += peek;
                        } else {
                            temp += peek;
                        }
                    }
                    return {
                        tag: 'NUM',
                        value: Number(temp),
                    };
                }

                if (/[a-zA-z]/.test(peek)) {
                    let temp = peek;
                    while ((peek = readChar()) && /[a-zA-z]/.test(peek)) {
                        // 经测试,null和undefined都能通过/\w/以及/[a-zA-z]/,并可以转化为字面字符串
                        temp += peek;
                    }
                    let w = words.get(temp);
                    if (w != undefined) {
                        return w;
                    } else {
                        w = {
                            tag: 'ID',
                            lexeme: temp,
                        };
                        words.set(temp, w);
                        return w;
                    }
                }

                if (/[><=!]/.test(peek)) {
                    let first = peek;
                    peek = readChar();
                    if (peek == '=') {
                        peek = readChar(); // 避免重复处理
                        return {
                            tag: '逻辑运算符',
                            value: first + '=',
                        };
                    } else if (first != '=') {
                        return {
                            tag: '逻辑运算符',
                            value: first,
                        };
                    } else { // 单个=的情况,回溯
                        undo();
                        peek = first;
                    }
                }

                let res = {
                    tag: peek,
                };
                peek = ' ';
                return res;
            };

            let tokens = [];
            let token;
            while (token = scan()) {
                tokens.push(token);
            }
            return tokens;
        }
    </script>
</body>

</html>
View Code

 

posted @ 2019-03-15 18:10  xkfx  阅读(1361)  评论(7编辑  收藏  举报