编译原理 #04# 中缀表达式转化为四元式(JavaScript实现)
// 实验存档
运行截图:
代码中的总体转化流程:中缀表达式字符串→tokens→逆波兰tokens(即后缀表达式)→四元式。
由后缀表达式写出四元式非常容易,比较繁琐的地方在于中缀转逆波兰,这里采用的方法如下↓
通过维护一个符号栈(或者说运算符栈)来处理运算符间的优先级关系。从左至右读入元素:
- 该元素是数字,则直接输出该数字
- 该元素是算数运算符:
- 直接压入符号栈的情况:符号栈为空,或者该运算符优先级大于栈顶运算符
- 不断弹出(同时输出该运算符)再压入的情况:符号栈不为空,或者该运算符优先级小于等于栈顶运算符
- 该元素是左括号,则直接将左括号压入符号栈,并赋予最小的优先级,避免被弹出。
- 该元素是右括号,则不断弹出(同时输出该运算符)符号栈中的元素,直到找到左括号,将左括号弹出但不输出(后缀表达式中是没有括号的)。
- 该元素是输入终止符号,则弹出(同时输出该运算符)符号栈中所有元素。
代码:
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title></title> </head> <body> <script> let str = '4*(28+81*6-75)/8'; let tokens = tokenizer(str); let inversePolishNotation = getInversePolishNotation(tokens); let threeAddressCode = getThreeAddressCode(inversePolishNotation); console.log("输入:" + str); console.log("逆波兰式:" + inversePolishNotation.map(x => x.value)); console.log("四元式:" + threeAddressCode.map(x => x + '\n')); // 获取逆波兰式相应的四元式 function getThreeAddressCode(inversePolishNotation) { let result = []; let stack = []; let index = 0; // 临时变量序号 for (let i = 0; i != inversePolishNotation.length; ++i) { if (inversePolishNotation[i].tag == '数字') { stack.push(inversePolishNotation[i]); } else if (inversePolishNotation[i].tag == '算数运算符') { let right = stack.pop(); // 右操作数应该是后入栈的那个 let left = stack.pop(); let temp = { tag: '临时变量', value: 't' + index++, }; stack.push(temp); if (left && right) { // 如果左右操作数都不为空 result.push(`(${inversePolishNotation[i].value}, ${left.value}, ${right.value}, ${temp.value})`); } else { throw new Error("缺少操作数,非法运算!"); } } else { throw new Error("无法处理的token类型:" + tokens[i].tag); } } return result; } // 输入中缀形式的tokens,输出逆波兰形式的tokens function getInversePolishNotation(tokens) { let result = []; let symbols = []; // 维护一个符号栈,以便处理运算符间的优先级关系 for (let i = 0; i != tokens.length; ++i) { if (tokens[i].tag == '数字') { result.push(tokens[i]); } else if (tokens[i].tag == '算数运算符') { if (symbols.length == 0 || symbols[symbols.length - 1].priority < tokens[i].priority) { symbols.push(tokens[i]); } else { while (symbols.length != 0 && symbols[symbols.length - 1].priority >= tokens[i].priority) { result.push(symbols.pop()); } symbols.push(tokens[i]); } } else if (tokens[i].value == '(') { symbols.push(tokens[i]); } else if (tokens[i].value == ')') { let find = false; while (symbols.length != 0) { let temp = symbols.pop(); if (temp.value == '(') { find = true; break; } else { result.push(temp); } } if (!find) throw new Error("左括号缺失"); } else { throw new Error("无法处理的token类型:" + tokens[i].tag); } } while (symbols.length != 0) { let temp = symbols.pop(); if (temp.value == '(') { throw new Error("右括号缺失"); } else { result.push(temp); } } return result; } // 重用之前的词法分析程序 function tokenizer(input) { let s = input; let cur = 0; let peek = ' '; let line = 1; let readChar = () => s[cur++]; let undo = () => cur--; let scan = () => { // 每次scan返回一个Token // 略过空格,上次设置的peek值并不会被清空 for (;; peek = readChar()) { if (peek == undefined) { return null; // 读完了 } else if (peek == ' ' || peek == '\t') { continue; // 略过空格和Tab } else if (peek == '\n') { line++; // 记录当前行 } else { break; } } if (/[0-9.]/.test(peek)) { let temp = peek; let hasPoint = false; if (peek == '.') hasPoint = true; while (/[0-9.]/.test(peek = readChar())) { if (peek == '.' && hasPoint) { console.log("第" + line + "行存在语法错误,数字中包含多个小数点"); return null; } else if (peek == '.') { hasPoint = true; temp += peek; } else { temp += peek; } } return { tag: '数字', value: Number(temp), }; } if (/[+*/-]/.test(peek)) { let result = { tag: '算数运算符', value: peek, }; if (peek == '+' || peek == '-') { result.priority = 1; // 加减号的优先级较低 } else if (peek == '*' || peek == '/') { result.priority = 2; // 乘除号的优先级较高 } peek = ' '; return result; } if (peek == '(') { peek = ' '; return { tag: '括号', value: '(', priority: -99, // 左括号的优先级设置为最小, // 不会因为除读到右括号外的情况而出栈 }; } if (peek == ')') { peek = ' '; return { tag: '括号', value: ')', }; } throw new Error("读入非法字符: " + peek); }; let tokens = []; let token; while (token = scan()) { tokens.push(token); } return tokens; } </script> </body> </html>