前端编译原理 parser.js源码解读
前面已经介绍了一个jison的使用,在正常开发中其实已经够用下,下面主要是看了下parser.js代码解读下,作为一些了解。
下面以最简单的文法产生的parser做一些代码注释
下面是一些注释,标示了编译过程,能够了解jison产生的编译器有哪些处理
var a = (function() { var o = function(k, v, o, l) { for (o = o || {}, l = k.length; l--; o[k[l]] = v); return o; }; var parser = { trace: function trace() {}, //编译期共享对象 yy: {}, //标示符,$accept: 0, $end: 1,error: 2是默认的符号,E: 3, NAT: 4是分析文法获得的 symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 }, //终结符 terminals_: { 2: 'error', 4: 'NAT' }, //对应文法规则 productions_: [ 0, // e` => E $end [ 3, //规约成符号2(E) 1 //堆栈移除一个状态即移除NAT,压入E ] // E => NAT ], //规约的时候执行的代码 performAction: function anonymous( yytext, yyleng, yylineno, yy, yystate /* action[1] */, $$ /* vstack */, _$ /* lstack */ ) { var $0 = $$.length - 1; switch (yystate) { case 1: this.$ = $$[$0] * 1; break; } }, //移入规约自动机 lalr(1)分析表,每一值表示一个状态 table: [ /*状态0 I0*/{ 3: 1/**goto 遇到符号3(E),跳转到状态1 */, 4: [1, 2] /**移入,遇到符号4(NAT),移入进入状态2 */}, /*状态1 I1*/{ 1: [3] /**接受 遇到符号1($end),结束程序,完成编译 */}, /*状态2 I2*/{ 1: [2, 1] /**规约 遇到符号1($end),按照productions[1],进行规则 */ } ], defaultActions: { 2: [2, 1] //状态2进行规则规约,不用向前看符号是什么(逻辑优化) }, //错误提示 parseError: function parseError(str, hash) { if (hash.recoverable) { this.trace(str); } else { var error = new Error(str); error.hash = hash; throw error; } }, //编译 input为处理的字符串 parse: function parse(input) { var self = this, stack = [0], tstack = [], vstack = [null], lstack = [], table = this.table, yytext = '', yylineno = 0, yyleng = 0, recovering = 0, TERROR = 2, EOF = 1; var args = lstack.slice.call(arguments, 1); var lexer = Object.create(this.lexer); var sharedState = { yy: {} }; for (var k in this.yy) { if (Object.prototype.hasOwnProperty.call(this.yy, k)) { sharedState.yy[k] = this.yy[k]; } } lexer.setInput(input, sharedState.yy); sharedState.yy.lexer = lexer; sharedState.yy.parser = this; if (typeof lexer.yylloc === 'undefined') { lexer.yylloc = {}; } var yyloc = lexer.yylloc; lstack.push(yyloc); //位置信息采用区间的形式显示 var ranges = lexer.options && lexer.options.ranges; if (typeof sharedState.yy.parseError === 'function') { this.parseError = sharedState.yy.parseError; } else { this.parseError = Object.getPrototypeOf(this).parseError; } //规约的时候需要弹出堆栈 function popStack(n) { //stack [0,标识符,状态,标识符,状态...],所以出栈是2倍 stack.length = stack.length - 2 * n; vstack.length = vstack.length - n; lstack.length = lstack.length - n; } //词法分析,返回token即标示符 var lex = function() { var token; token = lexer.lex() || EOF; if (typeof token !== 'number') { token = self.symbols_[token] || token; } return token; }; var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected; while (true) { state = stack[stack.length - 1]; if (this.defaultActions[state]) { action = this.defaultActions[state]; } else { if (symbol === null || typeof symbol === 'undefined') { //通过正则获取下一个标示符 symbol = lex(); } //action对应的动作 action = table[state] && table[state][symbol]; } //动作错误,语法分析错误 if (typeof action === 'undefined' || !action.length || !action[0]) { var errStr = ''; expected = []; for (p in table[state]) { if (this.terminals_[p] && p > TERROR) { expected.push("'" + this.terminals_[p] + "'"); } } if (lexer.showPosition) { errStr = 'Parse error on line ' + (yylineno + 1) + ':\n' + lexer.showPosition() + '\nExpecting ' + expected.join(', ') + ", got '" + (this.terminals_[symbol] || symbol) + "'"; } else { errStr = 'Parse error on line ' + (yylineno + 1) + ': Unexpected ' + (symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'"); } this.parseError(errStr, { text: lexer.match, token: this.terminals_[symbol] || symbol, line: lexer.yylineno, loc: yyloc, expected: expected }); } //规约-规约冲突,移入规约冲突 if (action[0] instanceof Array && action.length > 1) { throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol); } switch (action[0]) { //移入 case 1: stack.push(symbol); vstack.push(lexer.yytext); lstack.push(lexer.yylloc); stack.push(action[1]); symbol = null; if (!preErrorSymbol) { yyleng = lexer.yyleng; yytext = lexer.yytext; yylineno = lexer.yylineno; yyloc = lexer.yylloc; if (recovering > 0) { recovering--; } } else { symbol = preErrorSymbol; preErrorSymbol = null; } break; //规约 case 2: len = this.productions_[action[1]][1]; yyval.$ = vstack[vstack.length - len]; yyval._$ = { first_line: lstack[lstack.length - (len || 1)].first_line, last_line: lstack[lstack.length - 1].last_line, first_column: lstack[lstack.length - (len || 1)].first_column, last_column: lstack[lstack.length - 1].last_column }; if (ranges) { yyval._$.range = [ lstack[lstack.length - (len || 1)].range[0], lstack[lstack.length - 1].range[1] ]; } //执行文法定义的代码 r = this.performAction.apply( yyval, [yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args) ); if (typeof r !== 'undefined') { return r; } if (len) { stack = stack.slice(0, -1 * len * 2); vstack = vstack.slice(0, -1 * len); lstack = lstack.slice(0, -1 * len); } stack.push(this.productions_[action[1]][0]); vstack.push(yyval.$); lstack.push(yyval._$); newState = table[stack[stack.length - 2]][stack[stack.length - 1]]; stack.push(newState); break; //接受 case 3: return true; } } return true; } }; /* 词法分析 */ var lexer = (function() { var lexer = { EOF: 1, parseError: function parseError(str, hash) { if (this.yy.parser) { this.yy.parser.parseError(str, hash); } else { throw new Error(str); } }, // resets the lexer, sets new input setInput: function(input, yy) { this.yy = yy || this.yy || {}; this._input = input; this._more = this._backtrack = this.done = false; this.yylineno = this.yyleng = 0; this.yytext = this.matched = this.match = ''; this.conditionStack = ['INITIAL']; this.yylloc = { first_line: 1, first_column: 0, last_line: 1, last_column: 0 }; if (this.options.ranges) { this.yylloc.range = [0, 0]; } this.offset = 0; return this; }, input: function() { var ch = this._input[0]; this.yytext += ch; this.yyleng++; this.offset++; this.match += ch; this.matched += ch; var lines = ch.match(/(?:\r\n?|\n).*/g); if (lines) { this.yylineno++; this.yylloc.last_line++; } else { this.yylloc.last_column++; } if (this.options.ranges) { this.yylloc.range[1]++; } this._input = this._input.slice(1); return ch; }, unput: function(ch) { var len = ch.length; var lines = ch.split(/(?:\r\n?|\n)/g); this._input = ch + this._input; this.yytext = this.yytext.substr(0, this.yytext.length - len); //this.yyleng -= len; this.offset -= len; var oldLines = this.match.split(/(?:\r\n?|\n)/g); this.match = this.match.substr(0, this.match.length - 1); this.matched = this.matched.substr(0, this.matched.length - 1); if (lines.length - 1) { this.yylineno -= lines.length - 1; } var r = this.yylloc.range; this.yylloc = { first_line: this.yylloc.first_line, last_line: this.yylineno + 1, first_column: this.yylloc.first_column, last_column: lines ? (lines.length === oldLines.length ? this.yylloc.first_column : 0) + oldLines[oldLines.length - lines.length].length - lines[0].length : this.yylloc.first_column - len }; if (this.options.ranges) { this.yylloc.range = [r[0], r[0] + this.yyleng - len]; } this.yyleng = this.yytext.length; return this; }, more: function() { this._more = true; return this; }, reject: function() { if (this.options.backtrack_lexer) { this._backtrack = true; } else { return this.parseError( 'Lexical error on line ' + (this.yylineno + 1) + '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' + this.showPosition(), { text: '', token: null, line: this.yylineno } ); } return this; }, less: function(n) { this.unput(this.match.slice(n)); }, pastInput: function() { var past = this.matched.substr(0, this.matched.length - this.match.length); return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/\n/g, ''); }, upcomingInput: function() { var next = this.match; if (next.length < 20) { next += this._input.substr(0, 20 - next.length); } return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/\n/g, ''); }, showPosition: function() { var pre = this.pastInput(); var c = new Array(pre.length + 1).join('-'); return pre + this.upcomingInput() + '\n' + c + '^'; }, test_match: function(match, indexed_rule) { var token, lines, backup; if (this.options.backtrack_lexer) { // save context backup = { yylineno: this.yylineno, yylloc: { first_line: this.yylloc.first_line, last_line: this.last_line, first_column: this.yylloc.first_column, last_column: this.yylloc.last_column }, yytext: this.yytext, match: this.match, matches: this.matches, matched: this.matched, yyleng: this.yyleng, offset: this.offset, _more: this._more, _input: this._input, yy: this.yy, conditionStack: this.conditionStack.slice(0), done: this.done }; if (this.options.ranges) { backup.yylloc.range = this.yylloc.range.slice(0); } } lines = match[0].match(/(?:\r\n?|\n).*/g); if (lines) { this.yylineno += lines.length; } this.yylloc = { first_line: this.yylloc.last_line, last_line: this.yylineno + 1, first_column: this.yylloc.last_column, last_column: lines ? lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length : this.yylloc.last_column + match[0].length }; this.yytext += match[0]; this.match += match[0]; this.matches = match; this.yyleng = this.yytext.length; if (this.options.ranges) { this.yylloc.range = [this.offset, (this.offset += this.yyleng)]; } this._more = false; this._backtrack = false; this._input = this._input.slice(match[0].length); this.matched += match[0]; token = this.performAction.call( this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1] ); if (this.done && this._input) { this.done = false; } if (token) { return token; } else if (this._backtrack) { for (var k in backup) { this[k] = backup[k]; } return false; } return false; }, next: function() { if (this.done) { return this.EOF; } if (!this._input) { this.done = true; } var token, match, tempMatch, index; if (!this._more) { this.yytext = ''; this.match = ''; } //获取词法规则 //没看出来conditionStack有什么用,可能跟我的文法定义比较简单,没有使用一些内置的begin,pushState, popState之类的有点关系 var rules = this._currentRules(); for (var i = 0; i < rules.length; i++) { tempMatch = this._input.match(this.rules[rules[i]]); if (tempMatch && (!match || tempMatch[0].length > match[0].length)) { match = tempMatch; index = i; //backtrack_lexer,会先尝试返回token,如果想尝试下面的规则的话,通过reject()方法 if (this.options.backtrack_lexer) { token = this.test_match(tempMatch, rules[i]); if (token !== false) { return token; } else if (this._backtrack) { match = false; continue; } else { return false; } //flex 设置true 会匹配符合正则多的文本 //flex 设置false 根据文法顺序返回结果 } else if (!this.options.flex) { break; } } } if (match) { token = this.test_match(match, rules[index]); if (token !== false) { return token; } return false; } if (this._input === '') { return this.EOF; } else { //输入字符串不存在词法对应 return this.parseError( 'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), { text: '', token: null, line: this.yylineno } ); } }, lex: function lex() { var r = this.next(); if (r) { return r; } else { return this.lex(); } }, begin: function begin(condition) { this.conditionStack.push(condition); }, popState: function popState() { var n = this.conditionStack.length - 1; if (n > 0) { return this.conditionStack.pop(); } else { return this.conditionStack[0]; } }, _currentRules: function _currentRules() { if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) { return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules; } else { return this.conditions['INITIAL'].rules; } }, topState: function topState(n) { n = this.conditionStack.length - 1 - Math.abs(n || 0); if (n >= 0) { return this.conditionStack[n]; } else { return 'INITIAL'; } }, pushState: function pushState(condition) { this.begin(condition); }, stateStackSize: function stateStackSize() { return this.conditionStack.length; }, options: {}, performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) { var YYSTATE = YY_START; switch ($avoiding_name_collisions) { case 0 /* skip whitespace */: break; case 1: return 4; break; case 2: return '+'; break; } }, //词法中的规则 rules: [ /^(?:\s+)/, /^(?:[0-9]+)/, /^(?:\+)/ ], conditions: { INITIAL: { rules: [0, 1, 2], inclusive: true } } }; return lexer; })(); parser.lexer = lexer; function Parser() { this.yy = {}; } Parser.prototype = parser; parser.Parser = Parser; return new Parser(); })();