loader/lib/Parser.js (241 lines of code) (raw):

/* Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /*jslint sloppy: true */ /* This is a simple top-down recursive descent parser. When the grammar defines a set of alternatives, the parser iterates through them, and chooses the first one which matches the input. It cannot handle left recursion in the grammar, where a NonTerminal LHS appears first in its own RHS expansion. */ /////////////// NonTerminal ////////////// function NonTerminal(parser, elements) { var i; this.parser = parser; if(elements) { this.members = new Array(elements.length); for(i = 0 ; i < elements.length ; i++) { this.members[i] = elements[i]; } } } NonTerminal.prototype = { isNonTerminal : true, strict : false, parser : null, ebnf : null, members : null, innerEval : null, isRepeat : null, visitorFunc : null, evaluate : function unset() { throw new Error("UNDEF EVAL"); } }; NonTerminal.prototype.set = function(that) { this.strict = that.strict; this.ebnf = that.ebnf; this.members = that.members; this.innerEval = that.innerEval; this.isRepeat = that.isRepeat; this.evaluate = that.evaluate; }; NonTerminal.prototype.describe = function() { var i, m, str; str = this.ebnf[0] + " "; for(i = 0 ; i < this.members.length ; i++) { m = this.members[i]; if(i) { str += this.ebnf[1]; } str += this.parser.describe(m) + " "; } str += this.ebnf[2]; return str; }; /////////////// Node of parsed tree /////////////// var nextNodeId = 0; function Node(nonTerminal) { this.nonTerminal = nonTerminal; this.children = []; this.id = ++nextNodeId; } Node.prototype.isNode = true; /* Node.visit() : takes a visitor and an object that holds the semantic "transform" of the parsed tree. If a node has a defined visitor function, call it; otherwise call the generic visitor. */ Node.prototype.visit = function(visitor, transform) { if(typeof visitor[this.nonTerminal.visitorFunc] === 'function') { visitor[this.nonTerminal.visitorFunc](this, transform); } else { this.visitChildNodes(visitor, transform); } }; /* This is the generic visitor. It will do two things: (a) visit all NonTerminal child Nodes. (b) if the transform is actually an array (a "collector"), push terminal token values onto the collector array. */ Node.prototype.visitChildNodes = function(visitor, collector) { var i; for(i = 0 ; i < this.children.length ; i++) { if(this.children[i]) { if(this.children[i].isNode) { this.children[i].visit(visitor, collector); } else if(Array.isArray(collector)) { collector.push(this.children[i].value); } } } }; function _getTerminal(array, type, index) { var i, j; j = 0; for(i = 0 ; i < array.length ; i++) { if(array[i] !== null && !(array[i].isNode) && ((array[i].type === type) || !type) && j++ === index) { return array[i].value; } } return null; } Node.prototype.getString = function(index) { return _getTerminal(this.children, 'string', index); }; Node.prototype.getNumber = function(index) { return _getTerminal(this.children, 'number', index); }; Node.prototype.getName = function(index) { return _getTerminal(this.children, 'name', index); }; Node.prototype.getVariable = function(index) { return _getTerminal(this.children, 'variable', index); }; Node.prototype.getToken = function(index) { return _getTerminal(this.children, null, index); }; /////////////// Parser ////////////// function Parser() { } /* The "text" here is the raw input text for the scanner; it will be used to produce error messages. */ Parser.prototype.setText = function(txt) { this.text = txt; }; Parser.prototype.Nonterminal = function(visitor) { var nt = new NonTerminal(this); if(typeof visitor === 'string' ) { nt.visitorFunc = visitor; } return nt; }; Parser.prototype.begin = function(tokens) { this.index = 0; // index of the current token this.tokens = tokens; // array of tokens created by the tokenizer }; Parser.prototype.fail = function(msg) { var start_line = 0, lines = [], token = this.tokens[this.index], i = 0, ctx = ""; /* Excerpt original text into the error message */ if(this.text) { start_line = (token.line > 2 ? token.line-1 : 1); lines = this.text.split('\n'); ctx += lines[start_line-1] + "\n"; if(token.line > start_line) { ctx += lines[token.line-1] + "\n"; } /* Now indent up to a caret at the errant token */ while(++i < token.column) { ctx += ' '; } ctx += "^^^\n"; } var err = new Error("\n" + ctx + msg); err.token = token; throw err; }; Parser.prototype.done = function() { if(this.index < this.tokens.length) { this.fail("Extra text after parsed statement."); } this.final_line = this.tokens[this.tokens.length-1].line; }; /* def(lhs, rhs) Sets the stub NonTerminal on the left hand side to behave as the fully formed one from the right hand side */ Parser.prototype.def = function(lhs, rhs) { lhs.set(rhs); }; /* Call def() on a list of RHS, LHS pairs */ Parser.prototype.defineProductions = function() { var i; for(i = 0 ; i < arguments.length ; i += 2) { this.def(arguments[i], arguments[ i+1 ]); } }; /* Describe Terminals, and let NonTerminals describe themselves */ Parser.prototype.describe = function(sym) { if(typeof sym === 'string') { return sym; } if(sym === undefined) { return "undefined"; } if(sym === null) { return "null"; } if(sym.isNonTerminal) { return sym.describe(); } }; /* evalSeries() Evaluate a series of tokens. If any strict token fails to match, return null. After one match, any subsequent strict token that fails to match should cause the parser to fail. */ function evalSeries() { var i = 0, /* Iterator */ sym, /* The current element */ r = null, /* Result of evaluating sym */ s = 0, /* Number of succesful evaluations */ strict_sym, /* Strictness of sym */ node = new Node(this); do { sym = this.members[i]; strict_sym = sym.isNonTerminal ? sym.strict : true; node.children[i] = r = this.parser.evaluate(sym, (this.strict && strict_sym && (s > 0) )); if(r !== null) { s += 1; } // Enable strictness i += 1; } while(i < this.members.length && (! (r === null && strict_sym))); return (s == 0 ? null : node); } /* evalAlternates() Evaluate a list of alternatives. Return the first match found in the grammar, or null if none match. */ function evalAlternates() { var r, i, node ; node = new Node(this); for(i = 0 ; i < this.members.length ; i++) { r = this.parser.evaluate(this.members[i]); if(r !== null) { node.children[0] = r; return node; } } return null; } /* evalSeveral() Evaluate a series of tokens for 0 or more matches against an inner eval. Return null if zero matches; return a Node if 1 or more matches. */ function evalSeveral() { var a = null; var r = this.innerEval(); if(r !== null) { a = new Node(this); while(r !== null) { a.children.push(r); r = this.innerEval(); } } return a; } /////// NonTerminal Factories: Series, Several, Alts, Option /////// /* Series: A sequence of tokens, e.g. "LOAD" "DATA" "INFILE" */ Parser.prototype.Series = function() { var nt = new NonTerminal(this, arguments); nt.strict = true; nt.ebnf = [ '','','' ]; nt.evaluate = evalSeries; return nt; }; /* Alternatives: ( a | b | c ) */ Parser.prototype.Alts = function() { var nt = new NonTerminal(this, arguments); nt.strict = true; nt.ebnf = [ '(' , '| ' , ')' ]; nt.evaluate = evalAlternates; return nt; }; /* "Option" means 0 or 1. "[a] b" could be either ab or b. Option is like Series, but non-strict. */ Parser.prototype.Option = function() { var nt = new NonTerminal(this, arguments); nt.ebnf = [ '[' , '', ']' ]; nt.evaluate = evalSeries; return nt; }; /* "Several" means 0 or more, as in the EBNF production "list := item { , item }" */ Parser.prototype.Several = function() { var nt = new NonTerminal(this, arguments); nt.ebnf = [ '{', '', '}' ]; nt.isRepeat = true; nt.innerEval = evalSeries; nt.evaluate = evalSeveral; return nt; }; /* Parser.evaluate() Evaluate the current token against a parser symbol. If the token matches and is a Terminal, advance the token stream, and return the matching token. If the symbol is a NonTerminal, delegate to its own evaluate() method. Return null if no match. If strict mode is set, and no match, fail the parser. */ Parser.prototype.evaluate = function(sym, strict) { if(debug) { console.log("Parser evaluate", this.describe(sym)); } var r = null; var t = this.tokens[this.index]; if(t) { if(typeof sym === 'string') { /* Terminal */ if( ('{' + t.type + '}' === sym) || (t.value === sym) || (typeof t.value === 'string' && (t.value.toUpperCase() === sym.toUpperCase()))) { r = t; } if(r !== null) { if(debug) { console.log("Parser Advance", t.value); } this.index += 1; // advance to the next token } } else if(sym.isNonTerminal) { r = sym.evaluate(); } if(strict === true && r === null) { this.fail("Expected " + this.describe(sym)); } } return r; }; module.exports.Parser = Parser;