From 199e1504a977c1f890ae8921332ddfcadd0d1dcf Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Fri, 20 Apr 2018 20:45:13 +0200 Subject: [PATCH 01/18] refactor(parser): merge lexer with parser & optimize parser --- src/lexer.js | 425 ---------------------- src/parser.js | 838 ++++++++++++++++++++++++++++++++------------ test/parser.spec.js | 53 ++- 3 files changed, 674 insertions(+), 642 deletions(-) delete mode 100644 src/lexer.js diff --git a/src/lexer.js b/src/lexer.js deleted file mode 100644 index 447c1228..00000000 --- a/src/lexer.js +++ /dev/null @@ -1,425 +0,0 @@ -export class Token { - constructor(index, text) { - this.index = index; - this.text = text; - } - - withOp(op) { - this.opKey = op; - return this; - } - - withGetterSetter(key) { - this.key = key; - return this; - } - - withValue(value) { - this.value = value; - return this; - } - - toString() { - return `Token(${this.text})`; - } -} - -export class Lexer { - lex(text) { - let scanner = new Scanner(text); - let tokens = []; - let token = scanner.scanToken(); - - while (token) { - tokens.push(token); - token = scanner.scanToken(); - } - - return tokens; - } -} - -export class Scanner { - constructor(input) { - this.input = input; - this.length = input.length; - this.peek = 0; - this.index = -1; - - this.advance(); - } - - scanToken() { - // Skip whitespace. - while (this.peek <= $SPACE) { - if (++this.index >= this.length) { - this.peek = $EOF; - return null; - } - - this.peek = this.input.charCodeAt(this.index); - } - - // Handle identifiers and numbers. - if (isIdentifierStart(this.peek)) { - return this.scanIdentifier(); - } - - if (isDigit(this.peek)) { - return this.scanNumber(this.index); - } - - let start = this.index; - - switch (this.peek) { - case $PERIOD: - this.advance(); - return isDigit(this.peek) ? this.scanNumber(start) : new Token(start, '.'); - case $LPAREN: - case $RPAREN: - case $LBRACE: - case $RBRACE: - case $LBRACKET: - case $RBRACKET: - case $COMMA: - case $COLON: - case $SEMICOLON: - return this.scanCharacter(start, String.fromCharCode(this.peek)); - case $SQ: - case $DQ: - return this.scanString(); - case $PLUS: - case $MINUS: - case $STAR: - case $SLASH: - case $PERCENT: - case $CARET: - case $QUESTION: - return this.scanOperator(start, String.fromCharCode(this.peek)); - case $LT: - case $GT: - case $BANG: - case $EQ: - return this.scanComplexOperator(start, $EQ, String.fromCharCode(this.peek), '='); - case $AMPERSAND: - return this.scanComplexOperator(start, $AMPERSAND, '&', '&'); - case $BAR: - return this.scanComplexOperator(start, $BAR, '|', '|'); - case $NBSP: - while (isWhitespace(this.peek)) { - this.advance(); - } - - return this.scanToken(); - // no default - } - - let character = String.fromCharCode(this.peek); - this.error(`Unexpected character [${character}]`); - return null; - } - - scanCharacter(start, text) { - assert(this.peek === text.charCodeAt(0)); - this.advance(); - return new Token(start, text); - } - - scanOperator(start, text) { - assert(this.peek === text.charCodeAt(0)); - assert(OPERATORS[text] === 1); - this.advance(); - return new Token(start, text).withOp(text); - } - - scanComplexOperator(start, code, one, two) { - assert(this.peek === one.charCodeAt(0)); - this.advance(); - - let text = one; - - if (this.peek === code) { - this.advance(); - text += two; - } - - if (this.peek === code) { - this.advance(); - text += two; - } - - assert(OPERATORS[text] === 1); - - return new Token(start, text).withOp(text); - } - - scanIdentifier() { - assert(isIdentifierStart(this.peek)); - let start = this.index; - - this.advance(); - - while (isIdentifierPart(this.peek)) { - this.advance(); - } - - let text = this.input.substring(start, this.index); - let result = new Token(start, text); - - // TODO(kasperl): Deal with null, undefined, true, and false in - // a cleaner and faster way. - if (OPERATORS[text] === 1) { - result.withOp(text); - } else { - result.withGetterSetter(text); - } - - return result; - } - - scanNumber(start) { - assert(isDigit(this.peek)); - let simple = (this.index === start); - this.advance(); // Skip initial digit. - - while (true) { // eslint-disable-line no-constant-condition - if (!isDigit(this.peek)) { - if (this.peek === $PERIOD) { - simple = false; - } else if (isExponentStart(this.peek)) { - this.advance(); - - if (isExponentSign(this.peek)) { - this.advance(); - } - - if (!isDigit(this.peek)) { - this.error('Invalid exponent', -1); - } - - simple = false; - } else { - break; - } - } - - this.advance(); - } - - let text = this.input.substring(start, this.index); - let value = simple ? parseInt(text, 10) : parseFloat(text); - return new Token(start, text).withValue(value); - } - - scanString() { - assert(this.peek === $SQ || this.peek === $DQ); - - let start = this.index; - let quote = this.peek; - - this.advance(); // Skip initial quote. - - let buffer; - let marker = this.index; - - while (this.peek !== quote) { - if (this.peek === $BACKSLASH) { - if (!buffer) { - buffer = []; - } - - buffer.push(this.input.substring(marker, this.index)); - this.advance(); - - let unescaped; - - if (this.peek === $u) { - // TODO(kasperl): Check bounds? Make sure we have test - // coverage for this. - let hex = this.input.substring(this.index + 1, this.index + 5); - - if (!/[A-Z0-9]{4}/.test(hex)) { - this.error(`Invalid unicode escape [\\u${hex}]`); - } - - unescaped = parseInt(hex, 16); - - for (let i = 0; i < 5; ++i) { - this.advance(); - } - } else { - unescaped = unescape(this.peek); - this.advance(); - } - - buffer.push(String.fromCharCode(unescaped)); - marker = this.index; - } else if (this.peek === $EOF) { - this.error('Unterminated quote'); - } else { - this.advance(); - } - } - - let last = this.input.substring(marker, this.index); - this.advance(); // Skip terminating quote. - let text = this.input.substring(start, this.index); - - // Compute the unescaped string value. - let unescaped = last; - - if (buffer !== null && buffer !== undefined) { - buffer.push(last); - unescaped = buffer.join(''); - } - - return new Token(start, text).withValue(unescaped); - } - - advance() { - if (++this.index >= this.length) { - this.peek = $EOF; - } else { - this.peek = this.input.charCodeAt(this.index); - } - } - - error(message, offset = 0) { - // TODO(kasperl): Try to get rid of the offset. It is only used to match - // the error expectations in the lexer tests for numbers with exponents. - let position = this.index + offset; - throw new Error(`Lexer Error: ${message} at column ${position} in expression [${this.input}]`); - } -} - -const OPERATORS = { - 'undefined': 1, - 'null': 1, - 'true': 1, - 'false': 1, - '+': 1, - '-': 1, - '*': 1, - '/': 1, - '%': 1, - '^': 1, - '=': 1, - '==': 1, - '===': 1, - '!=': 1, - '!==': 1, - '<': 1, - '>': 1, - '<=': 1, - '>=': 1, - '&&': 1, - '||': 1, - '&': 1, - '|': 1, - '!': 1, - '?': 1 -}; - -const $EOF = 0; -const $TAB = 9; -const $LF = 10; -const $VTAB = 11; -const $FF = 12; -const $CR = 13; -const $SPACE = 32; -const $BANG = 33; -const $DQ = 34; -const $$ = 36; -const $PERCENT = 37; -const $AMPERSAND = 38; -const $SQ = 39; -const $LPAREN = 40; -const $RPAREN = 41; -const $STAR = 42; -const $PLUS = 43; -const $COMMA = 44; -const $MINUS = 45; -const $PERIOD = 46; -const $SLASH = 47; -const $COLON = 58; -const $SEMICOLON = 59; -const $LT = 60; -const $EQ = 61; -const $GT = 62; -const $QUESTION = 63; - -const $0 = 48; -const $9 = 57; - -const $A = 65; -const $E = 69; -const $Z = 90; - -const $LBRACKET = 91; -const $BACKSLASH = 92; -const $RBRACKET = 93; -const $CARET = 94; -const $_ = 95; - -const $a = 97; -const $e = 101; -const $f = 102; -const $n = 110; -const $r = 114; -const $t = 116; -const $u = 117; -const $v = 118; -const $z = 122; - -const $LBRACE = 123; -const $BAR = 124; -const $RBRACE = 125; -const $NBSP = 160; - -function isWhitespace(code) { - return (code >= $TAB && code <= $SPACE) || (code === $NBSP); -} - -function isIdentifierStart(code) { - return ($a <= code && code <= $z) - || ($A <= code && code <= $Z) - || (code === $_) - || (code === $$); -} - -function isIdentifierPart(code) { - return ($a <= code && code <= $z) - || ($A <= code && code <= $Z) - || ($0 <= code && code <= $9) - || (code === $_) - || (code === $$); -} - -function isDigit(code) { - return ($0 <= code && code <= $9); -} - -function isExponentStart(code) { - return (code === $e || code === $E); -} - -function isExponentSign(code) { - return (code === $MINUS || code === $PLUS); -} - -function unescape(code) { - switch (code) { - case $n: return $LF; - case $f: return $FF; - case $r: return $CR; - case $t: return $TAB; - case $v: return $VTAB; - default: return code; - } -} - -function assert(condition, message) { - if (!condition) { - throw message || 'Assertion failed'; - } -} diff --git a/src/parser.js b/src/parser.js index b1f05b09..4510c11c 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,4 +1,3 @@ -import {Lexer, Token} from './lexer'; import { Chain, ValueConverter, Assign, Conditional, AccessThis, AccessScope, AccessMember, AccessKeyed, @@ -7,50 +6,52 @@ import { LiteralPrimitive, LiteralArray, LiteralObject, LiteralString } from './ast'; -let EOF = new Token(-1, null); - export class Parser { + cache; constructor() { - this.cache = {}; - this.lexer = new Lexer(); + this.cache = Object.create(null); } parse(input) { input = input || ''; return this.cache[input] - || (this.cache[input] = new ParserImplementation(this.lexer, input).parseChain()); + || (this.cache[input] = new ParserImplementation(input).parseChain()); } } export class ParserImplementation { - constructor(lexer, input) { + constructor(input) { this.index = 0; + this.startIndex = 0; + this.lastIndex = 0; this.input = input; - this.tokens = lexer.lex(input); - } - - get peek() { - return (this.index < this.tokens.length) ? this.tokens[this.index] : EOF; + this.length = input.length; + this.token = T_EndOfSource; + this.tokenValue = undefined; + this.tokenRaw = ''; + this.lastValue = 0; } parseChain() { + this.nextToken(); + let isChain = false; let expressions = []; - while (this.optional(';')) { - isChain = true; - } + while (this.token !== T_EndOfSource) { + while (this.optional(T_Semicolon)) { + isChain = true; + } - while (this.index < this.tokens.length) { - if (this.peek.text === ')' || this.peek.text === '}' || this.peek.text === ']') { - this.error(`Unconsumed token ${this.peek.text}`); + if ((this.token & T_ClosingToken) === T_ClosingToken) { + this.error(`Unconsumed token ${String.fromCharCode(this.tokenValue)}`); } - let expr = this.parseBindingBehavior(); + const expr = this.parseBindingBehavior(); expressions.push(expr); - while (this.optional(';')) { + while (this.optional(T_Semicolon)) { isChain = true; } @@ -65,13 +66,13 @@ export class ParserImplementation { parseBindingBehavior() { let result = this.parseValueConverter(); - while (this.optional('&')) { - let name = this.peek.text; + while (this.optional(T_BindingBehavior)) { + let name = this.tokenValue; let args = []; - this.advance(); + this.nextToken(); - while (this.optional(':')) { + while (this.optional(T_Colon)) { args.push(this.parseExpression()); } @@ -84,14 +85,14 @@ export class ParserImplementation { parseValueConverter() { let result = this.parseExpression(); - while (this.optional('|')) { - let name = this.peek.text; // TODO(kasperl): Restrict to identifier? + while (this.optional(T_ValueConverter)) { + let name = this.tokenValue; let args = []; - this.advance(); + this.nextToken(); - while (this.optional(':')) { - // TODO(kasperl): Is this really supposed to be expressions? + while (this.optional(T_Colon)) { + // todo(kasperl): Is this really supposed to be expressions? args.push(this.parseExpression()); } @@ -102,18 +103,18 @@ export class ParserImplementation { } parseExpression() { - let start = this.peek.index; + let start = this.index; let result = this.parseConditional(); - while (this.peek.text === '=') { + while (this.token === T_Assign) { if (!result.isAssignable) { - let end = (this.index < this.tokens.length) ? this.peek.index : this.input.length; - let expression = this.input.substring(start, end); + let end = (this.index < this.length) ? this.index : this.length; + let expression = this.input.slice(start, end); this.error(`Expression ${expression} is not assignable`); } - this.expect('='); + this.expect(T_Assign); result = new Assign(result, this.parseConditional()); } @@ -121,15 +122,15 @@ export class ParserImplementation { } parseConditional() { - let start = this.peek.index; - let result = this.parseLogicalOr(); + let start = this.index; + let result = this.parseBinary(0); - if (this.optional('?')) { + if (this.optional(T_QuestionMark)) { let yes = this.parseExpression(); - if (!this.optional(':')) { - let end = (this.index < this.tokens.length) ? this.peek.index : this.input.length; - let expression = this.input.substring(start, end); + if (!this.optional(T_Colon)) { + let end = (this.index < this.length) ? this.index : this.length; + let expression = this.input.slice(start, end); this.error(`Conditional expression ${expression} requires all 3 expressions`); } @@ -141,101 +142,38 @@ export class ParserImplementation { return result; } - parseLogicalOr() { - let result = this.parseLogicalAnd(); + parseBinary(minPrecedence) { + let left = this.parseUnary(); - while (this.optional('||')) { - result = new Binary('||', result, this.parseLogicalAnd()); + if ((this.token & T_BinaryOperator) !== T_BinaryOperator) { + return left; } - return result; - } - - parseLogicalAnd() { - let result = this.parseEquality(); - - while (this.optional('&&')) { - result = new Binary('&&', result, this.parseEquality()); - } - - return result; - } - - parseEquality() { - let result = this.parseRelational(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('==')) { - result = new Binary('==', result, this.parseRelational()); - } else if (this.optional('!=')) { - result = new Binary('!=', result, this.parseRelational()); - } else if (this.optional('===')) { - result = new Binary('===', result, this.parseRelational()); - } else if (this.optional('!==')) { - result = new Binary('!==', result, this.parseRelational()); - } else { - return result; - } - } - } - - parseRelational() { - let result = this.parseAdditive(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('<')) { - result = new Binary('<', result, this.parseAdditive()); - } else if (this.optional('>')) { - result = new Binary('>', result, this.parseAdditive()); - } else if (this.optional('<=')) { - result = new Binary('<=', result, this.parseAdditive()); - } else if (this.optional('>=')) { - result = new Binary('>=', result, this.parseAdditive()); - } else { - return result; + while ((this.token & T_BinaryOperator) === T_BinaryOperator) { + const opToken = this.token; + const precedence = opToken & T_Precedence; + if (precedence < minPrecedence) { + break; } + this.nextToken(); + left = new Binary(TokenValues[opToken & T_TokenMask], left, this.parseBinary(precedence)); } + return left; } - parseAdditive() { - let result = this.parseMultiplicative(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('+')) { - result = new Binary('+', result, this.parseMultiplicative()); - } else if (this.optional('-')) { - result = new Binary('-', result, this.parseMultiplicative()); - } else { - return result; + parseUnary() { + const opToken = this.token; + if ((opToken & T_UnaryOperator) === T_UnaryOperator) { + this.nextToken(); + switch(opToken) { + case T_Add: + return this.parseUnary(); + case T_Subtract: + return new Binary('-', new LiteralPrimitive(0), this.parseUnary()); + case T_LogicalNot: + return new PrefixNot('!', this.parseUnary()); } } - } - - parseMultiplicative() { - let result = this.parsePrefix(); - - while (true) { // eslint-disable-line no-constant-condition - if (this.optional('*')) { - result = new Binary('*', result, this.parsePrefix()); - } else if (this.optional('%')) { - result = new Binary('%', result, this.parsePrefix()); - } else if (this.optional('/')) { - result = new Binary('/', result, this.parsePrefix()); - } else { - return result; - } - } - } - - parsePrefix() { - if (this.optional('+')) { - return this.parsePrefix(); // TODO(kasperl): This is different than the original parser. - } else if (this.optional('-')) { - return new Binary('-', new LiteralPrimitive(0), this.parsePrefix()); - } else if (this.optional('!')) { - return new PrefixNot('!', this.parsePrefix()); - } - return this.parseAccessOrCallMember(); } @@ -243,14 +181,14 @@ export class ParserImplementation { let result = this.parsePrimary(); while (true) { // eslint-disable-line no-constant-condition - if (this.optional('.')) { - let name = this.peek.text; // TODO(kasperl): Check that this is an identifier. Are keywords okay? + if (this.optional(T_Period)) { + let name = this.tokenValue; // todo(kasperl): Check that this is an identifier. Are keywords okay? - this.advance(); + this.nextToken(); - if (this.optional('(')) { - let args = this.parseExpressionList(')'); - this.expect(')'); + if (this.optional(T_LeftParen)) { + let args = this.parseExpressionList(T_RightParen); + this.expect(T_RightParen); if (result instanceof AccessThis) { result = new CallScope(name, args, result.ancestor); } else { @@ -263,13 +201,13 @@ export class ParserImplementation { result = new AccessMember(result, name); } } - } else if (this.optional('[')) { + } else if (this.optional(T_LeftBracket)) { let key = this.parseExpression(); - this.expect(']'); + this.expect(T_RightBracket); result = new AccessKeyed(result, key); - } else if (this.optional('(')) { - let args = this.parseExpressionList(')'); - this.expect(')'); + } else if (this.optional(T_LeftParen)) { + let args = this.parseExpressionList(T_RightParen); + this.expect(T_RightParen); result = new CallFunction(result, args); } else { return result; @@ -278,70 +216,78 @@ export class ParserImplementation { } parsePrimary() { - if (this.optional('(')) { - let result = this.parseExpression(); - this.expect(')'); - return result; - } else if (this.optional('null')) { - return new LiteralPrimitive(null); - } else if (this.optional('undefined')) { - return new LiteralPrimitive(undefined); - } else if (this.optional('true')) { - return new LiteralPrimitive(true); - } else if (this.optional('false')) { - return new LiteralPrimitive(false); - } else if (this.optional('[')) { - let elements = this.parseExpressionList(']'); - this.expect(']'); - return new LiteralArray(elements); - } else if (this.peek.text === '{') { - return this.parseObject(); - } else if (this.peek.key !== null && this.peek.key !== undefined) { - return this.parseAccessOrCallScope(); - } else if (this.peek.value !== null && this.peek.value !== undefined) { - let value = this.peek.value; - this.advance(); - return value instanceof String || typeof value === 'string' ? new LiteralString(value) : new LiteralPrimitive(value); - } else if (this.index >= this.tokens.length) { - throw new Error(`Unexpected end of expression: ${this.input}`); - } else { - this.error(`Unexpected token ${this.peek.text}`); + const token = this.token; + switch (token) { + case T_Identifier: + case T_ParentScope: + return this.parseAccessOrCallScope(); + case T_ThisScope: + this.nextToken(); + return new AccessThis(0); + case T_LeftParen: + this.nextToken(); + const result = this.parseExpression(); + this.expect(T_RightParen); + return result; + case T_LeftBracket: + this.nextToken(); + const elements = this.parseExpressionList(T_RightBracket); + this.expect(T_RightBracket); + return new LiteralArray(elements); + case T_LeftBrace: + return this.parseObject(); + case T_StringLiteral: + { + const value = this.tokenValue; + this.nextToken(); + return new LiteralString(value); + } + case T_NumericLiteral: + { + const value = this.tokenValue; + this.nextToken(); + return new LiteralPrimitive(value); + } + case T_NullKeyword: + case T_UndefinedKeyword: + case T_TrueKeyword: + case T_FalseKeyword: + this.nextToken(); + return new LiteralPrimitive(TokenValues[token & T_TokenMask]); + default: + if (this.index >= this.length) { + throw new Error(`Unexpected end of expression at column ${this.index} of ${this.input}`); + } else { + const expression = this.input.slice(this.lastIndex, this.index); + this.error(`Unexpected token ${expression}`); + } } } parseAccessOrCallScope() { - let name = this.peek.key; - - this.advance(); + let name = this.tokenValue; + let token = this.token; - if (name === '$this') { - return new AccessThis(0); - } + this.nextToken(); let ancestor = 0; - while (name === '$parent') { + while (token === T_ParentScope) { ancestor++; - if (this.optional('.')) { - name = this.peek.key; - this.advance(); - } else if (this.peek === EOF - || this.peek.text === '(' - || this.peek.text === ')' - || this.peek.text === '[' - || this.peek.text === '}' - || this.peek.text === ',' - || this.peek.text === '|' - || this.peek.text === '&' - ) { + if (this.optional(T_Period)) { + name = this.tokenValue; + token = this.token; + this.nextToken(); + } else if ((this.token & T_AccessScopeTerminal) === T_AccessScopeTerminal) { return new AccessThis(ancestor); } else { - this.error(`Unexpected token ${this.peek.text}`); + const expression = this.input.slice(this.lastIndex, this.index); + this.error(`Unexpected token ${expression}`); } } - if (this.optional('(')) { - let args = this.parseExpressionList(')'); - this.expect(')'); + if (this.optional(T_LeftParen)) { + let args = this.parseExpressionList(T_RightParen); + this.expect(T_RightParen); return new CallScope(name, args, ancestor); } @@ -352,28 +298,28 @@ export class ParserImplementation { let keys = []; let values = []; - this.expect('{'); + this.expect(T_LeftBrace); - if (this.peek.text !== '}') { + if (this.token ^ T_RightBrace) { do { - // TODO(kasperl): Stricter checking. Only allow identifiers + // todo(kasperl): Stricter checking. Only allow identifiers // and strings as keys. Maybe also keywords? - let peek = this.peek; - let value = peek.value; - keys.push(typeof value === 'string' ? value : peek.text); - this.advance(); - if (peek.key && (this.peek.text === ',' || this.peek.text === '}')) { + let token = this.token; + keys.push(this.tokenValue); + + this.nextToken(); + if (token === T_Identifier && (this.token === T_Comma || this.token === T_RightBrace)) { --this.index; values.push(this.parseAccessOrCallScope()); } else { - this.expect(':'); + this.expect(T_Colon); values.push(this.parseExpression()); } - } while (this.optional(',')); + } while (this.optional(T_Comma)); } - this.expect('}'); + this.expect(T_RightBrace); return new LiteralObject(keys, values); } @@ -381,41 +327,501 @@ export class ParserImplementation { parseExpressionList(terminator) { let result = []; - if (this.peek.text !== terminator) { + if (this.token ^ terminator) { do { result.push(this.parseExpression()); - } while (this.optional(',')); + } while (this.optional(T_Comma)); } return result; } - optional(text) { - if (this.peek.text === text) { - this.advance(); + nextToken() { + this.lastIndex = this.index; + + return this.token = this.scanToken(); + } + + scanToken() { + while (this.index < this.length) { + this.startIndex = this.index; + let current = this.input.charCodeAt(this.index); + // skip whitespace. + if (current <= $SPACE) { + this.index++; + continue; + } + + // handle identifiers and numbers. + if (isIdentifierStart(current)) { + return this.scanIdentifier(); + } + + if (isDigit(current)) { + return this.scanNumber(false); + } + + let start = this.index; + + switch (current) { + case $PERIOD: + { + if (this.index < this.length) { + const next = this.input.charCodeAt(this.index + 1); + if (next >= $0 && next <= $9) { + return this.scanNumber(true); + } + this.index++; + } + return T_Period; + } + case $LPAREN: + this.index++; + return T_LeftParen; + case $RPAREN: + this.index++; + return T_RightParen; + case $LBRACE: + this.index++; + return T_LeftBrace; + case $RBRACE: + this.index++; + return T_RightBrace; + case $LBRACKET: + this.index++; + return T_LeftBracket; + case $RBRACKET: + this.index++; + return T_RightBracket; + case $COMMA: + this.index++; + return T_Comma; + case $COLON: + this.index++; + return T_Colon; + case $SEMICOLON: + this.index++; + return T_Semicolon; + case $SQ: + case $DQ: + return this.scanString(); + case $PLUS: + this.index++; + return T_Add; + case $MINUS: + this.index++; + return T_Subtract; + case $STAR: + this.index++; + return T_Multiply; + case $SLASH: + this.index++; + return T_Divide; + case $PERCENT: + this.index++; + return T_Modulo; + case $CARET: + this.index++; + return T_BitwiseXor; + case $QUESTION: + this.index++; + return T_QuestionMark; + case $LT: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_LessThanOrEqual; + } + return T_LessThan; + } + case $GT: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_GreaterThanOrEqual; + } + return T_GreaterThan; + } + case $BANG: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_StrictNotEqual; + } + return T_LooseNotEqual; + } + return T_LogicalNot; + } + case $EQ: + { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + let next = this.input.charCodeAt(++this.index); + if (next === $EQ) { + this.index++; + return T_StrictEqual; + } + return T_LooseEqual; + } + return T_Assign; + } + case $AMPERSAND: + { + let next = this.input.charCodeAt(++this.index); + if (next === $AMPERSAND) { + this.index++; + return T_LogicalAnd; + } + return T_BindingBehavior; + } + case $BAR: + { + let next = this.input.charCodeAt(++this.index); + if (next === $BAR) { + this.index++; + return T_LogicalOr; + } + return T_ValueConverter; + } + case $NBSP: + this.index++; + continue; + // no default + } + + let character = String.fromCharCode(this.input.charCodeAt(this.index)); + this.error(`Unexpected character [${character}]`); + return null; + } + + return T_EndOfSource; + } + + scanIdentifier() { + const start = this.index; + let char = this.input.charCodeAt(++this.index); + + while (isIdentifierPart(char)) { + char = this.input.charCodeAt(++this.index); + } + + let text = this.input.slice(start, this.index); + this.tokenValue = text; + + let len = text.length; + if (len >= 4 && len <= 9) { + const token = KeywordLookup[text]; + if (token !== undefined) { + return token; + } + } + + return T_Identifier; + } + + scanNumber(isFloat) { + let start = this.index; + this.index++; + let char = this.input.charCodeAt(this.index); + loop: while (true) { + switch(char) { + case $PERIOD: + // todo(fkleuver): Should deal with spread operator elsewhere, + // and throw here when seeing more than one period + isFloat = true; + break; + case $e: + case $E: + char = this.input.charCodeAt(++this.index); + if (char === $PLUS || char === $MINUS) { + char = this.input.charCodeAt(++this.index); + } + if (char < $0 || char > $9) { + this.error('Invalid exponent', -1); + } + isFloat = true; + break; + default: + if (char < $0 || char > $9 || this.index === this.length) { + break loop; + } + } + char = this.input.charCodeAt(++this.index); + } + + const text = this.input.slice(start, this.index); + this.tokenValue = isFloat ? parseFloat(text) : parseInt(text, 10); + return T_NumericLiteral; + } + + scanString() { + let start = this.index; + let quote = this.input.charCodeAt(this.index++); // Skip initial quote. + + let buffer; + let marker = this.index; + let char = this.input.charCodeAt(this.index); + + while (char !== quote) { + if (char === $BACKSLASH) { + if (!buffer) { + buffer = []; + } + + buffer.push(this.input.slice(marker, this.index)); + char = this.input.charCodeAt(++this.index) + + let unescaped; + + if (char === $u) { + // todo(kasperl): Check bounds? Make sure we have test + // coverage for this. + let hex = this.input.slice(this.index + 1, this.index + 5); + + if (!/[A-Z0-9]{4}/.test(hex)) { + this.error(`Invalid unicode escape [\\u${hex}]`); + } + + unescaped = parseInt(hex, 16); + this.index += 5; + } else { + unescaped = unescape(this.input.charCodeAt(this.index)); + this.index++; + } + + buffer.push(String.fromCharCode(unescaped)); + marker = this.index; + } else if (char === $EOF) { + this.error('Unterminated quote'); + } else { + this.index++; + } + + char = this.input.charCodeAt(this.index) + } + + let last = this.input.slice(marker, this.index); + this.index++; // Skip terminating quote. + let text = this.input.slice(start, this.index); + + // Compute the unescaped string value. + let unescaped = last; + + if (buffer !== null && buffer !== undefined) { + buffer.push(last); + unescaped = buffer.join(''); + } + + this.tokenValue = unescaped; + this.tokenRaw = text; + return T_StringLiteral; + } + + error(message, offset = 0) { + // todo(kasperl): Try to get rid of the offset. It is only used to match + // the error expectations in the lexer tests for numbers with exponents. + let position = this.index + offset; + throw new Error(`Lexer Error: ${message} at column ${position} in expression [${this.input}]`); + } + + optional(type) { + if (this.token === type) { + this.nextToken(); return true; } return false; } - expect(text) { - if (this.peek.text === text) { - this.advance(); + expect(type) { + if (this.token === type) { + this.nextToken(); } else { - this.error(`Missing expected ${text}`); + this.error(`Missing expected token type ${type}`); } } +} - advance() { - this.index++; - } +const $EOF = 0; +const $TAB = 9; +const $LF = 10; +const $VTAB = 11; +const $FF = 12; +const $CR = 13; +const $SPACE = 32; +const $BANG = 33; +const $DQ = 34; +const $$ = 36; +const $PERCENT = 37; +const $AMPERSAND = 38; +const $SQ = 39; +const $LPAREN = 40; +const $RPAREN = 41; +const $STAR = 42; +const $PLUS = 43; +const $COMMA = 44; +const $MINUS = 45; +const $PERIOD = 46; +const $SLASH = 47; +const $COLON = 58; +const $SEMICOLON = 59; +const $LT = 60; +const $EQ = 61; +const $GT = 62; +const $QUESTION = 63; + +const $0 = 48; +const $9 = 57; + +const $A = 65; +const $E = 69; +const $Z = 90; + +const $LBRACKET = 91; +const $BACKSLASH = 92; +const $RBRACKET = 93; +const $CARET = 94; +const $_ = 95; + +const $a = 97; +const $e = 101; +const $f = 102; +const $n = 110; +const $r = 114; +const $t = 116; +const $u = 117; +const $v = 118; +const $z = 122; + +const $LBRACE = 123; +const $BAR = 124; +const $RBRACE = 125; +const $NBSP = 160; + +function isIdentifierStart(code) { + return ($a <= code && code <= $z) + || ($A <= code && code <= $Z) + || (code === $_) + || (code === $$); +} - error(message) { - let location = (this.index < this.tokens.length) - ? `at column ${this.tokens[this.index].index + 1} in` - : 'at the end of the expression'; +function isIdentifierPart(code) { + return ($a <= code && code <= $z) + || ($A <= code && code <= $Z) + || ($0 <= code && code <= $9) + || (code === $_) + || (code === $$); +} + +function isDigit(code) { + return ($0 <= code && code <= $9); +} - throw new Error(`Parser Error: ${message} ${location} [${this.input}]`); +function unescape(code) { + switch (code) { + case $n: return $LF; + case $f: return $FF; + case $r: return $CR; + case $t: return $TAB; + case $v: return $VTAB; + default: return code; } } + +/* Performing a bitwise and (&) with this value (63) will return only the + * token bit, which corresponds to the index of the token's value in the + * TokenValues array */ +const T_TokenMask = (1 << 6) - 1; + +/* Shifting 6 bits to the left gives us a step size of 64 in a range of + * 64 (1 << 6) to 448 (7 << 6) for our precedence bit + * This is the lowest value which does not overlap with the token bits 0-38. */ +const T_PrecedenceShift = 6; + +/* Performing a bitwise and (&) with this value will return only the + * precedence bit, which is used to determine the parsing order of bitwise + * expressions */ +const T_Precedence = 7 << T_PrecedenceShift; + +/** ')' | '}' | ']' */ +const T_ClosingToken = 1 << 9; +/** EndOfSource | '(' | '}' | ')' | ',' | '[' | '&' | '|' */ +const T_AccessScopeTerminal = 1 << 10; +const T_EndOfSource = 1 << 11 | T_AccessScopeTerminal; +const T_Identifier = 1 << 12; +const T_NumericLiteral = 1 << 13; +const T_StringLiteral = 1 << 14; +const T_BinaryOperator = 1 << 15; +const T_UnaryOperator = 1 << 16; + +/** false */ const T_FalseKeyword = 0; +/** true */ const T_TrueKeyword = 1; +/** null */ const T_NullKeyword = 2; +/** undefined */ const T_UndefinedKeyword = 3; +/** '$this' */ const T_ThisScope = 4; +/** '$parent' */ const T_ParentScope = 5; + +/** '(' */const T_LeftParen = 6 | T_AccessScopeTerminal; +/** '{' */const T_LeftBrace = 7; +/** '.' */const T_Period = 8; +/** '}' */const T_RightBrace = 9 | T_ClosingToken | T_AccessScopeTerminal; +/** ')' */const T_RightParen = 10 | T_ClosingToken | T_AccessScopeTerminal; +/** ';' */const T_Semicolon = 11; +/** ',' */const T_Comma = 12 | T_AccessScopeTerminal; +/** '[' */const T_LeftBracket = 13 | T_AccessScopeTerminal; +/** ']' */const T_RightBracket = 14 | T_ClosingToken; +/** ':' */const T_Colon = 15; +/** '?' */const T_QuestionMark = 16; +/** ''' */const T_SingleQuote = 17; +/** '"' */const T_DoubleQuote = 18; + +/** '&' */ const T_BindingBehavior = 19 | T_AccessScopeTerminal; +/** '|' */ const T_ValueConverter = 20 | T_AccessScopeTerminal; +/** '||' */ const T_LogicalOr = 21 | T_BinaryOperator | 1 << T_PrecedenceShift; +/** '&&' */ const T_LogicalAnd = 22 | T_BinaryOperator | 2 << T_PrecedenceShift; +/** '^' */ const T_BitwiseXor = 23 | T_BinaryOperator | 3 << T_PrecedenceShift; +/** '==' */ const T_LooseEqual = 24 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '!=' */ const T_LooseNotEqual = 25 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '===' */const T_StrictEqual = 26 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '!== '*/const T_StrictNotEqual = 27 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '<' */ const T_LessThan = 28 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '>' */ const T_GreaterThan = 29 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '<=' */ const T_LessThanOrEqual = 30 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '>=' */ const T_GreaterThanOrEqual = 31 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '+' */ const T_Add = 32 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; +/** '-' */ const T_Subtract = 33 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; +/** '*' */ const T_Multiply = 34 | T_BinaryOperator | 7 << T_PrecedenceShift; +/** '%' */ const T_Modulo = 35 | T_BinaryOperator | 7 << T_PrecedenceShift; +/** '/' */ const T_Divide = 36 | T_BinaryOperator | 7 << T_PrecedenceShift; +/** '=' */ const T_Assign = 37; +/** '!' */ const T_LogicalNot = 38 | T_UnaryOperator; + +const KeywordLookup = Object.create(null, { + true: {value: T_TrueKeyword}, + null: {value: T_NullKeyword}, + false: {value: T_FalseKeyword}, + undefined: {value: T_UndefinedKeyword}, + $this: {value: T_ThisScope}, + $parent: {value: T_ParentScope} +}); + +/** + * Array for mapping tokens to token values. The indices of the values + * correspond to the token bits 0-38. + * For this to work properly, the values in the array must be kept in + * the same order as the token bits. + * Usage: TokenValues[token & T_TokenMask] + */ +const TokenValues = [ + false, true, null, undefined, '$this', '$parent', + + '(', '{', '.', '}', ')', ';', ',', '[', ']', ':', '?', '\'', '"', + + '&', '|', '||', '&&', '^', '==', '!=', '===', '!==', '<', '>', + '<=', '>=', '+', '-', '*', '%', '/', '=', '!' +]; diff --git a/test/parser.spec.js b/test/parser.spec.js index 7b7d6b1a..3a8cf59a 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -13,7 +13,9 @@ import { CallFunction, AccessThis, AccessAncestor, - Assign + Assign, + Conditional, + Binary } from '../src/ast'; describe('Parser', () => { @@ -52,6 +54,55 @@ describe('Parser', () => { } }); + it('parses conditional', () => { + let expression = parser.parse('foo ? bar : baz'); + expect(expression instanceof Conditional).toBe(true); + expect(expression.condition instanceof AccessScope).toBe(true); + expect(expression.condition.name).toBe('foo'); + expect(expression.yes instanceof AccessScope).toBe(true); + expect(expression.yes.name).toBe('bar'); + expect(expression.no instanceof AccessScope).toBe(true); + expect(expression.no.name).toBe('baz'); + }); + + it('parses nested conditional', () => { + let expression = parser.parse('foo ? bar : foo1 ? bar1 : baz'); + expect(expression instanceof Conditional).toBe(true); + expect(expression.condition instanceof AccessScope).toBe(true); + expect(expression.condition.name).toBe('foo'); + expect(expression.yes instanceof AccessScope).toBe(true); + expect(expression.yes.name).toBe('bar'); + expect(expression.no instanceof Conditional).toBe(true); + expect(expression.no.condition instanceof AccessScope).toBe(true); + expect(expression.no.condition.name).toBe('foo1'); + expect(expression.no.yes instanceof AccessScope).toBe(true); + expect(expression.no.yes.name).toBe('bar1'); + expect(expression.no.no instanceof AccessScope).toBe(true); + expect(expression.no.no.name).toBe('baz'); + }); + + describe('parses binary', () => { + const operators = [ + '&&', '||', + '==', '!=', '===', '!==', + '<', '>', '<=', '>=', + '+', '-', + '*', '%', '/' + ]; + + for (let op of operators) { + it(`\"${op}\"`, () => { + let expression = parser.parse(`foo ${op} bar`); + expect(expression instanceof Binary).toBe(true); + expect(expression.operation).toBe(op); + expect(expression.left instanceof AccessScope).toBe(true); + expect(expression.left.name).toBe('foo'); + expect(expression.right instanceof AccessScope).toBe(true); + expect(expression.right.name).toBe('bar'); + }); + } + }); + it('parses binding behaviors', () => { let expression = parser.parse('foo & bar'); expect(expression instanceof BindingBehavior).toBe(true); From b91a0e2a6b12ab0db352f94609300b99c6ff31ad Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 12:17:56 +0200 Subject: [PATCH 02/18] refactor(value-converter): set AllArgs inside constructor --- src/ast.js | 4 ++-- src/parser.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ast.js b/src/ast.js index cdaad4ea..ce95227c 100644 --- a/src/ast.js +++ b/src/ast.js @@ -104,13 +104,13 @@ export class BindingBehavior extends Expression { } export class ValueConverter extends Expression { - constructor(expression, name, args, allArgs) { + constructor(expression, name, args) { super(); this.expression = expression; this.name = name; this.args = args; - this.allArgs = allArgs; + this.allArgs = [expression].concat(args); } evaluate(scope, lookupFunctions) { diff --git a/src/parser.js b/src/parser.js index 4510c11c..89cd009d 100644 --- a/src/parser.js +++ b/src/parser.js @@ -96,7 +96,7 @@ export class ParserImplementation { args.push(this.parseExpression()); } - result = new ValueConverter(result, name, args, [result].concat(args)); + result = new ValueConverter(result, name, args); } return result; From c60e2401ee10caf87eaeaa1cb0c6cc84292c752b Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 12:19:04 +0200 Subject: [PATCH 03/18] test(parser): cleanup / add more tests --- test/parser.spec.js | 461 +++++++++++++++++++++++++++----------------- 1 file changed, 280 insertions(+), 181 deletions(-) diff --git a/test/parser.spec.js b/test/parser.spec.js index 3a8cf59a..a581423a 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -15,7 +15,8 @@ import { AccessAncestor, Assign, Conditional, - Binary + Binary, + Expression } from '../src/ast'; describe('Parser', () => { @@ -25,60 +26,59 @@ describe('Parser', () => { parser = new Parser(); }); - it('parses literal primitives', () => { + describe('parses literal primitive', () => { // http://es5.github.io/x7.html#x7.8.4 - let tests = [ - { expression: '\'foo\'', value: 'foo', type: LiteralString }, - { expression: '\'\\\\\'', value: '\\', type: LiteralString }, - { expression: '\'\\\'\'', value: '\'', type: LiteralString }, - { expression: '\'"\'', value: '"', type: LiteralString }, - { expression: '\'\\f\'', value: '\f', type: LiteralString }, - { expression: '\'\\n\'', value: '\n', type: LiteralString }, - { expression: '\'\\r\'', value: '\r', type: LiteralString }, - { expression: '\'\\t\'', value: '\t', type: LiteralString }, - { expression: '\'\\v\'', value: '\v', type: LiteralString }, - { expression: 'true', value: true, type: LiteralPrimitive }, - { expression: 'false', value: false, type: LiteralPrimitive }, - { expression: 'null', value: null, type: LiteralPrimitive }, - { expression: 'undefined', value: undefined, type: LiteralPrimitive }, - { expression: '0', value: 0, type: LiteralPrimitive }, - { expression: '1', value: 1, type: LiteralPrimitive }, - { expression: '2.2', value: 2.2, type: LiteralPrimitive } + const tests = [ + { expression: '\'foo\'', expected: new LiteralString('foo') }, + { expression: '\'\\\\\'', expected: new LiteralString('\\') }, + { expression: '\'\\\'\'', expected: new LiteralString('\'') }, + { expression: '\'"\'', expected: new LiteralString('"') }, + { expression: '\'\\f\'', expected: new LiteralString('\f') }, + { expression: '\'\\n\'', expected: new LiteralString('\n') }, + { expression: '\'\\r\'', expected: new LiteralString('\r') }, + { expression: '\'\\t\'', expected: new LiteralString('\t') }, + { expression: '\'\\v\'', expected: new LiteralString('\v') }, + { expression: 'true', expected: new LiteralPrimitive(true) }, + { expression: 'false', expected: new LiteralPrimitive(false) }, + { expression: 'null', expected: new LiteralPrimitive(null) }, + { expression: 'undefined', expected: new LiteralPrimitive(undefined) }, + { expression: '0', expected: new LiteralPrimitive(0) }, + { expression: '1', expected: new LiteralPrimitive(1) }, + { expression: '2.2', expected: new LiteralPrimitive(2.2) } ]; - for (let i = 0; i < tests.length; i++) { - let test = tests[i]; - let expression = parser.parse(test.expression); - expect(expression instanceof test.type).toBe(true); - expect(expression.value).toEqual(test.value); + for (const test of tests) { + it(test.expression, () => { + let expression = parser.parse(test.expression); + verifyEqual(expression, test.expected); + }); } }); it('parses conditional', () => { let expression = parser.parse('foo ? bar : baz'); - expect(expression instanceof Conditional).toBe(true); - expect(expression.condition instanceof AccessScope).toBe(true); - expect(expression.condition.name).toBe('foo'); - expect(expression.yes instanceof AccessScope).toBe(true); - expect(expression.yes.name).toBe('bar'); - expect(expression.no instanceof AccessScope).toBe(true); - expect(expression.no.name).toBe('baz'); + verifyEqual(expression, + new Conditional( + new AccessScope('foo', 0), + new AccessScope('bar', 0), + new AccessScope('baz', 0) + ) + ); }); it('parses nested conditional', () => { let expression = parser.parse('foo ? bar : foo1 ? bar1 : baz'); - expect(expression instanceof Conditional).toBe(true); - expect(expression.condition instanceof AccessScope).toBe(true); - expect(expression.condition.name).toBe('foo'); - expect(expression.yes instanceof AccessScope).toBe(true); - expect(expression.yes.name).toBe('bar'); - expect(expression.no instanceof Conditional).toBe(true); - expect(expression.no.condition instanceof AccessScope).toBe(true); - expect(expression.no.condition.name).toBe('foo1'); - expect(expression.no.yes instanceof AccessScope).toBe(true); - expect(expression.no.yes.name).toBe('bar1'); - expect(expression.no.no instanceof AccessScope).toBe(true); - expect(expression.no.no.name).toBe('baz'); + verifyEqual(expression, + new Conditional( + new AccessScope('foo', 0), + new AccessScope('bar', 0), + new Conditional( + new AccessScope('foo1', 0), + new AccessScope('bar1', 0), + new AccessScope('baz', 0) + ) + ) + ); }); describe('parses binary', () => { @@ -93,138 +93,212 @@ describe('Parser', () => { for (let op of operators) { it(`\"${op}\"`, () => { let expression = parser.parse(`foo ${op} bar`); - expect(expression instanceof Binary).toBe(true); - expect(expression.operation).toBe(op); - expect(expression.left instanceof AccessScope).toBe(true); - expect(expression.left.name).toBe('foo'); - expect(expression.right instanceof AccessScope).toBe(true); - expect(expression.right.name).toBe('bar'); + verifyEqual(expression, + new Binary( + op, + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ) + ); }); } }); - it('parses binding behaviors', () => { + it('parses binding behavior', () => { let expression = parser.parse('foo & bar'); - expect(expression instanceof BindingBehavior).toBe(true); - expect(expression.name).toBe('bar'); - expect(expression.expression instanceof AccessScope).toBe(true); + verifyEqual(expression, + new BindingBehavior(new AccessScope('foo', 0), 'bar', []) + ); + }); - expression = parser.parse('foo & bar:x:y:z & baz:a:b:c'); - expect(expression instanceof BindingBehavior).toBe(true); - expect(expression.name).toBe('baz'); - expect(expression.args).toEqual([new AccessScope('a', 0), new AccessScope('b', 0), new AccessScope('c', 0)]) - expect(expression.expression instanceof BindingBehavior).toBe(true); - expect(expression.expression.name).toBe('bar'); - expect(expression.expression.args).toEqual([new AccessScope('x', 0), new AccessScope('y', 0), new AccessScope('z', 0)]); - expect(expression.expression.expression instanceof AccessScope).toBe(true); + it('parses chained binding behaviors', () => { + let expression = parser.parse('foo & bar:x:y:z & baz:a:b:c'); + verifyEqual(expression, + new BindingBehavior( + new BindingBehavior( + new AccessScope('foo', 0), + 'bar', + [ + new AccessScope('x', 0), + new AccessScope('y', 0), + new AccessScope('z', 0) + ] + ), + 'baz', + [ + new AccessScope('a', 0), + new AccessScope('b', 0), + new AccessScope('c', 0) + ] + ) + ); }); - it('parses value converters', () => { + it('parses value converter', () => { let expression = parser.parse('foo | bar'); - expect(expression instanceof ValueConverter).toBe(true); - expect(expression.name).toBe('bar'); - expect(expression.expression instanceof AccessScope).toBe(true); + verifyEqual(expression, + new ValueConverter(new AccessScope('foo', 0), 'bar', []) + ); + }); - expression = parser.parse('foo | bar:x:y:z | baz:a:b:c'); - expect(expression instanceof ValueConverter).toBe(true); - expect(expression.name).toBe('baz'); - expect(expression.args).toEqual([new AccessScope('a', 0), new AccessScope('b', 0), new AccessScope('c', 0)]); - expect(expression.expression instanceof ValueConverter).toBe(true); - expect(expression.expression.name).toBe('bar'); - expect(expression.expression.args).toEqual([new AccessScope('x', 0), new AccessScope('y', 0), new AccessScope('z', 0)]); - expect(expression.expression.expression instanceof AccessScope).toBe(true); + it('parses chained value converters', () => { + let expression = parser.parse('foo | bar:x:y:z | baz:a:b:c'); + verifyEqual(expression, + new ValueConverter( + new ValueConverter( + new AccessScope('foo', 0), + 'bar', + [ + new AccessScope('x', 0), + new AccessScope('y', 0), + new AccessScope('z', 0) + ] + ), + 'baz', + [ + new AccessScope('a', 0), + new AccessScope('b', 0), + new AccessScope('c', 0) + ] + ) + ); }); - it('parses value converters and binding behaviors', () => { + it('parses chained value converters and binding behaviors', () => { let expression = parser.parse('foo | bar:x:y:z & baz:a:b:c'); - expect(expression instanceof BindingBehavior).toBe(true); - expect(expression.name).toBe('baz'); - expect(expression.args).toEqual([new AccessScope('a', 0), new AccessScope('b', 0), new AccessScope('c', 0)]) - expect(expression.expression instanceof ValueConverter).toBe(true); - expect(expression.expression.name).toBe('bar'); - expect(expression.expression.args).toEqual([new AccessScope('x', 0), new AccessScope('y', 0), new AccessScope('z', 0)]); - expect(expression.expression.expression instanceof AccessScope).toBe(true); + verifyEqual(expression, + new BindingBehavior( + new ValueConverter( + new AccessScope('foo', 0), + 'bar', + [ + new AccessScope('x', 0), + new AccessScope('y', 0), + new AccessScope('z', 0) + ] + ), + 'baz', + [ + new AccessScope('a', 0), + new AccessScope('b', 0), + new AccessScope('c', 0) + ] + ) + ); }); it('parses AccessScope', () => { let expression = parser.parse('foo'); - expect(expression instanceof AccessScope).toBe(true); - expect(expression.name).toBe('foo'); + verifyEqual(expression, new AccessScope('foo', 0)); }); it('parses AccessMember', () => { let expression = parser.parse('foo.bar'); - expect(expression instanceof AccessMember).toBe(true); - expect(expression.name).toBe('bar'); - expect(expression.object instanceof AccessScope).toBe(true); - expect(expression.object.name).toBe('foo'); + verifyEqual(expression, + new AccessMember(new AccessScope('foo', 0), 'bar') + ); }); it('parses Assign', () => { let expression = parser.parse('foo = bar'); - expect(expression instanceof Assign).toBe(true); - expect(expression.target instanceof AccessScope).toBe(true); - expect(expression.target.name).toBe('foo'); - expect(expression.value instanceof AccessScope).toBe(true); - expect(expression.value.name).toBe('bar'); - - expression = parser.parse('foo = bar = baz'); - expect(expression instanceof Assign).toBe(true); - expect(expression.target instanceof Assign).toBe(true); - expect(expression.target.target instanceof AccessScope).toBe(true); - expect(expression.target.target.name).toBe('foo'); - expect(expression.target.value instanceof AccessScope).toBe(true); - expect(expression.target.value.name).toBe('bar'); - expect(expression.value instanceof AccessScope).toBe(true); - expect(expression.value.name).toBe('baz'); + verifyEqual(expression, + new Assign( + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ) + ); + }); + + it('parses chained Assign', () => { + let expression = parser.parse('foo = bar = baz'); + verifyEqual(expression, + new Assign( + new Assign( + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ), + new AccessScope('baz', 0) + ) + ); }); it('parses CallScope', () => { let expression = parser.parse('foo(x)'); - expect(expression instanceof CallScope).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.args).toEqual([new AccessScope('x', 0)]); + verifyEqual(expression, + new CallScope('foo', [new AccessScope('x', 0)], 0) + ); + }); + + it('parses nested CallScope', () => { + let expression = parser.parse('foo(bar(x), y)'); + verifyEqual(expression, + new CallScope( + 'foo', + [ + new CallScope( + 'bar', + [new AccessScope('x', 0)], + 0), + new AccessScope('y', 0) + ], 0) + ); }); it('parses CallMember', () => { let expression = parser.parse('foo.bar(x)'); - expect(expression instanceof CallMember).toBe(true); - expect(expression.name).toBe('bar'); - expect(expression.args).toEqual([new AccessScope('x', 0)]); - expect(expression.object instanceof AccessScope).toBe(true); - expect(expression.object.name).toBe('foo'); + verifyEqual(expression, + new CallMember( + new AccessScope('foo', 0), + 'bar', + [new AccessScope('x', 0)] + ) + ); + }); + + it('parses nested CallMember', () => { + let expression = parser.parse('foo.bar.baz(x)'); + verifyEqual(expression, + new CallMember( + new AccessMember( + new AccessScope('foo', 0), + 'bar' + ), + 'baz', + [new AccessScope('x', 0)] + ) + ); }); it('parses $this', () => { let expression = parser.parse('$this'); - expect(expression instanceof AccessThis).toBe(true); + verifyEqual(expression, new AccessThis(0)); }); it('translates $this.member to AccessScope', () => { let expression = parser.parse('$this.foo'); - expect(expression instanceof AccessScope).toBe(true); - expect(expression.name).toBe('foo'); + verifyEqual(expression, + new AccessScope('foo', 0) + ); }); it('translates $this() to CallFunction', () => { let expression = parser.parse('$this()'); - expect(expression instanceof CallFunction).toBe(true); - expect(expression.func instanceof AccessThis).toBe(true); + verifyEqual(expression, + new CallFunction(new AccessThis(0), [])); }); it('translates $this.member() to CallScope', () => { let expression = parser.parse('$this.foo(x)'); - expect(expression instanceof CallScope).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.args).toEqual([new AccessScope('x', 0)]); + verifyEqual(expression, + new CallScope('foo', [new AccessScope('x', 0)], 0) + ); }); it('parses $parent', () => { let s = '$parent'; for (let i = 1; i < 10; i++) { let expression = parser.parse(s); - expect(expression instanceof AccessThis).toBe(true); - expect(expression.ancestor).toBe(i); + verifyEqual(expression, new AccessThis(i)); s += '.$parent'; } }); @@ -234,10 +308,9 @@ describe('Parser', () => { for (let i = 1; i < 10; i++) { let s = `$parent${child} | foo`; let expression = parser.parse(s); - expect(expression instanceof ValueConverter).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.expression instanceof AccessThis).toBe(true); - expect(expression.expression.ancestor).toBe(i); + verifyEqual(expression, + new ValueConverter(new AccessThis(i), 'foo', []) + ); child += '.$parent'; } }); @@ -247,11 +320,9 @@ describe('Parser', () => { for (let i = 1; i < 10; i++) { let s = `$parent${child}.bar | foo`; let expression = parser.parse(s); - expect(expression instanceof ValueConverter).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.expression instanceof AccessScope).toBe(true); - expect(expression.expression.name).toBe('bar'); - expect(expression.expression.ancestor).toBe(i); + verifyEqual(expression, + new ValueConverter(new AccessScope('bar', i), 'foo', []) + ); child += '.$parent'; } }); @@ -261,10 +332,9 @@ describe('Parser', () => { for (let i = 1; i < 10; i++) { let s = `$parent${child} & foo`; let expression = parser.parse(s); - expect(expression instanceof BindingBehavior).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.expression instanceof AccessThis).toBe(true); - expect(expression.expression.ancestor).toBe(i); + verifyEqual(expression, + new BindingBehavior(new AccessThis(i), 'foo', []) + ); child += '.$parent'; } }); @@ -274,11 +344,9 @@ describe('Parser', () => { for (let i = 1; i < 10; i++) { let s = `$parent${child}.bar & foo`; let expression = parser.parse(s); - expect(expression instanceof BindingBehavior).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.expression instanceof AccessScope).toBe(true); - expect(expression.expression.name).toBe('bar'); - expect(expression.expression.ancestor).toBe(i); + verifyEqual(expression, + new BindingBehavior(new AccessScope('bar', i), 'foo', []) + ); child += '.$parent'; } }); @@ -287,9 +355,9 @@ describe('Parser', () => { let s = '$parent.foo'; for (let i = 1; i < 10; i++) { let expression = parser.parse(s); - expect(expression instanceof AccessScope).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.ancestor).toBe(i); + verifyEqual(expression, + new AccessScope('foo', i) + ); s = '$parent.' + s; } }); @@ -298,9 +366,9 @@ describe('Parser', () => { let s = '$parent.foo()'; for (let i = 1; i < 10; i++) { let expression = parser.parse(s); - expect(expression instanceof CallScope).toBe(true); - expect(expression.name).toBe('foo'); - expect(expression.ancestor).toBe(i); + verifyEqual(expression, + new CallScope('foo', [], i) + ); s = '$parent.' + s; } }); @@ -309,9 +377,9 @@ describe('Parser', () => { let s = '$parent()'; for (let i = 1; i < 10; i++) { let expression = parser.parse(s); - expect(expression instanceof CallFunction).toBe(true); - expect(expression.func instanceof AccessThis).toBe(true); - expect(expression.func.ancestor).toBe(i); + verifyEqual(expression, + new CallFunction(new AccessThis(i), []) + ); s = '$parent.' + s; } }); @@ -320,54 +388,67 @@ describe('Parser', () => { let s = '$parent[0]'; for (let i = 1; i < 10; i++) { let expression = parser.parse(s); - expect(expression instanceof AccessKeyed).toBe(true); - expect(expression.object instanceof AccessThis).toBe(true); - expect(expression.object.ancestor).toBe(i); - expect(expression.key instanceof LiteralPrimitive).toBe(true); - expect(expression.key.value).toBe(0); + verifyEqual(expression, + new AccessKeyed( + new AccessThis(i), + new LiteralPrimitive(0) + ) + ); s = '$parent.' + s; } }); it('handles $parent inside CallMember', () => { let expression = parser.parse('matcher.bind($parent)'); - expect(expression instanceof CallMember).toBe(true); - expect(expression.name).toBe('bind'); - expect(expression.args.length).toBe(1); - expect(expression.args[0] instanceof AccessThis).toBe(true); - expect(expression.args[0].ancestor).toBe(1); + verifyEqual(expression, + new CallMember( + new AccessScope('matcher', 0), + 'bind', + [new AccessThis(1)] + ) + ); }); it('parses $parent in LiteralObject', () => { let expression = parser.parse('{parent: $parent}'); - expect(expression instanceof LiteralObject).toBe(true); - expect(expression.keys.length).toBe(1); - expect(expression.keys).toEqual(['parent']); - expect(expression.values.length).toBe(1); - expect(expression.values[0] instanceof AccessThis).toBe(true); - expect(expression.values[0].ancestor).toBe(1); - - expression = parser.parse('{parent: $parent, foo: bar}'); - expect(expression instanceof LiteralObject).toBe(true); - expect(expression.keys.length).toBe(2); - expect(expression.keys).toEqual(['parent', 'foo']); - expect(expression.values.length).toBe(2); - expect(expression.values[0] instanceof AccessThis).toBe(true); - expect(expression.values[0].ancestor).toBe(1); - expect(expression.values[1] instanceof AccessScope).toBe(true); - expect(expression.values[1].name).toBe('bar'); + verifyEqual(expression, + new LiteralObject( + ['parent'], + [new AccessThis(1)] + ) + ); + }); + + it('parses $parent and foo in LiteralObject', () => { + let expression = parser.parse('{parent: $parent, foo: bar}'); + verifyEqual(expression, + new LiteralObject( + [ + 'parent', + 'foo' + ], + [ + new AccessThis(1), + new AccessScope('bar', 0) + ] + ) + ); }); it('parses es6 shorthand LiteralObject', () => { let expression = parser.parse('{ foo, bar }'); - expect(expression instanceof LiteralObject).toBe(true); - expect(expression.keys.length).toBe(2); - expect(expression.values.length).toBe(2); - - expect(expression.values[0] instanceof AccessScope).toBe(true); - expect(expression.values[0].name).toBe('foo'); - expect(expression.values[1] instanceof AccessScope).toBe(true); - expect(expression.values[1].name).toBe('bar'); + verifyEqual(expression, + new LiteralObject( + [ + 'foo', + 'bar' + ], + [ + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ] + ) + ); }); it('does not parse invalid shorthand properties', () => { @@ -385,3 +466,21 @@ describe('Parser', () => { expect(pass).toBe(false); }); }); + +function verifyEqual(actual, expected) { + if (typeof expected !== 'object' || expected === null || expected === undefined) { + expect(actual).toEqual(expected); + return; + } + if (expected instanceof Array) { + for (let i = 0; i < expected.length; i++) { + verifyEqual(actual[i], expected[i]); + } + return; + } + + expect(actual).toEqual(jasmine.any(expected.constructor)); + for (const prop of Object.keys(expected)) { + verifyEqual(actual[prop], expected[prop]); + } +} From 384aeee25d543f3d3812f0018d46ba45ae67b64b Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 12:23:56 +0200 Subject: [PATCH 04/18] fix(parser): make unicode escape case insensitive --- src/parser.js | 2 +- test/parser.spec.js | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index 89cd009d..b096f806 100644 --- a/src/parser.js +++ b/src/parser.js @@ -583,7 +583,7 @@ export class ParserImplementation { // coverage for this. let hex = this.input.slice(this.index + 1, this.index + 5); - if (!/[A-Z0-9]{4}/.test(hex)) { + if (!/[A-Z0-9]{4}/i.test(hex)) { this.error(`Invalid unicode escape [\\u${hex}]`); } diff --git a/test/parser.spec.js b/test/parser.spec.js index a581423a..04fc5a3f 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -30,6 +30,7 @@ describe('Parser', () => { // http://es5.github.io/x7.html#x7.8.4 const tests = [ { expression: '\'foo\'', expected: new LiteralString('foo') }, + { expression: `\'${unicodeEscape('äöüÄÖÜß')}\'`, expected: new LiteralString('äöüÄÖÜß') }, { expression: '\'\\\\\'', expected: new LiteralString('\\') }, { expression: '\'\\\'\'', expected: new LiteralString('\'') }, { expression: '\'"\'', expected: new LiteralString('"') }, @@ -38,6 +39,7 @@ describe('Parser', () => { { expression: '\'\\r\'', expected: new LiteralString('\r') }, { expression: '\'\\t\'', expected: new LiteralString('\t') }, { expression: '\'\\v\'', expected: new LiteralString('\v') }, + { expression: '\'\\v\'', expected: new LiteralString('\v') }, { expression: 'true', expected: new LiteralPrimitive(true) }, { expression: 'false', expected: new LiteralPrimitive(false) }, { expression: 'null', expected: new LiteralPrimitive(null) }, @@ -484,3 +486,7 @@ function verifyEqual(actual, expected) { verifyEqual(actual[prop], expected[prop]); } } + +function unicodeEscape(str) { + return str.replace(/[\s\S]/g, c => `\\u${('0000' + c.charCodeAt().toString(16)).slice(-4)}`); +} From 4d0c97d0c8b6074e41938135f6e581baf4b3eea8 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 13:10:21 +0200 Subject: [PATCH 05/18] fix(parser): check bounds for unicode escapes --- src/parser.js | 24 +++++++++++++++--------- test/parser.spec.js | 1 + 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/parser.js b/src/parser.js index b096f806..ea47300b 100644 --- a/src/parser.js +++ b/src/parser.js @@ -574,21 +574,27 @@ export class ParserImplementation { } buffer.push(this.input.slice(marker, this.index)); - char = this.input.charCodeAt(++this.index) + char = this.input.charCodeAt(++this.index); let unescaped; if (char === $u) { - // todo(kasperl): Check bounds? Make sure we have test - // coverage for this. - let hex = this.input.slice(this.index + 1, this.index + 5); + char = this.input.charCodeAt(++this.index); + const index = this.index; - if (!/[A-Z0-9]{4}/i.test(hex)) { - this.error(`Invalid unicode escape [\\u${hex}]`); + if (index + 4 < this.length) { + let hex = this.input.slice(index, index + 4); + + if (!/[A-Z0-9]{4}/i.test(hex)) { + this.error(`Invalid unicode escape [\\u${hex}]`); + } + + unescaped = parseInt(hex, 16); + this.index += 4; + } else { + const expression = this.input.slice(this.lastIndex, this.index); + this.error(`Unexpected token ${expression}`); } - - unescaped = parseInt(hex, 16); - this.index += 5; } else { unescaped = unescape(this.input.charCodeAt(this.index)); this.index++; diff --git a/test/parser.spec.js b/test/parser.spec.js index 04fc5a3f..578c85d2 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -31,6 +31,7 @@ describe('Parser', () => { const tests = [ { expression: '\'foo\'', expected: new LiteralString('foo') }, { expression: `\'${unicodeEscape('äöüÄÖÜß')}\'`, expected: new LiteralString('äöüÄÖÜß') }, + { expression: `\'${unicodeEscape('ಠ_ಠ')}\'`, expected: new LiteralString('ಠ_ಠ') }, { expression: '\'\\\\\'', expected: new LiteralString('\\') }, { expression: '\'\\\'\'', expected: new LiteralString('\'') }, { expression: '\'"\'', expected: new LiteralString('"') }, From d7fb6d559bcf80422f032eda406dd09504477e82 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 14:29:10 +0200 Subject: [PATCH 06/18] tests(parser): add unit tests for binary expression precedence --- test/parser.spec.js | 110 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) diff --git a/test/parser.spec.js b/test/parser.spec.js index 578c85d2..bcc46c4d 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -16,7 +16,8 @@ import { Assign, Conditional, Binary, - Expression + Expression, + PrefixNot } from '../src/ast'; describe('Parser', () => { @@ -107,6 +108,113 @@ describe('Parser', () => { } }); + it('parses binary in the correct order', () => { + const expression = parser.parse('a || b && c ^ d == e != f === g !== h < i > j <= k >= l + m - n * o % p / !q'); + verifyEqual( + expression, + new Binary( + '||', + new AccessScope('a', 0), + new Binary( + '&&', + new AccessScope('b', 0), + new Binary( + '^', + new AccessScope('c', 0), + new Binary( + '==', + new AccessScope('d', 0), + new Binary( + '!=', + new AccessScope('e', 0), + new Binary( + '===', + new AccessScope('f', 0), + new Binary( + '!==', + new AccessScope('g', 0), + new Binary( + '<', + new AccessScope('h', 0), + new Binary( + '>', + new AccessScope('i', 0), + new Binary( + '<=', + new AccessScope('j', 0), + new Binary( + '>=', + new AccessScope('k', 0), + new Binary( + '+', + new AccessScope('l', 0), + new Binary( + '-', + new AccessScope('m', 0), + new Binary( + '*', + new AccessScope('n', 0), + new Binary( + '%', + new AccessScope('o', 0), + new Binary( + '/', + new AccessScope('p', 0), + new PrefixNot( + '!', + new AccessScope('q', 0) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + }); + + it('reorders binary expression', () => { + const expression = parser.parse('a * b || c === d / e + f && g'); + verifyEqual( + expression, + new Binary( + '||', + new Binary( + '*', + new AccessScope('a', 0), + new AccessScope('b', 0) + ), + new Binary( + '&&', + new Binary( + '===', + new AccessScope('c', 0), + new Binary( + '+', + new Binary( + '/', + new AccessScope('d', 0), + new AccessScope('e', 0) + ), + new AccessScope('f', 0) + ), + ), + new AccessScope('g', 0) + ) + ) + ) + }); + it('parses binding behavior', () => { let expression = parser.parse('foo & bar'); verifyEqual(expression, From 8048949f2da7438441bc1b0b26ea60b7e27cb0ce Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 14:55:34 +0200 Subject: [PATCH 07/18] tests(parser): add unit tests for parsing numbers --- test/parser.spec.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/parser.spec.js b/test/parser.spec.js index bcc46c4d..21219eea 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -48,7 +48,23 @@ describe('Parser', () => { { expression: 'undefined', expected: new LiteralPrimitive(undefined) }, { expression: '0', expected: new LiteralPrimitive(0) }, { expression: '1', expected: new LiteralPrimitive(1) }, - { expression: '2.2', expected: new LiteralPrimitive(2.2) } + { expression: '-1', expected: new Binary('-', new LiteralPrimitive(0), new LiteralPrimitive(1)) }, + { expression: '(-1)', expected: new Binary('-', new LiteralPrimitive(0), new LiteralPrimitive(1)) }, + { expression: '-(-1)', expected: new Binary('-', new LiteralPrimitive(0), new Binary('-', new LiteralPrimitive(0), new LiteralPrimitive(1))) }, + { expression: '+(-1)', expected: new Binary('-', new LiteralPrimitive(0), new LiteralPrimitive(1)) }, + { expression: '-(+1)', expected: new Binary('-', new LiteralPrimitive(0), new LiteralPrimitive(1)) }, + { expression: '+(+1)', expected: new LiteralPrimitive(1) }, + { expression: '9007199254740992', expected: new LiteralPrimitive(9007199254740992) }, // Number.MAX_SAFE_INTEGER + 1 + { expression: '1.7976931348623157e+308', expected: new LiteralPrimitive(1.7976931348623157e+308) }, // Number.MAX_VALUE + { expression: '1.7976931348623157E+308', expected: new LiteralPrimitive(1.7976931348623157e+308) }, // Number.MAX_VALUE + { expression: '-9007199254740992', expected: new Binary('-', new LiteralPrimitive(0), new LiteralPrimitive(9007199254740992)) }, // Number.MIN_SAFE_INTEGER - 1 + { expression: '5e-324', expected: new LiteralPrimitive(5e-324) }, // Number.MIN_VALUE + { expression: '5E-324', expected: new LiteralPrimitive(5e-324) }, // Number.MIN_VALUE + { expression: '2.2', expected: new LiteralPrimitive(2.2) }, + { expression: '2.2e2', expected: new LiteralPrimitive(2.2e2) }, + { expression: '.42', expected: new LiteralPrimitive(.42) }, + { expression: '0.42', expected: new LiteralPrimitive(.42) }, + { expression: '.42E10', expected: new LiteralPrimitive(.42e10) } ]; for (const test of tests) { From 54c85b3e2560573c6c9c47f0ed7fd2763d29ba79 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 15:05:14 +0200 Subject: [PATCH 08/18] docs(parser): add documentation link for operator precedence --- src/parser.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser.js b/src/parser.js index ea47300b..d49b29e5 100644 --- a/src/parser.js +++ b/src/parser.js @@ -786,6 +786,8 @@ const T_UnaryOperator = 1 << 16; /** ''' */const T_SingleQuote = 17; /** '"' */const T_DoubleQuote = 18; +// Operator precedence: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence#Table + /** '&' */ const T_BindingBehavior = 19 | T_AccessScopeTerminal; /** '|' */ const T_ValueConverter = 20 | T_AccessScopeTerminal; /** '||' */ const T_LogicalOr = 21 | T_BinaryOperator | 1 << T_PrecedenceShift; From 01beaf313f00f9ca55f48ab3806db3c956791a03 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Tue, 24 Apr 2018 17:27:04 +0200 Subject: [PATCH 09/18] refactor(parser): optimize number scanning --- src/parser.js | 76 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/src/parser.js b/src/parser.js index d49b29e5..cd47756f 100644 --- a/src/parser.js +++ b/src/parser.js @@ -525,37 +525,54 @@ export class ParserImplementation { } scanNumber(isFloat) { - let start = this.index; - this.index++; + let value = 0; let char = this.input.charCodeAt(this.index); - loop: while (true) { - switch(char) { - case $PERIOD: - // todo(fkleuver): Should deal with spread operator elsewhere, - // and throw here when seeing more than one period - isFloat = true; - break; - case $e: - case $E: - char = this.input.charCodeAt(++this.index); - if (char === $PLUS || char === $MINUS) { - char = this.input.charCodeAt(++this.index); - } - if (char < $0 || char > $9) { - this.error('Invalid exponent', -1); - } - isFloat = true; - break; - default: - if (char < $0 || char > $9 || this.index === this.length) { - break loop; - } + if (!isFloat) { + // this is significantly faster than parseInt, however that + // gain is lost when the number turns out to be a float + while (isDigit(char)) { + value = value * 10 + (char - $0); + char = this.input.charCodeAt(++this.index) } + } + const start = this.index; + + if (char === $PERIOD) { + isFloat = true; + do { + char = this.input.charCodeAt(++this.index) + } while (isDigit(char)) + } + + if (char === $e || char === $E) { + isFloat = true; + // for error reporting in case the exponent is invalid + const startExp = this.index; char = this.input.charCodeAt(++this.index); + + if (char === $PLUS || char === $MINUS) { + char = this.input.charCodeAt(++this.index); + } + + if (!isDigit(char)) { + this.index = startExp; + this.error('Invalid exponent'); + } + } + + // we got nothing after the initial number scan, so just use + // the calculated integer + if (!isFloat) { + this.tokenValue = value; + return T_NumericLiteral; + } + + while (isDigit(char)) { + char = this.input.charCodeAt(++this.index) } - const text = this.input.slice(start, this.index); - this.tokenValue = isFloat ? parseFloat(text) : parseInt(text, 10); + const text = value + this.input.slice(start, this.index); + this.tokenValue = parseFloat(text); return T_NumericLiteral; } @@ -628,11 +645,8 @@ export class ParserImplementation { return T_StringLiteral; } - error(message, offset = 0) { - // todo(kasperl): Try to get rid of the offset. It is only used to match - // the error expectations in the lexer tests for numbers with exponents. - let position = this.index + offset; - throw new Error(`Lexer Error: ${message} at column ${position} in expression [${this.input}]`); + error(message) { + throw new Error(`Lexer Error: ${message} at column ${this.index} in expression [${this.input}]`); } optional(type) { From eec9b10183339bed3f624323de7832a298c61884 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Thu, 26 Apr 2018 22:19:51 +0200 Subject: [PATCH 10/18] refactor(parser): clean-up, improve readability --- src/parser.js | 301 ++++++++++++++++++++++++-------------------------- 1 file changed, 146 insertions(+), 155 deletions(-) diff --git a/src/parser.js b/src/parser.js index cd47756f..b4224924 100644 --- a/src/parser.js +++ b/src/parser.js @@ -21,16 +21,23 @@ export class Parser { } export class ParserImplementation { + get currentChar() { + return this.input.charCodeAt(this.index); + } + get hasNext() { + return this.index < this.length; + } + get tokenRaw() { + return this.input.slice(this.startIndex, this.index); + } + constructor(input) { this.index = 0; this.startIndex = 0; - this.lastIndex = 0; this.input = input; this.length = input.length; - this.token = T_EndOfSource; + this.currentToken = T_EndOfSource; this.tokenValue = undefined; - this.tokenRaw = ''; - this.lastValue = 0; } parseChain() { @@ -39,13 +46,13 @@ export class ParserImplementation { let isChain = false; let expressions = []; - while (this.token !== T_EndOfSource) { + while (this.currentToken !== T_EndOfSource) { while (this.optional(T_Semicolon)) { isChain = true; } - if ((this.token & T_ClosingToken) === T_ClosingToken) { - this.error(`Unconsumed token ${String.fromCharCode(this.tokenValue)}`); + if ((this.currentToken & T_ClosingToken) === T_ClosingToken) { + this.error(`Unconsumed token ${this.tokenRaw}`); } const expr = this.parseBindingBehavior(); @@ -106,7 +113,7 @@ export class ParserImplementation { let start = this.index; let result = this.parseConditional(); - while (this.token === T_Assign) { + while (this.currentToken === T_Assign) { if (!result.isAssignable) { let end = (this.index < this.length) ? this.index : this.length; let expression = this.input.slice(start, end); @@ -145,12 +152,12 @@ export class ParserImplementation { parseBinary(minPrecedence) { let left = this.parseUnary(); - if ((this.token & T_BinaryOperator) !== T_BinaryOperator) { + if ((this.currentToken & T_BinaryOperator) !== T_BinaryOperator) { return left; } - while ((this.token & T_BinaryOperator) === T_BinaryOperator) { - const opToken = this.token; + while ((this.currentToken & T_BinaryOperator) === T_BinaryOperator) { + const opToken = this.currentToken; const precedence = opToken & T_Precedence; if (precedence < minPrecedence) { break; @@ -162,7 +169,7 @@ export class ParserImplementation { } parseUnary() { - const opToken = this.token; + const opToken = this.currentToken; if ((opToken & T_UnaryOperator) === T_UnaryOperator) { this.nextToken(); switch(opToken) { @@ -216,7 +223,7 @@ export class ParserImplementation { } parsePrimary() { - const token = this.token; + const token = this.currentToken; switch (token) { case T_Identifier: case T_ParentScope: @@ -258,15 +265,14 @@ export class ParserImplementation { if (this.index >= this.length) { throw new Error(`Unexpected end of expression at column ${this.index} of ${this.input}`); } else { - const expression = this.input.slice(this.lastIndex, this.index); - this.error(`Unexpected token ${expression}`); + this.error(`Unexpected token ${this.tokenRaw}`); } } } parseAccessOrCallScope() { let name = this.tokenValue; - let token = this.token; + let token = this.currentToken; this.nextToken(); @@ -275,13 +281,12 @@ export class ParserImplementation { ancestor++; if (this.optional(T_Period)) { name = this.tokenValue; - token = this.token; + token = this.currentToken; this.nextToken(); - } else if ((this.token & T_AccessScopeTerminal) === T_AccessScopeTerminal) { + } else if ((this.currentToken & T_AccessScopeTerminal) === T_AccessScopeTerminal) { return new AccessThis(ancestor); } else { - const expression = this.input.slice(this.lastIndex, this.index); - this.error(`Unexpected token ${expression}`); + this.error(`Unexpected token ${this.tokenRaw}`); } } @@ -300,17 +305,16 @@ export class ParserImplementation { this.expect(T_LeftBrace); - if (this.token ^ T_RightBrace) { + if (this.currentToken !== T_RightBrace) { do { // todo(kasperl): Stricter checking. Only allow identifiers // and strings as keys. Maybe also keywords? - - let token = this.token; + const prevIndex = this.index; + const prevToken = this.currentToken; keys.push(this.tokenValue); - this.nextToken(); - if (token === T_Identifier && (this.token === T_Comma || this.token === T_RightBrace)) { - --this.index; + if (prevToken === T_Identifier && (this.currentToken === T_Comma || this.currentToken === T_RightBrace)) { + this.index = prevIndex; values.push(this.parseAccessOrCallScope()); } else { this.expect(T_Colon); @@ -327,7 +331,7 @@ export class ParserImplementation { parseExpressionList(terminator) { let result = []; - if (this.token ^ terminator) { + if (this.currentToken !== terminator) { do { result.push(this.parseExpression()); } while (this.optional(T_Comma)); @@ -337,120 +341,118 @@ export class ParserImplementation { } nextToken() { - this.lastIndex = this.index; + return this.currentToken = this.scanToken(); + } - return this.token = this.scanToken(); + nextChar() { + this.index++; } scanToken() { - while (this.index < this.length) { + while (this.hasNext) { this.startIndex = this.index; - let current = this.input.charCodeAt(this.index); + const char = this.currentChar; // skip whitespace. - if (current <= $SPACE) { - this.index++; + if (char <= $SPACE) { + this.nextChar(); continue; } // handle identifiers and numbers. - if (isIdentifierStart(current)) { + if (isIdentifierStart(char)) { return this.scanIdentifier(); } - if (isDigit(current)) { - return this.scanNumber(false); - } - - let start = this.index; + if (isDigit(char)) { + return this.scanNumber(); - switch (current) { + } + switch (char) { case $PERIOD: { - if (this.index < this.length) { - const next = this.input.charCodeAt(this.index + 1); - if (next >= $0 && next <= $9) { - return this.scanNumber(true); - } - this.index++; + const nextChar = this.input.charCodeAt(this.index + 1); + if (isDigit(nextChar)) { + return this.scanNumber(); } + this.nextChar(); return T_Period; } case $LPAREN: - this.index++; + this.nextChar(); return T_LeftParen; case $RPAREN: - this.index++; + this.nextChar(); return T_RightParen; case $LBRACE: - this.index++; + this.nextChar(); return T_LeftBrace; case $RBRACE: - this.index++; + this.nextChar(); return T_RightBrace; case $LBRACKET: - this.index++; + this.nextChar(); return T_LeftBracket; case $RBRACKET: - this.index++; + this.nextChar(); return T_RightBracket; case $COMMA: - this.index++; + this.nextChar(); return T_Comma; case $COLON: - this.index++; + this.nextChar(); return T_Colon; case $SEMICOLON: - this.index++; + this.nextChar(); return T_Semicolon; case $SQ: case $DQ: return this.scanString(); case $PLUS: - this.index++; + this.nextChar(); return T_Add; case $MINUS: - this.index++; + this.nextChar(); return T_Subtract; case $STAR: - this.index++; + this.nextChar(); return T_Multiply; case $SLASH: - this.index++; + this.nextChar(); return T_Divide; case $PERCENT: - this.index++; + this.nextChar(); return T_Modulo; case $CARET: - this.index++; + this.nextChar(); return T_BitwiseXor; case $QUESTION: - this.index++; + this.nextChar(); return T_QuestionMark; case $LT: { - let next = this.input.charCodeAt(++this.index); - if (next === $EQ) { - this.index++; + this.nextChar(); + if (this.currentChar === $EQ) { + this.nextChar(); return T_LessThanOrEqual; } return T_LessThan; } case $GT: { - let next = this.input.charCodeAt(++this.index); - if (next === $EQ) { - this.index++; + this.nextChar(); + if (this.currentChar === $EQ) { + this.nextChar(); return T_GreaterThanOrEqual; } return T_GreaterThan; } case $BANG: { - let next = this.input.charCodeAt(++this.index); - if (next === $EQ) { - let next = this.input.charCodeAt(++this.index); - if (next === $EQ) { - this.index++; + this.nextChar(); + if (this.currentChar === $EQ) { + this.nextChar(); + if (this.currentChar === $EQ) { + this.nextChar(); return T_StrictNotEqual; } return T_LooseNotEqual; @@ -459,11 +461,11 @@ export class ParserImplementation { } case $EQ: { - let next = this.input.charCodeAt(++this.index); - if (next === $EQ) { - let next = this.input.charCodeAt(++this.index); - if (next === $EQ) { - this.index++; + this.nextChar(); + if (this.currentChar === $EQ) { + this.nextChar(); + if (this.currentChar === $EQ) { + this.nextChar(); return T_StrictEqual; } return T_LooseEqual; @@ -472,30 +474,29 @@ export class ParserImplementation { } case $AMPERSAND: { - let next = this.input.charCodeAt(++this.index); - if (next === $AMPERSAND) { - this.index++; + this.nextChar(); + if (this.currentChar === $AMPERSAND) { + this.nextChar(); return T_LogicalAnd; } return T_BindingBehavior; } case $BAR: { - let next = this.input.charCodeAt(++this.index); - if (next === $BAR) { - this.index++; + this.nextChar(); + if (this.currentChar === $BAR) { + this.nextChar(); return T_LogicalOr; } return T_ValueConverter; } case $NBSP: - this.index++; + this.nextChar(); continue; // no default } - let character = String.fromCharCode(this.input.charCodeAt(this.index)); - this.error(`Unexpected character [${character}]`); + this.error(`Unexpected character [${String.fromCharCode(this.currentChar)}]`); return null; } @@ -503,19 +504,17 @@ export class ParserImplementation { } scanIdentifier() { - const start = this.index; - let char = this.input.charCodeAt(++this.index); + this.nextChar(); - while (isIdentifierPart(char)) { - char = this.input.charCodeAt(++this.index); + while (isIdentifierPart(this.currentChar)) { + this.nextChar(); } - let text = this.input.slice(start, this.index); - this.tokenValue = text; + this.tokenValue = this.tokenRaw; - let len = text.length; - if (len >= 4 && len <= 9) { - const token = KeywordLookup[text]; + // true/null have length 4, undefined has length 9 + if (4 <= this.tokenValue.length && this.tokenValue.length <= 9) { + const token = KeywordLookup[this.tokenValue]; if (token !== undefined) { return token; } @@ -524,83 +523,79 @@ export class ParserImplementation { return T_Identifier; } - scanNumber(isFloat) { + scanNumber() { + let isFloat = false; let value = 0; - let char = this.input.charCodeAt(this.index); - if (!isFloat) { - // this is significantly faster than parseInt, however that - // gain is lost when the number turns out to be a float - while (isDigit(char)) { - value = value * 10 + (char - $0); - char = this.input.charCodeAt(++this.index) - } + let char = this.currentChar; + + while (isDigit(this.currentChar)) { + value = value * 10 + (this.currentChar - $0); + this.nextChar(); } - const start = this.index; - if (char === $PERIOD) { + const nonDigitStart = this.index; + if (this.currentChar === $PERIOD) { isFloat = true; - do { - char = this.input.charCodeAt(++this.index) - } while (isDigit(char)) + this.nextChar(); + + while (isDigit(this.currentChar)) { + this.nextChar(); + } } - if (char === $e || char === $E) { + if (this.currentChar === $e || this.currentChar === $E) { isFloat = true; - // for error reporting in case the exponent is invalid - const startExp = this.index; - char = this.input.charCodeAt(++this.index); + const exponentStart = this.index; // for error reporting in case the exponent is invalid + this.nextChar(); - if (char === $PLUS || char === $MINUS) { - char = this.input.charCodeAt(++this.index); + if (this.currentChar === $PLUS || this.currentChar === $MINUS) { + this.nextChar(); } - if (!isDigit(char)) { - this.index = startExp; + if (!isDigit(this.currentChar)) { + this.index = exponentStart; this.error('Invalid exponent'); } + + while (isDigit(this.currentChar)) { + this.nextChar(); + } } - // we got nothing after the initial number scan, so just use - // the calculated integer if (!isFloat) { this.tokenValue = value; return T_NumericLiteral; } - while (isDigit(char)) { - char = this.input.charCodeAt(++this.index) - } - - const text = value + this.input.slice(start, this.index); + const text = value + this.input.slice(nonDigitStart, this.index); this.tokenValue = parseFloat(text); return T_NumericLiteral; } scanString() { - let start = this.index; - let quote = this.input.charCodeAt(this.index++); // Skip initial quote. + let quote = this.currentChar; + this.nextChar(); // Skip initial quote. let buffer; let marker = this.index; - let char = this.input.charCodeAt(this.index); - while (char !== quote) { - if (char === $BACKSLASH) { + while (this.currentChar !== quote) { + if (this.currentChar === $BACKSLASH) { if (!buffer) { buffer = []; } buffer.push(this.input.slice(marker, this.index)); - char = this.input.charCodeAt(++this.index); + + this.nextChar(); let unescaped; - if (char === $u) { - char = this.input.charCodeAt(++this.index); - const index = this.index; + if (this.currentChar === $u) { + this.nextChar(); - if (index + 4 < this.length) { - let hex = this.input.slice(index, index + 4); + if (this.index + 4 < this.length) { + let hex = this.input.slice(this.index, this.index + 4); if (!/[A-Z0-9]{4}/i.test(hex)) { this.error(`Invalid unicode escape [\\u${hex}]`); @@ -609,28 +604,24 @@ export class ParserImplementation { unescaped = parseInt(hex, 16); this.index += 4; } else { - const expression = this.input.slice(this.lastIndex, this.index); - this.error(`Unexpected token ${expression}`); + this.error(`Unexpected token ${this.tokenRaw}`); } } else { - unescaped = unescape(this.input.charCodeAt(this.index)); - this.index++; + unescaped = unescape(this.currentChar); + this.nextChar(); } buffer.push(String.fromCharCode(unescaped)); marker = this.index; - } else if (char === $EOF) { + } else if (this.currentChar === $EOF) { this.error('Unterminated quote'); } else { - this.index++; + this.nextChar(); } - - char = this.input.charCodeAt(this.index) } let last = this.input.slice(marker, this.index); - this.index++; // Skip terminating quote. - let text = this.input.slice(start, this.index); + this.nextChar(); // Skip terminating quote. // Compute the unescaped string value. let unescaped = last; @@ -641,7 +632,6 @@ export class ParserImplementation { } this.tokenValue = unescaped; - this.tokenRaw = text; return T_StringLiteral; } @@ -650,7 +640,7 @@ export class ParserImplementation { } optional(type) { - if (this.token === type) { + if (this.currentToken === type) { this.nextToken(); return true; } @@ -659,9 +649,10 @@ export class ParserImplementation { } expect(type) { - if (this.token === type) { + if (this.currentToken === type) { this.nextToken(); } else { + // todo(fkleuver): translate to string value for readable error messages this.error(`Missing expected token type ${type}`); } } @@ -744,12 +735,12 @@ function isDigit(code) { function unescape(code) { switch (code) { - case $n: return $LF; - case $f: return $FF; - case $r: return $CR; - case $t: return $TAB; - case $v: return $VTAB; - default: return code; + case $n: return $LF; + case $f: return $FF; + case $r: return $CR; + case $t: return $TAB; + case $v: return $VTAB; + default: return code; } } From 61033c2a2baedd222963e66b54e971f6883f4cac Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Thu, 26 Apr 2018 23:08:33 +0200 Subject: [PATCH 11/18] refactor(parser): align token names with char names --- src/parser.js | 178 +++++++++++++++++++++++++------------------------- 1 file changed, 89 insertions(+), 89 deletions(-) diff --git a/src/parser.js b/src/parser.js index b4224924..39478afc 100644 --- a/src/parser.js +++ b/src/parser.js @@ -36,7 +36,7 @@ export class ParserImplementation { this.startIndex = 0; this.input = input; this.length = input.length; - this.currentToken = T_EndOfSource; + this.currentToken = T_EOF; this.tokenValue = undefined; } @@ -46,7 +46,7 @@ export class ParserImplementation { let isChain = false; let expressions = []; - while (this.currentToken !== T_EndOfSource) { + while (this.currentToken !== T_EOF) { while (this.optional(T_Semicolon)) { isChain = true; } @@ -73,7 +73,7 @@ export class ParserImplementation { parseBindingBehavior() { let result = this.parseValueConverter(); - while (this.optional(T_BindingBehavior)) { + while (this.optional(T_Ampersand)) { let name = this.tokenValue; let args = []; @@ -92,7 +92,7 @@ export class ParserImplementation { parseValueConverter() { let result = this.parseExpression(); - while (this.optional(T_ValueConverter)) { + while (this.optional(T_Bar)) { let name = this.tokenValue; let args = []; @@ -113,7 +113,7 @@ export class ParserImplementation { let start = this.index; let result = this.parseConditional(); - while (this.currentToken === T_Assign) { + while (this.currentToken === T_Eq) { if (!result.isAssignable) { let end = (this.index < this.length) ? this.index : this.length; let expression = this.input.slice(start, end); @@ -121,7 +121,7 @@ export class ParserImplementation { this.error(`Expression ${expression} is not assignable`); } - this.expect(T_Assign); + this.expect(T_Eq); result = new Assign(result, this.parseConditional()); } @@ -132,7 +132,7 @@ export class ParserImplementation { let start = this.index; let result = this.parseBinary(0); - if (this.optional(T_QuestionMark)) { + if (this.optional(T_Question)) { let yes = this.parseExpression(); if (!this.optional(T_Colon)) { @@ -173,11 +173,11 @@ export class ParserImplementation { if ((opToken & T_UnaryOperator) === T_UnaryOperator) { this.nextToken(); switch(opToken) { - case T_Add: + case T_Plus: return this.parseUnary(); - case T_Subtract: + case T_Minus: return new Binary('-', new LiteralPrimitive(0), this.parseUnary()); - case T_LogicalNot: + case T_Bang: return new PrefixNot('!', this.parseUnary()); } } @@ -193,9 +193,9 @@ export class ParserImplementation { this.nextToken(); - if (this.optional(T_LeftParen)) { - let args = this.parseExpressionList(T_RightParen); - this.expect(T_RightParen); + if (this.optional(T_LParen)) { + let args = this.parseExpressionList(T_RParen); + this.expect(T_RParen); if (result instanceof AccessThis) { result = new CallScope(name, args, result.ancestor); } else { @@ -208,13 +208,13 @@ export class ParserImplementation { result = new AccessMember(result, name); } } - } else if (this.optional(T_LeftBracket)) { + } else if (this.optional(T_LBracket)) { let key = this.parseExpression(); - this.expect(T_RightBracket); + this.expect(T_RBracket); result = new AccessKeyed(result, key); - } else if (this.optional(T_LeftParen)) { - let args = this.parseExpressionList(T_RightParen); - this.expect(T_RightParen); + } else if (this.optional(T_LParen)) { + let args = this.parseExpressionList(T_RParen); + this.expect(T_RParen); result = new CallFunction(result, args); } else { return result; @@ -231,17 +231,17 @@ export class ParserImplementation { case T_ThisScope: this.nextToken(); return new AccessThis(0); - case T_LeftParen: + case T_LParen: this.nextToken(); const result = this.parseExpression(); - this.expect(T_RightParen); + this.expect(T_RParen); return result; - case T_LeftBracket: + case T_LBracket: this.nextToken(); - const elements = this.parseExpressionList(T_RightBracket); - this.expect(T_RightBracket); + const elements = this.parseExpressionList(T_RBracket); + this.expect(T_RBracket); return new LiteralArray(elements); - case T_LeftBrace: + case T_LBrace : return this.parseObject(); case T_StringLiteral: { @@ -290,9 +290,9 @@ export class ParserImplementation { } } - if (this.optional(T_LeftParen)) { - let args = this.parseExpressionList(T_RightParen); - this.expect(T_RightParen); + if (this.optional(T_LParen)) { + let args = this.parseExpressionList(T_RParen); + this.expect(T_RParen); return new CallScope(name, args, ancestor); } @@ -303,9 +303,9 @@ export class ParserImplementation { let keys = []; let values = []; - this.expect(T_LeftBrace); + this.expect(T_LBrace); - if (this.currentToken !== T_RightBrace) { + if (this.currentToken !== T_RBrace) { do { // todo(kasperl): Stricter checking. Only allow identifiers // and strings as keys. Maybe also keywords? @@ -313,7 +313,7 @@ export class ParserImplementation { const prevToken = this.currentToken; keys.push(this.tokenValue); this.nextToken(); - if (prevToken === T_Identifier && (this.currentToken === T_Comma || this.currentToken === T_RightBrace)) { + if (prevToken === T_Identifier && (this.currentToken === T_Comma || this.currentToken === T_RBrace)) { this.index = prevIndex; values.push(this.parseAccessOrCallScope()); } else { @@ -323,7 +323,7 @@ export class ParserImplementation { } while (this.optional(T_Comma)); } - this.expect(T_RightBrace); + this.expect(T_RBrace); return new LiteralObject(keys, values); } @@ -379,22 +379,22 @@ export class ParserImplementation { } case $LPAREN: this.nextChar(); - return T_LeftParen; + return T_LParen; case $RPAREN: this.nextChar(); - return T_RightParen; + return T_RParen; case $LBRACE: this.nextChar(); - return T_LeftBrace; + return T_LBrace; case $RBRACE: this.nextChar(); - return T_RightBrace; + return T_RBrace; case $LBRACKET: this.nextChar(); - return T_LeftBracket; + return T_LBracket; case $RBRACKET: this.nextChar(); - return T_RightBracket; + return T_RBracket; case $COMMA: this.nextChar(); return T_Comma; @@ -409,42 +409,42 @@ export class ParserImplementation { return this.scanString(); case $PLUS: this.nextChar(); - return T_Add; + return T_Plus; case $MINUS: this.nextChar(); - return T_Subtract; + return T_Minus; case $STAR: this.nextChar(); - return T_Multiply; + return T_Star; case $SLASH: this.nextChar(); - return T_Divide; + return T_Slash; case $PERCENT: this.nextChar(); - return T_Modulo; + return T_Percent; case $CARET: this.nextChar(); - return T_BitwiseXor; + return T_Caret; case $QUESTION: this.nextChar(); - return T_QuestionMark; + return T_Question; case $LT: { this.nextChar(); if (this.currentChar === $EQ) { this.nextChar(); - return T_LessThanOrEqual; + return T_LtEq; } - return T_LessThan; + return T_Lt; } case $GT: { this.nextChar(); if (this.currentChar === $EQ) { this.nextChar(); - return T_GreaterThanOrEqual; + return T_GtEq; } - return T_GreaterThan; + return T_Gt; } case $BANG: { @@ -453,11 +453,11 @@ export class ParserImplementation { this.nextChar(); if (this.currentChar === $EQ) { this.nextChar(); - return T_StrictNotEqual; + return T_BangEqEq; } - return T_LooseNotEqual; + return T_BangEq; } - return T_LogicalNot; + return T_Bang; } case $EQ: { @@ -466,29 +466,29 @@ export class ParserImplementation { this.nextChar(); if (this.currentChar === $EQ) { this.nextChar(); - return T_StrictEqual; + return T_EqEqEq; } - return T_LooseEqual; + return T_EqEq; } - return T_Assign; + return T_Eq; } case $AMPERSAND: { this.nextChar(); if (this.currentChar === $AMPERSAND) { this.nextChar(); - return T_LogicalAnd; + return T_AmpersandAmpersand; } - return T_BindingBehavior; + return T_Ampersand; } case $BAR: { this.nextChar(); if (this.currentChar === $BAR) { this.nextChar(); - return T_LogicalOr; + return T_BarBar; } - return T_ValueConverter; + return T_Bar; } case $NBSP: this.nextChar(); @@ -500,7 +500,7 @@ export class ParserImplementation { return null; } - return T_EndOfSource; + return T_EOF; } scanIdentifier() { @@ -763,7 +763,7 @@ const T_Precedence = 7 << T_PrecedenceShift; const T_ClosingToken = 1 << 9; /** EndOfSource | '(' | '}' | ')' | ',' | '[' | '&' | '|' */ const T_AccessScopeTerminal = 1 << 10; -const T_EndOfSource = 1 << 11 | T_AccessScopeTerminal; +const T_EOF = 1 << 11 | T_AccessScopeTerminal; const T_Identifier = 1 << 12; const T_NumericLiteral = 1 << 13; const T_StringLiteral = 1 << 14; @@ -777,42 +777,42 @@ const T_UnaryOperator = 1 << 16; /** '$this' */ const T_ThisScope = 4; /** '$parent' */ const T_ParentScope = 5; -/** '(' */const T_LeftParen = 6 | T_AccessScopeTerminal; -/** '{' */const T_LeftBrace = 7; +/** '(' */const T_LParen = 6 | T_AccessScopeTerminal; +/** '{' */const T_LBrace = 7; /** '.' */const T_Period = 8; -/** '}' */const T_RightBrace = 9 | T_ClosingToken | T_AccessScopeTerminal; -/** ')' */const T_RightParen = 10 | T_ClosingToken | T_AccessScopeTerminal; +/** '}' */const T_RBrace = 9 | T_ClosingToken | T_AccessScopeTerminal; +/** ')' */const T_RParen = 10 | T_ClosingToken | T_AccessScopeTerminal; /** ';' */const T_Semicolon = 11; /** ',' */const T_Comma = 12 | T_AccessScopeTerminal; -/** '[' */const T_LeftBracket = 13 | T_AccessScopeTerminal; -/** ']' */const T_RightBracket = 14 | T_ClosingToken; +/** '[' */const T_LBracket = 13 | T_AccessScopeTerminal; +/** ']' */const T_RBracket = 14 | T_ClosingToken; /** ':' */const T_Colon = 15; -/** '?' */const T_QuestionMark = 16; -/** ''' */const T_SingleQuote = 17; -/** '"' */const T_DoubleQuote = 18; +/** '?' */const T_Question = 16; +/** ''' */const T_SQ = 17; +/** '"' */const T_DQ = 18; // Operator precedence: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence#Table -/** '&' */ const T_BindingBehavior = 19 | T_AccessScopeTerminal; -/** '|' */ const T_ValueConverter = 20 | T_AccessScopeTerminal; -/** '||' */ const T_LogicalOr = 21 | T_BinaryOperator | 1 << T_PrecedenceShift; -/** '&&' */ const T_LogicalAnd = 22 | T_BinaryOperator | 2 << T_PrecedenceShift; -/** '^' */ const T_BitwiseXor = 23 | T_BinaryOperator | 3 << T_PrecedenceShift; -/** '==' */ const T_LooseEqual = 24 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '!=' */ const T_LooseNotEqual = 25 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '===' */const T_StrictEqual = 26 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '!== '*/const T_StrictNotEqual = 27 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '<' */ const T_LessThan = 28 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '>' */ const T_GreaterThan = 29 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '<=' */ const T_LessThanOrEqual = 30 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '>=' */ const T_GreaterThanOrEqual = 31 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '+' */ const T_Add = 32 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; -/** '-' */ const T_Subtract = 33 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; -/** '*' */ const T_Multiply = 34 | T_BinaryOperator | 7 << T_PrecedenceShift; -/** '%' */ const T_Modulo = 35 | T_BinaryOperator | 7 << T_PrecedenceShift; -/** '/' */ const T_Divide = 36 | T_BinaryOperator | 7 << T_PrecedenceShift; -/** '=' */ const T_Assign = 37; -/** '!' */ const T_LogicalNot = 38 | T_UnaryOperator; +/** '&' */ const T_Ampersand = 19 | T_AccessScopeTerminal; +/** '|' */ const T_Bar = 20 | T_AccessScopeTerminal; +/** '||' */ const T_BarBar = 21 | T_BinaryOperator | 1 << T_PrecedenceShift; +/** '&&' */ const T_AmpersandAmpersand = 22 | T_BinaryOperator | 2 << T_PrecedenceShift; +/** '^' */ const T_Caret = 23 | T_BinaryOperator | 3 << T_PrecedenceShift; +/** '==' */ const T_EqEq = 24 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '!=' */ const T_BangEq = 25 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '===' */const T_EqEqEq = 26 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '!== '*/const T_BangEqEq = 27 | T_BinaryOperator | 4 << T_PrecedenceShift; +/** '<' */ const T_Lt = 28 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '>' */ const T_Gt = 29 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '<=' */ const T_LtEq = 30 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '>=' */ const T_GtEq = 31 | T_BinaryOperator | 5 << T_PrecedenceShift; +/** '+' */ const T_Plus = 32 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; +/** '-' */ const T_Minus = 33 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; +/** '*' */ const T_Star = 34 | T_BinaryOperator | 7 << T_PrecedenceShift; +/** '%' */ const T_Percent = 35 | T_BinaryOperator | 7 << T_PrecedenceShift; +/** '/' */ const T_Slash = 36 | T_BinaryOperator | 7 << T_PrecedenceShift; +/** '=' */ const T_Eq = 37; +/** '!' */ const T_Bang = 38 | T_UnaryOperator; const KeywordLookup = Object.create(null, { true: {value: T_TrueKeyword}, From e5ea1802c970e15daaca226c85b69eba07a405c8 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Fri, 27 Apr 2018 08:28:17 +0200 Subject: [PATCH 12/18] docs(parser): fix typo --- src/parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index 39478afc..bcfcf352 100644 --- a/src/parser.js +++ b/src/parser.js @@ -755,7 +755,7 @@ const T_TokenMask = (1 << 6) - 1; const T_PrecedenceShift = 6; /* Performing a bitwise and (&) with this value will return only the - * precedence bit, which is used to determine the parsing order of bitwise + * precedence bit, which is used to determine the parsing order of binary * expressions */ const T_Precedence = 7 << T_PrecedenceShift; From 59f34e89fcb6f44082ed64ebf1632f3eda714112 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Fri, 27 Apr 2018 13:18:49 +0200 Subject: [PATCH 13/18] refactor(parser): minor tweak to currentChar --- src/parser.js | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/parser.js b/src/parser.js index bcfcf352..dcab6ad2 100644 --- a/src/parser.js +++ b/src/parser.js @@ -21,9 +21,6 @@ export class Parser { } export class ParserImplementation { - get currentChar() { - return this.input.charCodeAt(this.index); - } get hasNext() { return this.index < this.length; } @@ -38,6 +35,7 @@ export class ParserImplementation { this.length = input.length; this.currentToken = T_EOF; this.tokenValue = undefined; + this.currentChar = input.charCodeAt(0); } parseChain() { @@ -315,6 +313,7 @@ export class ParserImplementation { this.nextToken(); if (prevToken === T_Identifier && (this.currentToken === T_Comma || this.currentToken === T_RBrace)) { this.index = prevIndex; + this.currentChar = this.input.charCodeAt(this.index); values.push(this.parseAccessOrCallScope()); } else { this.expect(T_Colon); @@ -345,29 +344,28 @@ export class ParserImplementation { } nextChar() { - this.index++; + return this.currentChar = this.input.charCodeAt(++this.index); } scanToken() { while (this.hasNext) { this.startIndex = this.index; - const char = this.currentChar; // skip whitespace. - if (char <= $SPACE) { + if (this.currentChar <= $SPACE) { this.nextChar(); continue; } // handle identifiers and numbers. - if (isIdentifierStart(char)) { + if (isIdentifierStart(this.currentChar)) { return this.scanIdentifier(); } - if (isDigit(char)) { + if (isDigit(this.currentChar)) { return this.scanNumber(); } - switch (char) { + switch (this.currentChar) { case $PERIOD: { const nextChar = this.input.charCodeAt(this.index + 1); @@ -526,7 +524,6 @@ export class ParserImplementation { scanNumber() { let isFloat = false; let value = 0; - let char = this.currentChar; while (isDigit(this.currentChar)) { value = value * 10 + (this.currentChar - $0); @@ -603,6 +600,7 @@ export class ParserImplementation { unescaped = parseInt(hex, 16); this.index += 4; + this.currentChar = this.input.charCodeAt(this.index); } else { this.error(`Unexpected token ${this.tokenRaw}`); } From fdbb49745b3f0c1445928cbe1a804c9db687ebd5 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Sat, 28 Apr 2018 14:15:11 +0200 Subject: [PATCH 14/18] chore(parser): fix formatting --- src/parser.js | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/parser.js b/src/parser.js index dcab6ad2..b01f79a8 100644 --- a/src/parser.js +++ b/src/parser.js @@ -775,41 +775,41 @@ const T_UnaryOperator = 1 << 16; /** '$this' */ const T_ThisScope = 4; /** '$parent' */ const T_ParentScope = 5; -/** '(' */const T_LParen = 6 | T_AccessScopeTerminal; -/** '{' */const T_LBrace = 7; -/** '.' */const T_Period = 8; -/** '}' */const T_RBrace = 9 | T_ClosingToken | T_AccessScopeTerminal; -/** ')' */const T_RParen = 10 | T_ClosingToken | T_AccessScopeTerminal; -/** ';' */const T_Semicolon = 11; -/** ',' */const T_Comma = 12 | T_AccessScopeTerminal; -/** '[' */const T_LBracket = 13 | T_AccessScopeTerminal; -/** ']' */const T_RBracket = 14 | T_ClosingToken; -/** ':' */const T_Colon = 15; -/** '?' */const T_Question = 16; -/** ''' */const T_SQ = 17; -/** '"' */const T_DQ = 18; +/** '(' */const T_LParen = 6 | T_AccessScopeTerminal; +/** '{' */const T_LBrace = 7; +/** '.' */const T_Period = 8; +/** '}' */const T_RBrace = 9 | T_AccessScopeTerminal | T_ClosingToken; +/** ')' */const T_RParen = 10 | T_AccessScopeTerminal | T_ClosingToken; +/** ';' */const T_Semicolon = 11; +/** ',' */const T_Comma = 12 | T_AccessScopeTerminal; +/** '[' */const T_LBracket = 13 | T_AccessScopeTerminal; +/** ']' */const T_RBracket = 14 | T_ClosingToken; +/** ':' */const T_Colon = 15; +/** '?' */const T_Question = 16; +/** ''' */const T_SQ = 17; +/** '"' */const T_DQ = 18; // Operator precedence: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence#Table /** '&' */ const T_Ampersand = 19 | T_AccessScopeTerminal; /** '|' */ const T_Bar = 20 | T_AccessScopeTerminal; -/** '||' */ const T_BarBar = 21 | T_BinaryOperator | 1 << T_PrecedenceShift; -/** '&&' */ const T_AmpersandAmpersand = 22 | T_BinaryOperator | 2 << T_PrecedenceShift; -/** '^' */ const T_Caret = 23 | T_BinaryOperator | 3 << T_PrecedenceShift; -/** '==' */ const T_EqEq = 24 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '!=' */ const T_BangEq = 25 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '===' */const T_EqEqEq = 26 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '!== '*/const T_BangEqEq = 27 | T_BinaryOperator | 4 << T_PrecedenceShift; -/** '<' */ const T_Lt = 28 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '>' */ const T_Gt = 29 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '<=' */ const T_LtEq = 30 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '>=' */ const T_GtEq = 31 | T_BinaryOperator | 5 << T_PrecedenceShift; -/** '+' */ const T_Plus = 32 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; -/** '-' */ const T_Minus = 33 | T_UnaryOperator | T_BinaryOperator | 6 << T_PrecedenceShift; -/** '*' */ const T_Star = 34 | T_BinaryOperator | 7 << T_PrecedenceShift; -/** '%' */ const T_Percent = 35 | T_BinaryOperator | 7 << T_PrecedenceShift; -/** '/' */ const T_Slash = 36 | T_BinaryOperator | 7 << T_PrecedenceShift; -/** '=' */ const T_Eq = 37; +/** '||' */ const T_BarBar = 21 | 1 << T_PrecedenceShift | T_BinaryOperator; +/** '&&' */ const T_AmpersandAmpersand = 22 | 2 << T_PrecedenceShift | T_BinaryOperator; +/** '^' */ const T_Caret = 23 | 3 << T_PrecedenceShift | T_BinaryOperator; +/** '==' */ const T_EqEq = 24 | 4 << T_PrecedenceShift | T_BinaryOperator; +/** '!=' */ const T_BangEq = 25 | 4 << T_PrecedenceShift | T_BinaryOperator; +/** '===' */const T_EqEqEq = 26 | 4 << T_PrecedenceShift | T_BinaryOperator; +/** '!== '*/const T_BangEqEq = 27 | 4 << T_PrecedenceShift | T_BinaryOperator; +/** '<' */ const T_Lt = 28 | 5 << T_PrecedenceShift | T_BinaryOperator; +/** '>' */ const T_Gt = 29 | 5 << T_PrecedenceShift | T_BinaryOperator; +/** '<=' */ const T_LtEq = 30 | 5 << T_PrecedenceShift | T_BinaryOperator; +/** '>=' */ const T_GtEq = 31 | 5 << T_PrecedenceShift | T_BinaryOperator; +/** '+' */ const T_Plus = 32 | 6 << T_PrecedenceShift | T_BinaryOperator | T_UnaryOperator; +/** '-' */ const T_Minus = 33 | 6 << T_PrecedenceShift | T_BinaryOperator | T_UnaryOperator; +/** '*' */ const T_Star = 34 | 7 << T_PrecedenceShift | T_BinaryOperator; +/** '%' */ const T_Percent = 35 | 7 << T_PrecedenceShift | T_BinaryOperator; +/** '/' */ const T_Slash = 36 | 7 << T_PrecedenceShift | T_BinaryOperator; +/** '=' */ const T_Eq = 37; /** '!' */ const T_Bang = 38 | T_UnaryOperator; const KeywordLookup = Object.create(null, { From bb185e38493c0ea073fa1559f47a1565f824777b Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Sat, 28 Apr 2018 23:42:34 +0200 Subject: [PATCH 15/18] refactor(parser): improve decimal parsing --- src/parser.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parser.js b/src/parser.js index b01f79a8..76867c28 100644 --- a/src/parser.js +++ b/src/parser.js @@ -530,16 +530,22 @@ export class ParserImplementation { this.nextChar(); } - const nonDigitStart = this.index; if (this.currentChar === $PERIOD) { - isFloat = true; this.nextChar(); + let decimalValue = 0; + let decimalPlaces = 0; + while (isDigit(this.currentChar)) { + decimalValue = decimalValue * 10 + (this.currentChar - $0); + decimalPlaces++; this.nextChar(); } + + value += (decimalValue / Math.pow(10, decimalPlaces)); } + const nonDigitStart = this.index; if (this.currentChar === $e || this.currentChar === $E) { isFloat = true; const exponentStart = this.index; // for error reporting in case the exponent is invalid From 2ffeb84a7cb678bf2bdaaeccb020835c5321e5f3 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Sun, 29 Apr 2018 04:28:10 +0200 Subject: [PATCH 16/18] refactor(parser): improve/test error reporting --- src/parser.js | 29 +++++------- test/parser.spec.js | 108 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 112 insertions(+), 25 deletions(-) diff --git a/src/parser.js b/src/parser.js index 76867c28..21588121 100644 --- a/src/parser.js +++ b/src/parser.js @@ -31,6 +31,7 @@ export class ParserImplementation { constructor(input) { this.index = 0; this.startIndex = 0; + this.lastIndex = 0; this.input = input; this.length = input.length; this.currentToken = T_EOF; @@ -41,12 +42,11 @@ export class ParserImplementation { parseChain() { this.nextToken(); - let isChain = false; let expressions = []; while (this.currentToken !== T_EOF) { - while (this.optional(T_Semicolon)) { - isChain = true; + if (this.optional(T_Semicolon)) { + this.error('Multiple expressions are not allowed.'); } if ((this.currentToken & T_ClosingToken) === T_ClosingToken) { @@ -56,11 +56,7 @@ export class ParserImplementation { const expr = this.parseBindingBehavior(); expressions.push(expr); - while (this.optional(T_Semicolon)) { - isChain = true; - } - - if (isChain) { + if (this.optional(T_Semicolon)) { this.error('Multiple expressions are not allowed.'); } } @@ -108,13 +104,11 @@ export class ParserImplementation { } parseExpression() { - let start = this.index; let result = this.parseConditional(); while (this.currentToken === T_Eq) { if (!result.isAssignable) { - let end = (this.index < this.length) ? this.index : this.length; - let expression = this.input.slice(start, end); + let expression = this.input.slice(this.lastIndex, this.startIndex); this.error(`Expression ${expression} is not assignable`); } @@ -349,12 +343,14 @@ export class ParserImplementation { scanToken() { while (this.hasNext) { - this.startIndex = this.index; // skip whitespace. if (this.currentChar <= $SPACE) { this.nextChar(); continue; } + + this.lastIndex = this.startIndex; + this.startIndex = this.index; // handle identifiers and numbers. if (isIdentifierStart(this.currentChar)) { @@ -640,7 +636,7 @@ export class ParserImplementation { } error(message) { - throw new Error(`Lexer Error: ${message} at column ${this.index} in expression [${this.input}]`); + throw new Error(`Parser Error: ${message} at column ${this.startIndex} in expression [${this.input}]`); } optional(type) { @@ -652,12 +648,11 @@ export class ParserImplementation { return false; } - expect(type) { - if (this.currentToken === type) { + expect(token) { + if (this.currentToken === token) { this.nextToken(); } else { - // todo(fkleuver): translate to string value for readable error messages - this.error(`Missing expected token type ${type}`); + this.error(`Missing expected token ${TokenValues[token & T_TokenMask]}`); } } } diff --git a/test/parser.spec.js b/test/parser.spec.js index 21219eea..320b525e 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -20,6 +20,14 @@ import { PrefixNot } from '../src/ast'; +const operators = [ + '&&', '||', + '==', '!=', '===', '!==', + '<', '>', '<=', '>=', + '+', '-', + '*', '%', '/' +]; + describe('Parser', () => { let parser; @@ -102,14 +110,6 @@ describe('Parser', () => { }); describe('parses binary', () => { - const operators = [ - '&&', '||', - '==', '!=', '===', '!==', - '<', '>', '<=', '>=', - '+', '-', - '*', '%', '/' - ]; - for (let op of operators) { it(`\"${op}\"`, () => { let expression = parser.parse(`foo ${op} bar`); @@ -592,6 +592,98 @@ describe('Parser', () => { } catch (e) { pass = false; } expect(pass).toBe(false); }); + + describe('does not parse multiple expressions', () => { + const expressions = [ + ';', + 'foo;', + ';foo', + 'foo&bar;baz|qux' + ]; + + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + } catch(e) { + expect(e.message).toContain('Multiple expressions are not allowed'); + } + }); + } + }); + + describe('throw on extra closing token', () => { + const expressions = [ + ')', + ']', + '}', + 'foo())', + 'foo[x]]', + '{foo}}' + ]; + + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + } catch(e) { + expect(e.message).toContain('Unconsumed token'); + } + }); + } + }); + + describe('throw on assigning unassignable', () => { + const expressions = [ + 'foo ? bar : baz = qux', + '$this = foo', + 'foo() = bar', + 'foo.bar() = baz', + '!foo = bar', + '-foo = bar', + '-foo = bar', + '\'foo\' = bar', + '42 = foo', + '[] = foo', + '{} = foo' + ].concat(operators.map(op => `foo ${op} bar`)); + + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + } catch(e) { + expect(e.message).toContain('is not assignable'); + } + }); + } + }); + + it('throw on incomplete conditional', () => { + try { + parser.parse('foo ? bar'); + } catch(e) { + expect(e.message).toContain('requires all 3 expressions'); + } + }); + + describe('throw on invalid exponent', () => { + const expressions = [ + '1e', + '1ee', + '1e.' + ]; + + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + } catch(e) { + expect(e.message).toContain('Invalid exponent'); + } + }); + } + }); }); function verifyEqual(actual, expected) { From d31ee614524b33f7237b0a13f490f57c598f49d7 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Mon, 30 Apr 2018 00:08:08 +0200 Subject: [PATCH 17/18] fix(parser): ensure property accessors are identifiers, make invalid object literals explicit --- src/parser.js | 87 +++++++++------- test/parser.spec.js | 234 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 263 insertions(+), 58 deletions(-) diff --git a/src/parser.js b/src/parser.js index 21588121..b1b16b10 100644 --- a/src/parser.js +++ b/src/parser.js @@ -7,7 +7,6 @@ import { } from './ast'; export class Parser { - cache; constructor() { this.cache = Object.create(null); } @@ -93,7 +92,6 @@ export class ParserImplementation { this.nextToken(); while (this.optional(T_Colon)) { - // todo(kasperl): Is this really supposed to be expressions? args.push(this.parseExpression()); } @@ -181,7 +179,10 @@ export class ParserImplementation { while (true) { // eslint-disable-line no-constant-condition if (this.optional(T_Period)) { - let name = this.tokenValue; // todo(kasperl): Check that this is an identifier. Are keywords okay? + if ((this.currentToken ^ T_IdentifierOrKeyword) === T_IdentifierOrKeyword) { + this.error(`Unexpected token ${this.tokenRaw}`); + } + let name = this.tokenValue; this.nextToken(); @@ -262,11 +263,12 @@ export class ParserImplementation { } } - parseAccessOrCallScope() { - let name = this.tokenValue; - let token = this.currentToken; - - this.nextToken(); + parseAccessOrCallScope(name, token) { + if (!(name && token)) { + name = this.tokenValue; + token = this.currentToken; + this.nextToken(); + } let ancestor = 0; while (token === T_ParentScope) { @@ -296,26 +298,44 @@ export class ParserImplementation { let values = []; this.expect(T_LBrace); - - if (this.currentToken !== T_RBrace) { - do { - // todo(kasperl): Stricter checking. Only allow identifiers - // and strings as keys. Maybe also keywords? - const prevIndex = this.index; - const prevToken = this.currentToken; - keys.push(this.tokenValue); - this.nextToken(); - if (prevToken === T_Identifier && (this.currentToken === T_Comma || this.currentToken === T_RBrace)) { - this.index = prevIndex; - this.currentChar = this.input.charCodeAt(this.index); - values.push(this.parseAccessOrCallScope()); - } else { + let isComputed = false; + + while (this.currentToken !== T_RBrace) { + const token = this.currentToken; + const name = this.tokenValue; + + switch(token) { + case T_Identifier: + // Treat keywords and predefined strings like identifiers + case T_FalseKeyword: + case T_TrueKeyword: + case T_NullKeyword: + case T_UndefinedKeyword: + case T_ThisScope: + case T_ParentScope: + keys.push(name); + this.nextToken(); + if (this.optional(T_Colon)) { + values.push(this.parseExpression()); + } else { + values.push(this.parseAccessOrCallScope(name, token)); + } + break; + case T_StringLiteral: + case T_NumericLiteral: + keys.push(name); + this.nextToken(); this.expect(T_Colon); values.push(this.parseExpression()); - } - } while (this.optional(T_Comma)); + break; + default: + this.error(`Unexpected token ${this.tokenRaw}`); + } + if (this.currentToken !== T_RBrace) { + this.expect(T_Comma); + } } - + this.expect(T_RBrace); return new LiteralObject(keys, values); @@ -763,18 +783,19 @@ const T_ClosingToken = 1 << 9; /** EndOfSource | '(' | '}' | ')' | ',' | '[' | '&' | '|' */ const T_AccessScopeTerminal = 1 << 10; const T_EOF = 1 << 11 | T_AccessScopeTerminal; -const T_Identifier = 1 << 12; +const T_Identifier = 1 << 12 | T_IdentifierOrKeyword; const T_NumericLiteral = 1 << 13; const T_StringLiteral = 1 << 14; const T_BinaryOperator = 1 << 15; const T_UnaryOperator = 1 << 16; - -/** false */ const T_FalseKeyword = 0; -/** true */ const T_TrueKeyword = 1; -/** null */ const T_NullKeyword = 2; -/** undefined */ const T_UndefinedKeyword = 3; -/** '$this' */ const T_ThisScope = 4; -/** '$parent' */ const T_ParentScope = 5; +const T_IdentifierOrKeyword = 1 << 17; + +/** false */ const T_FalseKeyword = 0 | T_IdentifierOrKeyword; +/** true */ const T_TrueKeyword = 1 | T_IdentifierOrKeyword; +/** null */ const T_NullKeyword = 2 | T_IdentifierOrKeyword; +/** undefined */ const T_UndefinedKeyword = 3 | T_IdentifierOrKeyword; +/** '$this' */ const T_ThisScope = 4 | T_IdentifierOrKeyword; +/** '$parent' */ const T_ParentScope = 5 | T_IdentifierOrKeyword; /** '(' */const T_LParen = 6 | T_AccessScopeTerminal; /** '{' */const T_LBrace = 7; diff --git a/test/parser.spec.js b/test/parser.spec.js index 320b525e..286ba417 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -314,6 +314,57 @@ describe('Parser', () => { ); }); + it('parses value converter with Conditional argument', () => { + let expression = parser.parse('foo | bar : foo ? bar : baz'); + verifyEqual(expression, + new ValueConverter( + new AccessScope('foo', 0), + 'bar', + [ + new Conditional( + new AccessScope('foo', 0), + new AccessScope('bar', 0), + new AccessScope('baz', 0) + ) + ]) + ); + }); + + it('parses value converter with Assign argument', () => { + let expression = parser.parse('foo | bar : foo = bar'); + verifyEqual(expression, + new ValueConverter( + new AccessScope('foo', 0), + 'bar', + [ + new Assign( + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ) + ]) + ); + }); + + describe('parses value converter with Binary argument', () => { + for (let op of operators) { + it(`\"${op}\"`, () => { + let expression = parser.parse(`foo | bar : foo ${op} bar`); + verifyEqual(expression, + new ValueConverter( + new AccessScope('foo', 0), + 'bar', + [ + new Binary( + op, + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ) + ]) + ); + }); + } + }); + it('parses AccessScope', () => { let expression = parser.parse('foo'); verifyEqual(expression, new AccessScope('foo', 0)); @@ -326,6 +377,32 @@ describe('Parser', () => { ); }); + it('parses AccessMember with indexed string property', () => { + let expression = parser.parse('foo["bar"].baz'); + verifyEqual(expression, + new AccessMember( + new AccessKeyed( + new AccessScope('foo', 0), + new LiteralString('bar') + ), + 'baz' + ) + ); + }); + + it('parses AccessMember with indexed numeric property', () => { + let expression = parser.parse('foo[42].baz'); + verifyEqual(expression, + new AccessMember( + new AccessKeyed( + new AccessScope('foo', 0), + new LiteralPrimitive(42) + ), + 'baz' + ) + ); + }); + it('parses Assign', () => { let expression = parser.parse('foo = bar'); verifyEqual(expression, @@ -562,7 +639,17 @@ describe('Parser', () => { ); }); - it('parses es6 shorthand LiteralObject', () => { + it('parses es6 shorthand LiteralObject with one property', () => { + let expression = parser.parse('{foo}'); + verifyEqual(expression, + new LiteralObject( + ['foo'], + [new AccessScope('foo', 0)] + ) + ); + }); + + it('parses es6 shorthand LiteralObject with two properties', () => { let expression = parser.parse('{ foo, bar }'); verifyEqual(expression, new LiteralObject( @@ -578,19 +665,76 @@ describe('Parser', () => { ); }); - it('does not parse invalid shorthand properties', () => { - let pass = false; - try { - parser.parse('{ foo.bar, bar.baz }'); - pass = true; - } catch (e) { pass = false; } - expect(pass).toBe(false); + it('parses empty LiteralObject', () => { + let expression = parser.parse('{}'); + verifyEqual(expression, + new LiteralObject([], []) + ); + }); - try { - parser.parse('{ "foo.bar" }'); - pass = true; - } catch (e) { pass = false; } - expect(pass).toBe(false); + it('parses LiteralObject with string literal property', () => { + let expression = parser.parse('{"foo": "bar"}'); + verifyEqual(expression, + new LiteralObject( + ['foo'], + [new LiteralString('bar')] + ) + ); + }); + + it('parses LiteralObject with numeric literal property', () => { + let expression = parser.parse('{42: "foo"}'); + verifyEqual(expression, + new LiteralObject( + [42], + [new LiteralString('foo')] + ) + ); + }); + + describe('does not parse LiteralObject with computed property', () => { + const expressions = [ + '{ []: "foo" }', + '{ [42]: "foo" }', + '{ ["foo"]: "bar" }', + '{ [foo]: "bar" }' + ]; + + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + pass = true; + } catch (e) { + expect(e.message).toContain('Unexpected token ['); + } + }); + } + }); + + describe('does not parse invalid shorthand properties', () => { + const expressions = [ + '{ foo.bar }', + '{ foo.bar, bar.baz }', + '{ "foo" }', + '{ "foo.bar" }', + '{ 42 }', + '{ 42, 42 }', + '{ [foo] }', + '{ ["foo"] }', + '{ [42] }' + ]; + + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + pass = true; + } catch (e) { + expect(e.message).toContain('expected'); + } + }); + } }); describe('does not parse multiple expressions', () => { @@ -613,21 +757,43 @@ describe('Parser', () => { }); describe('throw on extra closing token', () => { - const expressions = [ - ')', - ']', - '}', - 'foo())', - 'foo[x]]', - '{foo}}' + const tests = [ + { expr: ')', token: ')' }, + { expr: ']', token: ']' }, + { expr: '}', token: '}' }, + { expr: 'foo())', token: ')' }, + { expr: 'foo[x]]', token: ']' }, + { expr: '{foo}}', token: '}' } ]; - for (const expr of expressions) { - it(expr, () => { + for (const test of tests) { + it(test.expr, () => { try { - parser.parse(expr); + parser.parse(test.expr); + } catch(e) { + expect(e.message).toContain(`Unconsumed token ${test.token}`); + } + }); + } + }); + + describe('throw on missing expected token', () => { + const tests = [ + { expr: '(foo', token: ')' }, + { expr: '[foo', token: ']' }, + { expr: '{foo', token: ',' }, + { expr: 'foo(bar', token: ')' }, + { expr: 'foo[bar', token: ']' }, + { expr: 'foo.bar(baz', token: ')' }, + { expr: 'foo.bar[baz', token: ']' } + ]; + + for (const test of tests) { + it(test.expr, () => { + try { + parser.parse(test.expr); } catch(e) { - expect(e.message).toContain('Unconsumed token'); + expect(e.message).toContain(`Missing expected token ${test.token}`); } }); } @@ -641,7 +807,7 @@ describe('Parser', () => { 'foo.bar() = baz', '!foo = bar', '-foo = bar', - '-foo = bar', + '+foo = bar', '\'foo\' = bar', '42 = foo', '[] = foo', @@ -667,6 +833,24 @@ describe('Parser', () => { } }); + describe('throw on invalid primary expression', () => { + const expressions = ['.', ',', '&', '|', '=', '<', '>', '*', '%', '/']; + expressions.push(...expressions.map(e => e + ' ')); + for (const expr of expressions) { + it(expr, () => { + try { + parser.parse(expr); + } catch(e) { + if (expr.length === 1) { + expect(e.message).toContain(`Unexpected end of expression`); + } else { + expect(e.message).toContain(`Unexpected token ${expr.slice(0, 0)}`); + } + } + }); + } + }); + describe('throw on invalid exponent', () => { const expressions = [ '1e', From 1e2270f7d34147ab968ae6c2c8e100cb8865d7c7 Mon Sep 17 00:00:00 2001 From: Fred Kleuver Date: Thu, 3 May 2018 19:23:48 +0200 Subject: [PATCH 18/18] tests(parser): remove duplicate tests, fix error verification, correct assign tests --- test/parser.spec.js | 105 +++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/test/parser.spec.js b/test/parser.spec.js index 286ba417..5756d41c 100644 --- a/test/parser.spec.js +++ b/test/parser.spec.js @@ -109,6 +109,20 @@ describe('Parser', () => { ); }); + it('parses conditional with assign', () => { + let expression = parser.parse('foo ? bar : baz = qux'); + verifyEqual(expression, + new Conditional( + new AccessScope('foo', 0), + new AccessScope('bar', 0), + new Assign( + new AccessScope('baz', 0), + new AccessScope('qux', 0) + ) + ) + ); + }); + describe('parses binary', () => { for (let op of operators) { it(`\"${op}\"`, () => { @@ -413,6 +427,16 @@ describe('Parser', () => { ); }); + it('parses Assign to ignored Unary', () => { + let expression = parser.parse('+foo = bar'); + verifyEqual(expression, + new Assign( + new AccessScope('foo', 0), + new AccessScope('bar', 0) + ) + ); + }); + it('parses chained Assign', () => { let expression = parser.parse('foo = bar = baz'); verifyEqual(expression, @@ -702,12 +726,7 @@ describe('Parser', () => { for (const expr of expressions) { it(expr, () => { - try { - parser.parse(expr); - pass = true; - } catch (e) { - expect(e.message).toContain('Unexpected token ['); - } + verifyError(expr, 'Unexpected token ['); }); } }); @@ -727,12 +746,7 @@ describe('Parser', () => { for (const expr of expressions) { it(expr, () => { - try { - parser.parse(expr); - pass = true; - } catch (e) { - expect(e.message).toContain('expected'); - } + verifyError(expr, 'expected'); }); } }); @@ -747,11 +761,7 @@ describe('Parser', () => { for (const expr of expressions) { it(expr, () => { - try { - parser.parse(expr); - } catch(e) { - expect(e.message).toContain('Multiple expressions are not allowed'); - } + verifyError(expr, 'Multiple expressions are not allowed'); }); } }); @@ -768,11 +778,7 @@ describe('Parser', () => { for (const test of tests) { it(test.expr, () => { - try { - parser.parse(test.expr); - } catch(e) { - expect(e.message).toContain(`Unconsumed token ${test.token}`); - } + verifyError(test.expr, `Unconsumed token ${test.token}`); }); } }); @@ -790,47 +796,34 @@ describe('Parser', () => { for (const test of tests) { it(test.expr, () => { - try { - parser.parse(test.expr); - } catch(e) { - expect(e.message).toContain(`Missing expected token ${test.token}`); - } + verifyError(test.expr, `Missing expected token ${test.token}`); }); } }); describe('throw on assigning unassignable', () => { const expressions = [ - 'foo ? bar : baz = qux', + '(foo ? bar : baz) = qux', '$this = foo', 'foo() = bar', 'foo.bar() = baz', '!foo = bar', '-foo = bar', - '+foo = bar', '\'foo\' = bar', '42 = foo', '[] = foo', '{} = foo' - ].concat(operators.map(op => `foo ${op} bar`)); + ].concat(operators.map(op => `foo ${op} bar = baz`)); for (const expr of expressions) { it(expr, () => { - try { - parser.parse(expr); - } catch(e) { - expect(e.message).toContain('is not assignable'); - } + verifyError(expr, 'is not assignable'); }); } }); it('throw on incomplete conditional', () => { - try { - parser.parse('foo ? bar'); - } catch(e) { - expect(e.message).toContain('requires all 3 expressions'); - } + verifyError('foo ? bar', 'requires all 3 expressions'); }); describe('throw on invalid primary expression', () => { @@ -838,14 +831,10 @@ describe('Parser', () => { expressions.push(...expressions.map(e => e + ' ')); for (const expr of expressions) { it(expr, () => { - try { - parser.parse(expr); - } catch(e) { - if (expr.length === 1) { - expect(e.message).toContain(`Unexpected end of expression`); - } else { - expect(e.message).toContain(`Unexpected token ${expr.slice(0, 0)}`); - } + if (expr.length === 1) { + verifyError(expr, `Unexpected end of expression`); + } else { + verifyError(expr, `Unexpected token ${expr.slice(0, 0)}`); } }); } @@ -860,14 +849,23 @@ describe('Parser', () => { for (const expr of expressions) { it(expr, () => { - try { - parser.parse(expr); - } catch(e) { - expect(e.message).toContain('Invalid exponent'); - } + verifyError(expr, 'Invalid exponent'); }); } }); + + function verifyError(expression, errorMessage) { + let error = null; + try { + parser.parse(expression); + } catch(e) { + error = e; + } + + expect(error).not.toBeNull(); + expect(error.message).toContain(errorMessage); + } + }); function verifyEqual(actual, expected) { @@ -887,7 +885,6 @@ function verifyEqual(actual, expected) { verifyEqual(actual[prop], expected[prop]); } } - function unicodeEscape(str) { return str.replace(/[\s\S]/g, c => `\\u${('0000' + c.charCodeAt().toString(16)).slice(-4)}`); }