feat(compiler): record end of expression Token (#33549)

In the past, only the starting index of an expression Token has been
recorded, so a parser could demarkate the span of a token only by the
start locations of two tokens. This may lead to trailing whitespace
being included in the token span:

```html
{{ token1   + token2 }}
   ^^^^^^^^^             recorded span of `token1`
```

It's also not enough for a parser to determine the end of a token by
adding the length of the token value to the token's start location,
because lexed expression values may not exactly reflect the source code.
For example, `"d\\"e"` is lexed as a string token whose value is `d"e`.

Instead, this commit adds a `end` field to expression tokens. `end`
is one past the last index of the token source code. This will enable a
parser to determine the span of a token just by looking at that token.

This is a breaking change because the contructor interface of `Token`
has changed.

Part of #33477.

PR Close #33549
This commit is contained in:
ayazhafiz
2019-11-02 14:15:49 -05:00
committed by Alex Rickabaugh
parent 4414fce46e
commit 8a25cd4e96
2 changed files with 118 additions and 113 deletions

View File

@ -35,7 +35,7 @@ export class Lexer {
export class Token {
constructor(
public index: number, public type: TokenType, public numValue: number,
public index: number, public end: number, public type: TokenType, public numValue: number,
public strValue: string) {}
isCharacter(code: number): boolean {
@ -91,35 +91,35 @@ export class Token {
}
}
function newCharacterToken(index: number, code: number): Token {
return new Token(index, TokenType.Character, code, String.fromCharCode(code));
function newCharacterToken(index: number, end: number, code: number): Token {
return new Token(index, end, TokenType.Character, code, String.fromCharCode(code));
}
function newIdentifierToken(index: number, text: string): Token {
return new Token(index, TokenType.Identifier, 0, text);
function newIdentifierToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.Identifier, 0, text);
}
function newKeywordToken(index: number, text: string): Token {
return new Token(index, TokenType.Keyword, 0, text);
function newKeywordToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.Keyword, 0, text);
}
function newOperatorToken(index: number, text: string): Token {
return new Token(index, TokenType.Operator, 0, text);
function newOperatorToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.Operator, 0, text);
}
function newStringToken(index: number, text: string): Token {
return new Token(index, TokenType.String, 0, text);
function newStringToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.String, 0, text);
}
function newNumberToken(index: number, n: number): Token {
return new Token(index, TokenType.Number, n, '');
function newNumberToken(index: number, end: number, n: number): Token {
return new Token(index, end, TokenType.Number, n, '');
}
function newErrorToken(index: number, message: string): Token {
return new Token(index, TokenType.Error, 0, message);
function newErrorToken(index: number, end: number, message: string): Token {
return new Token(index, end, TokenType.Error, 0, message);
}
export const EOF: Token = new Token(-1, TokenType.Character, 0, '');
export const EOF: Token = new Token(-1, -1, TokenType.Character, 0, '');
class _Scanner {
length: number;
@ -165,7 +165,7 @@ class _Scanner {
case chars.$PERIOD:
this.advance();
return chars.isDigit(this.peek) ? this.scanNumber(start) :
newCharacterToken(start, chars.$PERIOD);
newCharacterToken(start, this.index, chars.$PERIOD);
case chars.$LPAREN:
case chars.$RPAREN:
case chars.$LBRACE:
@ -211,13 +211,13 @@ class _Scanner {
scanCharacter(start: number, code: number): Token {
this.advance();
return newCharacterToken(start, code);
return newCharacterToken(start, this.index, code);
}
scanOperator(start: number, str: string): Token {
this.advance();
return newOperatorToken(start, str);
return newOperatorToken(start, this.index, str);
}
/**
@ -243,7 +243,7 @@ class _Scanner {
this.advance();
str += three;
}
return newOperatorToken(start, str);
return newOperatorToken(start, this.index, str);
}
scanIdentifier(): Token {
@ -251,8 +251,8 @@ class _Scanner {
this.advance();
while (isIdentifierPart(this.peek)) this.advance();
const str: string = this.input.substring(start, this.index);
return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, str) :
newIdentifierToken(start, str);
return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, this.index, str) :
newIdentifierToken(start, this.index, str);
}
scanNumber(start: number): Token {
@ -275,7 +275,7 @@ class _Scanner {
}
const str: string = this.input.substring(start, this.index);
const value: number = simple ? parseIntAutoRadix(str) : parseFloat(str);
return newNumberToken(start, value);
return newNumberToken(start, this.index, value);
}
scanString(): Token {
@ -321,13 +321,14 @@ class _Scanner {
const last: string = input.substring(marker, this.index);
this.advance(); // Skip terminating quote.
return newStringToken(start, buffer + last);
return newStringToken(start, this.index, buffer + last);
}
error(message: string, offset: number): Token {
const position: number = this.index + offset;
return newErrorToken(
position, `Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
position, this.index,
`Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
}
}