fix(compiler): markup lexer should not capture quotes in attribute value (#28055)

When tokenizing markup (e.g. HTML) element attributes
can have quoted or unquoted values (e.g. `a=b` or `a="b"`).
The `ATTR_VALUE` tokens were capturing the quotes, which
was inconsistent and also affected source-mapping.

Now the tokenizer captures additional `ATTR_QUOTE` tokens,
which the HTML related parsers understand and factor into their
token parsing.

PR Close #28055
This commit is contained in:
Pete Bacon Darwin
2019-02-08 22:10:20 +00:00
committed by Misko Hevery
parent e6a00be014
commit c0dac184cd
7 changed files with 52 additions and 8 deletions

View File

@ -25,6 +25,7 @@ export enum TokenType {
CDATA_START,
CDATA_END,
ATTR_NAME,
ATTR_QUOTE,
ATTR_VALUE,
DOC_TYPE,
EXPANSION_FORM_START,
@ -709,23 +710,29 @@ class _Tokenizer {
}
private _consumeAttributeValue() {
this._beginToken(TokenType.ATTR_VALUE);
let value: string;
if (this._peek === chars.$SQ || this._peek === chars.$DQ) {
this._beginToken(TokenType.ATTR_QUOTE);
const quoteChar = this._peek;
this._advance();
this._endToken([String.fromCodePoint(quoteChar)]);
this._beginToken(TokenType.ATTR_VALUE);
const parts: string[] = [];
while (this._peek !== quoteChar) {
parts.push(this._readChar(true));
}
value = parts.join('');
this._endToken([this._processCarriageReturns(value)]);
this._beginToken(TokenType.ATTR_QUOTE);
this._advance();
this._endToken([String.fromCodePoint(quoteChar)]);
} else {
this._beginToken(TokenType.ATTR_VALUE);
const valueStart = this._index;
this._requireCharCodeUntilFn(isNameEnd, 1);
value = this._input.substring(valueStart, this._index);
this._endToken([this._processCarriageReturns(value)]);
}
this._endToken([this._processCarriageReturns(value)]);
}
private _consumeTagOpenEnd() {

View File

@ -326,12 +326,19 @@ class _TreeBuilder {
let end = attrName.sourceSpan.end;
let value = '';
let valueSpan: ParseSourceSpan = undefined !;
if (this._peek.type === lex.TokenType.ATTR_QUOTE) {
this._advance();
}
if (this._peek.type === lex.TokenType.ATTR_VALUE) {
const valueToken = this._advance();
value = valueToken.parts[0];
end = valueToken.sourceSpan.end;
valueSpan = valueToken.sourceSpan;
}
if (this._peek.type === lex.TokenType.ATTR_QUOTE) {
const quoteToken = this._advance();
end = quoteToken.sourceSpan.end;
}
return new html.Attribute(
fullName, value, new ParseSourceSpan(attrName.sourceSpan.start, end), valueSpan);
}