feat(HtmlLexer): allow "<" in text tokens

fixes #5550
This commit is contained in:
Victor Berchet
2015-12-06 13:11:00 -08:00
parent 3a438615c3
commit aecf68117a
2 changed files with 85 additions and 36 deletions

View File

@ -6,6 +6,7 @@ import {
CONST_EXPR,
serializeEnum
} from 'angular2/src/facade/lang';
import {ListWrapper} from 'angular2/src/facade/collection';
import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';
@ -161,7 +162,7 @@ class _HtmlTokenizer {
}
this._beginToken(HtmlTokenType.EOF);
this._endToken([]);
return new HtmlTokenizeResult(this.tokens, this.errors);
return new HtmlTokenizeResult(mergeTextTokens(this.tokens), this.errors);
}
private _getLocation(): ParseLocation {
@ -374,21 +375,37 @@ class _HtmlTokenizer {
}
private _consumeTagOpen(start: ParseLocation) {
this._attemptUntilFn(isNotWhitespace);
var nameStart = this.index;
this._consumeTagOpenStart(start);
var lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
this._attemptUntilFn(isNotWhitespace);
while (this.peek !== $SLASH && this.peek !== $GT) {
this._consumeAttributeName();
let savedPos = this._savePosition();
let lowercaseTagName;
try {
this._attemptUntilFn(isNotWhitespace);
if (this._attemptChar($EQ)) {
var nameStart = this.index;
this._consumeTagOpenStart(start);
lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
this._attemptUntilFn(isNotWhitespace);
while (this.peek !== $SLASH && this.peek !== $GT) {
this._consumeAttributeName();
this._attemptUntilFn(isNotWhitespace);
if (this._attemptChar($EQ)) {
this._attemptUntilFn(isNotWhitespace);
this._consumeAttributeValue();
}
this._attemptUntilFn(isNotWhitespace);
this._consumeAttributeValue();
}
this._attemptUntilFn(isNotWhitespace);
this._consumeTagOpenEnd();
} catch (e) {
if (e instanceof ControlFlowError) {
// When the start tag is invalid, assume we want a "<"
this._restorePosition(savedPos);
// Back to back text tokens are merged at the end
this._beginToken(HtmlTokenType.TEXT, start);
this._endToken(['<']);
return;
}
throw e;
}
this._consumeTagOpenEnd();
var contentTokenType = getHtmlTagDefinition(lowercaseTagName).contentType;
if (contentTokenType === HtmlTagContentType.RAW_TEXT) {
this._consumeRawTextWithTagClose(lowercaseTagName, false);
@ -470,13 +487,20 @@ class _HtmlTokenizer {
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }
private _savePosition(): number[] {
return [this.peek, this.index, this.column, this.line, this.tokens.length];
}
private _restorePosition(position: number[]): void {
this.peek = position[0];
this.index = position[1];
this.column = position[2];
this.line = position[3];
let nbTokens = position[4];
if (nbTokens < this.tokens.length) {
// remove any extra tokens
this.tokens = ListWrapper.slice(this.tokens, 0, nbTokens);
}
}
}
@ -516,3 +540,21 @@ function isAsciiLetter(code: number): boolean {
function isAsciiHexDigit(code: number): boolean {
return code >= $a && code <= $f || code >= $0 && code <= $9;
}
function mergeTextTokens(srcTokens: HtmlToken[]): HtmlToken[] {
let dstTokens = [];
let lastDstToken: HtmlToken;
for (let i = 0; i < srcTokens.length; i++) {
let token = srcTokens[i];
if (isPresent(lastDstToken) && lastDstToken.type == HtmlTokenType.TEXT &&
token.type == HtmlTokenType.TEXT) {
lastDstToken.parts[0] += token.parts[0];
lastDstToken.sourceSpan.end = token.sourceSpan.end;
} else {
lastDstToken = token;
dstTokens.push(lastDstToken);
}
}
return dstTokens;
}