feat(Compiler): case sensitive html parser

close #4417
Closes #5264
This commit is contained in:
Victor Berchet
2015-11-10 15:56:25 -08:00
parent adb87562bb
commit 36a423fac8
13 changed files with 834 additions and 381 deletions

View File

@ -6,7 +6,6 @@ import {
CONST_EXPR,
serializeEnum
} from 'angular2/src/facade/lang';
import {BaseException} from 'angular2/src/facade/exceptions';
import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';
@ -50,12 +49,14 @@ export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlToke
const $EOF = 0;
const $TAB = 9;
const $LF = 10;
const $FF = 12;
const $CR = 13;
const $SPACE = 32;
const $BANG = 33;
const $DQ = 34;
const $HASH = 35;
const $$ = 36;
const $AMPERSAND = 38;
const $SQ = 39;
@ -76,7 +77,9 @@ const $Z = 90;
const $LBRACKET = 91;
const $RBRACKET = 93;
const $a = 97;
const $f = 102;
const $z = 122;
const $x = 120;
const $NBSP = 160;
@ -86,7 +89,7 @@ function unexpectedCharacterErrorMsg(charCode: number): string {
}
function unknownEntityErrorMsg(entitySrc: string): string {
return `Unknown entity "${entitySrc}"`;
return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`;
}
class ControlFlowError {
@ -249,16 +252,7 @@ class _HtmlTokenizer {
private _readChar(decodeEntities: boolean): string {
if (decodeEntities && this.peek === $AMPERSAND) {
var start = this._getLocation();
this._attemptUntilChar($SEMICOLON);
this._advance();
var entitySrc = this.input.substring(start.offset + 1, this.index - 1);
var decodedEntity = decodeEntity(entitySrc);
if (isPresent(decodedEntity)) {
return decodedEntity;
} else {
throw this._createError(unknownEntityErrorMsg(entitySrc), start);
}
return this._decodeEntity();
} else {
var index = this.index;
this._advance();
@ -266,6 +260,42 @@ class _HtmlTokenizer {
}
}
private _decodeEntity(): string {
var start = this._getLocation();
this._advance();
if (this._attemptChar($HASH)) {
let isHex = this._attemptChar($x);
let numberStart = this._getLocation().offset;
this._attemptUntilFn(isDigitEntityEnd);
if (this.peek != $SEMICOLON) {
throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getLocation());
}
this._advance();
let strNum = this.input.substring(numberStart, this.index - 1);
try {
let charCode = NumberWrapper.parseInt(strNum, isHex ? 16 : 10);
return StringWrapper.fromCharCode(charCode);
} catch (e) {
let entity = this.input.substring(start.offset + 1, this.index - 1);
throw this._createError(unknownEntityErrorMsg(entity), start);
}
} else {
let startPosition = this._savePosition();
this._attemptUntilFn(isNamedEntityEnd);
if (this.peek != $SEMICOLON) {
this._restorePosition(startPosition);
return '&';
}
this._advance();
let name = this.input.substring(start.offset + 1, this.index - 1);
let char = NAMED_ENTITIES[name];
if (isBlank(char)) {
throw this._createError(unknownEntityErrorMsg(name), start);
}
return char;
}
}
private _consumeRawText(decodeEntities: boolean, firstCharOfEnd: number,
attemptEndRest: Function): HtmlToken {
var tagCloseStart;
@ -428,6 +458,15 @@ class _HtmlTokenizer {
}
this._endToken([parts.join('')]);
}
private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }
private _restorePosition(position: number[]): void {
this.peek = position[0];
this.index = position[1];
this.column = position[2];
this.line = position[3];
}
}
function isNotWhitespace(code: number): boolean {
@ -440,39 +479,29 @@ function isWhitespace(code: number): boolean {
function isNameEnd(code: number): boolean {
return isWhitespace(code) || code === $GT || code === $SLASH || code === $SQ || code === $DQ ||
code === $EQ
code === $EQ;
}
function isPrefixEnd(code: number): boolean {
return (code < $a || $z < code) && (code < $A || $Z < code) && (code < $0 || code > $9);
}
function isDigitEntityEnd(code: number): boolean {
return code == $SEMICOLON || code == $EOF || !isAsciiHexDigit(code);
}
function isNamedEntityEnd(code: number): boolean {
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
}
function isTextEnd(code: number): boolean {
return code === $LT || code === $EOF;
}
function decodeEntity(entity: string): string {
var i = 0;
var isNumber = entity.length > i && entity[i] == '#';
if (isNumber) i++;
var isHex = entity.length > i && entity[i] == 'x';
if (isHex) i++;
var value = entity.substring(i);
var result = null;
if (isNumber) {
var charCode;
try {
charCode = NumberWrapper.parseInt(value, isHex ? 16 : 10);
} catch (e) {
return null;
}
result = StringWrapper.fromCharCode(charCode);
} else {
result = NAMED_ENTITIES[value];
}
if (isPresent(result)) {
return result;
} else {
return null;
}
function isAsciiLetter(code: number): boolean {
return code >= $a && code <= $z;
}
function isAsciiHexDigit(code: number): boolean {
return code >= $a && code <= $f || code >= $0 && code <= $9;
}