diff --git a/modules/angular2/src/compiler/html_lexer.ts b/modules/angular2/src/compiler/html_lexer.ts index 9c604d9acc..243a5e69b4 100644 --- a/modules/angular2/src/compiler/html_lexer.ts +++ b/modules/angular2/src/compiler/html_lexer.ts @@ -83,6 +83,9 @@ const $x = 120; const $NBSP = 160; +var CRLF_REGEXP = /\r\n/g; +var CR_REGEXP = /\r/g; + function unexpectedCharacterErrorMsg(charCode: number): string { var char = charCode === $EOF ? 'EOF' : StringWrapper.fromCharCode(charCode); return `Unexpected character "${char}"`; @@ -119,6 +122,14 @@ class _HtmlTokenizer { this._advance(); } + private _processCarriageReturns(content: string): string { + // http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream + // In order to keep the original position in the source, we can not pre-process it. + // Instead CRs are processed right before instantiating the tokens. + content = StringWrapper.replaceAll(content, CRLF_REGEXP, '\r'); + return StringWrapper.replaceAll(content, CR_REGEXP, '\n'); + } + tokenize(): HtmlTokenizeResult { while (this.peek !== $EOF) { var start = this._getLocation(); @@ -315,7 +326,7 @@ class _HtmlTokenizer { parts.push(this._readChar(decodeEntities)); } } - return this._endToken([parts.join('')], tagCloseStart); + return this._endToken([this._processCarriageReturns(parts.join(''))], tagCloseStart); } private _consumeComment(start: ParseLocation) { @@ -428,7 +439,7 @@ class _HtmlTokenizer { this._requireUntilFn(isNameEnd, 1); value = this.input.substring(valueStart, this.index); } - this._endToken([value]); + this._endToken([this._processCarriageReturns(value)]); } private _consumeTagOpenEnd() { @@ -456,7 +467,7 @@ class _HtmlTokenizer { while (!isTextEnd(this.peek)) { parts.push(this._readChar(true)); } - this._endToken([parts.join('')]); + this._endToken([this._processCarriageReturns(parts.join(''))]); } private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; } diff --git a/modules/angular2/test/compiler/html_lexer_spec.ts b/modules/angular2/test/compiler/html_lexer_spec.ts index 2caba1af75..35b0f5676c 100644 --- a/modules/angular2/test/compiler/html_lexer_spec.ts +++ b/modules/angular2/test/compiler/html_lexer_spec.ts @@ -53,26 +53,38 @@ export function main() { [HtmlTokenType.EOF, '2:5'] ]); }); + + it('should work with CR and LF', () => { + expect(tokenizeAndHumanizeLineColumn('\r\na\r')) + .toEqual([ + [HtmlTokenType.TAG_OPEN_START, '0:0'], + [HtmlTokenType.TAG_OPEN_END, '1:0'], + [HtmlTokenType.TEXT, '1:1'], + [HtmlTokenType.TAG_CLOSE, '2:1'], + [HtmlTokenType.EOF, '2:5'] + ]); + }); }); describe('comments', () => { it('should parse comments', () => { - expect(tokenizeAndHumanizeParts('')) + expect(tokenizeAndHumanizeParts('')) .toEqual([ [HtmlTokenType.COMMENT_START], - [HtmlTokenType.RAW_TEXT, 'test'], + [HtmlTokenType.RAW_TEXT, 't\ne\ns\nt'], [HtmlTokenType.COMMENT_END], [HtmlTokenType.EOF] ]); }); - it('should store the locations', () => {expect(tokenizeAndHumanizeSourceSpans('')) - .toEqual([ - [HtmlTokenType.COMMENT_START, ''], - [HtmlTokenType.EOF, ''] - ])}); + it('should store the locations', + () => {expect(tokenizeAndHumanizeSourceSpans('')) + .toEqual([ + [HtmlTokenType.COMMENT_START, ''], + [HtmlTokenType.EOF, ''] + ])}); it('should report { expect(tokenizeAndHumanizeErrors(' { it('should parse cdata', () => { - expect(tokenizeAndHumanizeParts('')) + expect(tokenizeAndHumanizeParts('')) .toEqual([ [HtmlTokenType.CDATA_START], - [HtmlTokenType.RAW_TEXT, 'test'], + [HtmlTokenType.RAW_TEXT, 't\ne\ns\nt'], [HtmlTokenType.CDATA_END], [HtmlTokenType.EOF] ]); }); it('should store the locations', () => { - expect(tokenizeAndHumanizeSourceSpans('')) + expect(tokenizeAndHumanizeSourceSpans('')) .toEqual([ [HtmlTokenType.CDATA_START, ''], [HtmlTokenType.EOF, ''] ]); @@ -301,6 +313,17 @@ export function main() { ]); }); + it('should parse values with CR and LF', () => { + expect(tokenizeAndHumanizeParts("")) + .toEqual([ + [HtmlTokenType.TAG_OPEN_START, null, 't'], + [HtmlTokenType.ATTR_NAME, null, 'a'], + [HtmlTokenType.ATTR_VALUE, 't\ne\ns\nt'], + [HtmlTokenType.TAG_OPEN_END], + [HtmlTokenType.EOF] + ]); + }); + it('should store the locations', () => { expect(tokenizeAndHumanizeSourceSpans('')) .toEqual([ @@ -406,6 +429,11 @@ export function main() { .toEqual([[HtmlTokenType.TEXT, 'a'], [HtmlTokenType.EOF]]); }); + it('should handle CR & LF', () => { + expect(tokenizeAndHumanizeParts('t\ne\rs\r\nt')) + .toEqual([[HtmlTokenType.TEXT, 't\ne\ns\nt'], [HtmlTokenType.EOF]]); + }); + it('should parse entities', () => { expect(tokenizeAndHumanizeParts('a&b')) .toEqual([[HtmlTokenType.TEXT, 'a&b'], [HtmlTokenType.EOF]]); @@ -424,11 +452,11 @@ export function main() { describe('raw text', () => { it('should parse text', () => { - expect(tokenizeAndHumanizeParts(``)) + expect(tokenizeAndHumanizeParts(``)) .toEqual([ [HtmlTokenType.TAG_OPEN_START, null, 'script'], [HtmlTokenType.TAG_OPEN_END], - [HtmlTokenType.RAW_TEXT, 'a'], + [HtmlTokenType.RAW_TEXT, 't\ne\ns\nt'], [HtmlTokenType.TAG_CLOSE, null, 'script'], [HtmlTokenType.EOF] ]); @@ -482,11 +510,11 @@ export function main() { describe('escapable raw text', () => { it('should parse text', () => { - expect(tokenizeAndHumanizeParts(`a`)) + expect(tokenizeAndHumanizeParts(`t\ne\rs\r\nt`)) .toEqual([ [HtmlTokenType.TAG_OPEN_START, null, 'title'], [HtmlTokenType.TAG_OPEN_END], - [HtmlTokenType.ESCAPABLE_RAW_TEXT, 'a'], + [HtmlTokenType.ESCAPABLE_RAW_TEXT, 't\ne\ns\nt'], [HtmlTokenType.TAG_CLOSE, null, 'title'], [HtmlTokenType.EOF] ]);