From e0aa39929b3120acfc6fed83fba697eb3696c4ba Mon Sep 17 00:00:00 2001 From: Pete Bacon Darwin Date: Sun, 26 Apr 2020 18:15:43 +0100 Subject: [PATCH] refactor(compiler): simplify tokenizer and parser results (#36741) Move the creation of the results objects into the wrapper functions. This makes it easier to reason about what the parser and lexer classes are responsible for - you create a new object for each tokenization or parsing activity and they hold the state of the activity. PR Close #36741 --- packages/compiler/src/ml_parser/lexer.ts | 7 +-- packages/compiler/src/ml_parser/parser.ts | 53 +++++++++++------------ 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts index e499f5994c..c9e6e33497 100644 --- a/packages/compiler/src/ml_parser/lexer.ts +++ b/packages/compiler/src/ml_parser/lexer.ts @@ -110,7 +110,9 @@ export interface TokenizeOptions { export function tokenize( source: string, url: string, getTagDefinition: (tagName: string) => TagDefinition, options: TokenizeOptions = {}): TokenizeResult { - return new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options).tokenize(); + const tokenizer = new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options); + tokenizer.tokenize(); + return new TokenizeResult(mergeTextTokens(tokenizer.tokens), tokenizer.errors); } const _CR_OR_CRLF_REGEXP = /\r\n?/g; @@ -177,7 +179,7 @@ class _Tokenizer { return content.replace(_CR_OR_CRLF_REGEXP, '\n'); } - tokenize(): TokenizeResult { + tokenize(): void { while (this._cursor.peek() !== chars.$EOF) { const start = this._cursor.clone(); try { @@ -204,7 +206,6 @@ class _Tokenizer { } this._beginToken(TokenType.EOF); this._endToken([]); - return new TokenizeResult(mergeTextTokens(this.tokens), this.errors); } /** diff --git a/packages/compiler/src/ml_parser/parser.ts b/packages/compiler/src/ml_parser/parser.ts index cbb46723ed..eb9564a407 100644 --- a/packages/compiler/src/ml_parser/parser.ts +++ b/packages/compiler/src/ml_parser/parser.ts @@ -30,32 +30,29 @@ export class Parser { constructor(public getTagDefinition: (tagName: string) => TagDefinition) {} parse(source: string, url: string, options?: lex.TokenizeOptions): ParseTreeResult { - const tokensAndErrors = lex.tokenize(source, url, this.getTagDefinition, options); - - const treeAndErrors = new _TreeBuilder(tokensAndErrors.tokens, this.getTagDefinition).build(); - + const tokenizeResult = lex.tokenize(source, url, this.getTagDefinition, options); + const parser = new _TreeBuilder(tokenizeResult.tokens, this.getTagDefinition); + parser.build(); return new ParseTreeResult( - treeAndErrors.rootNodes, - (tokensAndErrors.errors).concat(treeAndErrors.errors)); + parser.rootNodes, (tokenizeResult.errors as ParseError[]).concat(parser.errors)); } } class _TreeBuilder { private _index: number = -1; - // TODO(issue/24571): remove '!'. + // `_peek` will be initialized by the call to `advance()` in the constructor. private _peek!: lex.Token; - - private _rootNodes: html.Node[] = []; - private _errors: TreeError[] = []; - private _elementStack: html.Element[] = []; + rootNodes: html.Node[] = []; + errors: TreeError[] = []; + constructor( private tokens: lex.Token[], private getTagDefinition: (tagName: string) => TagDefinition) { this._advance(); } - build(): ParseTreeResult { + build(): void { while (this._peek.type !== lex.TokenType.EOF) { if (this._peek.type === lex.TokenType.TAG_OPEN_START) { this._consumeStartTag(this._advance()); @@ -79,7 +76,6 @@ class _TreeBuilder { this._advance(); } } - return new ParseTreeResult(this._rootNodes, this._errors); } private _advance(): lex.Token { @@ -99,7 +95,7 @@ class _TreeBuilder { return null; } - private _consumeCdata(startToken: lex.Token) { + private _consumeCdata(_startToken: lex.Token) { this._consumeText(this._advance()); this._advanceIf(lex.TokenType.CDATA_END); } @@ -126,7 +122,7 @@ class _TreeBuilder { // read the final } if (this._peek.type !== lex.TokenType.EXPANSION_FORM_END) { - this._errors.push( + this.errors.push( TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`)); return; } @@ -142,7 +138,7 @@ class _TreeBuilder { // read { if (this._peek.type !== lex.TokenType.EXPANSION_CASE_EXP_START) { - this._errors.push( + this.errors.push( TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`)); return null; } @@ -157,16 +153,17 @@ class _TreeBuilder { exp.push(new lex.Token(lex.TokenType.EOF, [], end.sourceSpan)); // parse everything in between { and } - const parsedExp = new _TreeBuilder(exp, this.getTagDefinition).build(); - if (parsedExp.errors.length > 0) { - this._errors = this._errors.concat(parsedExp.errors); + const expansionCaseParser = new _TreeBuilder(exp, this.getTagDefinition); + expansionCaseParser.build(); + if (expansionCaseParser.errors.length > 0) { + this.errors = this.errors.concat(expansionCaseParser.errors); return null; } const sourceSpan = new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end); const expSourceSpan = new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end); return new html.ExpansionCase( - value.parts[0], parsedExp.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan); + value.parts[0], expansionCaseParser.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan); } private _collectExpansionExpTokens(start: lex.Token): lex.Token[]|null { @@ -185,7 +182,7 @@ class _TreeBuilder { if (expansionFormStack.length == 0) return exp; } else { - this._errors.push( + this.errors.push( TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); return null; } @@ -195,14 +192,14 @@ class _TreeBuilder { if (lastOnStack(expansionFormStack, lex.TokenType.EXPANSION_FORM_START)) { expansionFormStack.pop(); } else { - this._errors.push( + this.errors.push( TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); return null; } } if (this._peek.type === lex.TokenType.EOF) { - this._errors.push( + this.errors.push( TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`)); return null; } @@ -249,7 +246,7 @@ class _TreeBuilder { selfClosing = true; const tagDef = this.getTagDefinition(fullName); if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) { - this._errors.push(TreeError.create( + this.errors.push(TreeError.create( fullName, startTagToken.sourceSpan, `Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`)); } @@ -287,13 +284,13 @@ class _TreeBuilder { } if (this.getTagDefinition(fullName).isVoid) { - this._errors.push(TreeError.create( + this.errors.push(TreeError.create( fullName, endTagToken.sourceSpan, `Void elements do not have end tags "${endTagToken.parts[1]}"`)); } else if (!this._popElement(fullName)) { const errMsg = `Unexpected closing tag "${ fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`; - this._errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg)); + this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg)); } } @@ -362,7 +359,7 @@ class _TreeBuilder { if (parent != null) { parent.children.push(node); } else { - this._rootNodes.push(node); + this.rootNodes.push(node); } } @@ -384,7 +381,7 @@ class _TreeBuilder { const index = parent.children.indexOf(container); parent.children[index] = node; } else { - this._rootNodes.push(node); + this.rootNodes.push(node); } node.children.push(container); this._elementStack.splice(this._elementStack.indexOf(container), 0, node);