From d24ade91b881780572214f700f891382cab08647 Mon Sep 17 00:00:00 2001 From: Pete Bacon Darwin Date: Wed, 2 Oct 2019 18:17:56 +0100 Subject: [PATCH] fix(ivy): i18n - support colons in $localize metadata (#32867) Metadata blocks are delimited by colons. Previously the code naively just looked for the next colon in the string as the end marker. This commit supports escaping colons within the metadata content. The Angular compiler has been updated to add escaping as required. PR Close #32867 --- .../src/ngtsc/translator/src/translator.ts | 16 +++--- .../compliance/r3_view_compiler_i18n_spec.ts | 10 ++-- .../compiler/src/render3/view/i18n/meta.ts | 41 +++++++++----- .../compiler/test/render3/view/i18n_spec.ts | 38 +++++++++---- .../localize/src/localize/src/localize.ts | 44 ++++++++++++--- .../src/localize/test/localize_spec.ts | 5 ++ packages/localize/src/utils/messages.ts | 56 ++++++++++++++----- packages/localize/test/utils/messages_spec.ts | 31 +++++++++- 8 files changed, 183 insertions(+), 58 deletions(-) diff --git a/packages/compiler-cli/src/ngtsc/translator/src/translator.ts b/packages/compiler-cli/src/ngtsc/translator/src/translator.ts index 7ccd7abdb1..947f6bd3a6 100644 --- a/packages/compiler-cli/src/ngtsc/translator/src/translator.ts +++ b/packages/compiler-cli/src/ngtsc/translator/src/translator.ts @@ -8,7 +8,7 @@ import {ArrayType, AssertNotNull, BinaryOperator, BinaryOperatorExpr, BuiltinType, BuiltinTypeName, CastExpr, ClassStmt, CommaExpr, CommentStmt, ConditionalExpr, DeclareFunctionStmt, DeclareVarStmt, Expression, ExpressionStatement, ExpressionType, ExpressionVisitor, ExternalExpr, ExternalReference, FunctionExpr, IfStmt, InstantiateExpr, InvokeFunctionExpr, InvokeMethodExpr, JSDocCommentStmt, LiteralArrayExpr, LiteralExpr, LiteralMapExpr, MapType, NotExpr, ReadKeyExpr, ReadPropExpr, ReadVarExpr, ReturnStatement, Statement, StatementVisitor, StmtModifier, ThrowStmt, TryCatchStmt, Type, TypeVisitor, TypeofExpr, WrappedNodeExpr, WriteKeyExpr, WritePropExpr, WriteVarExpr} from '@angular/compiler'; import {LocalizedString} from '@angular/compiler/src/output/output_ast'; -import {serializeI18nMetaBlock, serializeI18nPlaceholderBlock} from '@angular/compiler/src/render3/view/i18n/meta'; +import {serializeI18nHead, serializeI18nTemplatePart} from '@angular/compiler/src/render3/view/i18n/meta'; import * as ts from 'typescript'; import {DefaultImportRecorder, ImportRewriter, NOOP_DEFAULT_IMPORT_RECORDER, NoopImportRewriter} from '../../imports'; @@ -529,18 +529,18 @@ export class TypeTranslatorVisitor implements ExpressionVisitor, TypeVisitor { */ function visitLocalizedString(ast: LocalizedString, context: Context, visitor: ExpressionVisitor) { let template: ts.TemplateLiteral; - const headPart = `${serializeI18nMetaBlock(ast.metaBlock)}${ast.messageParts[0]}`; + const metaBlock = serializeI18nHead(ast.metaBlock, ast.messageParts[0]); if (ast.messageParts.length === 1) { - template = ts.createNoSubstitutionTemplateLiteral(headPart); + template = ts.createNoSubstitutionTemplateLiteral(metaBlock); } else { - const head = ts.createTemplateHead(headPart); + const head = ts.createTemplateHead(metaBlock); const spans: ts.TemplateSpan[] = []; for (let i = 1; i < ast.messageParts.length; i++) { const resolvedExpression = ast.expressions[i - 1].visitExpression(visitor, context); - spans.push(ts.createTemplateSpan( - resolvedExpression, - ts.createTemplateMiddle( - serializeI18nPlaceholderBlock(ast.placeHolderNames[i - 1]) + ast.messageParts[i]))); + const templatePart = + serializeI18nTemplatePart(ast.placeHolderNames[i - 1], ast.messageParts[i]); + const templateMiddle = ts.createTemplateMiddle(templatePart); + spans.push(ts.createTemplateSpan(resolvedExpression, templateMiddle)); } if (spans.length > 0) { // The last span is supposed to have a tail rather than a middle diff --git a/packages/compiler-cli/test/compliance/r3_view_compiler_i18n_spec.ts b/packages/compiler-cli/test/compliance/r3_view_compiler_i18n_spec.ts index 228320d1c9..5b331533ee 100644 --- a/packages/compiler-cli/test/compliance/r3_view_compiler_i18n_spec.ts +++ b/packages/compiler-cli/test/compliance/r3_view_compiler_i18n_spec.ts @@ -172,7 +172,7 @@ const verify = (input: string, output: string, extra: any = {}): void => { describe('i18n support in the template compiler', () => { describe('element attributes', () => { - it('should add the meaning and description as JsDoc comments', () => { + it('should add the meaning and description as JsDoc comments and metadata blocks', () => { const input = `
Content A
Content B
@@ -265,7 +265,7 @@ describe('i18n support in the template compiler', () => { $I18N_23$ = $MSG_EXTERNAL_idG$$APP_SPEC_TS_24$; } else { - $I18N_23$ = $localize \`:[BACKUP_MESSAGE_ID:idH]desc@@idG:Title G\`; + $I18N_23$ = $localize \`:[BACKUP_MESSAGE_ID\\:idH]desc@@idG:Title G\`; } const $_c25$ = ["title", $I18N_23$]; … @@ -2361,9 +2361,9 @@ describe('i18n support in the template compiler', () => {
Test
`; - // TODO(FW-635): currently we generate unique consts for each i18n block even though it might - // contain the same content. This should be optimized by translation statements caching, that - // can be implemented in the future within FW-635. + // TODO(FW-635): currently we generate unique consts for each i18n block even though it + // might contain the same content. This should be optimized by translation statements caching, + // that can be implemented in the future within FW-635. const output = String.raw ` var $I18N_0$; if (ngI18nClosureMode) { diff --git a/packages/compiler/src/render3/view/i18n/meta.ts b/packages/compiler/src/render3/view/i18n/meta.ts index 3d45bb1ac8..f627d846b0 100644 --- a/packages/compiler/src/render3/view/i18n/meta.ts +++ b/packages/compiler/src/render3/view/i18n/meta.ts @@ -179,12 +179,13 @@ export function parseI18nMeta(meta?: string): I18nMeta { } /** - * Serialize the given `meta` into a string that can be used in a `$localize` tagged string metadata - * block. The format is the same as that parsed by `parseI18nMeta()`. + * Serialize the given `meta` and `messagePart` a string that can be used in a `$localize` + * tagged string. The format of the metadata is the same as that parsed by `parseI18nMeta()`. * * @param meta The metadata to serialize + * @param messagePart The first part of the tagged string */ -export function serializeI18nMetaBlock(meta: I18nMeta): string { +export function serializeI18nHead(meta: I18nMeta, messagePart: string): string { let metaBlock = meta.description || ''; if (meta.meaning) { metaBlock = `${meta.meaning}|${metaBlock}`; @@ -192,22 +193,28 @@ export function serializeI18nMetaBlock(meta: I18nMeta): string { if (meta.id) { metaBlock = `${metaBlock}@@${meta.id}`; } - return metaBlock !== '' ? `:${metaBlock}:` : ''; + if (metaBlock === '') { + // There is no metaBlock, so we must ensure that any starting colon is escaped. + return escapeStartingColon(messagePart); + } else { + return `:${escapeColons(metaBlock)}:${messagePart}`; + } } /** - * Convert a placeholder into marked block for rendering. - * - * We want our tagged literals to include placeholder name information to aid runtime translation. - * - * The expressions are marked with placeholder names by postfixing the expression with - * `:placeHolderName:`. To achieve this, we actually "prefix" the message part that follows the - * expression. + * Serialize the given `placeholderName` and `messagePart` into strings that can be used in a + * `$localize` tagged string. * * @param placeholderName The placeholder name to serialize + * @param messagePart The following message string after this placeholder */ -export function serializeI18nPlaceholderBlock(placeholderName: string): string { - return placeholderName !== '' ? `:${placeholderName}:` : ''; +export function serializeI18nTemplatePart(placeholderName: string, messagePart: string): string { + if (placeholderName === '') { + // There is no placeholder name block, so we must ensure that any starting colon is escaped. + return escapeStartingColon(messagePart); + } else { + return `:${placeholderName}:${messagePart}`; + } } // Converts i18n meta information for a message (id, description, meaning) @@ -222,3 +229,11 @@ export function i18nMetaToDocStmt(meta: I18nMeta): o.JSDocCommentStmt|null { } return tags.length == 0 ? null : new o.JSDocCommentStmt(tags); } + +export function escapeStartingColon(str: string): string { + return str.replace(/^:/, '\\:'); +} + +export function escapeColons(str: string): string { + return str.replace(/:/g, '\\:'); +} \ No newline at end of file diff --git a/packages/compiler/test/render3/view/i18n_spec.ts b/packages/compiler/test/render3/view/i18n_spec.ts index c40228d241..6e06ef9e50 100644 --- a/packages/compiler/test/render3/view/i18n_spec.ts +++ b/packages/compiler/test/render3/view/i18n_spec.ts @@ -15,7 +15,7 @@ import {I18nContext} from '../../../src/render3/view/i18n/context'; import {serializeI18nMessageForGetMsg} from '../../../src/render3/view/i18n/get_msg_utils'; import {serializeIcuNode} from '../../../src/render3/view/i18n/icu_serializer'; import {serializeI18nMessageForLocalize} from '../../../src/render3/view/i18n/localize_utils'; -import {I18nMeta, parseI18nMeta, serializeI18nMetaBlock, serializeI18nPlaceholderBlock} from '../../../src/render3/view/i18n/meta'; +import {I18nMeta, parseI18nMeta, serializeI18nHead, serializeI18nTemplatePart} from '../../../src/render3/view/i18n/meta'; import {formatI18nPlaceholderName} from '../../../src/render3/view/i18n/util'; import {parseR3 as parse} from './util'; @@ -209,18 +209,36 @@ describe('Utils', () => { expect(parseI18nMeta('@@id')).toEqual(meta('id', '', '')); }); - it('serializeI18nMetaBlock()', () => { - expect(serializeI18nMetaBlock(meta())).toEqual(''); - expect(serializeI18nMetaBlock(meta('', '', 'desc'))).toEqual(':desc:'); - expect(serializeI18nMetaBlock(meta('id', '', 'desc'))).toEqual(':desc@@id:'); - expect(serializeI18nMetaBlock(meta('', 'meaning', 'desc'))).toEqual(':meaning|desc:'); - expect(serializeI18nMetaBlock(meta('id', 'meaning', 'desc'))).toEqual(':meaning|desc@@id:'); - expect(serializeI18nMetaBlock(meta('id', '', ''))).toEqual(':@@id:'); + it('serializeI18nHead()', () => { + expect(serializeI18nHead(meta(), '')).toEqual(''); + expect(serializeI18nHead(meta('', '', 'desc'), '')).toEqual(':desc:'); + expect(serializeI18nHead(meta('id', '', 'desc'), '')).toEqual(':desc@@id:'); + expect(serializeI18nHead(meta('', 'meaning', 'desc'), '')).toEqual(':meaning|desc:'); + expect(serializeI18nHead(meta('id', 'meaning', 'desc'), '')).toEqual(':meaning|desc@@id:'); + expect(serializeI18nHead(meta('id', '', ''), '')).toEqual(':@@id:'); + + // Escaping colons (block markers) + expect(serializeI18nHead(meta('id:sub_id', 'meaning', 'desc'), '')) + .toEqual(':meaning|desc@@id\\:sub_id:'); + expect(serializeI18nHead(meta('id', 'meaning:sub_meaning', 'desc'), '')) + .toEqual(':meaning\\:sub_meaning|desc@@id:'); + expect(serializeI18nHead(meta('id', 'meaning', 'desc:sub_desc'), '')) + .toEqual(':meaning|desc\\:sub_desc@@id:'); + expect(serializeI18nHead(meta('id', 'meaning', 'desc'), 'message source')) + .toEqual(':meaning|desc@@id:message source'); + expect(serializeI18nHead(meta('id', 'meaning', 'desc'), ':message source')) + .toEqual(':meaning|desc@@id::message source'); + expect(serializeI18nHead(meta('', '', ''), 'message source')).toEqual('message source'); + expect(serializeI18nHead(meta('', '', ''), ':message source')).toEqual('\\:message source'); }); it('serializeI18nPlaceholderBlock()', () => { - expect(serializeI18nPlaceholderBlock('')).toEqual(''); - expect(serializeI18nPlaceholderBlock('abc')).toEqual(':abc:'); + expect(serializeI18nTemplatePart('', '')).toEqual(''); + expect(serializeI18nTemplatePart('abc', '')).toEqual(':abc:'); + expect(serializeI18nTemplatePart('', 'message')).toEqual('message'); + expect(serializeI18nTemplatePart('abc', 'message')).toEqual(':abc:message'); + expect(serializeI18nTemplatePart('', ':message')).toEqual('\\:message'); + expect(serializeI18nTemplatePart('abc', ':message')).toEqual(':abc::message'); }); function meta(id?: string, meaning?: string, description?: string): I18nMeta { diff --git a/packages/localize/src/localize/src/localize.ts b/packages/localize/src/localize/src/localize.ts index 043bc4b51c..ad0f31cd8e 100644 --- a/packages/localize/src/localize/src/localize.ts +++ b/packages/localize/src/localize/src/localize.ts @@ -143,22 +143,52 @@ const BLOCK_MARKER = ':'; * escaped with a backslash, `\:`. This function checks for this by looking at the `raw` * messagePart, which should still contain the backslash. * - * If the template literal was synthesized, rather than appearing in original source code, then its - * raw array will only contain empty strings. This is because the current TypeScript compiler use + * --- + * + * If the template literal was synthesized and downleveled by TypeScript to ES5 then its + * raw array will only contain empty strings. This is because the current TypeScript compiler uses * the original source code to find the raw text and in the case of synthesized AST nodes, there is * no source code to draw upon. * * The workaround in this function is to assume that the template literal did not contain an escaped - * placeholder name, and fall back on checking the cooked array instead. This should be OK because - * synthesized nodes (from the Angular template compiler) will always provide explicit delimited - * blocks and so will never need to escape placeholder name markers. + * placeholder name, and fall back on checking the cooked array instead. + * This is a limitation if compiling to ES5 in TypeScript but is not a problem if the TypeScript + * output is ES2015 and the code is downleveled by a separate tool as happens in the Angular CLI. * * @param messagePart The cooked message part to process. * @param rawMessagePart The raw message part to check. * @returns the message part with the placeholder name stripped, if found. + * @throws an error if the block is unterminated */ function stripBlock(messagePart: string, rawMessagePart: string) { - return (rawMessagePart || messagePart).charAt(0) === BLOCK_MARKER ? - messagePart.substring(messagePart.indexOf(BLOCK_MARKER, 1) + 1) : + rawMessagePart = rawMessagePart || messagePart; + return rawMessagePart.charAt(0) === BLOCK_MARKER ? + messagePart.substring(findEndOfBlock(messagePart, rawMessagePart) + 1) : messagePart; } + +/** + * Find the end of a "marked block" indicated by the first non-escaped colon. + * + * @param cooked The cooked string (where escaped chars have been processed) + * @param raw The raw string (where escape sequences are still in place) + * + * @returns the index of the end of block marker + * @throws an error if the block is unterminated + */ +function findEndOfBlock(cooked: string, raw: string): number { + /*********************************************************************************************** + * This function is repeated in `src/utils/messages.ts` and the two should be kept in sync. + * The reason is that this file is marked as having side-effects, and if we import `messages.ts` + * into it, the whole of `src/utils` will be included in this bundle and none of the functions + * will be tree shaken. + ***********************************************************************************************/ + for (let cookedIndex = 1, rawIndex = 1; cookedIndex < cooked.length; cookedIndex++, rawIndex++) { + if (raw[rawIndex] === '\\') { + rawIndex++; + } else if (cooked[cookedIndex] === BLOCK_MARKER) { + return cookedIndex; + } + } + throw new Error(`Unterminated $localize metadata block in "${raw}".`); +} \ No newline at end of file diff --git a/packages/localize/src/localize/test/localize_spec.ts b/packages/localize/src/localize/test/localize_spec.ts index c910a177d4..53c2d039be 100644 --- a/packages/localize/src/localize/test/localize_spec.ts +++ b/packages/localize/src/localize/test/localize_spec.ts @@ -29,6 +29,11 @@ describe('$localize tag', () => { expect($localize `\:abc:def`).toEqual(':abc:def'); }); + it('should strip metadata block containing escaped block markers', () => { + expect($localize.translate).toBeUndefined(); + expect($localize `:abc\:def:content`).toEqual('content'); + }); + it('should strip placeholder names from message parts', () => { expect($localize.translate).toBeUndefined(); expect($localize `abc${1 + 2 + 3}:ph1:def${4 + 5 + 6}:ph2:`).toEqual('abc6def15'); diff --git a/packages/localize/src/utils/messages.ts b/packages/localize/src/utils/messages.ts index 6762c96ead..79b7b79356 100644 --- a/packages/localize/src/utils/messages.ts +++ b/packages/localize/src/utils/messages.ts @@ -53,6 +53,7 @@ export type MessageId = string; * { * messageId: '6998194507597730591', * substitutions: { title: 'Jo Bloggs' }, + * messageString: 'Hello {$title}!', * } * ``` */ @@ -158,28 +159,30 @@ export function parseMetadata(cooked: string, raw: string): MessageMetadata { * Since blocks are optional, it is possible that the content of a message block actually starts * with a block marker. In this case the marker must be escaped `\:`. * + * --- + * + * If the template literal was synthesized and downleveled by TypeScript to ES5 then its + * raw array will only contain empty strings. This is because the current TypeScript compiler uses + * the original source code to find the raw text and in the case of synthesized AST nodes, there is + * no source code to draw upon. + * + * The workaround in this function is to assume that the template literal did not contain an escaped + * placeholder name, and fall back on checking the cooked array instead. + * This is a limitation if compiling to ES5 in TypeScript but is not a problem if the TypeScript + * output is ES2015 and the code is downlevelled by a separate tool as happens in the Angular CLI. + * * @param cooked The cooked version of the message part to parse. * @param raw The raw version of the message part to parse. * @returns An object containing the `text` of the message part and the text of the `block`, if it * exists. + * @throws an error if the `block` is unterminated */ export function splitBlock(cooked: string, raw: string): {text: string, block?: string} { - // Synthesizing AST nodes that represent template literals using the TypeScript API is problematic - // because it doesn't allow for the raw value of messageParts to be programmatically set. - // The result is that synthesized AST nodes have empty `raw` values. - - // Normally we rely upon checking the `raw` value to check whether the `BLOCK_MARKER` was escaped - // in the original source. If the `raw` value is missing then we cannot do this. - // In such a case we fall back on the `cooked` version and assume that the `BLOCK_MARKER` was not - // escaped. - - // This should be OK because synthesized nodes only come from the Angular template compiler, which - // always provides full id and placeholder name information so it will never escape `BLOCK_MARKER` - // characters. - if ((raw || cooked).charAt(0) !== BLOCK_MARKER) { + raw = raw || cooked; + if (raw.charAt(0) !== BLOCK_MARKER) { return {text: cooked}; } else { - const endOfBlock = cooked.indexOf(BLOCK_MARKER, 1); + const endOfBlock = findEndOfBlock(cooked, raw); return { block: cooked.substring(1, endOfBlock), text: cooked.substring(endOfBlock + 1), @@ -187,6 +190,31 @@ export function splitBlock(cooked: string, raw: string): {text: string, block?: } } + function computePlaceholderName(index: number) { return index === 1 ? 'PH' : `PH_${index - 1}`; } + +/** + * Find the end of a "marked block" indicated by the first non-escaped colon. + * + * @param cooked The cooked string (where escaped chars have been processed) + * @param raw The raw string (where escape sequences are still in place) + * + * @returns the index of the end of block marker + * @throws an error if the block is unterminated + */ +export function findEndOfBlock(cooked: string, raw: string): number { + /************************************************************************************************ + * This function is repeated in `src/localize/src/localize.ts` and the two should be kept in sync. + * (See that file for more explanation of why.) + ************************************************************************************************/ + for (let cookedIndex = 1, rawIndex = 1; cookedIndex < cooked.length; cookedIndex++, rawIndex++) { + if (raw[rawIndex] === '\\') { + rawIndex++; + } else if (cooked[cookedIndex] === BLOCK_MARKER) { + return cookedIndex; + } + } + throw new Error(`Unterminated $localize metadata block in "${raw}".`); +} \ No newline at end of file diff --git a/packages/localize/test/utils/messages_spec.ts b/packages/localize/test/utils/messages_spec.ts index d2bad923f4..83e09625fd 100644 --- a/packages/localize/test/utils/messages_spec.ts +++ b/packages/localize/test/utils/messages_spec.ts @@ -5,7 +5,7 @@ * Use of this source code is governed by an MIT-style license that can be * found in the LICENSE file at https://angular.io/license */ -import {parseMessage, parseMetadata, splitBlock} from '../../src/utils/messages'; +import {findEndOfBlock, parseMessage, parseMetadata, splitBlock} from '../../src/utils/messages'; import {makeTemplateObject} from '../../src/utils/translations'; describe('messages utils', () => { @@ -85,10 +85,18 @@ describe('messages utils', () => { expect(splitBlock('::abc def', '::abc def')).toEqual({text: 'abc def', block: ''}); }); + it('should error on an unterminated block', () => { + expect(() => splitBlock(':abc def', ':abc def')) + .toThrowError('Unterminated $localize metadata block in ":abc def".'); + }); + it('should handle escaped block markers', () => { expect(splitBlock(':part of the message:abc def', '\\:part of the message:abc def')).toEqual({ text: ':part of the message:abc def' }); + expect(splitBlock( + ':block with escaped : in it:abc def', ':block with escaped \\: in it:abc def')) + .toEqual({text: 'abc def', block: 'block with escaped : in it'}); }); it('should handle the empty raw part', () => { @@ -96,6 +104,27 @@ describe('messages utils', () => { }); }); + describe('findEndOfBlock()', () => { + it('should throw error if there is no end of block marker', () => { + expect(() => findEndOfBlock(':some text', ':some text')) + .toThrowError('Unterminated $localize metadata block in ":some text".'); + expect(() => findEndOfBlock(':escaped colon:', ':escaped colon\\:')) + .toThrowError('Unterminated $localize metadata block in ":escaped colon\\:".'); + }); + + it('should return index of the end of block marker', () => { + expect(findEndOfBlock(':block:', ':block:')).toEqual(6); + expect(findEndOfBlock(':block::', ':block::')).toEqual(6); + expect(findEndOfBlock(':block:some text', ':block:some text')).toEqual(6); + expect(findEndOfBlock(':block:some text:more text', ':block:some text:more text')).toEqual(6); + expect(findEndOfBlock('::::', ':\\:\\::')).toEqual(3); + expect(findEndOfBlock(':block::', ':block\\::')).toEqual(7); + expect(findEndOfBlock(':block:more:some text', ':block\\:more:some text')).toEqual(11); + expect(findEndOfBlock(':block:more:and-more:some text', ':block\\:more\\:and-more:some text')) + .toEqual(20); + }); + }); + describe('parseMetadata()', () => { it('should return just the text if there is no block', () => { expect(parseMetadata('abc def', 'abc def'))