fix(ivy): i18n - support colons in $localize metadata (#32867)

Metadata blocks are delimited by colons. Previously the code naively just
looked for the next colon in the string as the end marker.

This commit supports escaping colons within the metadata content.
The Angular compiler has been updated to add escaping as required.

PR Close #32867
This commit is contained in:
Pete Bacon Darwin
2019-10-02 18:17:56 +01:00
committed by atscott
parent 9b15588188
commit d24ade91b8
8 changed files with 183 additions and 58 deletions

View File

@ -143,22 +143,52 @@ const BLOCK_MARKER = ':';
* escaped with a backslash, `\:`. This function checks for this by looking at the `raw`
* messagePart, which should still contain the backslash.
*
* If the template literal was synthesized, rather than appearing in original source code, then its
* raw array will only contain empty strings. This is because the current TypeScript compiler use
* ---
*
* If the template literal was synthesized and downleveled by TypeScript to ES5 then its
* raw array will only contain empty strings. This is because the current TypeScript compiler uses
* the original source code to find the raw text and in the case of synthesized AST nodes, there is
* no source code to draw upon.
*
* The workaround in this function is to assume that the template literal did not contain an escaped
* placeholder name, and fall back on checking the cooked array instead. This should be OK because
* synthesized nodes (from the Angular template compiler) will always provide explicit delimited
* blocks and so will never need to escape placeholder name markers.
* placeholder name, and fall back on checking the cooked array instead.
* This is a limitation if compiling to ES5 in TypeScript but is not a problem if the TypeScript
* output is ES2015 and the code is downleveled by a separate tool as happens in the Angular CLI.
*
* @param messagePart The cooked message part to process.
* @param rawMessagePart The raw message part to check.
* @returns the message part with the placeholder name stripped, if found.
* @throws an error if the block is unterminated
*/
function stripBlock(messagePart: string, rawMessagePart: string) {
return (rawMessagePart || messagePart).charAt(0) === BLOCK_MARKER ?
messagePart.substring(messagePart.indexOf(BLOCK_MARKER, 1) + 1) :
rawMessagePart = rawMessagePart || messagePart;
return rawMessagePart.charAt(0) === BLOCK_MARKER ?
messagePart.substring(findEndOfBlock(messagePart, rawMessagePart) + 1) :
messagePart;
}
/**
* Find the end of a "marked block" indicated by the first non-escaped colon.
*
* @param cooked The cooked string (where escaped chars have been processed)
* @param raw The raw string (where escape sequences are still in place)
*
* @returns the index of the end of block marker
* @throws an error if the block is unterminated
*/
function findEndOfBlock(cooked: string, raw: string): number {
/***********************************************************************************************
* This function is repeated in `src/utils/messages.ts` and the two should be kept in sync.
* The reason is that this file is marked as having side-effects, and if we import `messages.ts`
* into it, the whole of `src/utils` will be included in this bundle and none of the functions
* will be tree shaken.
***********************************************************************************************/
for (let cookedIndex = 1, rawIndex = 1; cookedIndex < cooked.length; cookedIndex++, rawIndex++) {
if (raw[rawIndex] === '\\') {
rawIndex++;
} else if (cooked[cookedIndex] === BLOCK_MARKER) {
return cookedIndex;
}
}
throw new Error(`Unterminated $localize metadata block in "${raw}".`);
}

View File

@ -29,6 +29,11 @@ describe('$localize tag', () => {
expect($localize `\:abc:def`).toEqual(':abc:def');
});
it('should strip metadata block containing escaped block markers', () => {
expect($localize.translate).toBeUndefined();
expect($localize `:abc\:def:content`).toEqual('content');
});
it('should strip placeholder names from message parts', () => {
expect($localize.translate).toBeUndefined();
expect($localize `abc${1 + 2 + 3}:ph1:def${4 + 5 + 6}:ph2:`).toEqual('abc6def15');

View File

@ -53,6 +53,7 @@ export type MessageId = string;
* {
* messageId: '6998194507597730591',
* substitutions: { title: 'Jo Bloggs' },
* messageString: 'Hello {$title}!',
* }
* ```
*/
@ -158,28 +159,30 @@ export function parseMetadata(cooked: string, raw: string): MessageMetadata {
* Since blocks are optional, it is possible that the content of a message block actually starts
* with a block marker. In this case the marker must be escaped `\:`.
*
* ---
*
* If the template literal was synthesized and downleveled by TypeScript to ES5 then its
* raw array will only contain empty strings. This is because the current TypeScript compiler uses
* the original source code to find the raw text and in the case of synthesized AST nodes, there is
* no source code to draw upon.
*
* The workaround in this function is to assume that the template literal did not contain an escaped
* placeholder name, and fall back on checking the cooked array instead.
* This is a limitation if compiling to ES5 in TypeScript but is not a problem if the TypeScript
* output is ES2015 and the code is downlevelled by a separate tool as happens in the Angular CLI.
*
* @param cooked The cooked version of the message part to parse.
* @param raw The raw version of the message part to parse.
* @returns An object containing the `text` of the message part and the text of the `block`, if it
* exists.
* @throws an error if the `block` is unterminated
*/
export function splitBlock(cooked: string, raw: string): {text: string, block?: string} {
// Synthesizing AST nodes that represent template literals using the TypeScript API is problematic
// because it doesn't allow for the raw value of messageParts to be programmatically set.
// The result is that synthesized AST nodes have empty `raw` values.
// Normally we rely upon checking the `raw` value to check whether the `BLOCK_MARKER` was escaped
// in the original source. If the `raw` value is missing then we cannot do this.
// In such a case we fall back on the `cooked` version and assume that the `BLOCK_MARKER` was not
// escaped.
// This should be OK because synthesized nodes only come from the Angular template compiler, which
// always provides full id and placeholder name information so it will never escape `BLOCK_MARKER`
// characters.
if ((raw || cooked).charAt(0) !== BLOCK_MARKER) {
raw = raw || cooked;
if (raw.charAt(0) !== BLOCK_MARKER) {
return {text: cooked};
} else {
const endOfBlock = cooked.indexOf(BLOCK_MARKER, 1);
const endOfBlock = findEndOfBlock(cooked, raw);
return {
block: cooked.substring(1, endOfBlock),
text: cooked.substring(endOfBlock + 1),
@ -187,6 +190,31 @@ export function splitBlock(cooked: string, raw: string): {text: string, block?:
}
}
function computePlaceholderName(index: number) {
return index === 1 ? 'PH' : `PH_${index - 1}`;
}
/**
* Find the end of a "marked block" indicated by the first non-escaped colon.
*
* @param cooked The cooked string (where escaped chars have been processed)
* @param raw The raw string (where escape sequences are still in place)
*
* @returns the index of the end of block marker
* @throws an error if the block is unterminated
*/
export function findEndOfBlock(cooked: string, raw: string): number {
/************************************************************************************************
* This function is repeated in `src/localize/src/localize.ts` and the two should be kept in sync.
* (See that file for more explanation of why.)
************************************************************************************************/
for (let cookedIndex = 1, rawIndex = 1; cookedIndex < cooked.length; cookedIndex++, rawIndex++) {
if (raw[rawIndex] === '\\') {
rawIndex++;
} else if (cooked[cookedIndex] === BLOCK_MARKER) {
return cookedIndex;
}
}
throw new Error(`Unterminated $localize metadata block in "${raw}".`);
}

View File

@ -5,7 +5,7 @@
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
import {parseMessage, parseMetadata, splitBlock} from '../../src/utils/messages';
import {findEndOfBlock, parseMessage, parseMetadata, splitBlock} from '../../src/utils/messages';
import {makeTemplateObject} from '../../src/utils/translations';
describe('messages utils', () => {
@ -85,10 +85,18 @@ describe('messages utils', () => {
expect(splitBlock('::abc def', '::abc def')).toEqual({text: 'abc def', block: ''});
});
it('should error on an unterminated block', () => {
expect(() => splitBlock(':abc def', ':abc def'))
.toThrowError('Unterminated $localize metadata block in ":abc def".');
});
it('should handle escaped block markers', () => {
expect(splitBlock(':part of the message:abc def', '\\:part of the message:abc def')).toEqual({
text: ':part of the message:abc def'
});
expect(splitBlock(
':block with escaped : in it:abc def', ':block with escaped \\: in it:abc def'))
.toEqual({text: 'abc def', block: 'block with escaped : in it'});
});
it('should handle the empty raw part', () => {
@ -96,6 +104,27 @@ describe('messages utils', () => {
});
});
describe('findEndOfBlock()', () => {
it('should throw error if there is no end of block marker', () => {
expect(() => findEndOfBlock(':some text', ':some text'))
.toThrowError('Unterminated $localize metadata block in ":some text".');
expect(() => findEndOfBlock(':escaped colon:', ':escaped colon\\:'))
.toThrowError('Unterminated $localize metadata block in ":escaped colon\\:".');
});
it('should return index of the end of block marker', () => {
expect(findEndOfBlock(':block:', ':block:')).toEqual(6);
expect(findEndOfBlock(':block::', ':block::')).toEqual(6);
expect(findEndOfBlock(':block:some text', ':block:some text')).toEqual(6);
expect(findEndOfBlock(':block:some text:more text', ':block:some text:more text')).toEqual(6);
expect(findEndOfBlock('::::', ':\\:\\::')).toEqual(3);
expect(findEndOfBlock(':block::', ':block\\::')).toEqual(7);
expect(findEndOfBlock(':block:more:some text', ':block\\:more:some text')).toEqual(11);
expect(findEndOfBlock(':block:more:and-more:some text', ':block\\:more\\:and-more:some text'))
.toEqual(20);
});
});
describe('parseMetadata()', () => {
it('should return just the text if there is no block', () => {
expect(parseMetadata('abc def', 'abc def'))