From 72bb38f83bf188eeb6bea2a2f789446b8530e0cb Mon Sep 17 00:00:00 2001 From: Victor Berchet Date: Fri, 12 Aug 2016 20:14:52 -0700 Subject: [PATCH] feat(i18n): xliff --- .../compiler/src/i18n/serializers/xliff.ts | 289 ++++++++++++++++++ .../compiler/src/i18n/serializers/xtb.ts | 4 +- .../test/i18n/serializers/xliff_spec.ts | 110 +++++++ 3 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 modules/@angular/compiler/src/i18n/serializers/xliff.ts create mode 100644 modules/@angular/compiler/test/i18n/serializers/xliff_spec.ts diff --git a/modules/@angular/compiler/src/i18n/serializers/xliff.ts b/modules/@angular/compiler/src/i18n/serializers/xliff.ts new file mode 100644 index 0000000000..c1fc9540bc --- /dev/null +++ b/modules/@angular/compiler/src/i18n/serializers/xliff.ts @@ -0,0 +1,289 @@ +/** + * @license + * Copyright Google Inc. All Rights Reserved. + * + * Use of this source code is governed by an MIT-style license that can be + * found in the LICENSE file at https://angular.io/license + */ + +import {ListWrapper} from '../../facade/collection'; +import * as ml from '../../ml_parser/ast'; +import {HtmlParser} from '../../ml_parser/html_parser'; +import {InterpolationConfig} from '../../ml_parser/interpolation_config'; +import {XmlParser} from '../../ml_parser/xml_parser'; +import {ParseError} from '../../parse_util'; +import * as i18n from '../i18n_ast'; +import {MessageBundle} from '../message_bundle'; +import {I18nError} from '../parse_util'; + +import {Serializer, extractPlaceholderToIds, extractPlaceholders} from './serializer'; +import * as xml from './xml_helper'; + +const _VERSION = '1.2'; +const _XMLNS = 'urn:oasis:names:tc:xliff:document:1.2'; +// TODO(vicb): make this a param (s/_/-/) +const _SOURCE_LANG = 'en'; +const _PLACEHOLDER_TAG = 'x'; +const _SOURCE_TAG = 'source'; +const _TARGET_TAG = 'target'; +const _UNIT_TAG = 'trans-unit'; +const _CR = (ws: number = 0) => new xml.Text(`\n${new Array(ws).join(' ')}`); + +// http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html +// http://docs.oasis-open.org/xliff/v1.2/xliff-profile-html/xliff-profile-html-1.2.html +export class Xliff implements Serializer { + constructor(private _htmlParser: HtmlParser, private _interpolationConfig: InterpolationConfig) {} + + write(messageMap: {[id: string]: i18n.Message}): string { + const visitor = new _WriteVisitor(); + + const transUnits: xml.Node[] = []; + + Object.keys(messageMap).forEach((id) => { + const message = messageMap[id]; + + let transUnit = new xml.Tag(_UNIT_TAG, {id: id, datatype: 'html'}); + transUnit.children.push( + _CR(8), new xml.Tag(_SOURCE_TAG, {}, visitor.serialize(message.nodes)), _CR(8), + new xml.Tag(_TARGET_TAG)); + + if (message.description) { + transUnit.children.push( + _CR(8), + new xml.Tag( + 'note', {priority: '1', from: 'description'}, [new xml.Text(message.description)])); + } + + if (message.meaning) { + transUnit.children.push( + _CR(8), + new xml.Tag('note', {priority: '1', from: 'meaning'}, [new xml.Text(message.meaning)])); + } + + transUnit.children.push(_CR(6)); + + transUnits.push(_CR(6), transUnit); + }); + + const body = new xml.Tag('body', {}, [...transUnits, _CR(4)]); + const file = new xml.Tag( + 'file', {'source-language': _SOURCE_LANG, datatype: 'plaintext', original: 'ng2.template'}, + [_CR(4), body, _CR(2)]); + const xliff = new xml.Tag('xliff', {version: _VERSION, xmlns: _XMLNS}, [_CR(2), file, _CR()]); + + return xml.serialize([new xml.Declaration({version: '1.0', encoding: 'UTF-8'}), _CR(), xliff]); + } + + load(content: string, url: string, messageBundle: MessageBundle): {[id: string]: ml.Node[]} { + // Parse the xtb file into xml nodes + const result = new XmlParser().parse(content, url); + + if (result.errors.length) { + throw new Error(`xtb parse errors:\n${result.errors.join('\n')}`); + } + + // Replace the placeholders, messages are now string + const {messages, errors} = new _LoadVisitor().parse(result.rootNodes, messageBundle); + + if (errors.length) { + throw new Error(`xtb parse errors:\n${errors.join('\n')}`); + } + + // Convert the string messages to html ast + // TODO(vicb): map error message back to the original message in xtb + let messageMap: {[id: string]: ml.Node[]} = {}; + const parseErrors: ParseError[] = []; + + Object.keys(messages).forEach((id) => { + const res = this._htmlParser.parse(messages[id], url, true, this._interpolationConfig); + parseErrors.push(...res.errors); + messageMap[id] = res.rootNodes; + }); + + if (parseErrors.length) { + throw new Error(`xtb parse errors:\n${parseErrors.join('\n')}`); + } + + return messageMap; + } +} + +class _WriteVisitor implements i18n.Visitor { + private _isInIcu: boolean; + + visitText(text: i18n.Text, context?: any): xml.Node[] { return [new xml.Text(text.value)]; } + + visitContainer(container: i18n.Container, context?: any): xml.Node[] { + const nodes: xml.Node[] = []; + container.children.forEach((node: i18n.Node) => nodes.push(...node.visit(this))); + return nodes; + } + + visitIcu(icu: i18n.Icu, context?: any): xml.Node[] { + if (this._isInIcu) { + // nested ICU is not supported + throw new Error('xliff does not support nested ICU messages'); + } + this._isInIcu = true; + + // TODO(vicb): support ICU messages + // https://lists.oasis-open.org/archives/xliff/201201/msg00028.html + // http://docs.oasis-open.org/xliff/v1.2/xliff-profile-po/xliff-profile-po-1.2-cd02.html + const nodes: xml.Node[] = []; + + this._isInIcu = false; + + return nodes; + } + + visitTagPlaceholder(ph: i18n.TagPlaceholder, context?: any): xml.Node[] { + const startTagPh = new xml.Tag(_PLACEHOLDER_TAG, {id: ph.startName, ctype: ph.tag}); + if (ph.isVoid) { + // void tags have no children nor closing tags + return [startTagPh]; + } + + const closeTagPh = new xml.Tag(_PLACEHOLDER_TAG, {id: ph.closeName, ctype: ph.tag}); + + return [startTagPh, ...this.serialize(ph.children), closeTagPh]; + } + + visitPlaceholder(ph: i18n.Placeholder, context?: any): xml.Node[] { + return [new xml.Tag(_PLACEHOLDER_TAG, {id: ph.name})]; + } + + visitIcuPlaceholder(ph: i18n.IcuPlaceholder, context?: any): xml.Node[] { + return [new xml.Tag(_PLACEHOLDER_TAG, {id: ph.name})]; + } + + serialize(nodes: i18n.Node[]): xml.Node[] { + this._isInIcu = false; + return ListWrapper.flatten(nodes.map(node => node.visit(this))); + } +} + +// TODO(vicb): add error management (structure) +// TODO(vicb): factorize (xtb) ? +class _LoadVisitor implements ml.Visitor { + private _messageNodes: [string, ml.Node[]][]; + private _translatedMessages: {[id: string]: string}; + private _msgId: string; + private _target: ml.Node[]; + private _errors: I18nError[]; + private _placeholders: {[name: string]: string}; + private _placeholderToIds: {[name: string]: string}; + + parse(nodes: ml.Node[], messageBundle: MessageBundle): + {messages: {[k: string]: string}, errors: I18nError[]} { + this._messageNodes = []; + this._translatedMessages = {}; + this._msgId = ''; + this._target = []; + this._errors = []; + + // Find all messages + ml.visitAll(this, nodes, null); + + const messageMap = messageBundle.getMessageMap(); + const placeholders = extractPlaceholders(messageBundle); + const placeholderToIds = extractPlaceholderToIds(messageBundle); + + this._messageNodes + .filter(message => { + // Remove any messages that is not present in the source message bundle. + return messageMap.hasOwnProperty(message[0]); + }) + .sort((a, b) => { + // Because there could be no ICU placeholders inside an ICU message, + // we do not need to take into account the `placeholderToMsgIds` of the referenced + // messages, those would always be empty + // TODO(vicb): overkill - create 2 buckets and [...woDeps, ...wDeps].process() + if (Object.keys(messageMap[a[0]].placeholderToMsgIds).length == 0) { + return -1; + } + + if (Object.keys(messageMap[b[0]].placeholderToMsgIds).length == 0) { + return 1; + } + + return 0; + }) + .forEach(message => { + const id = message[0]; + this._placeholders = placeholders[id] || {}; + this._placeholderToIds = placeholderToIds[id] || {}; + // TODO(vicb): make sure there is no `_TRANSLATIONS_TAG` nor `_TRANSLATION_TAG` + this._translatedMessages[id] = ml.visitAll(this, message[1]).join(''); + }); + + return {messages: this._translatedMessages, errors: this._errors}; + } + + visitElement(element: ml.Element, context: any): any { + switch (element.name) { + case _UNIT_TAG: + this._target = null; + const msgId = element.attrs.find((attr) => attr.name === 'id'); + if (!msgId) { + this._addError(element, `<${_UNIT_TAG}> misses the "id" attribute`); + } else { + this._msgId = msgId.value; + } + ml.visitAll(this, element.children, null); + if (this._msgId !== null) { + this._messageNodes.push([this._msgId, this._target]); + } + break; + + case _SOURCE_TAG: + // ignore source message + break; + + case _TARGET_TAG: + this._target = element.children; + break; + + case _PLACEHOLDER_TAG: + const idAttr = element.attrs.find((attr) => attr.name === 'id'); + if (!idAttr) { + this._addError(element, `<${_PLACEHOLDER_TAG}> misses the "id" attribute`); + } else { + const id = idAttr.value; + if (this._placeholders.hasOwnProperty(id)) { + return this._placeholders[id]; + } + if (this._placeholderToIds.hasOwnProperty(id) && + this._translatedMessages.hasOwnProperty(this._placeholderToIds[id])) { + return this._translatedMessages[this._placeholderToIds[id]]; + } + // TODO(vicb): better error message for when + // !this._translatedMessages.hasOwnProperty(this._placeholderToIds[id]) + this._addError(element, `The placeholder "${id}" does not exists in the source message`); + } + break; + + default: + ml.visitAll(this, element.children, null); + } + } + + visitAttribute(attribute: ml.Attribute, context: any): any { + throw new Error('unreachable code'); + } + + visitText(text: ml.Text, context: any): any { return text.value; } + + visitComment(comment: ml.Comment, context: any): any { return ''; } + + visitExpansion(expansion: ml.Expansion, context: any): any { + throw new Error('unreachable code'); + } + + visitExpansionCase(expansionCase: ml.ExpansionCase, context: any): any { + throw new Error('unreachable code'); + } + + private _addError(node: ml.Node, message: string): void { + this._errors.push(new I18nError(node.sourceSpan, message)); + } +} diff --git a/modules/@angular/compiler/src/i18n/serializers/xtb.ts b/modules/@angular/compiler/src/i18n/serializers/xtb.ts index 3c12f95c04..612c0bd1e8 100644 --- a/modules/@angular/compiler/src/i18n/serializers/xtb.ts +++ b/modules/@angular/compiler/src/i18n/serializers/xtb.ts @@ -35,7 +35,7 @@ export class Xtb implements Serializer { } // Replace the placeholders, messages are now string - const {messages, errors} = new _Serializer().parse(result.rootNodes, messageBundle); + const {messages, errors} = new _Visitor().parse(result.rootNodes, messageBundle); if (errors.length) { throw new Error(`xtb parse errors:\n${errors.join('\n')}`); @@ -60,7 +60,7 @@ export class Xtb implements Serializer { } } -class _Serializer implements ml.Visitor { +class _Visitor implements ml.Visitor { private _messageNodes: [string, ml.Node[]][]; private _translatedMessages: {[id: string]: string}; private _bundleDepth: number; diff --git a/modules/@angular/compiler/test/i18n/serializers/xliff_spec.ts b/modules/@angular/compiler/test/i18n/serializers/xliff_spec.ts new file mode 100644 index 0000000000..93457db3f0 --- /dev/null +++ b/modules/@angular/compiler/test/i18n/serializers/xliff_spec.ts @@ -0,0 +1,110 @@ +/** + * @license + * Copyright Google Inc. All Rights Reserved. + * + * Use of this source code is governed by an MIT-style license that can be + * found in the LICENSE file at https://angular.io/license + */ + +import {Xliff} from '@angular/compiler/src/i18n/serializers/xliff'; +import {beforeEach, ddescribe, describe, expect, iit, inject, it, xdescribe, xit} from '@angular/core/testing/testing_internal'; +import {MessageBundle} from '../../../src/i18n/message_bundle'; +import {HtmlParser} from '../../../src/ml_parser/html_parser'; +import {DEFAULT_INTERPOLATION_CONFIG} from '../../../src/ml_parser/interpolation_config'; +import {serializeNodes} from '../../ml_parser/ast_serializer_spec'; + +const HTML = ` +

not translatable

+

translatable element with placeholders {{ interpolation}}

+

foo

+`; + +const WRITE_XLIFF = ` + + + + + translatable attribute + + + + translatable element with placeholders + + + + foo + + d + m + + + +`; + +const LOAD_XLIFF = ` + + + + + translatable attribute + etubirtta elbatalsnart + + + translatable element with placeholders + footnemele elbatalsnart sredlohecalp htiw + + + foo + oof + d + m + + + +`; + +export function main(): void { + let serializer: Xliff; + let htmlParser: HtmlParser; + + function toXliff(html: string): string { + let catalog = new MessageBundle(new HtmlParser, [], {}); + catalog.updateFromTemplate(html, '', DEFAULT_INTERPOLATION_CONFIG); + return catalog.write(serializer); + } + + function loadAsText(template: string, xliff: string): {[id: string]: string} { + let messageBundle = new MessageBundle(htmlParser, [], {}); + messageBundle.updateFromTemplate(template, 'url', DEFAULT_INTERPOLATION_CONFIG); + + const asAst = serializer.load(xliff, 'url', messageBundle); + let asText: {[id: string]: string} = {}; + Object.keys(asAst).forEach(id => { asText[id] = serializeNodes(asAst[id]).join(''); }); + + return asText; + } + + describe('XLIFF serializer', () => { + + beforeEach(() => { + htmlParser = new HtmlParser(); + serializer = new Xliff(htmlParser, DEFAULT_INTERPOLATION_CONFIG); + }); + + + describe('write', () => { + it('should write a valid xliff file', () => { expect(toXliff(HTML)).toEqual(WRITE_XLIFF); }); + }); + + describe('load', () => { + it('should load XLIFF files', () => { + expect(loadAsText(HTML, LOAD_XLIFF)).toEqual({ + '983775b9a51ce14b036be72d4cfd65d68d64e231': 'etubirtta elbatalsnart', + 'ec1d033f2436133c14ab038286c4f5df4697484a': + '{{ interpolation}} footnemele elbatalsnart sredlohecalp htiw', + 'db3e0a6a5a96481f60aec61d98c3eecddef5ac23': 'oof', + }); + }); + }); + }); +} \ No newline at end of file