From 582550a90da567ab5ca5436b4e87e6c497a2f5fb Mon Sep 17 00:00:00 2001 From: Victor Berchet Date: Fri, 28 Oct 2016 18:00:31 -0700 Subject: [PATCH] feat(core): implements a decimal fingerprint for i18n --- modules/@angular/compiler/src/i18n/digest.ts | 228 ++++++++++++++---- .../compiler/test/i18n/digest_spec.ts | 116 ++++++--- 2 files changed, 257 insertions(+), 87 deletions(-) diff --git a/modules/@angular/compiler/src/i18n/digest.ts b/modules/@angular/compiler/src/i18n/digest.ts index 549357d584..d104fa74e1 100644 --- a/modules/@angular/compiler/src/i18n/digest.ts +++ b/modules/@angular/compiler/src/i18n/digest.ts @@ -63,7 +63,7 @@ export function serializeNodes(nodes: i18n.Node[]): string[] { */ export function sha1(str: string): string { const utf8 = utf8Encode(str); - const words32 = stringToWords32(utf8); + const words32 = stringToWords32(utf8, Endian.Big); const len = utf8.length * 8; const w = new Array(80); @@ -90,15 +90,90 @@ export function sha1(str: string): string { [a, b, c, d, e] = [add32(a, h0), add32(b, h1), add32(c, h2), add32(d, h3), add32(e, h4)]; } - const sha1 = words32ToString([a, b, c, d, e]); + return byteStringToHexString(words32ToByteString([a, b, c, d, e])); +} - let hex: string = ''; - for (let i = 0; i < sha1.length; i++) { - const b = sha1.charCodeAt(i); - hex += (b >>> 4 & 0x0f).toString(16) + (b & 0x0f).toString(16); +function fk(index: number, b: number, c: number, d: number): [number, number] { + if (index < 20) { + return [(b & c) | (~b & d), 0x5a827999]; } - return hex.toLowerCase(); + if (index < 40) { + return [b ^ c ^ d, 0x6ed9eba1]; + } + + if (index < 60) { + return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc]; + } + + return [b ^ c ^ d, 0xca62c1d6]; +} + +/** + * Compute the fingerprint of the given string + * + * The output is 64 bit number encoded as a decimal string + * + * based on: + * https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/GoogleJsMessageIdGenerator.java + */ +export function fingerprint(str: string): string { + const utf8 = utf8Encode(str); + + let [hi, lo] = [hash32(utf8, 0), hash32(utf8, 102072)]; + + if (hi == 0 && (lo == 0 || lo == 1)) { + hi = hi ^ 0x130f9bef; + lo = lo ^ -0x6b5f56d8; + } + + hi = hi & 0x7fffffff; + + return byteStringToDecString(words32ToByteString([hi, lo])); +} + +function hash32(str: string, c: number): number { + let [a, b] = [0x9e3779b9, 0x9e3779b9]; + let i: number; + + const len = str.length; + + for (i = 0; i + 12 <= len; i += 12) { + a = add32(a, wordAt(str, i, Endian.Little)); + b = add32(b, wordAt(str, i + 4, Endian.Little)); + c = add32(c, wordAt(str, i + 8, Endian.Little)); + [a, b, c] = mix([a, b, c]); + } + + a = add32(a, wordAt(str, i, Endian.Little)); + b = add32(b, wordAt(str, i + 4, Endian.Little)); + // the first byte of c is reserved for the length + c = add32(c, len); + c = add32(c, wordAt(str, i + 8, Endian.Little) << 8); + + return mix([a, b, c])[2]; +} + +// clang-format off +function mix([a, b, c]: [number, number, number]): [number, number, number] { + a = sub32(a, b); a = sub32(a, c); a ^= c >>> 13; + b = sub32(b, c); b = sub32(b, a); b ^= a << 8; + c = sub32(c, a); c = sub32(c, b); c ^= b >>> 13; + a = sub32(a, b); a = sub32(a, c); a ^= c >>> 12; + b = sub32(b, c); b = sub32(b, a); b ^= a << 16; + c = sub32(c, a); c = sub32(c, b); c ^= b >>> 5; + a = sub32(a, b); a = sub32(a, c); a ^= c >>> 3; + b = sub32(b, c); b = sub32(b, a); b ^= a << 10; + c = sub32(c, a); c = sub32(c, b); c ^= b >>> 15; + return [a, b, c]; +} +// clang-format on + +// Utils + +enum Endian { + Little, + Big, } function utf8Encode(str: string): string { @@ -131,10 +206,9 @@ function decodeSurrogatePairs(str: string, index: number): number { } const high = str.charCodeAt(index); - let low: number; if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) { - low = str.charCodeAt(index + 1); + const low = byteAt(str, index + 1); if (low >= 0xdc00 && low <= 0xdfff) { return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; } @@ -143,50 +217,108 @@ function decodeSurrogatePairs(str: string, index: number): number { return high; } -function stringToWords32(str: string): number[] { - const words32 = Array(str.length >>> 2); - - for (let i = 0; i < words32.length; i++) { - words32[i] = 0; - } - - for (let i = 0; i < str.length; i++) { - words32[i >>> 2] |= (str.charCodeAt(i) & 0xff) << 8 * (3 - i & 0x3); - } - - return words32; -} - -function words32ToString(words32: number[]): string { - let str = ''; - for (let i = 0; i < words32.length * 4; i++) { - str += String.fromCharCode((words32[i >>> 2] >>> 8 * (3 - i & 0x3)) & 0xff); - } - return str; -} - -function fk(index: number, b: number, c: number, d: number): [number, number] { - if (index < 20) { - return [(b & c) | (~b & d), 0x5a827999]; - } - - if (index < 40) { - return [b ^ c ^ d, 0x6ed9eba1]; - } - - if (index < 60) { - return [(b & c) | (b & d) | (c & d), 0x8f1bbcdc]; - } - - return [b ^ c ^ d, 0xca62c1d6]; -} - function add32(a: number, b: number): number { const low = (a & 0xffff) + (b & 0xffff); const high = (a >> 16) + (b >> 16) + (low >> 16); return (high << 16) | (low & 0xffff); } +function sub32(a: number, b: number): number { + const low = (a & 0xffff) - (b & 0xffff); + const high = (a >> 16) - (b >> 16) + (low >> 16); + return (high << 16) | (low & 0xffff); +} + +// Rotate a 32b number left `count` position function rol32(a: number, count: number): number { return (a << count) | (a >>> (32 - count)); -} \ No newline at end of file +} + +function stringToWords32(str: string, endian: Endian): number[] { + const words32 = Array((str.length + 3) >>> 2); + + for (let i = 0; i < words32.length; i++) { + words32[i] = wordAt(str, i * 4, endian); + } + + return words32; +} + +function byteAt(str: string, index: number): number { + return index >= str.length ? 0 : str.charCodeAt(index) & 0xff; +} + +function wordAt(str: string, index: number, endian: Endian): number { + let word = 0; + if (endian === Endian.Big) { + for (let i = 0; i < 4; i++) { + word += byteAt(str, index + i) << (24 - 8 * i); + } + } else { + for (let i = 0; i < 4; i++) { + word += byteAt(str, index + i) << 8 * i; + } + } + return word; +} + +function words32ToByteString(words32: number[]): string { + return words32.reduce((str, word) => str + word32ToByteString(word), ''); +} + +function word32ToByteString(word: number): string { + let str = ''; + for (let i = 0; i < 4; i++) { + str += String.fromCharCode((word >>> 8 * (3 - i)) & 0xff); + } + return str; +} + +function byteStringToHexString(str: string): string { + let hex: string = ''; + for (let i = 0; i < str.length; i++) { + const b = byteAt(str, i); + hex += (b >>> 4).toString(16) + (b & 0x0f).toString(16); + } + return hex.toLowerCase(); +} + +// based on http://www.danvk.org/hex2dec.html (JS can not handle more than 56b) +function byteStringToDecString(str: string): string { + let decimal = ''; + let toThePower = '1'; + + for (let i = str.length - 1; i >= 0; i--) { + decimal = addBigInt(decimal, numberTimesBigInt(byteAt(str, i), toThePower)); + toThePower = numberTimesBigInt(256, toThePower); + } + + return decimal.split('').reverse().join(''); +} + +function addBigInt(x: string, y: string): string { + let sum = ''; + const len = Math.max(x.length, y.length); + for (let i = 0, carry = 0; i < len || carry; i++) { + const tmpSum = carry + +(x[i] || 0) + +(y[i] || 0); + if (tmpSum >= 10) { + carry = 1; + sum += tmpSum - 10; + } else { + carry = 0; + sum += tmpSum; + } + } + + return sum; +} + +function numberTimesBigInt(num: number, b: string): string { + let product = ''; + let bToThePower = b; + for (; num !== 0; num = num >>> 1) { + if (num & 1) product = addBigInt(product, bToThePower); + bToThePower = addBigInt(bToThePower, bToThePower); + } + return product; +} diff --git a/modules/@angular/compiler/test/i18n/digest_spec.ts b/modules/@angular/compiler/test/i18n/digest_spec.ts index b6c7c80551..bd5e579632 100644 --- a/modules/@angular/compiler/test/i18n/digest_spec.ts +++ b/modules/@angular/compiler/test/i18n/digest_spec.ts @@ -8,51 +8,89 @@ import {describe, expect, it} from '@angular/core/testing/testing_internal'; -import {sha1} from '../../src/i18n/digest'; +import {fingerprint, sha1} from '../../src/i18n/digest'; export function main(): void { - describe('sha1', () => { - it('should work on emnpty strings', - () => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); }); + describe('digest', () => { + describe('sha1', () => { + it('should work on empty strings', + () => { expect(sha1('')).toEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709'); }); - it('should returns the sha1 of "hello world"', - () => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); }); + it('should returns the sha1 of "hello world"', + () => { expect(sha1('abc')).toEqual('a9993e364706816aba3e25717850c26c9cd0d89d'); }); - it('should returns the sha1 of unicode strings', - () => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); }); + it('should returns the sha1 of unicode strings', + () => { expect(sha1('你好,世界')).toEqual('3becb03b015ed48050611c8d7afe4b88f70d5a20'); }); - it('should support arbitrary string size', () => { - // node.js reference code: - // - // var crypto = require('crypto'); - // - // function sha1(string) { - // var shasum = crypto.createHash('sha1'); - // shasum.update(string, 'utf8'); - // return shasum.digest('hex', 'utf8'); - // } - // - // var prefix = `你好,世界`; - // var result = sha1(prefix); - // for (var size = prefix.length; size < 5000; size += 101) { - // result = prefix + sha1(result); - // while (result.length < size) { - // result += result; - // } - // result = result.slice(-size); - // } - // - // console.log(sha1(result)); - const prefix = `你好,世界`; - let result = sha1(prefix); - for (let size = prefix.length; size < 5000; size += 101) { - result = prefix + sha1(result); - while (result.length < size) { - result += result; + it('should support arbitrary string size', () => { + // node.js reference code: + // + // var crypto = require('crypto'); + // + // function sha1(string) { + // var shasum = crypto.createHash('sha1'); + // shasum.update(string, 'utf8'); + // return shasum.digest('hex', 'utf8'); + // } + // + // var prefix = `你好,世界`; + // var result = sha1(prefix); + // for (var size = prefix.length; size < 5000; size += 101) { + // result = prefix + sha1(result); + // while (result.length < size) { + // result += result; + // } + // result = result.slice(-size); + // } + // + // console.log(sha1(result)); + const prefix = `你好,世界`; + let result = sha1(prefix); + for (let size = prefix.length; size < 5000; size += 101) { + result = prefix + sha1(result); + while (result.length < size) { + result += result; + } + result = result.slice(-size); } - result = result.slice(-size); - } - expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45'); + expect(sha1(result)).toEqual('24c2dae5c1ac6f604dbe670a60290d7ce6320b45'); + }); + }); + + describe('decimal fingerprint', () => { + const fixtures: {[msg: string]: string} = { + ' Spaced Out ': '3976450302996657536', + 'Last Name': '4407559560004943843', + 'First Name': '6028371114637047813', + 'View': '2509141182388535183', + 'START_BOLDNUMEND_BOLD of START_BOLDmillionsEND_BOLD': '29997634073898638', + 'The customer\'s credit card was authorized for AMOUNT and passed all risk checks.': + '6836487644149622036', + 'Hello world!': '3022994926184248873', + 'Jalape\u00f1o': '8054366208386598941', + 'The set of SET_NAME is {XXX, ...}.': '135956960462609535', + 'NAME took a trip to DESTINATION.': '768490705511913603', + 'by AUTHOR (YEAR)': '7036633296476174078', + '': '4416290763660062288', + }; + + it('should work on well known inputs', () => { + Object.keys(fixtures).forEach(msg => { expect(fingerprint(msg)).toEqual(fixtures[msg]); }); + }); + + it('should support arbitrary string size', () => { + const prefix = `你好,世界`; + let result = fingerprint(prefix); + for (let size = prefix.length; size < 5000; size += 101) { + result = prefix + fingerprint(result); + while (result.length < size) { + result += result; + } + result = result.slice(-size); + } + expect(fingerprint(result)).toEqual('2122606631351252558'); + }); + }); }); }