crisbeto 0100a39e21 fix(ivy): i18n instructions thrown off by sanitizer in IE11 (#34305)
While sanitizing on browsers that don't support the `template` element (pretty much only IE), we create an inert document and we insert content into it via `document.body.innerHTML = unsafeHTML`. The problem is that IE appears to parse the HTML passed to `innerHTML` differently, depending on whether the element has been inserted into a document or not. In particular, it seems to split some strings into multiple text nodes, which would've otherwise been a single node. This ended up throwing off some of the i18n code down the line and causing a handful of failures. I've worked around it by creating a new inert `body` element into which the HTML would be inserted.

PR Close #34305
2019-12-13 14:19:56 -08:00

182 lines
7.0 KiB
TypeScript

/**
* @license
* Copyright Google Inc. All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
/**
* This helper class is used to get hold of an inert tree of DOM elements containing dirty HTML
* that needs sanitizing.
* Depending upon browser support we must use one of three strategies for doing this.
* Support: Safari 10.x -> XHR strategy
* Support: Firefox -> DomParser strategy
* Default: InertDocument strategy
*/
export class InertBodyHelper {
private inertDocument: Document;
constructor(private defaultDoc: Document) {
this.inertDocument = this.defaultDoc.implementation.createHTMLDocument('sanitization-inert');
let inertBodyElement = this.inertDocument.body;
if (inertBodyElement == null) {
// usually there should be only one body element in the document, but IE doesn't have any, so
// we need to create one.
const inertHtml = this.inertDocument.createElement('html');
this.inertDocument.appendChild(inertHtml);
inertBodyElement = this.inertDocument.createElement('body');
inertHtml.appendChild(inertBodyElement);
}
inertBodyElement.innerHTML = '<svg><g onload="this.parentNode.remove()"></g></svg>';
if (inertBodyElement.querySelector && !inertBodyElement.querySelector('svg')) {
// We just hit the Safari 10.1 bug - which allows JS to run inside the SVG G element
// so use the XHR strategy.
this.getInertBodyElement = this.getInertBodyElement_XHR;
return;
}
inertBodyElement.innerHTML = '<svg><p><style><img src="</style><img src=x onerror=alert(1)//">';
if (inertBodyElement.querySelector && inertBodyElement.querySelector('svg img')) {
// We just hit the Firefox bug - which prevents the inner img JS from being sanitized
// so use the DOMParser strategy, if it is available.
// If the DOMParser is not available then we are not in Firefox (Server/WebWorker?) so we
// fall through to the default strategy below.
if (isDOMParserAvailable()) {
this.getInertBodyElement = this.getInertBodyElement_DOMParser;
return;
}
}
// None of the bugs were hit so it is safe for us to use the default InertDocument strategy
this.getInertBodyElement = this.getInertBodyElement_InertDocument;
}
/**
* Get an inert DOM element containing DOM created from the dirty HTML string provided.
* The implementation of this is determined in the constructor, when the class is instantiated.
*/
getInertBodyElement: (html: string) => HTMLElement | null;
/**
* Use XHR to create and fill an inert body element (on Safari 10.1)
* See
* https://github.com/cure53/DOMPurify/blob/a992d3a75031cb8bb032e5ea8399ba972bdf9a65/src/purify.js#L439-L449
*/
private getInertBodyElement_XHR(html: string) {
// We add these extra elements to ensure that the rest of the content is parsed as expected
// e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
// `<head>` tag.
html = '<body><remove></remove>' + html + '</body>';
try {
html = encodeURI(html);
} catch {
return null;
}
const xhr = new XMLHttpRequest();
xhr.responseType = 'document';
xhr.open('GET', 'data:text/html;charset=utf-8,' + html, false);
xhr.send(undefined);
const body: HTMLBodyElement = xhr.response.body;
body.removeChild(body.firstChild !);
return body;
}
/**
* Use DOMParser to create and fill an inert body element (on Firefox)
* See https://github.com/cure53/DOMPurify/releases/tag/0.6.7
*
*/
private getInertBodyElement_DOMParser(html: string) {
// We add these extra elements to ensure that the rest of the content is parsed as expected
// e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
// `<head>` tag.
html = '<body><remove></remove>' + html + '</body>';
try {
const body = new (window as any)
.DOMParser()
.parseFromString(html, 'text/html')
.body as HTMLBodyElement;
body.removeChild(body.firstChild !);
return body;
} catch {
return null;
}
}
/**
* Use an HTML5 `template` element, if supported, or an inert body element created via
* `createHtmlDocument` to create and fill an inert DOM element.
* This is the default sane strategy to use if the browser does not require one of the specialised
* strategies above.
*/
private getInertBodyElement_InertDocument(html: string) {
// Prefer using <template> element if supported.
const templateEl = this.inertDocument.createElement('template');
if ('content' in templateEl) {
templateEl.innerHTML = html;
return templateEl;
}
// Note that previously we used to do something like `this.inertDocument.body.innerHTML = html`
// and we returned the inert `body` node. This was changed, because IE seems to treat setting
// `innerHTML` on an inserted element differently, compared to one that hasn't been inserted
// yet. In particular, IE appears to split some of the text into multiple text nodes rather
// than keeping them in a single one which ends up messing with Ivy's i18n parsing further
// down the line. This has been worked around by creating a new inert `body` and using it as
// the root node in which we insert the HTML.
const inertBody = this.inertDocument.createElement('body');
inertBody.innerHTML = html;
// Support: IE 9-11 only
// strip custom-namespaced attributes on IE<=11
if ((this.defaultDoc as any).documentMode) {
this.stripCustomNsAttrs(inertBody);
}
return inertBody;
}
/**
* When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
* attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g.
* 'ns1:xlink:foo').
*
* This is undesirable since we don't want to allow any of these custom attributes. This method
* strips them all.
*/
private stripCustomNsAttrs(el: Element) {
const elAttrs = el.attributes;
// loop backwards so that we can support removals.
for (let i = elAttrs.length - 1; 0 < i; i--) {
const attrib = elAttrs.item(i);
const attrName = attrib !.name;
if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
el.removeAttribute(attrName);
}
}
let childNode = el.firstChild as Node | null;
while (childNode) {
if (childNode.nodeType === Node.ELEMENT_NODE) this.stripCustomNsAttrs(childNode as Element);
childNode = childNode.nextSibling;
}
}
}
/**
* We need to determine whether the DOMParser exists in the global context.
* The try-catch is because, on some browsers, trying to access this property
* on window can actually throw an error.
*
* @suppress {uselessCode}
*/
function isDOMParserAvailable() {
try {
return !!(window as any).DOMParser;
} catch {
return false;
}
}