fix(core): traverse and sanitize content of unsafe elements (#28804)

In the past, the sanitizer would remove unsafe elements, but still
traverse and sanitize (and potentially preserve) their content. This was
problematic in the case of `<style></style>` tags, whose content would
be converted to HTML text nodes.

In order to fix this, the sanitizer's behavior was changed in #25879 to
ignore the content of _all_ unsafe elements. While this fixed the
problem with `<style></style>` tags, it unnecessarily removed the
contents for _any_ unsafe element. This was an unneeded breaking change.

This commit partially restores the old sanitizer behavior (namely
traversing content of unsafe elements), but introduces a list of
elements whose content should not be traversed if the elements
themselves are considered unsafe. Currently, this list contains `style`,
`script` and `template`.

Related to #25879 and #26007.

Fixes #28427

PR Close #28804
This commit is contained in:
George Kalpakas
2019-02-18 18:14:34 +02:00
committed by Ben Lesh
parent dc9f0af080
commit 262ba67525
2 changed files with 48 additions and 26 deletions

View File

@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(
export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);
// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
//
// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
// don't want to preserve the content, if the elements themselves are going to be removed.
const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style,template');
/**
* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
* attributes.
@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
// However this code never accesses properties off of `document` before deleting its contents
// again, so it shouldn't be vulnerable to DOM clobbering.
let current: Node = el.firstChild !;
let elementValid = true;
let traverseContent = true;
while (current) {
if (current.nodeType === Node.ELEMENT_NODE) {
elementValid = this.startElement(current as Element);
traverseContent = this.startElement(current as Element);
} else if (current.nodeType === Node.TEXT_NODE) {
this.chars(current.nodeValue !);
} else {
// Strip non-element, non-text nodes.
this.sanitizedSomething = true;
}
if (elementValid && current.firstChild) {
if (traverseContent && current.firstChild) {
current = current.firstChild !;
continue;
}
@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
}
/**
* Outputs only valid Elements.
* Sanitizes an opening element tag (if valid) and returns whether the element's contents should
* be traversed. Element content must always be traversed (even if the element itself is not
* valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
*
* Invalid elements are skipped.
*
* @param element element to sanitize
* Returns true if the element is valid.
* @param element The element to sanitize.
* @return True if the element's contents should be traversed.
*/
private startElement(element: Element): boolean {
const tagName = element.nodeName.toLowerCase();
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
this.sanitizedSomething = true;
return false;
return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
}
this.buf.push('<');
this.buf.push(tagName);