asdoc/library/closure/goog/html/sanitizer/csssanitizer.js (278 lines of code) (raw):

// Copyright 2016 The Closure Library Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS-IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview * JavaScript support for client-side CSS sanitization. * * @author danesh@google.com (Danesh Irani) * @author mikesamuel@gmail.com (Mike Samuel) */ goog.provide('goog.html.sanitizer.CssSanitizer'); goog.require('goog.array'); goog.require('goog.dom'); goog.require('goog.dom.TagName'); goog.require('goog.html.CssSpecificity'); goog.require('goog.html.SafeStyle'); goog.require('goog.html.SafeStyleSheet'); goog.require('goog.html.SafeUrl'); goog.require('goog.html.sanitizer.noclobber'); goog.require('goog.html.uncheckedconversions'); goog.require('goog.object'); goog.require('goog.string'); goog.require('goog.userAgent'); goog.require('goog.userAgent.product'); /** * The set of characters that need to be normalized inside url("..."). * We normalize newlines because they are not allowed inside quoted strings, * normalize quote characters, angle-brackets, and asterisks because they * could be used to break out of the URL or introduce targets for CSS * error recovery. We normalize parentheses since they delimit unquoted * URLs and calls and could be a target for error recovery. * @const @private {!RegExp} */ goog.html.sanitizer.CssSanitizer.NORM_URL_REGEXP_ = /[\n\f\r\"\'()*<>]/g; /** * The replacements for NORM_URL_REGEXP. * @private @const {!Object<string, string>} */ goog.html.sanitizer.CssSanitizer.NORM_URL_REPLACEMENTS_ = { '\n': '%0a', '\f': '%0c', '\r': '%0d', '"': '%22', '\'': '%27', '(': '%28', ')': '%29', '*': '%2a', '<': '%3c', '>': '%3e' }; /** * A regular expression to match each selector in a CSS rule. Selectors are * separated by commas, but can have strings within them (e.g. foo[name="bar"]) * that can contain commas and escaped quotes. * @private {?RegExp} */ goog.html.sanitizer.CssSanitizer.SELECTOR_REGEX_ = // Don't even evaluate it on older browsers (IE8 and IE9), it throws a // syntax error and we don't use it anyway. !(goog.userAgent.IE && document.documentMode < 10) ? new RegExp( '\\s*' + // Discard initial space '([^\\s\'",]+' + // Beginning of the match. Anything but a comma, // spaces or a string delimiter. This is the only // non-optional component of the regex. '[^\'",]*' + // Spaces are fine afterwards (e.g. "a > b"). ('(' + // A series of optional strings with matching delimiters // that can contain anything, and optional non-quoted text // without commas. '(\'([^\'\\r\\n\\f\\\\]|\\\\[^])*\')|' + // Optional single-quoted // string. '("([^"\\r\\n\\f\\\\]|\\\\[^])*")|' + // Optional double-quoted // string. '[^\'",]' + // Optional non-string content. ')*') + // String and non-string // content can come in any // order. ')', // End of the match. 'g') : null; /** * Normalizes a character for use in a url() directive. * @param {string} ch Character to be normalized. * @return {?string} Normalized character. * @private */ goog.html.sanitizer.CssSanitizer.normalizeUrlChar_ = function(ch) { return goog.html.sanitizer.CssSanitizer.NORM_URL_REPLACEMENTS_[ch] || null; }; /** * Constructs a safe URI from a given URI and prop using a given uriRewriter * function. * @param {string} uri URI to be sanitized. * @param {string} propName Property name which contained the URI. * @param {?function(string, string):?goog.html.SafeUrl} uriRewriter A URI * rewriter that returns a goog.html.SafeUrl. * @return {?string} Safe URI for use in CSS. * @private */ goog.html.sanitizer.CssSanitizer.getSafeUri_ = function( uri, propName, uriRewriter) { if (!uriRewriter) { return null; } var safeUri = uriRewriter(uri, propName); if (safeUri && goog.html.SafeUrl.unwrap(safeUri) != goog.html.SafeUrl.INNOCUOUS_STRING) { return 'url("' + goog.html.SafeUrl.unwrap(safeUri).replace( goog.html.sanitizer.CssSanitizer.NORM_URL_REGEXP_, goog.html.sanitizer.CssSanitizer.normalizeUrlChar_) + '")'; } return null; }; /** * Used to detect the beginning of the argument list of a CSS property value * containing a CSS function call. * @private @const {string} */ goog.html.sanitizer.CssSanitizer.FUNCTION_ARGUMENTS_BEGIN_ = '('; /** * Used to detect the end of the argument list of a CSS property value * containing a CSS function call. * @private @const {string} */ goog.html.sanitizer.CssSanitizer.FUNCTION_ARGUMENTS_END_ = ')'; /** * Allowed CSS functions * @private @const {!Object<string,boolean>} */ goog.html.sanitizer.CssSanitizer.ALLOWED_FUNCTIONS_ = goog.object.createSet( 'rgb', 'rgba', 'alpha', 'rect', 'image', 'linear-gradient', 'radial-gradient', 'repeating-linear-gradient', 'repeating-radial-gradient', 'cubic-bezier', 'matrix', 'perspective', 'rotate', 'rotate3d', 'rotatex', 'rotatey', 'steps', 'rotatez', 'scale', 'scale3d', 'scalex', 'scaley', 'scalez', 'skew', 'skewx', 'skewy', 'translate', 'translate3d', 'translatex', 'translatey', 'translatez'); /** * Removes a vendor prefix from a property name. * @param {string} propName A property name. * @return {string} A property name without vendor prefixes. * @private */ goog.html.sanitizer.CssSanitizer.withoutVendorPrefix_ = function(propName) { // http://stackoverflow.com/a/5411098/20394 has a fairly extensive list // of vendor prefices. Blink has not declared a vendor prefix distinct from // -webkit- and http://css-tricks.com/tldr-on-vendor-prefix-drama/ discusses // how Mozilla recognizes some -webkit- prefixes. // http://wiki.csswg.org/spec/vendor-prefixes talks more about // cross-implementation, and lists other prefixes. return propName.replace( /^-(?:apple|css|epub|khtml|moz|mso?|o|rim|wap|webkit|xv)-(?=[a-z])/i, ''); }; /** * Sanitizes the value for a given a browser-parsed CSS value. * @param {string} propName A property name. * @param {string} propValue Value of the property as parsed by the browser. * @param {function(string, string):?goog.html.SafeUrl=} opt_uriRewriter A URI * rewriter that returns an unwrapped goog.html.SafeUrl. * @return {?string} Sanitized property value or null. * @private */ goog.html.sanitizer.CssSanitizer.sanitizeProperty_ = function( propName, propValue, opt_uriRewriter) { var outputPropValue = goog.string.trim(propValue); if (outputPropValue == '') { return null; } if (goog.string.caseInsensitiveStartsWith(outputPropValue, 'url(')) { // Urls are rewritten according to the policy implemented in // opt_uriRewriter. // TODO(pelizzi): use HtmlSanitizerUrlPolicy for opt_uriRewriter. if (!opt_uriRewriter) { return null; } // TODO(danesh): Check if we need to resolve this URI. var uri = goog.string.stripQuotes( outputPropValue.substring(4, outputPropValue.length - 1), '"\''); return goog.html.sanitizer.CssSanitizer.getSafeUri_( uri, propName, opt_uriRewriter); } else if (outputPropValue.indexOf('(') > 0) { // Functions are filtered through a whitelist. Nesting whitelisted functions // is not supported. if (goog.string.countOf( outputPropValue, goog.html.sanitizer.CssSanitizer.FUNCTION_ARGUMENTS_BEGIN_) > 1 || !(outputPropValue .substring( 0, outputPropValue.indexOf(goog.html.sanitizer.CssSanitizer .FUNCTION_ARGUMENTS_BEGIN_)) .toLowerCase() in goog.html.sanitizer.CssSanitizer.ALLOWED_FUNCTIONS_ && goog.string.endsWith( outputPropValue, goog.html.sanitizer.CssSanitizer.FUNCTION_ARGUMENTS_END_))) { // TODO(b/34222379): Handle functions that may need recursing or that may // appear in the middle of a string. For now, just allow functions which // aren't nested. return null; } return outputPropValue; } else { // Everything else is allowed. return outputPropValue; } }; /** * Sanitizes a {@link CSSStyleSheet}. * @param {!CSSStyleSheet} cssStyleSheet * @param {?string} containerId An ID to restrict the scope of the rules being * sanitized. If null, no restriction is applied. * @param {function(string, string):?goog.html.SafeUrl|undefined} uriRewriter A * URI rewriter that returns a goog.html.SafeUrl. * @return {!goog.html.SafeStyleSheet} * @private */ goog.html.sanitizer.CssSanitizer.sanitizeStyleSheet_ = function( cssStyleSheet, containerId, uriRewriter) { var sanitizedRules = []; var cssRules = goog.html.sanitizer.CssSanitizer.getOnlyStyleRules_( goog.array.toArray(cssStyleSheet.cssRules)); goog.array.forEach(cssRules, function(cssRule) { if (containerId && !/[a-zA-Z][\w-:\.]*/.test(containerId)) { // Sanity check on the element ID that will confine the new CSS rules. throw new Error('Invalid container id'); } if (containerId && goog.userAgent.product.IE && document.documentMode == 10 && /\\['"]/.test(cssRule.selectorText)) { // If a container ID was specified, drop selectors with escaped quotes in // strings on IE 10 due to a regex bug. return; } // If a container ID was specified, restrict all selectors in this rule to // be descendants of the node with such an ID. Use a regex to exclude commas // within selector strings. var scopedSelector = containerId ? cssRule.selectorText.replace( goog.html.sanitizer.CssSanitizer.SELECTOR_REGEX_, '#' + containerId + ' $1') : cssRule.selectorText; sanitizedRules.push(goog.html.SafeStyleSheet.createRule( scopedSelector, goog.html.sanitizer.CssSanitizer.sanitizeInlineStyle( cssRule.style, uriRewriter))); }); return goog.html.SafeStyleSheet.concat(sanitizedRules); }; /** * Used to filter out at-rules like @media, @font, etc. Currently, none of these * are supported. * @param {!Array<!CSSRule>} cssRules * @return {!Array<!CSSStyleRule>} * @private */ // TODO(pelizzi): some of these at-rules are safe, consider adding partial // support for them. goog.html.sanitizer.CssSanitizer.getOnlyStyleRules_ = function(cssRules) { return /** @type {!Array<!CSSStyleRule>} */ ( goog.array.filter(cssRules, function(cssRule) { return cssRule instanceof CSSStyleRule || cssRule.type == CSSRule.STYLE_RULE; })); }; /** * Sanitizes the contents of a STYLE tag. * @param {string} textContent The textual content of the STYLE tag. * @param {?string=} opt_containerId The ID of a node that will contain the * STYLE tag that includes the sanitized content, to restrict the effects of * the rules being sanitized to descendants of this node. * @param {function(string, string):?goog.html.SafeUrl=} opt_uriRewriter A URI * rewriter that returns a goog.html.SafeUrl. * @return {!goog.html.SafeStyleSheet} * @supported IE 10+, Chrome 26+, Firefox 22+, Safari 7.1+, Opera 15+. On IE10, * support for escaped quotes inside quoted strings (e.g. `a[name="it\'s"]`) * is unreliable, and some (but not all!) rules containing these are * silently dropped. */ goog.html.sanitizer.CssSanitizer.sanitizeStyleSheetString = function( textContent, opt_containerId, opt_uriRewriter) { var styleTag = goog.html.sanitizer.CssSanitizer.safeParseHtmlAndGetInertElement( '<style>' + textContent + '</style>'); if (styleTag == null) { return goog.html.SafeStyleSheet.EMPTY; } var containerId = opt_containerId != undefined ? opt_containerId : null; return goog.html.sanitizer.CssSanitizer.sanitizeStyleSheet_( styleTag.sheet, containerId, opt_uriRewriter); }; /** * Returns an inert DOM tree produced by parsing the provided html using * DOMParser. "Inert" here means that merely parsing the string won't execute * scripts or load images. If you attach this tree to a non-inert document, it * will execute these side effects! In this package we prefer using the TEMPLATE * tag over DOMParser to produce inert trees, but at least on Chrome the inert * STYLE tag does not have a CSSStyleSheet object attached to it. * @param {string} html * @return {?Element} */ goog.html.sanitizer.CssSanitizer.safeParseHtmlAndGetInertElement = function( html) { if ((goog.userAgent.IE && !goog.userAgent.isVersionOrHigher(10)) || typeof goog.global.DOMParser != 'function') { return null; } var parser = new DOMParser(); return parser .parseFromString( '<html><head></head><body>' + html + '</body></html>', 'text/html') .body.children[0]; }; /** * Sanitizes an inline style attribute. Short-hand attributes are expanded to * their individual elements. Note: The sanitizer does not output vendor * prefixed styles. * @param {?CSSStyleDeclaration} cssStyle A CSS style object. * @param {function(string, string):?goog.html.SafeUrl=} opt_uriRewriter A URI * rewriter that returns a goog.html.SafeUrl. * @return {!goog.html.SafeStyle} A sanitized inline cssText. */ goog.html.sanitizer.CssSanitizer.sanitizeInlineStyle = function( cssStyle, opt_uriRewriter) { if (!cssStyle) { return goog.html.SafeStyle.EMPTY; } var cleanCssStyle = document.createElement('div').style; var cssPropNames = goog.html.sanitizer.CssSanitizer.getCssPropNames_(cssStyle); for (var i = 0; i < cssPropNames.length; i++) { var propName = goog.html.sanitizer.CssSanitizer.withoutVendorPrefix_(cssPropNames[i]); if (!goog.html.sanitizer.CssSanitizer.isDisallowedPropertyName_(propName)) { var propValue = goog.html.sanitizer.noclobber.getCssPropertyValue(cssStyle, propName); var sanitizedValue = goog.html.sanitizer.CssSanitizer.sanitizeProperty_( propName, propValue, opt_uriRewriter); if (sanitizedValue != null) { goog.html.sanitizer.noclobber.setCssProperty( cleanCssStyle, propName, sanitizedValue); } } } return goog.html.uncheckedconversions .safeStyleFromStringKnownToSatisfyTypeContract( goog.string.Const.from('Output of CSS sanitizer'), cleanCssStyle.cssText || ''); }; /** * Sanitizes inline CSS text and returns it as a SafeStyle object. When adequate * browser support is not available, such as for IE9 and below, a * SafeStyle-wrapped empty string is returned. * @param {string} cssText CSS text to be sanitized. * @param {function(string, string):?goog.html.SafeUrl=} opt_uriRewriter A URI * rewriter that returns a goog.html.SafeUrl. * @return {!goog.html.SafeStyle} A sanitized inline cssText. */ goog.html.sanitizer.CssSanitizer.sanitizeInlineStyleString = function( cssText, opt_uriRewriter) { // same check as in goog.html.sanitizer.HTML_SANITIZER_SUPPORTED_ if (goog.userAgent.IE && document.documentMode < 10) { return new goog.html.SafeStyle(); } var div = goog.html.sanitizer.CssSanitizer .createInertDocument_() .createElement('DIV'); div.style.cssText = cssText; return goog.html.sanitizer.CssSanitizer.sanitizeInlineStyle( div.style, opt_uriRewriter); }; /** * Converts rules in STYLE tags into style attributes on the tags they apply to. * Modifies the provided DOM subtree in-place. * @param {!Element} element * @package */ goog.html.sanitizer.CssSanitizer.inlineStyleRules = function(element) { // Note that Webkit used to offer the perfect function for the job: // getMatchedCSSRules. Unfortunately, it was never supported cross-browser and // is deprecated now. On the other hand, getComputedStyle cannot be used to // differentiate property values that are set by a style sheet from those set // by a style attribute or default values. This algorithm with // O(nr_of_elements * nr_of_rules) complexity that has to manually sort // selectors by specificity is the best we can do. // Extract all rules from STYLE tags found in the subtree. /** @type {!Array<!HTMLStyleElement>} */ var styleTags = goog.html.sanitizer.noclobber.getElementsByTagName(element, 'STYLE'); var cssRules = goog.array.concatMap(styleTags, function(styleTag) { return goog.array.toArray( goog.html.sanitizer.noclobber.getElementStyleSheet(styleTag).cssRules); }); cssRules = goog.html.sanitizer.CssSanitizer.getOnlyStyleRules_(cssRules); // Sort the rules by descending specificity. cssRules.sort(function(a, b) { var aSpecificity = goog.html.CssSpecificity.getSpecificity(a.selectorText); var bSpecificity = goog.html.CssSpecificity.getSpecificity(b.selectorText); return -goog.array.compare3(aSpecificity, bSpecificity); }); // For each element, apply the matching rules to the element style attribute. // If a property is already explicitly defined, do not update it. This // guarantees that the rule with selectors with the highest priority (or the // properties defined in the style attribute itself) have precedence over // lower priority ones. var subTreeWalker = document.createTreeWalker( element, NodeFilter.SHOW_ELEMENT, null /* filter */, false /* entityReferenceExpansion */); var currentElement; while (currentElement = /** @type {!Element} */ (subTreeWalker.nextNode())) { goog.array.forEach(cssRules, function(rule) { if (!goog.html.sanitizer.noclobber.elementMatches( currentElement, rule.selectorText)) { return; } if (!rule.style) { return; } goog.html.sanitizer.CssSanitizer.mergeStyleDeclarations_( currentElement, rule.style); }); } // Delete the STYLE tags. goog.array.forEach(styleTags, goog.dom.removeNode); }; /** * Merges style properties from `styleDeclaration` into * `element.style`. * @param {!Element} element * @param {!CSSStyleDeclaration} styleDeclaration * @private */ goog.html.sanitizer.CssSanitizer.mergeStyleDeclarations_ = function( element, styleDeclaration) { var existingPropNames = goog.html.sanitizer.CssSanitizer.getCssPropNames_(element.style); var newPropNames = goog.html.sanitizer.CssSanitizer.getCssPropNames_(styleDeclaration); goog.array.forEach(newPropNames, function(propName) { if (existingPropNames.indexOf(propName) >= 0) { // This was either a property set by the style attribute or a stylesheet // rule with a higher priority. Leave the existing value. return; } var propValue = goog.html.sanitizer.noclobber.getCssPropertyValue( styleDeclaration, propName); goog.html.sanitizer.noclobber.setCssProperty( element.style, propName, propValue); }); }; /** * Creates an DOM Document object that will not execute scripts or make * network requests while parsing HTML. * @return {!Document} * @private */ goog.html.sanitizer.CssSanitizer.createInertDocument_ = function() { // Documents created using window.document.implementation.createHTMLDocument() // use the same custom component registry as their parent document. This means // that parsing arbitrary HTML can result in calls to user-defined JavaScript. // This is worked around by creating a template element and its content's // document. See https://github.com/cure53/DOMPurify/issues/47. var doc = document; if (typeof HTMLTemplateElement === 'function') { doc = goog.dom.createElement(goog.dom.TagName.TEMPLATE).content.ownerDocument; } return doc.implementation.createHTMLDocument(''); }; /** * Provides a cross-browser way to get a CSS property names. * @param {!CSSStyleDeclaration} cssStyle A CSS style object. * @return {!Array<string>} CSS property names. * @private */ goog.html.sanitizer.CssSanitizer.getCssPropNames_ = function(cssStyle) { var propNames = []; if (goog.isArrayLike(cssStyle)) { // Gets property names via item(). // https://drafts.csswg.org/cssom/#dom-cssstyledeclaration-item propNames = goog.array.toArray(cssStyle); } else { // In IE8 and other older browsers we have to iterate over all the property // names. We skip cssText because it contains the unsanitized CSS, which // defeats the purpose. propNames = goog.object.getKeys(cssStyle); goog.array.remove(propNames, 'cssText'); } return propNames; }; /** * Checks whether the property name specified should be disallowed. * @param {string} propName A property name. * @return {boolean} Whether the property name is disallowed. * @private */ goog.html.sanitizer.CssSanitizer.isDisallowedPropertyName_ = function( propName) { // getPropertyValue doesn't deal with custom variables properly and will NOT // decode CSS escapes (but the browser will do so silently). Simply disallow // custom variables (http://www.w3.org/TR/css-variables/#defining-variables). return goog.string.startsWith(propName, '--') || goog.string.startsWith(propName, 'var'); };