export function sanitizeHTML()

in src/utils.ts [65:124]


export function sanitizeHTML(html: string, whiteList: string[]): string {
    let cleanHTML = '';
    if (html && whiteList && whiteList.length) {
        // Stack Overflow is all like NEVER PARSE HTML WITH REGEX
        // http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags/1732454#1732454
        // plus the C# whitelist regex I found didn't work in JS
        // http://stackoverflow.com/questions/307013/how-do-i-filter-all-html-tags-except-a-certain-whitelist#315851
        // So going with the innerHTML approach...
        // http://stackoverflow.com/questions/6659351/removing-all-script-tags-from-html-with-js-regular-expression

        let doomedNodeList = [];

        if (!document.createTreeWalker) {
            return ''; // in case someone's hax0ring us?
        }

        let div = $('<div/>');

        // For the reasons above, we do need innerHTML, so suppress tslint
        // tslint:disable-next-line
        div.html(html);

        let filter: any = function (node) {
            removeScriptAttributes(node);
            if (whiteList.indexOf(node.nodeName.toUpperCase()) === -1) {
                return NodeFilter.FILTER_ACCEPT;
            }

            return NodeFilter.FILTER_SKIP;
        };

        filter.acceptNode = filter;

        // Create a tree walker (hierarchical iterator) that only exposes non-whitelisted nodes, which we'll delete.
        let treeWalker = document.createTreeWalker(
            div.get()[0],
            NodeFilter.SHOW_ELEMENT,
            filter,
            false
        );

        while (treeWalker.nextNode()) {
            doomedNodeList.push(treeWalker.currentNode);
        }

        let length = doomedNodeList.length;
        for (let i = 0; i < length; i++) {
            if (doomedNodeList[i].parentNode) {
                try {
                    doomedNodeList[i].parentNode.removeChild(doomedNodeList[i]);
                } catch (ex) { }
            }
        }

        // convert back to a string.
        cleanHTML = div.html().trim();
    }

    return cleanHTML;
}