in src/utils.ts [65:124]
export function sanitizeHTML(html: string, whiteList: string[]): string {
let cleanHTML = '';
if (html && whiteList && whiteList.length) {
// Stack Overflow is all like NEVER PARSE HTML WITH REGEX
// http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags/1732454#1732454
// plus the C# whitelist regex I found didn't work in JS
// http://stackoverflow.com/questions/307013/how-do-i-filter-all-html-tags-except-a-certain-whitelist#315851
// So going with the innerHTML approach...
// http://stackoverflow.com/questions/6659351/removing-all-script-tags-from-html-with-js-regular-expression
let doomedNodeList = [];
if (!document.createTreeWalker) {
return ''; // in case someone's hax0ring us?
}
let div = $('<div/>');
// For the reasons above, we do need innerHTML, so suppress tslint
// tslint:disable-next-line
div.html(html);
let filter: any = function (node) {
removeScriptAttributes(node);
if (whiteList.indexOf(node.nodeName.toUpperCase()) === -1) {
return NodeFilter.FILTER_ACCEPT;
}
return NodeFilter.FILTER_SKIP;
};
filter.acceptNode = filter;
// Create a tree walker (hierarchical iterator) that only exposes non-whitelisted nodes, which we'll delete.
let treeWalker = document.createTreeWalker(
div.get()[0],
NodeFilter.SHOW_ELEMENT,
filter,
false
);
while (treeWalker.nextNode()) {
doomedNodeList.push(treeWalker.currentNode);
}
let length = doomedNodeList.length;
for (let i = 0; i < length; i++) {
if (doomedNodeList[i].parentNode) {
try {
doomedNodeList[i].parentNode.removeChild(doomedNodeList[i]);
} catch (ex) { }
}
}
// convert back to a string.
cleanHTML = div.html().trim();
}
return cleanHTML;
}