mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2025-01-04 21:20:09 +01:00
874 lines
30 KiB
JavaScript
874 lines
30 KiB
JavaScript
|
/*
|
|||
|
sanitize-html (Apostrophe Technologies)
|
|||
|
SOURCE: https://github.com/apostrophecms/sanitize-html
|
|||
|
LICENSE: https://github.com/apostrophecms/sanitize-html/blob/main/LICENSE
|
|||
|
|
|||
|
Modified for audiobookshelf
|
|||
|
*/
|
|||
|
|
|||
|
const htmlparser = require('htmlparser2');
|
|||
|
// const escapeStringRegexp = require('escape-string-regexp');
|
|||
|
// const { isPlainObject } = require('is-plain-object');
|
|||
|
// const deepmerge = require('deepmerge');
|
|||
|
// const parseSrcset = require('parse-srcset');
|
|||
|
// const { parse: postcssParse } = require('postcss');
|
|||
|
// Tags that can conceivably represent stand-alone media.
|
|||
|
|
|||
|
// ABS UPDATE: Packages not necessary
|
|||
|
// SOURCE: https://github.com/sindresorhus/escape-string-regexp/blob/main/index.js
|
|||
|
function escapeStringRegexp(string) {
|
|||
|
if (typeof string !== 'string') {
|
|||
|
throw new TypeError('Expected a string');
|
|||
|
}
|
|||
|
|
|||
|
// Escape characters with special meaning either inside or outside character sets.
|
|||
|
// Use a simple backslash escape when it’s always valid, and a `\xnn` escape when the simpler form would be disallowed by Unicode patterns’ stricter grammar.
|
|||
|
return string
|
|||
|
.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&')
|
|||
|
.replace(/-/g, '\\x2d');
|
|||
|
}
|
|||
|
|
|||
|
// SOURCE: https://github.com/jonschlinkert/is-plain-object/blob/master/is-plain-object.js
|
|||
|
function isObject(o) {
|
|||
|
return Object.prototype.toString.call(o) === '[object Object]';
|
|||
|
}
|
|||
|
|
|||
|
function isPlainObject(o) {
|
|||
|
var ctor, prot;
|
|||
|
|
|||
|
if (isObject(o) === false) return false;
|
|||
|
|
|||
|
// If has modified constructor
|
|||
|
ctor = o.constructor;
|
|||
|
if (ctor === undefined) return true;
|
|||
|
|
|||
|
// If has modified prototype
|
|||
|
prot = ctor.prototype;
|
|||
|
if (isObject(prot) === false) return false;
|
|||
|
|
|||
|
// If constructor does not have an Object-specific method
|
|||
|
if (prot.hasOwnProperty('isPrototypeOf') === false) {
|
|||
|
return false;
|
|||
|
}
|
|||
|
|
|||
|
// Most likely a plain Object
|
|||
|
return true;
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
const mediaTags = [
|
|||
|
'img', 'audio', 'video', 'picture', 'svg',
|
|||
|
'object', 'map', 'iframe', 'embed'
|
|||
|
];
|
|||
|
// Tags that are inherently vulnerable to being used in XSS attacks.
|
|||
|
const vulnerableTags = ['script', 'style'];
|
|||
|
|
|||
|
function each(obj, cb) {
|
|||
|
if (obj) {
|
|||
|
Object.keys(obj).forEach(function (key) {
|
|||
|
cb(obj[key], key);
|
|||
|
});
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Avoid false positives with .__proto__, .hasOwnProperty, etc.
|
|||
|
function has(obj, key) {
|
|||
|
return ({}).hasOwnProperty.call(obj, key);
|
|||
|
}
|
|||
|
|
|||
|
// Returns those elements of `a` for which `cb(a)` returns truthy
|
|||
|
function filter(a, cb) {
|
|||
|
const n = [];
|
|||
|
each(a, function (v) {
|
|||
|
if (cb(v)) {
|
|||
|
n.push(v);
|
|||
|
}
|
|||
|
});
|
|||
|
return n;
|
|||
|
}
|
|||
|
|
|||
|
function isEmptyObject(obj) {
|
|||
|
for (const key in obj) {
|
|||
|
if (has(obj, key)) {
|
|||
|
return false;
|
|||
|
}
|
|||
|
}
|
|||
|
return true;
|
|||
|
}
|
|||
|
|
|||
|
function stringifySrcset(parsedSrcset) {
|
|||
|
return parsedSrcset.map(function (part) {
|
|||
|
if (!part.url) {
|
|||
|
throw new Error('URL missing');
|
|||
|
}
|
|||
|
|
|||
|
return (
|
|||
|
part.url +
|
|||
|
(part.w ? ` ${part.w}w` : '') +
|
|||
|
(part.h ? ` ${part.h}h` : '') +
|
|||
|
(part.d ? ` ${part.d}x` : '')
|
|||
|
);
|
|||
|
}).join(', ');
|
|||
|
}
|
|||
|
|
|||
|
module.exports = sanitizeHtml;
|
|||
|
|
|||
|
// A valid attribute name.
|
|||
|
// We use a tolerant definition based on the set of strings defined by
|
|||
|
// html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|||
|
// and html.spec.whatwg.org/multipage/parsing.html#attribute-name-state .
|
|||
|
// The characters accepted are ones which can be appended to the attribute
|
|||
|
// name buffer without triggering a parse error:
|
|||
|
// * unexpected-equals-sign-before-attribute-name
|
|||
|
// * unexpected-null-character
|
|||
|
// * unexpected-character-in-attribute-name
|
|||
|
// We exclude the empty string because it's impossible to get to the after
|
|||
|
// attribute name state with an empty attribute name buffer.
|
|||
|
const VALID_HTML_ATTRIBUTE_NAME = /^[^\0\t\n\f\r /<=>]+$/;
|
|||
|
|
|||
|
// Ignore the _recursing flag; it's there for recursive
|
|||
|
// invocation as a guard against this exploit:
|
|||
|
// https://github.com/fb55/htmlparser2/issues/105
|
|||
|
|
|||
|
function sanitizeHtml(html, options, _recursing) {
|
|||
|
if (html == null) {
|
|||
|
return '';
|
|||
|
}
|
|||
|
|
|||
|
let result = '';
|
|||
|
// Used for hot swapping the result variable with an empty string in order to "capture" the text written to it.
|
|||
|
let tempResult = '';
|
|||
|
|
|||
|
function Frame(tag, attribs) {
|
|||
|
const that = this;
|
|||
|
this.tag = tag;
|
|||
|
this.attribs = attribs || {};
|
|||
|
this.tagPosition = result.length;
|
|||
|
this.text = ''; // Node inner text
|
|||
|
this.mediaChildren = [];
|
|||
|
|
|||
|
this.updateParentNodeText = function () {
|
|||
|
if (stack.length) {
|
|||
|
const parentFrame = stack[stack.length - 1];
|
|||
|
parentFrame.text += that.text;
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
this.updateParentNodeMediaChildren = function () {
|
|||
|
if (stack.length && mediaTags.includes(this.tag)) {
|
|||
|
const parentFrame = stack[stack.length - 1];
|
|||
|
parentFrame.mediaChildren.push(this.tag);
|
|||
|
}
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
options = Object.assign({}, sanitizeHtml.defaults, options);
|
|||
|
options.parser = Object.assign({}, htmlParserDefaults, options.parser);
|
|||
|
|
|||
|
// vulnerableTags
|
|||
|
vulnerableTags.forEach(function (tag) {
|
|||
|
if (
|
|||
|
options.allowedTags && options.allowedTags.indexOf(tag) > -1 &&
|
|||
|
!options.allowVulnerableTags
|
|||
|
) {
|
|||
|
console.warn(`\n\n⚠️ Your \`allowedTags\` option includes, \`${tag}\`, which is inherently\nvulnerable to XSS attacks. Please remove it from \`allowedTags\`.\nOr, to disable this warning, add the \`allowVulnerableTags\` option\nand ensure you are accounting for this risk.\n\n`);
|
|||
|
}
|
|||
|
});
|
|||
|
|
|||
|
// Tags that contain something other than HTML, or where discarding
|
|||
|
// the text when the tag is disallowed makes sense for other reasons.
|
|||
|
// If we are not allowing these tags, we should drop their content too.
|
|||
|
// For other tags you would drop the tag but keep its content.
|
|||
|
const nonTextTagsArray = options.nonTextTags || [
|
|||
|
'script',
|
|||
|
'style',
|
|||
|
'textarea',
|
|||
|
'option'
|
|||
|
];
|
|||
|
let allowedAttributesMap;
|
|||
|
let allowedAttributesGlobMap;
|
|||
|
if (options.allowedAttributes) {
|
|||
|
allowedAttributesMap = {};
|
|||
|
allowedAttributesGlobMap = {};
|
|||
|
each(options.allowedAttributes, function (attributes, tag) {
|
|||
|
allowedAttributesMap[tag] = [];
|
|||
|
const globRegex = [];
|
|||
|
attributes.forEach(function (obj) {
|
|||
|
if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
|
|||
|
globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
|
|||
|
} else {
|
|||
|
allowedAttributesMap[tag].push(obj);
|
|||
|
}
|
|||
|
});
|
|||
|
if (globRegex.length) {
|
|||
|
allowedAttributesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
|
|||
|
}
|
|||
|
});
|
|||
|
}
|
|||
|
const allowedClassesMap = {};
|
|||
|
const allowedClassesGlobMap = {};
|
|||
|
const allowedClassesRegexMap = {};
|
|||
|
each(options.allowedClasses, function (classes, tag) {
|
|||
|
// Implicitly allows the class attribute
|
|||
|
if (allowedAttributesMap) {
|
|||
|
if (!has(allowedAttributesMap, tag)) {
|
|||
|
allowedAttributesMap[tag] = [];
|
|||
|
}
|
|||
|
allowedAttributesMap[tag].push('class');
|
|||
|
}
|
|||
|
|
|||
|
allowedClassesMap[tag] = [];
|
|||
|
allowedClassesRegexMap[tag] = [];
|
|||
|
const globRegex = [];
|
|||
|
classes.forEach(function (obj) {
|
|||
|
if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
|
|||
|
globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
|
|||
|
} else if (obj instanceof RegExp) {
|
|||
|
allowedClassesRegexMap[tag].push(obj);
|
|||
|
} else {
|
|||
|
allowedClassesMap[tag].push(obj);
|
|||
|
}
|
|||
|
});
|
|||
|
if (globRegex.length) {
|
|||
|
allowedClassesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
|
|||
|
}
|
|||
|
});
|
|||
|
|
|||
|
const transformTagsMap = {};
|
|||
|
let transformTagsAll;
|
|||
|
each(options.transformTags, function (transform, tag) {
|
|||
|
let transFun;
|
|||
|
if (typeof transform === 'function') {
|
|||
|
transFun = transform;
|
|||
|
} else if (typeof transform === 'string') {
|
|||
|
transFun = sanitizeHtml.simpleTransform(transform);
|
|||
|
}
|
|||
|
if (tag === '*') {
|
|||
|
transformTagsAll = transFun;
|
|||
|
} else {
|
|||
|
transformTagsMap[tag] = transFun;
|
|||
|
}
|
|||
|
});
|
|||
|
|
|||
|
let depth;
|
|||
|
let stack;
|
|||
|
let skipMap;
|
|||
|
let transformMap;
|
|||
|
let skipText;
|
|||
|
let skipTextDepth;
|
|||
|
let addedText = false;
|
|||
|
|
|||
|
initializeState();
|
|||
|
|
|||
|
const parser = new htmlparser.Parser({
|
|||
|
onopentag: function (name, attribs) {
|
|||
|
// If `enforceHtmlBoundary` is `true` and this has found the opening
|
|||
|
// `html` tag, reset the state.
|
|||
|
if (options.enforceHtmlBoundary && name === 'html') {
|
|||
|
initializeState();
|
|||
|
}
|
|||
|
|
|||
|
if (skipText) {
|
|||
|
skipTextDepth++;
|
|||
|
return;
|
|||
|
}
|
|||
|
const frame = new Frame(name, attribs);
|
|||
|
stack.push(frame);
|
|||
|
|
|||
|
let skip = false;
|
|||
|
const hasText = !!frame.text;
|
|||
|
let transformedTag;
|
|||
|
if (has(transformTagsMap, name)) {
|
|||
|
transformedTag = transformTagsMap[name](name, attribs);
|
|||
|
|
|||
|
frame.attribs = attribs = transformedTag.attribs;
|
|||
|
|
|||
|
if (transformedTag.text !== undefined) {
|
|||
|
frame.innerText = transformedTag.text;
|
|||
|
}
|
|||
|
|
|||
|
if (name !== transformedTag.tagName) {
|
|||
|
frame.name = name = transformedTag.tagName;
|
|||
|
transformMap[depth] = transformedTag.tagName;
|
|||
|
}
|
|||
|
}
|
|||
|
if (transformTagsAll) {
|
|||
|
transformedTag = transformTagsAll(name, attribs);
|
|||
|
|
|||
|
frame.attribs = attribs = transformedTag.attribs;
|
|||
|
if (name !== transformedTag.tagName) {
|
|||
|
frame.name = name = transformedTag.tagName;
|
|||
|
transformMap[depth] = transformedTag.tagName;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ((options.allowedTags && options.allowedTags.indexOf(name) === -1) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) {
|
|||
|
skip = true;
|
|||
|
skipMap[depth] = true;
|
|||
|
if (options.disallowedTagsMode === 'discard') {
|
|||
|
if (nonTextTagsArray.indexOf(name) !== -1) {
|
|||
|
skipText = true;
|
|||
|
skipTextDepth = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
skipMap[depth] = true;
|
|||
|
}
|
|||
|
depth++;
|
|||
|
if (skip) {
|
|||
|
if (options.disallowedTagsMode === 'discard') {
|
|||
|
// We want the contents but not this tag
|
|||
|
return;
|
|||
|
}
|
|||
|
tempResult = result;
|
|||
|
result = '';
|
|||
|
}
|
|||
|
result += '<' + name;
|
|||
|
|
|||
|
if (name === 'script') {
|
|||
|
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
|
|||
|
frame.innerText = '';
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (!allowedAttributesMap || has(allowedAttributesMap, name) || allowedAttributesMap['*']) {
|
|||
|
each(attribs, function (value, a) {
|
|||
|
if (!VALID_HTML_ATTRIBUTE_NAME.test(a)) {
|
|||
|
// This prevents part of an attribute name in the output from being
|
|||
|
// interpreted as the end of an attribute, or end of a tag.
|
|||
|
delete frame.attribs[a];
|
|||
|
return;
|
|||
|
}
|
|||
|
let parsed;
|
|||
|
// check allowedAttributesMap for the element and attribute and modify the value
|
|||
|
// as necessary if there are specific values defined.
|
|||
|
let passedAllowedAttributesMapCheck = false;
|
|||
|
if (!allowedAttributesMap ||
|
|||
|
(has(allowedAttributesMap, name) && allowedAttributesMap[name].indexOf(a) !== -1) ||
|
|||
|
(allowedAttributesMap['*'] && allowedAttributesMap['*'].indexOf(a) !== -1) ||
|
|||
|
(has(allowedAttributesGlobMap, name) && allowedAttributesGlobMap[name].test(a)) ||
|
|||
|
(allowedAttributesGlobMap['*'] && allowedAttributesGlobMap['*'].test(a))) {
|
|||
|
passedAllowedAttributesMapCheck = true;
|
|||
|
} else if (allowedAttributesMap && allowedAttributesMap[name]) {
|
|||
|
for (const o of allowedAttributesMap[name]) {
|
|||
|
if (isPlainObject(o) && o.name && (o.name === a)) {
|
|||
|
passedAllowedAttributesMapCheck = true;
|
|||
|
let newValue = '';
|
|||
|
if (o.multiple === true) {
|
|||
|
// verify the values that are allowed
|
|||
|
const splitStrArray = value.split(' ');
|
|||
|
for (const s of splitStrArray) {
|
|||
|
if (o.values.indexOf(s) !== -1) {
|
|||
|
if (newValue === '') {
|
|||
|
newValue = s;
|
|||
|
} else {
|
|||
|
newValue += ' ' + s;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
} else if (o.values.indexOf(value) >= 0) {
|
|||
|
// verified an allowed value matches the entire attribute value
|
|||
|
newValue = value;
|
|||
|
}
|
|||
|
value = newValue;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if (passedAllowedAttributesMapCheck) {
|
|||
|
if (options.allowedSchemesAppliedToAttributes.indexOf(a) !== -1) {
|
|||
|
if (naughtyHref(name, value)) {
|
|||
|
delete frame.attribs[a];
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (name === 'script' && a === 'src') {
|
|||
|
|
|||
|
let allowed = true;
|
|||
|
|
|||
|
try {
|
|||
|
const parsed = new URL(value);
|
|||
|
|
|||
|
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
|
|||
|
const allowedHostname = (options.allowedScriptHostnames || []).find(function (hostname) {
|
|||
|
return hostname === parsed.hostname;
|
|||
|
});
|
|||
|
const allowedDomain = (options.allowedScriptDomains || []).find(function (domain) {
|
|||
|
return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`);
|
|||
|
});
|
|||
|
allowed = allowedHostname || allowedDomain;
|
|||
|
}
|
|||
|
} catch (e) {
|
|||
|
allowed = false;
|
|||
|
}
|
|||
|
|
|||
|
if (!allowed) {
|
|||
|
delete frame.attribs[a];
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (name === 'iframe' && a === 'src') {
|
|||
|
let allowed = true;
|
|||
|
try {
|
|||
|
// Chrome accepts \ as a substitute for / in the // at the
|
|||
|
// start of a URL, so rewrite accordingly to prevent exploit.
|
|||
|
// Also drop any whitespace at that point in the URL
|
|||
|
value = value.replace(/^(\w+:)?\s*[\\/]\s*[\\/]/, '$1//');
|
|||
|
if (value.startsWith('relative:')) {
|
|||
|
// An attempt to exploit our workaround for base URLs being
|
|||
|
// mandatory for relative URL validation in the WHATWG
|
|||
|
// URL parser, reject it
|
|||
|
throw new Error('relative: exploit attempt');
|
|||
|
}
|
|||
|
// naughtyHref is in charge of whether protocol relative URLs
|
|||
|
// are cool. Here we are concerned just with allowed hostnames and
|
|||
|
// whether to allow relative URLs.
|
|||
|
//
|
|||
|
// Build a placeholder "base URL" against which any reasonable
|
|||
|
// relative URL may be parsed successfully
|
|||
|
let base = 'relative://relative-site';
|
|||
|
for (let i = 0; (i < 100); i++) {
|
|||
|
base += `/${i}`;
|
|||
|
}
|
|||
|
const parsed = new URL(value, base);
|
|||
|
const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:';
|
|||
|
if (isRelativeUrl) {
|
|||
|
// default value of allowIframeRelativeUrls is true
|
|||
|
// unless allowedIframeHostnames or allowedIframeDomains specified
|
|||
|
allowed = has(options, 'allowIframeRelativeUrls')
|
|||
|
? options.allowIframeRelativeUrls
|
|||
|
: (!options.allowedIframeHostnames && !options.allowedIframeDomains);
|
|||
|
} else if (options.allowedIframeHostnames || options.allowedIframeDomains) {
|
|||
|
const allowedHostname = (options.allowedIframeHostnames || []).find(function (hostname) {
|
|||
|
return hostname === parsed.hostname;
|
|||
|
});
|
|||
|
const allowedDomain = (options.allowedIframeDomains || []).find(function (domain) {
|
|||
|
return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`);
|
|||
|
});
|
|||
|
allowed = allowedHostname || allowedDomain;
|
|||
|
}
|
|||
|
} catch (e) {
|
|||
|
// Unparseable iframe src
|
|||
|
allowed = false;
|
|||
|
}
|
|||
|
if (!allowed) {
|
|||
|
delete frame.attribs[a];
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
if (a === 'srcset') {
|
|||
|
delete frame.attribs[a];
|
|||
|
|
|||
|
// ABS UPDATE: srcset not necessary
|
|||
|
// try {
|
|||
|
// parsed = parseSrcset(value);
|
|||
|
// parsed.forEach(function (value) {
|
|||
|
// if (naughtyHref('srcset', value.url)) {
|
|||
|
// value.evil = true;
|
|||
|
// }
|
|||
|
// });
|
|||
|
// parsed = filter(parsed, function (v) {
|
|||
|
// return !v.evil;
|
|||
|
// });
|
|||
|
// if (!parsed.length) {
|
|||
|
// delete frame.attribs[a];
|
|||
|
// return;
|
|||
|
// } else {
|
|||
|
// value = stringifySrcset(filter(parsed, function (v) {
|
|||
|
// return !v.evil;
|
|||
|
// }));
|
|||
|
// frame.attribs[a] = value;
|
|||
|
// }
|
|||
|
// } catch (e) {
|
|||
|
// // Unparseable srcset
|
|||
|
// delete frame.attribs[a];
|
|||
|
// return;
|
|||
|
// }
|
|||
|
}
|
|||
|
if (a === 'class') {
|
|||
|
const allowedSpecificClasses = allowedClassesMap[name];
|
|||
|
const allowedWildcardClasses = allowedClassesMap['*'];
|
|||
|
const allowedSpecificClassesGlob = allowedClassesGlobMap[name];
|
|||
|
const allowedSpecificClassesRegex = allowedClassesRegexMap[name];
|
|||
|
const allowedWildcardClassesGlob = allowedClassesGlobMap['*'];
|
|||
|
const allowedClassesGlobs = [
|
|||
|
allowedSpecificClassesGlob,
|
|||
|
allowedWildcardClassesGlob
|
|||
|
]
|
|||
|
.concat(allowedSpecificClassesRegex)
|
|||
|
.filter(function (t) {
|
|||
|
return t;
|
|||
|
});
|
|||
|
if (allowedSpecificClasses && allowedWildcardClasses) {
|
|||
|
// ABS UPDATE: classes and wildcard classes not necessary now
|
|||
|
// value = filterClasses(value, deepmerge(allowedSpecificClasses, allowedWildcardClasses), allowedClassesGlobs);
|
|||
|
} else {
|
|||
|
value = filterClasses(value, allowedSpecificClasses || allowedWildcardClasses, allowedClassesGlobs);
|
|||
|
}
|
|||
|
if (!value.length) {
|
|||
|
delete frame.attribs[a];
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
if (a === 'style') {
|
|||
|
delete frame.attribs[a];
|
|||
|
|
|||
|
// ABS UPDATE: Styles not necessary
|
|||
|
// try {
|
|||
|
// const abstractSyntaxTree = postcssParse(name + ' {' + value + '}');
|
|||
|
// const filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);
|
|||
|
|
|||
|
// value = stringifyStyleAttributes(filteredAST);
|
|||
|
|
|||
|
// if (value.length === 0) {
|
|||
|
// delete frame.attribs[a];
|
|||
|
// return;
|
|||
|
// }
|
|||
|
// } catch (e) {
|
|||
|
// delete frame.attribs[a];
|
|||
|
// return;
|
|||
|
// }
|
|||
|
}
|
|||
|
result += ' ' + a;
|
|||
|
if (value && value.length) {
|
|||
|
result += '="' + escapeHtml(value, true) + '"';
|
|||
|
}
|
|||
|
} else {
|
|||
|
delete frame.attribs[a];
|
|||
|
}
|
|||
|
});
|
|||
|
}
|
|||
|
if (options.selfClosing.indexOf(name) !== -1) {
|
|||
|
result += ' />';
|
|||
|
} else {
|
|||
|
result += '>';
|
|||
|
if (frame.innerText && !hasText && !options.textFilter) {
|
|||
|
result += escapeHtml(frame.innerText);
|
|||
|
addedText = true;
|
|||
|
}
|
|||
|
}
|
|||
|
if (skip) {
|
|||
|
result = tempResult + escapeHtml(result);
|
|||
|
tempResult = '';
|
|||
|
}
|
|||
|
},
|
|||
|
ontext: function (text) {
|
|||
|
if (skipText) {
|
|||
|
return;
|
|||
|
}
|
|||
|
const lastFrame = stack[stack.length - 1];
|
|||
|
let tag;
|
|||
|
|
|||
|
if (lastFrame) {
|
|||
|
tag = lastFrame.tag;
|
|||
|
// If inner text was set by transform function then let's use it
|
|||
|
text = lastFrame.innerText !== undefined ? lastFrame.innerText : text;
|
|||
|
}
|
|||
|
|
|||
|
if (options.disallowedTagsMode === 'discard' && ((tag === 'script') || (tag === 'style'))) {
|
|||
|
// htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing
|
|||
|
// script tags is, by definition, game over for XSS protection, so if that's
|
|||
|
// your concern, don't allow them. The same is essentially true for style tags
|
|||
|
// which have their own collection of XSS vectors.
|
|||
|
result += text;
|
|||
|
} else {
|
|||
|
const escaped = escapeHtml(text, false);
|
|||
|
if (options.textFilter && !addedText) {
|
|||
|
result += options.textFilter(escaped, tag);
|
|||
|
} else if (!addedText) {
|
|||
|
result += escaped;
|
|||
|
}
|
|||
|
}
|
|||
|
if (stack.length) {
|
|||
|
const frame = stack[stack.length - 1];
|
|||
|
frame.text += text;
|
|||
|
}
|
|||
|
},
|
|||
|
onclosetag: function (name) {
|
|||
|
|
|||
|
if (skipText) {
|
|||
|
skipTextDepth--;
|
|||
|
if (!skipTextDepth) {
|
|||
|
skipText = false;
|
|||
|
} else {
|
|||
|
return;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
const frame = stack.pop();
|
|||
|
if (!frame) {
|
|||
|
// Do not crash on bad markup
|
|||
|
return;
|
|||
|
}
|
|||
|
skipText = options.enforceHtmlBoundary ? name === 'html' : false;
|
|||
|
depth--;
|
|||
|
const skip = skipMap[depth];
|
|||
|
if (skip) {
|
|||
|
delete skipMap[depth];
|
|||
|
if (options.disallowedTagsMode === 'discard') {
|
|||
|
frame.updateParentNodeText();
|
|||
|
return;
|
|||
|
}
|
|||
|
tempResult = result;
|
|||
|
result = '';
|
|||
|
}
|
|||
|
|
|||
|
if (transformMap[depth]) {
|
|||
|
name = transformMap[depth];
|
|||
|
delete transformMap[depth];
|
|||
|
}
|
|||
|
|
|||
|
if (options.exclusiveFilter && options.exclusiveFilter(frame)) {
|
|||
|
result = result.substr(0, frame.tagPosition);
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
frame.updateParentNodeMediaChildren();
|
|||
|
frame.updateParentNodeText();
|
|||
|
|
|||
|
if (options.selfClosing.indexOf(name) !== -1) {
|
|||
|
// Already output />
|
|||
|
if (skip) {
|
|||
|
result = tempResult;
|
|||
|
tempResult = '';
|
|||
|
}
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
result += '</' + name + '>';
|
|||
|
if (skip) {
|
|||
|
result = tempResult + escapeHtml(result);
|
|||
|
tempResult = '';
|
|||
|
}
|
|||
|
addedText = false;
|
|||
|
}
|
|||
|
}, options.parser);
|
|||
|
parser.write(html);
|
|||
|
parser.end();
|
|||
|
|
|||
|
return result;
|
|||
|
|
|||
|
function initializeState() {
|
|||
|
result = '';
|
|||
|
depth = 0;
|
|||
|
stack = [];
|
|||
|
skipMap = {};
|
|||
|
transformMap = {};
|
|||
|
skipText = false;
|
|||
|
skipTextDepth = 0;
|
|||
|
}
|
|||
|
|
|||
|
function escapeHtml(s, quote) {
|
|||
|
if (typeof (s) !== 'string') {
|
|||
|
s = s + '';
|
|||
|
}
|
|||
|
if (options.parser.decodeEntities) {
|
|||
|
s = s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|||
|
if (quote) {
|
|||
|
s = s.replace(/"/g, '"');
|
|||
|
}
|
|||
|
}
|
|||
|
// TODO: this is inadequate because it will pass `&0;`. This approach
|
|||
|
// will not work, each & must be considered with regard to whether it
|
|||
|
// is followed by a 100% syntactically valid entity or not, and escaped
|
|||
|
// if it is not. If this bothers you, don't set parser.decodeEntities
|
|||
|
// to false. (The default is true.)
|
|||
|
s = s.replace(/&(?![a-zA-Z0-9#]{1,20};)/g, '&') // Match ampersands not part of existing HTML entity
|
|||
|
.replace(/</g, '<')
|
|||
|
.replace(/>/g, '>');
|
|||
|
if (quote) {
|
|||
|
s = s.replace(/"/g, '"');
|
|||
|
}
|
|||
|
return s;
|
|||
|
}
|
|||
|
|
|||
|
function naughtyHref(name, href) {
|
|||
|
// Browsers ignore character codes of 32 (space) and below in a surprising
|
|||
|
// number of situations. Start reading here:
|
|||
|
// https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab
|
|||
|
// eslint-disable-next-line no-control-regex
|
|||
|
href = href.replace(/[\x00-\x20]+/g, '');
|
|||
|
// Clobber any comments in URLs, which the browser might
|
|||
|
// interpret inside an XML data island, allowing
|
|||
|
// a javascript: URL to be snuck through
|
|||
|
href = href.replace(/<!--.*?-->/g, '');
|
|||
|
// Case insensitive so we don't get faked out by JAVASCRIPT #1
|
|||
|
// Allow more characters after the first so we don't get faked
|
|||
|
// out by certain schemes browsers accept
|
|||
|
const matches = href.match(/^([a-zA-Z][a-zA-Z0-9.\-+]*):/);
|
|||
|
if (!matches) {
|
|||
|
// Protocol-relative URL starting with any combination of '/' and '\'
|
|||
|
if (href.match(/^[/\\]{2}/)) {
|
|||
|
return !options.allowProtocolRelative;
|
|||
|
}
|
|||
|
|
|||
|
// No scheme
|
|||
|
return false;
|
|||
|
}
|
|||
|
const scheme = matches[1].toLowerCase();
|
|||
|
|
|||
|
if (has(options.allowedSchemesByTag, name)) {
|
|||
|
return options.allowedSchemesByTag[name].indexOf(scheme) === -1;
|
|||
|
}
|
|||
|
|
|||
|
return !options.allowedSchemes || options.allowedSchemes.indexOf(scheme) === -1;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Filters user input css properties by allowlisted regex attributes.
|
|||
|
* Modifies the abstractSyntaxTree object.
|
|||
|
*
|
|||
|
* @param {object} abstractSyntaxTree - Object representation of CSS attributes.
|
|||
|
* @property {array[Declaration]} abstractSyntaxTree.nodes[0] - Each object cointains prop and value key, i.e { prop: 'color', value: 'red' }.
|
|||
|
* @param {object} allowedStyles - Keys are properties (i.e color), value is list of permitted regex rules (i.e /green/i).
|
|||
|
* @return {object} - The modified tree.
|
|||
|
*/
|
|||
|
// function filterCss(abstractSyntaxTree, allowedStyles) {
|
|||
|
// if (!allowedStyles) {
|
|||
|
// return abstractSyntaxTree;
|
|||
|
// }
|
|||
|
|
|||
|
// const astRules = abstractSyntaxTree.nodes[0];
|
|||
|
// let selectedRule;
|
|||
|
|
|||
|
// // Merge global and tag-specific styles into new AST.
|
|||
|
// if (allowedStyles[astRules.selector] && allowedStyles['*']) {
|
|||
|
// selectedRule = deepmerge(
|
|||
|
// allowedStyles[astRules.selector],
|
|||
|
// allowedStyles['*']
|
|||
|
// );
|
|||
|
// } else {
|
|||
|
// selectedRule = allowedStyles[astRules.selector] || allowedStyles['*'];
|
|||
|
// }
|
|||
|
|
|||
|
// if (selectedRule) {
|
|||
|
// abstractSyntaxTree.nodes[0].nodes = astRules.nodes.reduce(filterDeclarations(selectedRule), []);
|
|||
|
// }
|
|||
|
|
|||
|
// return abstractSyntaxTree;
|
|||
|
// }
|
|||
|
|
|||
|
/**
|
|||
|
* Extracts the style attributes from an AbstractSyntaxTree and formats those
|
|||
|
* values in the inline style attribute format.
|
|||
|
*
|
|||
|
* @param {AbstractSyntaxTree} filteredAST
|
|||
|
* @return {string} - Example: "color:yellow;text-align:center !important;font-family:helvetica;"
|
|||
|
*/
|
|||
|
function stringifyStyleAttributes(filteredAST) {
|
|||
|
return filteredAST.nodes[0].nodes
|
|||
|
.reduce(function (extractedAttributes, attrObject) {
|
|||
|
extractedAttributes.push(
|
|||
|
`${attrObject.prop}:${attrObject.value}${attrObject.important ? ' !important' : ''}`
|
|||
|
);
|
|||
|
return extractedAttributes;
|
|||
|
}, [])
|
|||
|
.join(';');
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Filters the existing attributes for the given property. Discards any attributes
|
|||
|
* which don't match the allowlist.
|
|||
|
*
|
|||
|
* @param {object} selectedRule - Example: { color: red, font-family: helvetica }
|
|||
|
* @param {array} allowedDeclarationsList - List of declarations which pass the allowlist.
|
|||
|
* @param {object} attributeObject - Object representing the current css property.
|
|||
|
* @property {string} attributeObject.type - Typically 'declaration'.
|
|||
|
* @property {string} attributeObject.prop - The CSS property, i.e 'color'.
|
|||
|
* @property {string} attributeObject.value - The corresponding value to the css property, i.e 'red'.
|
|||
|
* @return {function} - When used in Array.reduce, will return an array of Declaration objects
|
|||
|
*/
|
|||
|
function filterDeclarations(selectedRule) {
|
|||
|
return function (allowedDeclarationsList, attributeObject) {
|
|||
|
// If this property is allowlisted...
|
|||
|
if (has(selectedRule, attributeObject.prop)) {
|
|||
|
const matchesRegex = selectedRule[attributeObject.prop].some(function (regularExpression) {
|
|||
|
return regularExpression.test(attributeObject.value);
|
|||
|
});
|
|||
|
|
|||
|
if (matchesRegex) {
|
|||
|
allowedDeclarationsList.push(attributeObject);
|
|||
|
}
|
|||
|
}
|
|||
|
return allowedDeclarationsList;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
function filterClasses(classes, allowed, allowedGlobs) {
|
|||
|
if (!allowed) {
|
|||
|
// The class attribute is allowed without filtering on this tag
|
|||
|
return classes;
|
|||
|
}
|
|||
|
classes = classes.split(/\s+/);
|
|||
|
return classes.filter(function (clss) {
|
|||
|
return allowed.indexOf(clss) !== -1 || allowedGlobs.some(function (glob) {
|
|||
|
return glob.test(clss);
|
|||
|
});
|
|||
|
}).join(' ');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Defaults are accessible to you so that you can use them as a starting point
|
|||
|
// programmatically if you wish
|
|||
|
|
|||
|
const htmlParserDefaults = {
|
|||
|
decodeEntities: true
|
|||
|
};
|
|||
|
sanitizeHtml.defaults = {
|
|||
|
allowedTags: [
|
|||
|
// Sections derived from MDN element categories and limited to the more
|
|||
|
// benign categories.
|
|||
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element
|
|||
|
// Content sectioning
|
|||
|
'address', 'article', 'aside', 'footer', 'header',
|
|||
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hgroup',
|
|||
|
'main', 'nav', 'section',
|
|||
|
// Text content
|
|||
|
'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure',
|
|||
|
'hr', 'li', 'main', 'ol', 'p', 'pre', 'ul',
|
|||
|
// Inline text semantics
|
|||
|
'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn',
|
|||
|
'em', 'i', 'kbd', 'mark', 'q',
|
|||
|
'rb', 'rp', 'rt', 'rtc', 'ruby',
|
|||
|
's', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr',
|
|||
|
// Table content
|
|||
|
'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th',
|
|||
|
'thead', 'tr'
|
|||
|
],
|
|||
|
disallowedTagsMode: 'discard',
|
|||
|
allowedAttributes: {
|
|||
|
a: ['href', 'name', 'target'],
|
|||
|
// We don't currently allow img itself by default, but
|
|||
|
// these attributes would make sense if we did.
|
|||
|
img: ['src', 'srcset', 'alt', 'title', 'width', 'height', 'loading']
|
|||
|
},
|
|||
|
// Lots of these won't come up by default because we don't allow them
|
|||
|
selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'],
|
|||
|
// URL schemes we permit
|
|||
|
allowedSchemes: ['http', 'https', 'ftp', 'mailto', 'tel'],
|
|||
|
allowedSchemesByTag: {},
|
|||
|
allowedSchemesAppliedToAttributes: ['href', 'src', 'cite'],
|
|||
|
allowProtocolRelative: true,
|
|||
|
enforceHtmlBoundary: false
|
|||
|
};
|
|||
|
|
|||
|
sanitizeHtml.simpleTransform = function (newTagName, newAttribs, merge) {
|
|||
|
merge = (merge === undefined) ? true : merge;
|
|||
|
newAttribs = newAttribs || {};
|
|||
|
|
|||
|
return function (tagName, attribs) {
|
|||
|
let attrib;
|
|||
|
if (merge) {
|
|||
|
for (attrib in newAttribs) {
|
|||
|
attribs[attrib] = newAttribs[attrib];
|
|||
|
}
|
|||
|
} else {
|
|||
|
attribs = newAttribs;
|
|||
|
}
|
|||
|
|
|||
|
return {
|
|||
|
tagName: newTagName,
|
|||
|
attribs: attribs
|
|||
|
};
|
|||
|
};
|
|||
|
};
|