mirror of
https://github.com/EGroupware/egroupware.git
synced 2024-11-26 01:43:47 +01:00
upgrade HTML Purifier to Version 4.4.0 (with merged trunk patches -c37721 -c34417 -c35216 -c35219)
This commit is contained in:
parent
3874ec6084
commit
5511abc5bf
@ -26,6 +26,10 @@ These optional extensions can enhance the capabilities of HTML Purifier:
|
|||||||
* bcmath : Used for unit conversion and imagecrash protection
|
* bcmath : Used for unit conversion and imagecrash protection
|
||||||
* tidy : Used for pretty-printing HTML
|
* tidy : Used for pretty-printing HTML
|
||||||
|
|
||||||
|
These optional libraries can enhance the capabilities of HTML Purifier:
|
||||||
|
|
||||||
|
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
|
||||||
|
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
2. Reconnaissance
|
2. Reconnaissance
|
||||||
@ -331,11 +335,6 @@ Or move the cache directory somewhere else (no trailing slash):
|
|||||||
|
|
||||||
The interface is mind-numbingly simple:
|
The interface is mind-numbingly simple:
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
|
||||||
$clean_html = $purifier->purify( $dirty_html );
|
|
||||||
|
|
||||||
...or, if you're using the configuration object:
|
|
||||||
|
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
$clean_html = $purifier->purify( $dirty_html );
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
@ -354,7 +353,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
|
|||||||
<?php
|
<?php
|
||||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
?>
|
?>
|
||||||
|
|
||||||
|
@ -9,6 +9,49 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
4.4.0, released 2012-01-18
|
||||||
|
# Removed PEARSax3 handler.
|
||||||
|
# URI.Munge now munges URIs inside the same host that go from https
|
||||||
|
to http. Reported by Neike Taika-Tessaro.
|
||||||
|
# Core.EscapeNonASCIICharacters now always transforms entities to
|
||||||
|
entities, even if target encoding is UTF-8.
|
||||||
|
# Tighten up selector validation in ExtractStyleBlocks.
|
||||||
|
Non-syntactically valid selectors are now rejected, along with
|
||||||
|
some of the more obscure ones such as attribute selectors, the
|
||||||
|
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
|
||||||
|
ID and class selectors now work properly with the relevant
|
||||||
|
configuration attributes. Also, mute errors when parsing CSS
|
||||||
|
with CSS Tidy.
|
||||||
|
! Added support for 'scope' attribute on tables.
|
||||||
|
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||||
|
! Properly handle sub-lists directly nested inside of lists in
|
||||||
|
a standards compliant way, by moving them into the preceding <li>
|
||||||
|
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
|
||||||
|
limited allowed comments in untrusted situations.
|
||||||
|
! Implement iframes, and allow them to be used in untrusted mode with
|
||||||
|
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
|
||||||
|
<brad.froehle@gmail.com> for submitting an initial version of the patch.
|
||||||
|
! The Forms module now works properly for transitional doctypes.
|
||||||
|
! Added support for internationalized domain names. You need the PEAR
|
||||||
|
Net_IDNA2 module to be in your path; if it is installed, ensure the
|
||||||
|
class can be loaded and then set %Core.EnableIDNA to true.
|
||||||
|
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||||
|
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||||
|
- Explicitly initialize anonModule variable to null.
|
||||||
|
- Do not duplicate nofollow if already present. Thanks 178
|
||||||
|
for reporting.
|
||||||
|
- Do not add nofollow if hostname matches our current host. Thanks 178
|
||||||
|
for reporting, and Neike Taika-Tessaro for helping diagnose.
|
||||||
|
- Do not unset parser variable; this fixes intermittent serialization
|
||||||
|
problems. Thanks Neike Taika-Tessaro for reporting, bill
|
||||||
|
<10010tiger@gmail.com> for diagnosing.
|
||||||
|
- Fix iconv truncation bug, where non-UTF-8 target encodings see
|
||||||
|
output truncated after around 8000 characters. Thanks Jörg Ludwig
|
||||||
|
<joerg.ludwig@iserv.eu> for reporting.
|
||||||
|
- Fix broken table content model for XHTML1.1 (and also earlier
|
||||||
|
versions, although the W3C validator doesn't catch those violations).
|
||||||
|
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
|
||||||
|
|
||||||
4.3.0, released 2011-03-27
|
4.3.0, released 2011-03-27
|
||||||
# Fixed broken caching of customized raw definitions, but requires an
|
# Fixed broken caching of customized raw definitions, but requires an
|
||||||
API change. The old API still works but will emit a warning,
|
API change. The old API still works but will emit a warning,
|
||||||
|
@ -1 +1 @@
|
|||||||
4.3.0
|
4.4.0
|
@ -7,7 +7,7 @@
|
|||||||
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
||||||
* FILE, changes will be overwritten the next time the script is run.
|
* FILE, changes will be overwritten the next time the script is run.
|
||||||
*
|
*
|
||||||
* @version 4.3.0
|
* @version 4.4.0
|
||||||
*
|
*
|
||||||
* @warning
|
* @warning
|
||||||
* You must *not* include any other HTML Purifier files before this file,
|
* You must *not* include any other HTML Purifier files before this file,
|
||||||
@ -73,6 +73,7 @@ require 'HTMLPurifier/UnitConverter.php';
|
|||||||
require 'HTMLPurifier/VarParser.php';
|
require 'HTMLPurifier/VarParser.php';
|
||||||
require 'HTMLPurifier/VarParserException.php';
|
require 'HTMLPurifier/VarParserException.php';
|
||||||
require 'HTMLPurifier/AttrDef/CSS.php';
|
require 'HTMLPurifier/AttrDef/CSS.php';
|
||||||
|
require 'HTMLPurifier/AttrDef/Clone.php';
|
||||||
require 'HTMLPurifier/AttrDef/Enum.php';
|
require 'HTMLPurifier/AttrDef/Enum.php';
|
||||||
require 'HTMLPurifier/AttrDef/Integer.php';
|
require 'HTMLPurifier/AttrDef/Integer.php';
|
||||||
require 'HTMLPurifier/AttrDef/Lang.php';
|
require 'HTMLPurifier/AttrDef/Lang.php';
|
||||||
@ -90,6 +91,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
|||||||
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||||
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||||
|
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||||
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||||
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||||
@ -130,10 +132,12 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
|
|||||||
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
||||||
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
||||||
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||||
|
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
|
||||||
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
||||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||||
|
require 'HTMLPurifier/ChildDef/List.php';
|
||||||
require 'HTMLPurifier/ChildDef/Required.php';
|
require 'HTMLPurifier/ChildDef/Required.php';
|
||||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
require 'HTMLPurifier/ChildDef/Optional.php';
|
||||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
@ -148,6 +152,7 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
|||||||
require 'HTMLPurifier/HTMLModule/Edit.php';
|
require 'HTMLPurifier/HTMLModule/Edit.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Forms.php';
|
require 'HTMLPurifier/HTMLModule/Forms.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||||
|
require 'HTMLPurifier/HTMLModule/Iframe.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Image.php';
|
require 'HTMLPurifier/HTMLModule/Image.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||||
require 'HTMLPurifier/HTMLModule/List.php';
|
require 'HTMLPurifier/HTMLModule/List.php';
|
||||||
@ -164,6 +169,7 @@ require 'HTMLPurifier/HTMLModule/Scripting.php';
|
|||||||
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Tables.php';
|
require 'HTMLPurifier/HTMLModule/Tables.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Target.php';
|
require 'HTMLPurifier/HTMLModule/Target.php';
|
||||||
|
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Text.php';
|
require 'HTMLPurifier/HTMLModule/Text.php';
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||||
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||||
@ -202,6 +208,7 @@ require 'HTMLPurifier/URIFilter/DisableResources.php';
|
|||||||
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||||
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||||
require 'HTMLPurifier/URIFilter/Munge.php';
|
require 'HTMLPurifier/URIFilter/Munge.php';
|
||||||
|
require 'HTMLPurifier/URIFilter/SafeIframe.php';
|
||||||
require 'HTMLPurifier/URIScheme/data.php';
|
require 'HTMLPurifier/URIScheme/data.php';
|
||||||
require 'HTMLPurifier/URIScheme/file.php';
|
require 'HTMLPurifier/URIScheme/file.php';
|
||||||
require 'HTMLPurifier/URIScheme/ftp.php';
|
require 'HTMLPurifier/URIScheme/ftp.php';
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTML Purifier 4.3.0 - Standards Compliant HTML Filtering
|
HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006-2008 Edward Z. Yang
|
Copyright (C) 2006-2008 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
@ -55,10 +55,10 @@ class HTMLPurifier
|
|||||||
{
|
{
|
||||||
|
|
||||||
/** Version of HTML Purifier */
|
/** Version of HTML Purifier */
|
||||||
public $version = '4.3.0';
|
public $version = '4.4.0';
|
||||||
|
|
||||||
/** Constant with version of HTML Purifier */
|
/** Constant with version of HTML Purifier */
|
||||||
const VERSION = '4.3.0';
|
const VERSION = '4.4.0';
|
||||||
|
|
||||||
/** Global configuration object */
|
/** Global configuration object */
|
||||||
public $config;
|
public $config;
|
||||||
|
@ -67,6 +67,7 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/VarParser.php';
|
require_once $__dir . '/HTMLPurifier/VarParser.php';
|
||||||
require_once $__dir . '/HTMLPurifier/VarParserException.php';
|
require_once $__dir . '/HTMLPurifier/VarParserException.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
|
||||||
@ -84,6 +85,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||||
@ -124,10 +126,12 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
@ -142,6 +146,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
||||||
@ -158,6 +163,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||||
@ -196,6 +202,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
|
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
||||||
|
@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
|||||||
$string = trim($string);
|
$string = trim($string);
|
||||||
|
|
||||||
if (empty($string)) return false;
|
if (empty($string)) return false;
|
||||||
if (isset($colors[$string])) return $colors[$string];
|
if (isset($colors[strtolower($string)])) return $colors[$string];
|
||||||
if ($string[0] === '#') $hex = substr($string, 1);
|
if ($string[0] === '#') $hex = substr($string, 1);
|
||||||
else $hex = $string;
|
else $hex = $string;
|
||||||
|
|
||||||
|
@ -12,12 +12,22 @@
|
|||||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
// ref functionality disabled, since we also have to verify
|
// selector is NOT a valid thing to use for IDREFs, because IDREFs
|
||||||
// whether or not the ID it refers to exists
|
// *must* target IDs that exist, whereas selector #ids do not.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines whether or not we're validating an ID in a CSS
|
||||||
|
* selector context.
|
||||||
|
*/
|
||||||
|
protected $selector;
|
||||||
|
|
||||||
|
public function __construct($selector = false) {
|
||||||
|
$this->selector = $selector;
|
||||||
|
}
|
||||||
|
|
||||||
public function validate($id, $config, $context) {
|
public function validate($id, $config, $context) {
|
||||||
|
|
||||||
if (!$config->get('Attr.EnableID')) return false;
|
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
|
||||||
|
|
||||||
$id = trim($id); // trim it first
|
$id = trim($id); // trim it first
|
||||||
|
|
||||||
@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
|||||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
//if (!$this->ref) {
|
if (!$this->selector) {
|
||||||
$id_accumulator =& $context->get('IDAccumulator');
|
$id_accumulator =& $context->get('IDAccumulator');
|
||||||
if (isset($id_accumulator->ids[$id])) return false;
|
if (isset($id_accumulator->ids[$id])) return false;
|
||||||
//}
|
}
|
||||||
|
|
||||||
// we purposely avoid using regex, hopefully this is faster
|
// we purposely avoid using regex, hopefully this is faster
|
||||||
|
|
||||||
@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
if (!$this->selector && $result) $id_accumulator->add($id);
|
||||||
|
|
||||||
// if no change was made to the ID, return the result
|
// if no change was made to the ID, return the result
|
||||||
// else, return the new id if stripping whitespace made it
|
// else, return the new id if stripping whitespace made it
|
||||||
|
@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
}
|
}
|
||||||
|
|
||||||
public function make($string) {
|
public function make($string) {
|
||||||
$embeds = (bool) $string;
|
$embeds = ($string === 'embedded');
|
||||||
return new HTMLPurifier_AttrDef_URI($embeds);
|
return new HTMLPurifier_AttrDef_URI($embeds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
// A regular domain name.
|
// A regular domain name.
|
||||||
|
|
||||||
// This breaks I18N domain names, but we don't have proper IRI support,
|
// This doesn't match I18N domain names, but we don't have proper IRI support,
|
||||||
// so force users to insert Punycode. If there's complaining we'll
|
// so force users to insert Punycode.
|
||||||
// try to fix things into an international friendly form.
|
|
||||||
|
|
||||||
// The productions describing this are:
|
// The productions describing this are:
|
||||||
$a = '[a-z]'; // alpha
|
$a = '[a-z]'; // alpha
|
||||||
@ -57,10 +56,44 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||||||
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||||
$toplabel = "$a($and*$an)?";
|
$toplabel = "$a($and*$an)?";
|
||||||
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||||
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
|
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||||
if (!$match) return false;
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
return $string;
|
// If we have Net_IDNA2 support, we can support IRIs by
|
||||||
|
// punycoding them. (This is the most portable thing to do,
|
||||||
|
// since otherwise we have to assume browsers support
|
||||||
|
|
||||||
|
if ($config->get('Core.EnableIDNA')) {
|
||||||
|
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
|
||||||
|
// we need to encode each period separately
|
||||||
|
$parts = explode('.', $string);
|
||||||
|
try {
|
||||||
|
$new_parts = array();
|
||||||
|
foreach ($parts as $part) {
|
||||||
|
$encodable = false;
|
||||||
|
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
|
||||||
|
if (ord($part[$i]) > 0x7a) {
|
||||||
|
$encodable = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!$encodable) {
|
||||||
|
$new_parts[] = $part;
|
||||||
|
} else {
|
||||||
|
$new_parts[] = $idna->encode($part);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$string = implode('.', $new_parts);
|
||||||
|
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
// XXX error reporting
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -24,9 +24,13 @@ class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
|
|||||||
$url = $this->parser->parse($attr['href']);
|
$url = $this->parser->parse($attr['href']);
|
||||||
$scheme = $url->getSchemeObj($config, $context);
|
$scheme = $url->getSchemeObj($config, $context);
|
||||||
|
|
||||||
if (!is_null($url->host) && $scheme !== false && $scheme->browsable) {
|
if ($scheme->browsable && !$url->isLocal($config, $context)) {
|
||||||
if (isset($attr['rel'])) {
|
if (isset($attr['rel'])) {
|
||||||
$attr['rel'] .= ' nofollow';
|
$rels = explode(' ', $attr);
|
||||||
|
if (!in_array('nofollow', $rels)) {
|
||||||
|
$rels[] = 'nofollow';
|
||||||
|
}
|
||||||
|
$attr['rel'] = implode(' ', $rels);
|
||||||
} else {
|
} else {
|
||||||
$attr['rel'] = 'nofollow';
|
$attr['rel'] = 'nofollow';
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,13 @@ class HTMLPurifier_AttrTypes
|
|||||||
* types.
|
* types.
|
||||||
*/
|
*/
|
||||||
public function __construct() {
|
public function __construct() {
|
||||||
|
// XXX This is kind of poor, since we don't actually /clone/
|
||||||
|
// instances; instead, we use the supplied make() attribute. So,
|
||||||
|
// the underlying class must know how to deal with arguments.
|
||||||
|
// With the old implementation of Enum, that ignored its
|
||||||
|
// arguments when handling a make dispatch, the IAlign
|
||||||
|
// definition wouldn't work.
|
||||||
|
|
||||||
// pseudo-types, must be instantiated via shorthand
|
// pseudo-types, must be instantiated via shorthand
|
||||||
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
|
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
|
||||||
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
|
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
|
||||||
@ -29,6 +36,9 @@ class HTMLPurifier_AttrTypes
|
|||||||
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||||
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
||||||
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
||||||
|
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
|
||||||
|
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
|
||||||
|
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
|
||||||
|
|
||||||
// unimplemented aliases
|
// unimplemented aliases
|
||||||
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||||
@ -44,6 +54,10 @@ class HTMLPurifier_AttrTypes
|
|||||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static function makeEnum($in) {
|
||||||
|
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves a type
|
* Retrieves a type
|
||||||
* @param $type String type name
|
* @param $type String type name
|
||||||
|
@ -1,7 +1,33 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Definition for tables
|
* Definition for tables. The general idea is to extract out all of the
|
||||||
|
* essential bits, and then reconstruct it later.
|
||||||
|
*
|
||||||
|
* This is a bit confusing, because the DTDs and the W3C
|
||||||
|
* validators seem to disagree on the appropriate definition. The
|
||||||
|
* DTD claims:
|
||||||
|
*
|
||||||
|
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
|
||||||
|
*
|
||||||
|
* But actually, the HTML4 spec then has this to say:
|
||||||
|
*
|
||||||
|
* The TBODY start tag is always required except when the table
|
||||||
|
* contains only one table body and no table head or foot sections.
|
||||||
|
* The TBODY end tag may always be safely omitted.
|
||||||
|
*
|
||||||
|
* So the DTD is kind of wrong. The validator is, unfortunately, kind
|
||||||
|
* of on crack.
|
||||||
|
*
|
||||||
|
* The definition changed again in XHTML1.1; and in my opinion, this
|
||||||
|
* formulation makes the most sense.
|
||||||
|
*
|
||||||
|
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
|
||||||
|
*
|
||||||
|
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
|
||||||
|
* If we encounter a thead, tfoot or tbody, we are placed in the former
|
||||||
|
* mode, and we *must* wrap any stray tr segments with a tbody. But if
|
||||||
|
* we don't run into any of them, just have tr tags is OK.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
@ -33,6 +59,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
$collection = array(); // collected nodes
|
$collection = array(); // collected nodes
|
||||||
$tag_index = 0; // the first node might be whitespace,
|
$tag_index = 0; // the first node might be whitespace,
|
||||||
// so this tells us where the start tag is
|
// so this tells us where the start tag is
|
||||||
|
$tbody_mode = false; // if true, then we need to wrap any stray
|
||||||
|
// <tr>s with a <tbody>.
|
||||||
|
|
||||||
foreach ($tokens_of_children as $token) {
|
foreach ($tokens_of_children as $token) {
|
||||||
$is_child = ($nesting == 0);
|
$is_child = ($nesting == 0);
|
||||||
@ -51,8 +79,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
// okay, let's stash the tokens away
|
// okay, let's stash the tokens away
|
||||||
// first token tells us the type of the collection
|
// first token tells us the type of the collection
|
||||||
switch ($collection[$tag_index]->name) {
|
switch ($collection[$tag_index]->name) {
|
||||||
case 'tr':
|
|
||||||
case 'tbody':
|
case 'tbody':
|
||||||
|
$tbody_mode = true;
|
||||||
|
case 'tr':
|
||||||
$content[] = $collection;
|
$content[] = $collection;
|
||||||
break;
|
break;
|
||||||
case 'caption':
|
case 'caption':
|
||||||
@ -61,13 +90,28 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
break;
|
break;
|
||||||
case 'thead':
|
case 'thead':
|
||||||
case 'tfoot':
|
case 'tfoot':
|
||||||
|
$tbody_mode = true;
|
||||||
|
// XXX This breaks rendering properties with
|
||||||
|
// Firefox, which never floats a <thead> to
|
||||||
|
// the top. Ever. (Our scheme will float the
|
||||||
|
// first <thead> to the top.) So maybe
|
||||||
|
// <thead>s that are not first should be
|
||||||
|
// turned into <tbody>? Very tricky, indeed.
|
||||||
|
|
||||||
// access the appropriate variable, $thead or $tfoot
|
// access the appropriate variable, $thead or $tfoot
|
||||||
$var = $collection[$tag_index]->name;
|
$var = $collection[$tag_index]->name;
|
||||||
if ($$var === false) {
|
if ($$var === false) {
|
||||||
$$var = $collection;
|
$$var = $collection;
|
||||||
} else {
|
} else {
|
||||||
// transmutate the first and less entries into
|
// Oops, there's a second one! What
|
||||||
// tbody tags, and then put into content
|
// should we do? Current behavior is to
|
||||||
|
// transmutate the first and last entries into
|
||||||
|
// tbody tags, and then put into content.
|
||||||
|
// Maybe a better idea is to *attach
|
||||||
|
// it* to the existing thead or tfoot?
|
||||||
|
// We don't do this, because Firefox
|
||||||
|
// doesn't float an extra tfoot to the
|
||||||
|
// bottom like it does for the first one.
|
||||||
$collection[$tag_index]->name = 'tbody';
|
$collection[$tag_index]->name = 'tbody';
|
||||||
$collection[count($collection)-1]->name = 'tbody';
|
$collection[count($collection)-1]->name = 'tbody';
|
||||||
$content[] = $collection;
|
$content[] = $collection;
|
||||||
@ -126,7 +170,48 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
|
||||||
|
if ($tbody_mode) {
|
||||||
|
// a little tricky, since the start of the collection may be
|
||||||
|
// whitespace
|
||||||
|
$inside_tbody = false;
|
||||||
|
foreach ($content as $token_array) {
|
||||||
|
// find the starting token
|
||||||
|
foreach ($token_array as $t) {
|
||||||
|
if ($t->name === 'tr' || $t->name === 'tbody') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} // iterator variable carries over
|
||||||
|
if ($t->name === 'tr') {
|
||||||
|
if ($inside_tbody) {
|
||||||
|
$ret = array_merge($ret, $token_array);
|
||||||
|
} else {
|
||||||
|
$ret[] = new HTMLPurifier_Token_Start('tbody');
|
||||||
|
$ret = array_merge($ret, $token_array);
|
||||||
|
$inside_tbody = true;
|
||||||
|
}
|
||||||
|
} elseif ($t->name === 'tbody') {
|
||||||
|
if ($inside_tbody) {
|
||||||
|
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||||
|
$inside_tbody = false;
|
||||||
|
$ret = array_merge($ret, $token_array);
|
||||||
|
} else {
|
||||||
|
$ret = array_merge($ret, $token_array);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($inside_tbody) {
|
||||||
|
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
foreach ($content as $token_array) {
|
||||||
|
// invariant: everything in here is <tr>s
|
||||||
|
$ret = array_merge($ret, $token_array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!empty($collection) && $is_collecting == false){
|
if (!empty($collection) && $is_collecting == false){
|
||||||
// grab the trailing space
|
// grab the trailing space
|
||||||
$ret = array_merge($ret, $collection);
|
$ret = array_merge($ret, $collection);
|
||||||
|
@ -20,7 +20,7 @@ class HTMLPurifier_Config
|
|||||||
/**
|
/**
|
||||||
* HTML Purifier's version
|
* HTML Purifier's version
|
||||||
*/
|
*/
|
||||||
public $version = '4.3.0';
|
public $version = '4.4.0';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bool indicator whether or not to automatically finalize
|
* Bool indicator whether or not to automatically finalize
|
||||||
@ -44,7 +44,7 @@ class HTMLPurifier_Config
|
|||||||
/**
|
/**
|
||||||
* Parser for variables
|
* Parser for variables
|
||||||
*/
|
*/
|
||||||
protected $parser;
|
protected $parser = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||||
@ -668,7 +668,7 @@ class HTMLPurifier_Config
|
|||||||
*/
|
*/
|
||||||
public function finalize() {
|
public function finalize() {
|
||||||
$this->finalized = true;
|
$this->finalized = true;
|
||||||
unset($this->parser);
|
$this->parser = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Binary file not shown.
@ -24,5 +24,6 @@ array (
|
|||||||
--DESCRIPTION--
|
--DESCRIPTION--
|
||||||
|
|
||||||
Lookup array of color names to six digit hexadecimal number corresponding
|
Lookup array of color names to six digit hexadecimal number corresponding
|
||||||
to color, with preceding hash mark. Used when parsing colors.
|
to color, with preceding hash mark. Used when parsing colors. The lookup
|
||||||
|
is done in a case-insensitive manner.
|
||||||
--# vim: et sw=4 sts=4
|
--# vim: et sw=4 sts=4
|
||||||
|
@ -19,6 +19,68 @@ class HTMLPurifier_Encoder
|
|||||||
*/
|
*/
|
||||||
public static function muteErrorHandler() {}
|
public static function muteErrorHandler() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* iconv wrapper which mutes errors, but doesn't work around bugs.
|
||||||
|
*/
|
||||||
|
public static function unsafeIconv($in, $out, $text) {
|
||||||
|
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||||
|
$r = iconv($in, $out, $text);
|
||||||
|
restore_error_handler();
|
||||||
|
return $r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* iconv wrapper which mutes errors and works around bugs.
|
||||||
|
*/
|
||||||
|
public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
|
||||||
|
$code = self::testIconvTruncateBug();
|
||||||
|
if ($code == self::ICONV_OK) {
|
||||||
|
return self::unsafeIconv($in, $out, $text);
|
||||||
|
} elseif ($code == self::ICONV_TRUNCATES) {
|
||||||
|
// we can only work around this if the input character set
|
||||||
|
// is utf-8
|
||||||
|
if ($in == 'utf-8') {
|
||||||
|
if ($max_chunk_size < 4) {
|
||||||
|
trigger_error('max_chunk_size is too small', E_USER_WARNING);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// split into 8000 byte chunks, but be careful to handle
|
||||||
|
// multibyte boundaries properly
|
||||||
|
if (($c = strlen($text)) <= $max_chunk_size) {
|
||||||
|
return self::unsafeIconv($in, $out, $text);
|
||||||
|
}
|
||||||
|
$r = '';
|
||||||
|
$i = 0;
|
||||||
|
while (true) {
|
||||||
|
if ($i + $max_chunk_size >= $c) {
|
||||||
|
$r .= self::unsafeIconv($in, $out, substr($text, $i));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// wibble the boundary
|
||||||
|
if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
|
||||||
|
$chunk_size = $max_chunk_size;
|
||||||
|
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
|
||||||
|
$chunk_size = $max_chunk_size - 1;
|
||||||
|
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
|
||||||
|
$chunk_size = $max_chunk_size - 2;
|
||||||
|
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
|
||||||
|
$chunk_size = $max_chunk_size - 3;
|
||||||
|
} else {
|
||||||
|
return false; // rather confusing UTF-8...
|
||||||
|
}
|
||||||
|
$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
|
||||||
|
$r .= self::unsafeIconv($in, $out, $chunk);
|
||||||
|
$i += $chunk_size;
|
||||||
|
}
|
||||||
|
return $r;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans a UTF-8 string for well-formedness and SGML validity
|
* Cleans a UTF-8 string for well-formedness and SGML validity
|
||||||
*
|
*
|
||||||
@ -260,6 +322,14 @@ class HTMLPurifier_Encoder
|
|||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function iconvAvailable() {
|
||||||
|
static $iconv = null;
|
||||||
|
if ($iconv === null) {
|
||||||
|
$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
|
||||||
|
}
|
||||||
|
return $iconv;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a string to UTF-8 based on configuration.
|
* Converts a string to UTF-8 based on configuration.
|
||||||
*/
|
*/
|
||||||
@ -267,25 +337,22 @@ class HTMLPurifier_Encoder
|
|||||||
$encoding = $config->get('Core.Encoding');
|
$encoding = $config->get('Core.Encoding');
|
||||||
if ($encoding === 'utf-8') return $str;
|
if ($encoding === 'utf-8') return $str;
|
||||||
static $iconv = null;
|
static $iconv = null;
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
if ($iconv === null) $iconv = self::iconvAvailable();
|
||||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
|
||||||
if ($iconv && !$config->get('Test.ForceNoIconv')) {
|
if ($iconv && !$config->get('Test.ForceNoIconv')) {
|
||||||
$str = iconv($encoding, 'utf-8//IGNORE', $str);
|
// unaffected by bugs, since UTF-8 support all characters
|
||||||
|
$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
|
||||||
if ($str === false) {
|
if ($str === false) {
|
||||||
// $encoding is not a valid encoding
|
// $encoding is not a valid encoding
|
||||||
restore_error_handler();
|
|
||||||
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
|
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
// If the string is bjorked by Shift_JIS or a similar encoding
|
// If the string is bjorked by Shift_JIS or a similar encoding
|
||||||
// that doesn't support all of ASCII, convert the naughty
|
// that doesn't support all of ASCII, convert the naughty
|
||||||
// characters to their true byte-wise ASCII/UTF-8 equivalents.
|
// characters to their true byte-wise ASCII/UTF-8 equivalents.
|
||||||
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
|
$str = strtr($str, self::testEncodingSupportsASCII($encoding));
|
||||||
restore_error_handler();
|
|
||||||
return $str;
|
return $str;
|
||||||
} elseif ($encoding === 'iso-8859-1') {
|
} elseif ($encoding === 'iso-8859-1') {
|
||||||
$str = utf8_encode($str);
|
$str = utf8_encode($str);
|
||||||
restore_error_handler();
|
|
||||||
return $str;
|
return $str;
|
||||||
}
|
}
|
||||||
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
|
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
|
||||||
@ -298,16 +365,15 @@ class HTMLPurifier_Encoder
|
|||||||
*/
|
*/
|
||||||
public static function convertFromUTF8($str, $config, $context) {
|
public static function convertFromUTF8($str, $config, $context) {
|
||||||
$encoding = $config->get('Core.Encoding');
|
$encoding = $config->get('Core.Encoding');
|
||||||
|
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
|
||||||
|
$str = self::convertToASCIIDumbLossless($str);
|
||||||
|
}
|
||||||
if ($encoding === 'utf-8') return $str;
|
if ($encoding === 'utf-8') return $str;
|
||||||
static $iconv = null;
|
static $iconv = null;
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
if ($iconv === null) $iconv = self::iconvAvailable();
|
||||||
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
|
|
||||||
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
|
|
||||||
}
|
|
||||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
|
||||||
if ($iconv && !$config->get('Test.ForceNoIconv')) {
|
if ($iconv && !$config->get('Test.ForceNoIconv')) {
|
||||||
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
|
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
|
||||||
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
|
$ascii_fix = self::testEncodingSupportsASCII($encoding);
|
||||||
if (!$escape && !empty($ascii_fix)) {
|
if (!$escape && !empty($ascii_fix)) {
|
||||||
$clear_fix = array();
|
$clear_fix = array();
|
||||||
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
|
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
|
||||||
@ -315,15 +381,17 @@ class HTMLPurifier_Encoder
|
|||||||
}
|
}
|
||||||
$str = strtr($str, array_flip($ascii_fix));
|
$str = strtr($str, array_flip($ascii_fix));
|
||||||
// Normal stuff
|
// Normal stuff
|
||||||
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
|
$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||||
restore_error_handler();
|
|
||||||
return $str;
|
return $str;
|
||||||
} elseif ($encoding === 'iso-8859-1') {
|
} elseif ($encoding === 'iso-8859-1') {
|
||||||
$str = utf8_decode($str);
|
$str = utf8_decode($str);
|
||||||
restore_error_handler();
|
|
||||||
return $str;
|
return $str;
|
||||||
}
|
}
|
||||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||||
|
// You might be tempted to assume that the ASCII representation
|
||||||
|
// might be OK, however, this is *not* universally true over all
|
||||||
|
// encodings. So we take the conservative route here, rather
|
||||||
|
// than forcibly turn on %Core.EscapeNonASCIICharacters
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -373,6 +441,49 @@ class HTMLPurifier_Encoder
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** No bugs detected in iconv. */
|
||||||
|
const ICONV_OK = 0;
|
||||||
|
|
||||||
|
/** Iconv truncates output if converting from UTF-8 to another
|
||||||
|
* character set with //IGNORE, and a non-encodable character is found */
|
||||||
|
const ICONV_TRUNCATES = 1;
|
||||||
|
|
||||||
|
/** Iconv does not support //IGNORE, making it unusable for
|
||||||
|
* transcoding purposes */
|
||||||
|
const ICONV_UNUSABLE = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* glibc iconv has a known bug where it doesn't handle the magic
|
||||||
|
* //IGNORE stanza correctly. In particular, rather than ignore
|
||||||
|
* characters, it will return an EILSEQ after consuming some number
|
||||||
|
* of characters, and expect you to restart iconv as if it were
|
||||||
|
* an E2BIG. Old versions of PHP did not respect the errno, and
|
||||||
|
* returned the fragment, so as a result you would see iconv
|
||||||
|
* mysteriously truncating output. We can work around this by
|
||||||
|
* manually chopping our input into segments of about 8000
|
||||||
|
* characters, as long as PHP ignores the error code. If PHP starts
|
||||||
|
* paying attention to the error code, iconv becomes unusable.
|
||||||
|
*
|
||||||
|
* @returns Error code indicating severity of bug.
|
||||||
|
*/
|
||||||
|
public static function testIconvTruncateBug() {
|
||||||
|
static $code = null;
|
||||||
|
if ($code === null) {
|
||||||
|
// better not use iconv, otherwise infinite loop!
|
||||||
|
$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
|
||||||
|
if ($r === false) {
|
||||||
|
$code = self::ICONV_UNUSABLE;
|
||||||
|
} elseif (($c = strlen($r)) < 9000) {
|
||||||
|
$code = self::ICONV_TRUNCATES;
|
||||||
|
} elseif ($c > 9000) {
|
||||||
|
trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
|
||||||
|
} else {
|
||||||
|
$code = self::ICONV_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $code;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This expensive function tests whether or not a given character
|
* This expensive function tests whether or not a given character
|
||||||
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
|
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
|
||||||
@ -385,6 +496,11 @@ class HTMLPurifier_Encoder
|
|||||||
* which can be used to "undo" any overzealous iconv action.
|
* which can be used to "undo" any overzealous iconv action.
|
||||||
*/
|
*/
|
||||||
public static function testEncodingSupportsASCII($encoding, $bypass = false) {
|
public static function testEncodingSupportsASCII($encoding, $bypass = false) {
|
||||||
|
// All calls to iconv here are unsafe, proof by case analysis:
|
||||||
|
// If ICONV_OK, no difference.
|
||||||
|
// If ICONV_TRUNCATE, all calls involve one character inputs,
|
||||||
|
// so bug is not triggered.
|
||||||
|
// If ICONV_UNUSABLE, this call is irrelevant
|
||||||
static $encodings = array();
|
static $encodings = array();
|
||||||
if (!$bypass) {
|
if (!$bypass) {
|
||||||
if (isset($encodings[$encoding])) return $encodings[$encoding];
|
if (isset($encodings[$encoding])) return $encodings[$encoding];
|
||||||
@ -398,24 +514,22 @@ class HTMLPurifier_Encoder
|
|||||||
if (strpos($lenc, 'iso-8859-') === 0) return array();
|
if (strpos($lenc, 'iso-8859-') === 0) return array();
|
||||||
}
|
}
|
||||||
$ret = array();
|
$ret = array();
|
||||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
|
||||||
if (iconv('UTF-8', $encoding, 'a') === false) return false;
|
|
||||||
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
|
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
|
||||||
$c = chr($i); // UTF-8 char
|
$c = chr($i); // UTF-8 char
|
||||||
$r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
|
$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
|
||||||
if (
|
if (
|
||||||
$r === '' ||
|
$r === '' ||
|
||||||
// This line is needed for iconv implementations that do not
|
// This line is needed for iconv implementations that do not
|
||||||
// omit characters that do not exist in the target character set
|
// omit characters that do not exist in the target character set
|
||||||
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
|
($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
|
||||||
) {
|
) {
|
||||||
// Reverse engineer: what's the UTF-8 equiv of this byte
|
// Reverse engineer: what's the UTF-8 equiv of this byte
|
||||||
// sequence? This assumes that there's no variable width
|
// sequence? This assumes that there's no variable width
|
||||||
// encoding that doesn't support ASCII.
|
// encoding that doesn't support ASCII.
|
||||||
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
|
$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
restore_error_handler();
|
|
||||||
$encodings[$encoding] = $ret;
|
$encodings[$encoding] = $ret;
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,11 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
// why is this a top level function? Because PHP 5.2.0 doesn't seem to
|
||||||
|
// understand how to interpret this filter if it's a static method.
|
||||||
|
// It's all really silly, but if we go this route it might be reasonable
|
||||||
|
// to coalesce all of these methods into one.
|
||||||
|
function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This filter extracts <style> blocks from input HTML, cleans them up
|
* This filter extracts <style> blocks from input HTML, cleans them up
|
||||||
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
|
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
|
||||||
@ -21,8 +27,15 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
|||||||
private $_styleMatches = array();
|
private $_styleMatches = array();
|
||||||
private $_tidy;
|
private $_tidy;
|
||||||
|
|
||||||
|
private $_id_attrdef;
|
||||||
|
private $_class_attrdef;
|
||||||
|
private $_enum_attrdef;
|
||||||
|
|
||||||
public function __construct() {
|
public function __construct() {
|
||||||
$this->_tidy = new csstidy();
|
$this->_tidy = new csstidy();
|
||||||
|
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
|
||||||
|
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
|
||||||
|
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -77,27 +90,166 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
|||||||
$css = substr($css, 0, -3);
|
$css = substr($css, 0, -3);
|
||||||
}
|
}
|
||||||
$css = trim($css);
|
$css = trim($css);
|
||||||
|
set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
|
||||||
$this->_tidy->parse($css);
|
$this->_tidy->parse($css);
|
||||||
|
restore_error_handler();
|
||||||
$css_definition = $config->getDefinition('CSS');
|
$css_definition = $config->getDefinition('CSS');
|
||||||
|
$html_definition = $config->getDefinition('HTML');
|
||||||
|
$new_css = array();
|
||||||
foreach ($this->_tidy->css as $k => $decls) {
|
foreach ($this->_tidy->css as $k => $decls) {
|
||||||
// $decls are all CSS declarations inside an @ selector
|
// $decls are all CSS declarations inside an @ selector
|
||||||
$new_decls = array();
|
$new_decls = array();
|
||||||
foreach ($decls as $selector => $style) {
|
foreach ($decls as $selector => $style) {
|
||||||
$selector = trim($selector);
|
$selector = trim($selector);
|
||||||
if ($selector === '') continue; // should not happen
|
if ($selector === '') continue; // should not happen
|
||||||
if ($selector[0] === '+') {
|
// Parse the selector
|
||||||
if ($selector !== '' && $selector[0] === '+') continue;
|
// Here is the relevant part of the CSS grammar:
|
||||||
}
|
//
|
||||||
if (!empty($scopes)) {
|
// ruleset
|
||||||
$new_selector = array(); // because multiple ones are possible
|
// : selector [ ',' S* selector ]* '{' ...
|
||||||
$selectors = array_map('trim', explode(',', $selector));
|
// selector
|
||||||
foreach ($scopes as $s1) {
|
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
|
||||||
foreach ($selectors as $s2) {
|
// combinator
|
||||||
$new_selector[] = "$s1 $s2";
|
// : '+' S*
|
||||||
|
// : '>' S*
|
||||||
|
// simple_selector
|
||||||
|
// : element_name [ HASH | class | attrib | pseudo ]*
|
||||||
|
// | [ HASH | class | attrib | pseudo ]+
|
||||||
|
// element_name
|
||||||
|
// : IDENT | '*'
|
||||||
|
// ;
|
||||||
|
// class
|
||||||
|
// : '.' IDENT
|
||||||
|
// ;
|
||||||
|
// attrib
|
||||||
|
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
|
||||||
|
// [ IDENT | STRING ] S* ]? ']'
|
||||||
|
// ;
|
||||||
|
// pseudo
|
||||||
|
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
|
||||||
|
// ;
|
||||||
|
//
|
||||||
|
// For reference, here are the relevant tokens:
|
||||||
|
//
|
||||||
|
// HASH #{name}
|
||||||
|
// IDENT {ident}
|
||||||
|
// INCLUDES ==
|
||||||
|
// DASHMATCH |=
|
||||||
|
// STRING {string}
|
||||||
|
// FUNCTION {ident}\(
|
||||||
|
//
|
||||||
|
// And the lexical scanner tokens
|
||||||
|
//
|
||||||
|
// name {nmchar}+
|
||||||
|
// nmchar [_a-z0-9-]|{nonascii}|{escape}
|
||||||
|
// nonascii [\240-\377]
|
||||||
|
// escape {unicode}|\\[^\r\n\f0-9a-f]
|
||||||
|
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
|
||||||
|
// ident -?{nmstart}{nmchar*}
|
||||||
|
// nmstart [_a-z]|{nonascii}|{escape}
|
||||||
|
// string {string1}|{string2}
|
||||||
|
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
||||||
|
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
|
||||||
|
//
|
||||||
|
// We'll implement a subset (in order to reduce attack
|
||||||
|
// surface); in particular:
|
||||||
|
//
|
||||||
|
// - No Unicode support
|
||||||
|
// - No escapes support
|
||||||
|
// - No string support (by proxy no attrib support)
|
||||||
|
// - element_name is matched against allowed
|
||||||
|
// elements (some people might find this
|
||||||
|
// annoying...)
|
||||||
|
// - Pseudo-elements one of :first-child, :link,
|
||||||
|
// :visited, :active, :hover, :focus
|
||||||
|
|
||||||
|
// handle ruleset
|
||||||
|
$selectors = array_map('trim', explode(',', $selector));
|
||||||
|
$new_selectors = array();
|
||||||
|
foreach ($selectors as $sel) {
|
||||||
|
// split on +, > and spaces
|
||||||
|
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||||
|
// even indices are chunks, odd indices are
|
||||||
|
// delimiters
|
||||||
|
$nsel = null;
|
||||||
|
$delim = null; // guaranteed to be non-null after
|
||||||
|
// two loop iterations
|
||||||
|
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
|
||||||
|
$x = $basic_selectors[$i];
|
||||||
|
if ($i % 2) {
|
||||||
|
// delimiter
|
||||||
|
if ($x === ' ') {
|
||||||
|
$delim = ' ';
|
||||||
|
} else {
|
||||||
|
$delim = ' ' . $x . ' ';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// simple selector
|
||||||
|
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||||
|
$sdelim = null;
|
||||||
|
$nx = null;
|
||||||
|
for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
|
||||||
|
$y = $components[$j];
|
||||||
|
if ($j === 0) {
|
||||||
|
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
|
||||||
|
$nx = $y;
|
||||||
|
} else {
|
||||||
|
// $nx stays null; this matters
|
||||||
|
// if we don't manage to find
|
||||||
|
// any valid selector content,
|
||||||
|
// in which case we ignore the
|
||||||
|
// outer $delim
|
||||||
|
}
|
||||||
|
} elseif ($j % 2) {
|
||||||
|
// set delimiter
|
||||||
|
$sdelim = $y;
|
||||||
|
} else {
|
||||||
|
$attrdef = null;
|
||||||
|
if ($sdelim === '#') {
|
||||||
|
$attrdef = $this->_id_attrdef;
|
||||||
|
} elseif ($sdelim === '.') {
|
||||||
|
$attrdef = $this->_class_attrdef;
|
||||||
|
} elseif ($sdelim === ':') {
|
||||||
|
$attrdef = $this->_enum_attrdef;
|
||||||
|
} else {
|
||||||
|
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
|
||||||
|
}
|
||||||
|
$r = $attrdef->validate($y, $config, $context);
|
||||||
|
if ($r !== false) {
|
||||||
|
if ($r !== true) {
|
||||||
|
$y = $r;
|
||||||
|
}
|
||||||
|
if ($nx === null) {
|
||||||
|
$nx = '';
|
||||||
|
}
|
||||||
|
$nx .= $sdelim . $y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($nx !== null) {
|
||||||
|
if ($nsel === null) {
|
||||||
|
$nsel = $nx;
|
||||||
|
} else {
|
||||||
|
$nsel .= $delim . $nx;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// delimiters to the left of invalid
|
||||||
|
// basic selector ignored
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($nsel !== null) {
|
||||||
|
if (!empty($scopes)) {
|
||||||
|
foreach ($scopes as $s) {
|
||||||
|
$new_selectors[] = "$s $nsel";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$new_selectors[] = $nsel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$selector = implode(', ', $new_selector); // now it's a string
|
|
||||||
}
|
}
|
||||||
|
if (empty($new_selectors)) continue;
|
||||||
|
$selector = implode(', ', $new_selectors);
|
||||||
foreach ($style as $name => $value) {
|
foreach ($style as $name => $value) {
|
||||||
if (!isset($css_definition->info[$name])) {
|
if (!isset($css_definition->info[$name])) {
|
||||||
unset($style[$name]);
|
unset($style[$name]);
|
||||||
@ -110,10 +262,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
|||||||
}
|
}
|
||||||
$new_decls[$selector] = $style;
|
$new_decls[$selector] = $style;
|
||||||
}
|
}
|
||||||
$this->_tidy->css[$k] = $new_decls;
|
$new_css[$k] = $new_decls;
|
||||||
}
|
}
|
||||||
// remove stuff that shouldn't be used, could be reenabled
|
// remove stuff that shouldn't be used, could be reenabled
|
||||||
// after security risks are analyzed
|
// after security risks are analyzed
|
||||||
|
$this->_tidy->css = $new_css;
|
||||||
$this->_tidy->import = array();
|
$this->_tidy->import = array();
|
||||||
$this->_tidy->charset = null;
|
$this->_tidy->charset = null;
|
||||||
$this->_tidy->namespace = null;
|
$this->_tidy->namespace = null;
|
||||||
|
@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
|||||||
return $this->_anonModule;
|
return $this->_anonModule;
|
||||||
}
|
}
|
||||||
|
|
||||||
private $_anonModule;
|
private $_anonModule = null;
|
||||||
|
|
||||||
|
|
||||||
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
|
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
|
||||||
|
@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
|
|||||||
'name' => 'CDATA',
|
'name' => 'CDATA',
|
||||||
'readonly' => 'Bool#readonly',
|
'readonly' => 'Bool#readonly',
|
||||||
'size' => 'Number',
|
'size' => 'Number',
|
||||||
'src' => 'URI#embeds',
|
'src' => 'URI#embedded',
|
||||||
'tabindex' => 'Number',
|
'tabindex' => 'Number',
|
||||||
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
|
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
|
||||||
'value' => 'CDATA',
|
'value' => 'CDATA',
|
||||||
@ -84,7 +84,8 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
|
|||||||
$button->excludes = $this->makeLookup(
|
$button->excludes = $this->makeLookup(
|
||||||
'form', 'fieldset', // Form
|
'form', 'fieldset', // Form
|
||||||
'input', 'select', 'textarea', 'label', 'button', // Formctrl
|
'input', 'select', 'textarea', 'label', 'button', // Formctrl
|
||||||
'a' // as per HTML 4.01 spec, this is omitted by modularization
|
'a', // as per HTML 4.01 spec, this is omitted by modularization
|
||||||
|
'isindex', 'iframe' // legacy items
|
||||||
);
|
);
|
||||||
|
|
||||||
// Extra exclusion: img usemap="" is not permitted within this element.
|
// Extra exclusion: img usemap="" is not permitted within this element.
|
||||||
|
@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
|||||||
$hr->attr['width'] = 'Length';
|
$hr->attr['width'] = 'Length';
|
||||||
|
|
||||||
$img = $this->addBlankElement('img');
|
$img = $this->addBlankElement('img');
|
||||||
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
|
$img->attr['align'] = 'IAlign';
|
||||||
$img->attr['border'] = 'Pixels';
|
$img->attr['border'] = 'Pixels';
|
||||||
$img->attr['hspace'] = 'Pixels';
|
$img->attr['hspace'] = 'Pixels';
|
||||||
$img->attr['vspace'] = 'Pixels';
|
$img->attr['vspace'] = 'Pixels';
|
||||||
@ -136,6 +136,22 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
|||||||
$ul->attr['compact'] = 'Bool#compact';
|
$ul->attr['compact'] = 'Bool#compact';
|
||||||
$ul->attr['type'] = 'Enum#square,disc,circle';
|
$ul->attr['type'] = 'Enum#square,disc,circle';
|
||||||
|
|
||||||
|
// "safe" modifications to "unsafe" elements
|
||||||
|
// WARNING: If you want to add support for an unsafe, legacy
|
||||||
|
// attribute, make a new TrustedLegacy module with the trusted
|
||||||
|
// bit set appropriately
|
||||||
|
|
||||||
|
$form = $this->addBlankElement('form');
|
||||||
|
$form->content_model = 'Flow | #PCDATA';
|
||||||
|
$form->content_model_type = 'optional';
|
||||||
|
$form->attr['target'] = 'FrameTarget';
|
||||||
|
|
||||||
|
$input = $this->addBlankElement('input');
|
||||||
|
$input->attr['align'] = 'IAlign';
|
||||||
|
|
||||||
|
$legend = $this->addBlankElement('legend');
|
||||||
|
$legend->attr['align'] = 'LAlign';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
|||||||
public $content_sets = array('Flow' => 'List');
|
public $content_sets = array('Flow' => 'List');
|
||||||
|
|
||||||
public function setup($config) {
|
public function setup($config) {
|
||||||
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
|
$ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||||
$ol->wrap = "li";
|
$ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||||
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
|
// XXX The wrap attribute is handled by MakeWellFormed. This is all
|
||||||
$ul->wrap = "li";
|
// quite unsatisfactory, because we generated this
|
||||||
|
// *specifically* for lists, and now a big chunk of the handling
|
||||||
|
// is done properly by the List ChildDef. So actually, we just
|
||||||
|
// want enough information to make autoclosing work properly,
|
||||||
|
// and then hand off the tricky stuff to the ChildDef.
|
||||||
|
$ol->wrap = 'li';
|
||||||
|
$ul->wrap = 'li';
|
||||||
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
|
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
|
||||||
|
|
||||||
$this->addElement('li', false, 'Flow', 'Common');
|
$this->addElement('li', false, 'Flow', 'Common');
|
||||||
|
@ -37,6 +37,9 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
|
|||||||
'abbr' => 'Text',
|
'abbr' => 'Text',
|
||||||
'colspan' => 'Number',
|
'colspan' => 'Number',
|
||||||
'rowspan' => 'Number',
|
'rowspan' => 'Number',
|
||||||
|
// Apparently, as of HTML5 this attribute only applies
|
||||||
|
// to 'th' elements.
|
||||||
|
'scope' => 'Enum#row,col,rowgroup,colgroup',
|
||||||
),
|
),
|
||||||
$cell_align
|
$cell_align
|
||||||
);
|
);
|
||||||
|
@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||||
'StyleAttribute',
|
'StyleAttribute',
|
||||||
// Unsafe:
|
// Unsafe:
|
||||||
'Scripting', 'Object', 'Forms',
|
'Scripting', 'Object', 'Forms',
|
||||||
// Sorta legacy, but present in strict:
|
// Sorta legacy, but present in strict:
|
||||||
'Name',
|
'Name',
|
||||||
);
|
);
|
||||||
$transitional = array('Legacy', 'Target');
|
$transitional = array('Legacy', 'Target', 'Iframe');
|
||||||
$xml = array('XMLCommonAttributes');
|
$xml = array('XMLCommonAttributes');
|
||||||
$non_xml = array('NonXMLCommonAttributes');
|
$non_xml = array('NonXMLCommonAttributes');
|
||||||
|
|
||||||
@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
|
|
||||||
$this->doctypes->register(
|
$this->doctypes->register(
|
||||||
'XHTML 1.1', true,
|
'XHTML 1.1', true,
|
||||||
array_merge($common, $xml, array('Ruby')),
|
// Iframe is a real XHTML 1.1 module, despite being
|
||||||
|
// "transitional"!
|
||||||
|
array_merge($common, $xml, array('Ruby', 'Iframe')),
|
||||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
|
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
|
||||||
array(),
|
array(),
|
||||||
'-//W3C//DTD XHTML 1.1//EN',
|
'-//W3C//DTD XHTML 1.1//EN',
|
||||||
@ -229,6 +231,9 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
if ($config->get('HTML.Nofollow')) {
|
if ($config->get('HTML.Nofollow')) {
|
||||||
$modules[] = 'Nofollow';
|
$modules[] = 'Nofollow';
|
||||||
}
|
}
|
||||||
|
if ($config->get('HTML.TargetBlank')) {
|
||||||
|
$modules[] = 'TargetBlank';
|
||||||
|
}
|
||||||
|
|
||||||
// merge in custom modules
|
// merge in custom modules
|
||||||
$modules = array_merge($modules, $this->userModules);
|
$modules = array_merge($modules, $this->userModules);
|
||||||
@ -364,6 +369,13 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
// :TODO:
|
// :TODO:
|
||||||
// non-standalone definitions that don't have a standalone
|
// non-standalone definitions that don't have a standalone
|
||||||
// to merge into could be deferred to the end
|
// to merge into could be deferred to the end
|
||||||
|
// HOWEVER, it is perfectly valid for a non-standalone
|
||||||
|
// definition to lack a standalone definition, even
|
||||||
|
// after all processing: this allows us to safely
|
||||||
|
// specify extra attributes for elements that may not be
|
||||||
|
// enabled all in one place. In particular, this might
|
||||||
|
// be the case for trusted elements. WARNING: care must
|
||||||
|
// be taken that the /extra/ definitions are all safe.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
|
|||||||
*/
|
*/
|
||||||
protected $strategies = array();
|
protected $strategies = array();
|
||||||
|
|
||||||
abstract public function __construct();
|
|
||||||
|
|
||||||
public function execute($tokens, $config, $context) {
|
public function execute($tokens, $config, $context) {
|
||||||
foreach ($this->strategies as $strategy) {
|
foreach ($this->strategies as $strategy) {
|
||||||
$tokens = $strategy->execute($tokens, $config, $context);
|
$tokens = $strategy->execute($tokens, $config, $context);
|
||||||
|
@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// currently only used to determine if comments should be kept
|
// currently only used to determine if comments should be kept
|
||||||
$trusted = $config->get('HTML.Trusted');
|
$trusted = $config->get('HTML.Trusted');
|
||||||
|
$comment_lookup = $config->get('HTML.AllowedComments');
|
||||||
|
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
||||||
|
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
||||||
|
|
||||||
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
||||||
$hidden_elements = $config->get('Core.HiddenElements');
|
$hidden_elements = $config->get('Core.HiddenElements');
|
||||||
@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
if ($textify_comments !== false) {
|
if ($textify_comments !== false) {
|
||||||
$data = $token->data;
|
$data = $token->data;
|
||||||
$token = new HTMLPurifier_Token_Text($data);
|
$token = new HTMLPurifier_Token_Text($data);
|
||||||
} elseif ($trusted) {
|
} elseif ($trusted || $check_comments) {
|
||||||
// keep, but perform comment cleaning
|
// always cleanup comments
|
||||||
|
$trailing_hyphen = false;
|
||||||
if ($e) {
|
if ($e) {
|
||||||
// perform check whether or not there's a trailing hyphen
|
// perform check whether or not there's a trailing hyphen
|
||||||
if (substr($token->data, -1) == '-') {
|
if (substr($token->data, -1) == '-') {
|
||||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
$trailing_hyphen = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$token->data = rtrim($token->data, '-');
|
$token->data = rtrim($token->data, '-');
|
||||||
$found_double_hyphen = false;
|
$found_double_hyphen = false;
|
||||||
while (strpos($token->data, '--') !== false) {
|
while (strpos($token->data, '--') !== false) {
|
||||||
if ($e && !$found_double_hyphen) {
|
$found_double_hyphen = true;
|
||||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
|
||||||
}
|
|
||||||
$found_double_hyphen = true; // prevent double-erroring
|
|
||||||
$token->data = str_replace('--', '-', $token->data);
|
$token->data = str_replace('--', '-', $token->data);
|
||||||
}
|
}
|
||||||
|
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
|
||||||
|
// OK good
|
||||||
|
if ($e) {
|
||||||
|
if ($trailing_hyphen) {
|
||||||
|
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||||
|
}
|
||||||
|
if ($found_double_hyphen) {
|
||||||
|
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ($e) {
|
||||||
|
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// strip comments
|
// strip comments
|
||||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||||
|
@ -40,7 +40,7 @@ class HTMLPurifier_URI
|
|||||||
} else {
|
} else {
|
||||||
// no scheme: retrieve the default one
|
// no scheme: retrieve the default one
|
||||||
$def = $config->getDefinition('URI');
|
$def = $config->getDefinition('URI');
|
||||||
$scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
|
$scheme_obj = $def->getDefaultScheme($config, $context);
|
||||||
if (!$scheme_obj) {
|
if (!$scheme_obj) {
|
||||||
// something funky happened to the default scheme object
|
// something funky happened to the default scheme object
|
||||||
trigger_error(
|
trigger_error(
|
||||||
@ -199,6 +199,44 @@ class HTMLPurifier_URI
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if this URL might be considered a 'local' URL given
|
||||||
|
* the current context. This is true when the host is null, or
|
||||||
|
* when it matches the host supplied to the configuration.
|
||||||
|
*
|
||||||
|
* Note that this does not do any scheme checking, so it is mostly
|
||||||
|
* only appropriate for metadata that doesn't care about protocol
|
||||||
|
* security. isBenign is probably what you actually want.
|
||||||
|
*/
|
||||||
|
public function isLocal($config, $context) {
|
||||||
|
if ($this->host === null) return true;
|
||||||
|
$uri_def = $config->getDefinition('URI');
|
||||||
|
if ($uri_def->host === $this->host) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if this URL should be considered a 'benign' URL,
|
||||||
|
* that is:
|
||||||
|
*
|
||||||
|
* - It is a local URL (isLocal), and
|
||||||
|
* - It has a equal or better level of security
|
||||||
|
*/
|
||||||
|
public function isBenign($config, $context) {
|
||||||
|
if (!$this->isLocal($config, $context)) return false;
|
||||||
|
|
||||||
|
$scheme_obj = $this->getSchemeObj($config, $context);
|
||||||
|
if (!$scheme_obj) return false; // conservative approach
|
||||||
|
|
||||||
|
$current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
|
||||||
|
if ($current_scheme_obj->secure) {
|
||||||
|
if (!$scheme_obj->secure) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
// vim: et sw=4 sts=4
|
||||||
|
@ -27,6 +27,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
|||||||
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
|
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
|
||||||
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
|
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
|
||||||
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
|
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
|
||||||
|
$this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
|
||||||
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
|
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
|
||||||
$this->registerFilter(new HTMLPurifier_URIFilter_Munge());
|
$this->registerFilter(new HTMLPurifier_URIFilter_Munge());
|
||||||
}
|
}
|
||||||
@ -52,9 +53,13 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
|||||||
|
|
||||||
protected function setupFilters($config) {
|
protected function setupFilters($config) {
|
||||||
foreach ($this->registeredFilters as $name => $filter) {
|
foreach ($this->registeredFilters as $name => $filter) {
|
||||||
$conf = $config->get('URI.' . $name);
|
if ($filter->always_load) {
|
||||||
if ($conf !== false && $conf !== null) {
|
|
||||||
$this->addFilter($filter, $config);
|
$this->addFilter($filter, $config);
|
||||||
|
} else {
|
||||||
|
$conf = $config->get('URI.' . $name);
|
||||||
|
if ($conf !== false && $conf !== null) {
|
||||||
|
$this->addFilter($filter, $config);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unset($this->registeredFilters);
|
unset($this->registeredFilters);
|
||||||
@ -72,6 +77,10 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
|
|||||||
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
|
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getDefaultScheme($config, $context) {
|
||||||
|
return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
|
||||||
|
}
|
||||||
|
|
||||||
public function filter(&$uri, $config, $context) {
|
public function filter(&$uri, $config, $context) {
|
||||||
foreach ($this->filters as $name => $f) {
|
foreach ($this->filters as $name => $f) {
|
||||||
$result = $f->filter($uri, $config, $context);
|
$result = $f->filter($uri, $config, $context);
|
||||||
|
@ -4,7 +4,21 @@
|
|||||||
* Chainable filters for custom URI processing.
|
* Chainable filters for custom URI processing.
|
||||||
*
|
*
|
||||||
* These filters can perform custom actions on a URI filter object,
|
* These filters can perform custom actions on a URI filter object,
|
||||||
* including transformation or blacklisting.
|
* including transformation or blacklisting. A filter named Foo
|
||||||
|
* must have a corresponding configuration directive %URI.Foo,
|
||||||
|
* unless always_load is specified to be true.
|
||||||
|
*
|
||||||
|
* The following contexts may be available while URIFilters are being
|
||||||
|
* processed:
|
||||||
|
*
|
||||||
|
* - EmbeddedURI: true if URI is an embedded resource that will
|
||||||
|
* be loaded automatically on page load
|
||||||
|
* - CurrentToken: a reference to the token that is currently
|
||||||
|
* being processed
|
||||||
|
* - CurrentAttr: the name of the attribute that is currently being
|
||||||
|
* processed
|
||||||
|
* - CurrentCSSProperty: the name of the CSS property that is
|
||||||
|
* currently being processed (if applicable)
|
||||||
*
|
*
|
||||||
* @warning This filter is called before scheme object validation occurs.
|
* @warning This filter is called before scheme object validation occurs.
|
||||||
* Make sure, if you require a specific scheme object, you
|
* Make sure, if you require a specific scheme object, you
|
||||||
@ -25,7 +39,15 @@ abstract class HTMLPurifier_URIFilter
|
|||||||
public $post = false;
|
public $post = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs initialization for the filter
|
* True if this filter should always be loaded (this permits
|
||||||
|
* a filter to be named Foo without the corresponding %URI.Foo
|
||||||
|
* directive existing.)
|
||||||
|
*/
|
||||||
|
public $always_load = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs initialization for the filter. If the filter returns
|
||||||
|
* false, this means that it shouldn't be considered active.
|
||||||
*/
|
*/
|
||||||
public function prepare($config) {return true;}
|
public function prepare($config) {return true;}
|
||||||
|
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
// It's not clear to me whether or not Punycode means that hostnames
|
||||||
|
// do not have canonical forms anymore. As far as I can tell, it's
|
||||||
|
// not a problem (punycoding should be identity when no Unicode
|
||||||
|
// points are involved), but I'm not 100% sure
|
||||||
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
|
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
|
||||||
{
|
{
|
||||||
public $name = 'HostBlacklist';
|
public $name = 'HostBlacklist';
|
||||||
|
@ -20,13 +20,8 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
|
|||||||
|
|
||||||
$scheme_obj = $uri->getSchemeObj($config, $context);
|
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||||
if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
|
if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
|
||||||
if (is_null($uri->host) || empty($scheme_obj->browsable)) {
|
if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way
|
||||||
return true;
|
if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL
|
||||||
}
|
|
||||||
// don't redirect if target host is our host
|
|
||||||
if ($uri->host === $config->getDefinition('URI')->host) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->makeReplace($uri, $config, $context);
|
$this->makeReplace($uri, $config, $context);
|
||||||
$this->replace = array_map('rawurlencode', $this->replace);
|
$this->replace = array_map('rawurlencode', $this->replace);
|
||||||
|
@ -19,6 +19,12 @@ abstract class HTMLPurifier_URIScheme
|
|||||||
*/
|
*/
|
||||||
public $browsable = false;
|
public $browsable = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not data transmitted over this scheme is encrypted.
|
||||||
|
* https is secure, http is not.
|
||||||
|
*/
|
||||||
|
public $secure = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether or not the URI always uses <hier_part>, resolves edge cases
|
* Whether or not the URI always uses <hier_part>, resolves edge cases
|
||||||
* with making relative URIs absolute
|
* with making relative URIs absolute
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
|
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
|
||||||
|
|
||||||
public $default_port = 443;
|
public $default_port = 443;
|
||||||
|
public $secure = true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user