upgrade HTML Purifier to Version 4.4.0 (with merged trunk patches -c37721 -c34417 -c35216 -c35219)

This commit is contained in:
Klaus Leithoff 2012-02-01 15:58:31 +00:00
parent 3874ec6084
commit 5511abc5bf
33 changed files with 700 additions and 101 deletions

View File

@ -26,6 +26,10 @@ These optional extensions can enhance the capabilities of HTML Purifier:
* bcmath : Used for unit conversion and imagecrash protection * bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML * tidy : Used for pretty-printing HTML
These optional libraries can enhance the capabilities of HTML Purifier:
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
2. Reconnaissance 2. Reconnaissance
@ -331,11 +335,6 @@ Or move the cache directory somewhere else (no trailing slash):
The interface is mind-numbingly simple: The interface is mind-numbingly simple:
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify( $dirty_html );
...or, if you're using the configuration object:
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify( $dirty_html ); $clean_html = $purifier->purify( $dirty_html );
@ -354,7 +353,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
<?php <?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php'; require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier(); $config = HTMLPurifier_Config::createDefault();
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html); $clean_html = $purifier->purify($dirty_html);
?> ?>

View File

@ -9,6 +9,49 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change . Internal change
========================== ==========================
4.4.0, released 2012-01-18
# Removed PEARSax3 handler.
# URI.Munge now munges URIs inside the same host that go from https
to http. Reported by Neike Taika-Tessaro.
# Core.EscapeNonASCIICharacters now always transforms entities to
entities, even if target encoding is UTF-8.
# Tighten up selector validation in ExtractStyleBlocks.
Non-syntactically valid selectors are now rejected, along with
some of the more obscure ones such as attribute selectors, the
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
ID and class selectors now work properly with the relevant
configuration attributes. Also, mute errors when parsing CSS
with CSS Tidy.
! Added support for 'scope' attribute on tables.
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
! Properly handle sub-lists directly nested inside of lists in
a standards compliant way, by moving them into the preceding <li>
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
limited allowed comments in untrusted situations.
! Implement iframes, and allow them to be used in untrusted mode with
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
<brad.froehle@gmail.com> for submitting an initial version of the patch.
! The Forms module now works properly for transitional doctypes.
! Added support for internationalized domain names. You need the PEAR
Net_IDNA2 module to be in your path; if it is installed, ensure the
class can be loaded and then set %Core.EnableIDNA to true.
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null.
- Do not duplicate nofollow if already present. Thanks 178
for reporting.
- Do not add nofollow if hostname matches our current host. Thanks 178
for reporting, and Neike Taika-Tessaro for helping diagnose.
- Do not unset parser variable; this fixes intermittent serialization
problems. Thanks Neike Taika-Tessaro for reporting, bill
<10010tiger@gmail.com> for diagnosing.
- Fix iconv truncation bug, where non-UTF-8 target encodings see
output truncated after around 8000 characters. Thanks Jörg Ludwig
<joerg.ludwig@iserv.eu> for reporting.
- Fix broken table content model for XHTML1.1 (and also earlier
versions, although the W3C validator doesn't catch those violations).
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
4.3.0, released 2011-03-27 4.3.0, released 2011-03-27
# Fixed broken caching of customized raw definitions, but requires an # Fixed broken caching of customized raw definitions, but requires an
API change. The old API still works but will emit a warning, API change. The old API still works but will emit a warning,

View File

@ -1 +1 @@
4.3.0 4.4.0

View File

@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run. * FILE, changes will be overwritten the next time the script is run.
* *
* @version 4.3.0 * @version 4.4.0
* *
* @warning * @warning
* You must *not* include any other HTML Purifier files before this file, * You must *not* include any other HTML Purifier files before this file,
@ -73,6 +73,7 @@ require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php'; require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php'; require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php'; require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Clone.php';
require 'HTMLPurifier/AttrDef/Enum.php'; require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php'; require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php'; require 'HTMLPurifier/AttrDef/Lang.php';
@ -90,6 +91,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php'; require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php'; require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php'; require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php'; require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php'; require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php'; require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
@ -130,10 +132,12 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php'; require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php'; require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php'; require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
require 'HTMLPurifier/AttrTransform/Textarea.php'; require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php'; require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php'; require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php'; require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php'; require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php'; require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php'; require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
@ -148,6 +152,7 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php'; require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php'; require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php'; require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Iframe.php';
require 'HTMLPurifier/HTMLModule/Image.php'; require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php'; require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php'; require 'HTMLPurifier/HTMLModule/List.php';
@ -164,6 +169,7 @@ require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php'; require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php'; require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
require 'HTMLPurifier/HTMLModule/Text.php'; require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php'; require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@ -202,6 +208,7 @@ require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php'; require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIFilter/SafeIframe.php';
require 'HTMLPurifier/URIScheme/data.php'; require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php'; require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php'; require 'HTMLPurifier/URIScheme/ftp.php';

View File

@ -19,7 +19,7 @@
*/ */
/* /*
HTML Purifier 4.3.0 - Standards Compliant HTML Filtering HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or This library is free software; you can redistribute it and/or
@ -55,10 +55,10 @@ class HTMLPurifier
{ {
/** Version of HTML Purifier */ /** Version of HTML Purifier */
public $version = '4.3.0'; public $version = '4.4.0';
/** Constant with version of HTML Purifier */ /** Constant with version of HTML Purifier */
const VERSION = '4.3.0'; const VERSION = '4.4.0';
/** Global configuration object */ /** Global configuration object */
public $config; public $config;

View File

@ -67,6 +67,7 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php'; require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php'; require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
@ -84,6 +85,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
@ -124,10 +126,12 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php'; require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
@ -142,6 +146,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
@ -158,6 +163,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@ -196,6 +202,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php'; require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php'; require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';

View File

@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
$string = trim($string); $string = trim($string);
if (empty($string)) return false; if (empty($string)) return false;
if (isset($colors[$string])) return $colors[$string]; if (isset($colors[strtolower($string)])) return $colors[$string];
if ($string[0] === '#') $hex = substr($string, 1); if ($string[0] === '#') $hex = substr($string, 1);
else $hex = $string; else $hex = $string;

View File

@ -12,12 +12,22 @@
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{ {
// ref functionality disabled, since we also have to verify // selector is NOT a valid thing to use for IDREFs, because IDREFs
// whether or not the ID it refers to exists // *must* target IDs that exist, whereas selector #ids do not.
/**
* Determines whether or not we're validating an ID in a CSS
* selector context.
*/
protected $selector;
public function __construct($selector = false) {
$this->selector = $selector;
}
public function validate($id, $config, $context) { public function validate($id, $config, $context) {
if (!$config->get('Attr.EnableID')) return false; if (!$this->selector && !$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first $id = trim($id); // trim it first
@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
'%Attr.IDPrefix is set', E_USER_WARNING); '%Attr.IDPrefix is set', E_USER_WARNING);
} }
//if (!$this->ref) { if (!$this->selector) {
$id_accumulator =& $context->get('IDAccumulator'); $id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false; if (isset($id_accumulator->ids[$id])) return false;
//} }
// we purposely avoid using regex, hopefully this is faster // we purposely avoid using regex, hopefully this is faster
@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
return false; return false;
} }
if (/*!$this->ref && */$result) $id_accumulator->add($id); if (!$this->selector && $result) $id_accumulator->add($id);
// if no change was made to the ID, return the result // if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it // else, return the new id if stripping whitespace made it

View File

@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
} }
public function make($string) { public function make($string) {
$embeds = (bool) $string; $embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds); return new HTMLPurifier_AttrDef_URI($embeds);
} }

View File

@ -44,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// A regular domain name. // A regular domain name.
// This breaks I18N domain names, but we don't have proper IRI support, // This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll // so force users to insert Punycode.
// try to fix things into an international friendly form.
// The productions describing this are: // The productions describing this are:
$a = '[a-z]'; // alpha $a = '[a-z]'; // alpha
@ -57,10 +56,44 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?"; $toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ] // hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
if (!$match) return false; return $string;
}
return $string; // If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
if ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
try {
$new_parts = array();
foreach ($parts as $part) {
$encodable = false;
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
if (ord($part[$i]) > 0x7a) {
$encodable = true;
break;
}
}
if (!$encodable) {
$new_parts[] = $part;
} else {
$new_parts[] = $idna->encode($part);
}
}
$string = implode('.', $new_parts);
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
} catch (Exception $e) {
// XXX error reporting
}
}
return false;
} }
} }

View File

@ -24,9 +24,13 @@ class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
$url = $this->parser->parse($attr['href']); $url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context); $scheme = $url->getSchemeObj($config, $context);
if (!is_null($url->host) && $scheme !== false && $scheme->browsable) { if ($scheme->browsable && !$url->isLocal($config, $context)) {
if (isset($attr['rel'])) { if (isset($attr['rel'])) {
$attr['rel'] .= ' nofollow'; $rels = explode(' ', $attr);
if (!in_array('nofollow', $rels)) {
$rels[] = 'nofollow';
}
$attr['rel'] = implode(' ', $rels);
} else { } else {
$attr['rel'] = 'nofollow'; $attr['rel'] = 'nofollow';
} }

View File

@ -15,6 +15,13 @@ class HTMLPurifier_AttrTypes
* types. * types.
*/ */
public function __construct() { public function __construct() {
// XXX This is kind of poor, since we don't actually /clone/
// instances; instead, we use the supplied make() attribute. So,
// the underlying class must know how to deal with arguments.
// With the old implementation of Enum, that ignored its
// arguments when handling a make dispatch, the IAlign
// definition wouldn't work.
// pseudo-types, must be instantiated via shorthand // pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum(); $this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool(); $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
@ -29,6 +36,9 @@ class HTMLPurifier_AttrTypes
$this->info['URI'] = new HTMLPurifier_AttrDef_URI(); $this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
// unimplemented aliases // unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text(); $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
@ -44,6 +54,10 @@ class HTMLPurifier_AttrTypes
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
} }
private static function makeEnum($in) {
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
}
/** /**
* Retrieves a type * Retrieves a type
* @param $type String type name * @param $type String type name

View File

@ -1,7 +1,33 @@
<?php <?php
/** /**
* Definition for tables * Definition for tables. The general idea is to extract out all of the
* essential bits, and then reconstruct it later.
*
* This is a bit confusing, because the DTDs and the W3C
* validators seem to disagree on the appropriate definition. The
* DTD claims:
*
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
*
* But actually, the HTML4 spec then has this to say:
*
* The TBODY start tag is always required except when the table
* contains only one table body and no table head or foot sections.
* The TBODY end tag may always be safely omitted.
*
* So the DTD is kind of wrong. The validator is, unfortunately, kind
* of on crack.
*
* The definition changed again in XHTML1.1; and in my opinion, this
* formulation makes the most sense.
*
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
*
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
* If we encounter a thead, tfoot or tbody, we are placed in the former
* mode, and we *must* wrap any stray tr segments with a tbody. But if
* we don't run into any of them, just have tr tags is OK.
*/ */
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{ {
@ -33,6 +59,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$collection = array(); // collected nodes $collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace, $tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is // so this tells us where the start tag is
$tbody_mode = false; // if true, then we need to wrap any stray
// <tr>s with a <tbody>.
foreach ($tokens_of_children as $token) { foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0); $is_child = ($nesting == 0);
@ -51,8 +79,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
// okay, let's stash the tokens away // okay, let's stash the tokens away
// first token tells us the type of the collection // first token tells us the type of the collection
switch ($collection[$tag_index]->name) { switch ($collection[$tag_index]->name) {
case 'tr':
case 'tbody': case 'tbody':
$tbody_mode = true;
case 'tr':
$content[] = $collection; $content[] = $collection;
break; break;
case 'caption': case 'caption':
@ -61,13 +90,28 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
break; break;
case 'thead': case 'thead':
case 'tfoot': case 'tfoot':
$tbody_mode = true;
// XXX This breaks rendering properties with
// Firefox, which never floats a <thead> to
// the top. Ever. (Our scheme will float the
// first <thead> to the top.) So maybe
// <thead>s that are not first should be
// turned into <tbody>? Very tricky, indeed.
// access the appropriate variable, $thead or $tfoot // access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name; $var = $collection[$tag_index]->name;
if ($$var === false) { if ($$var === false) {
$$var = $collection; $$var = $collection;
} else { } else {
// transmutate the first and less entries into // Oops, there's a second one! What
// tbody tags, and then put into content // should we do? Current behavior is to
// transmutate the first and last entries into
// tbody tags, and then put into content.
// Maybe a better idea is to *attach
// it* to the existing thead or tfoot?
// We don't do this, because Firefox
// doesn't float an extra tfoot to the
// bottom like it does for the first one.
$collection[$tag_index]->name = 'tbody'; $collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody'; $collection[count($collection)-1]->name = 'tbody';
$content[] = $collection; $content[] = $collection;
@ -126,7 +170,48 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead); if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot); if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
if ($tbody_mode) {
// a little tricky, since the start of the collection may be
// whitespace
$inside_tbody = false;
foreach ($content as $token_array) {
// find the starting token
foreach ($token_array as $t) {
if ($t->name === 'tr' || $t->name === 'tbody') {
break;
}
} // iterator variable carries over
if ($t->name === 'tr') {
if ($inside_tbody) {
$ret = array_merge($ret, $token_array);
} else {
$ret[] = new HTMLPurifier_Token_Start('tbody');
$ret = array_merge($ret, $token_array);
$inside_tbody = true;
}
} elseif ($t->name === 'tbody') {
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
$inside_tbody = false;
$ret = array_merge($ret, $token_array);
} else {
$ret = array_merge($ret, $token_array);
}
} else {
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
}
}
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
}
} else {
foreach ($content as $token_array) {
// invariant: everything in here is <tr>s
$ret = array_merge($ret, $token_array);
}
}
if (!empty($collection) && $is_collecting == false){ if (!empty($collection) && $is_collecting == false){
// grab the trailing space // grab the trailing space
$ret = array_merge($ret, $collection); $ret = array_merge($ret, $collection);

View File

@ -20,7 +20,7 @@ class HTMLPurifier_Config
/** /**
* HTML Purifier's version * HTML Purifier's version
*/ */
public $version = '4.3.0'; public $version = '4.4.0';
/** /**
* Bool indicator whether or not to automatically finalize * Bool indicator whether or not to automatically finalize
@ -44,7 +44,7 @@ class HTMLPurifier_Config
/** /**
* Parser for variables * Parser for variables
*/ */
protected $parser; protected $parser = null;
/** /**
* Reference HTMLPurifier_ConfigSchema for value checking * Reference HTMLPurifier_ConfigSchema for value checking
@ -668,7 +668,7 @@ class HTMLPurifier_Config
*/ */
public function finalize() { public function finalize() {
$this->finalized = true; $this->finalized = true;
unset($this->parser); $this->parser = null;
} }
/** /**

View File

@ -24,5 +24,6 @@ array (
--DESCRIPTION-- --DESCRIPTION--
Lookup array of color names to six digit hexadecimal number corresponding Lookup array of color names to six digit hexadecimal number corresponding
to color, with preceding hash mark. Used when parsing colors. to color, with preceding hash mark. Used when parsing colors. The lookup
is done in a case-insensitive manner.
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -19,6 +19,68 @@ class HTMLPurifier_Encoder
*/ */
public static function muteErrorHandler() {} public static function muteErrorHandler() {}
/**
* iconv wrapper which mutes errors, but doesn't work around bugs.
*/
public static function unsafeIconv($in, $out, $text) {
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
$r = iconv($in, $out, $text);
restore_error_handler();
return $r;
}
/**
* iconv wrapper which mutes errors and works around bugs.
*/
public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
$code = self::testIconvTruncateBug();
if ($code == self::ICONV_OK) {
return self::unsafeIconv($in, $out, $text);
} elseif ($code == self::ICONV_TRUNCATES) {
// we can only work around this if the input character set
// is utf-8
if ($in == 'utf-8') {
if ($max_chunk_size < 4) {
trigger_error('max_chunk_size is too small', E_USER_WARNING);
return false;
}
// split into 8000 byte chunks, but be careful to handle
// multibyte boundaries properly
if (($c = strlen($text)) <= $max_chunk_size) {
return self::unsafeIconv($in, $out, $text);
}
$r = '';
$i = 0;
while (true) {
if ($i + $max_chunk_size >= $c) {
$r .= self::unsafeIconv($in, $out, substr($text, $i));
break;
}
// wibble the boundary
if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
$chunk_size = $max_chunk_size;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
$chunk_size = $max_chunk_size - 1;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
$chunk_size = $max_chunk_size - 2;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
$chunk_size = $max_chunk_size - 3;
} else {
return false; // rather confusing UTF-8...
}
$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
$r .= self::unsafeIconv($in, $out, $chunk);
$i += $chunk_size;
}
return $r;
} else {
return false;
}
} else {
return false;
}
}
/** /**
* Cleans a UTF-8 string for well-formedness and SGML validity * Cleans a UTF-8 string for well-formedness and SGML validity
* *
@ -260,6 +322,14 @@ class HTMLPurifier_Encoder
return $ret; return $ret;
} }
public static function iconvAvailable() {
static $iconv = null;
if ($iconv === null) {
$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
}
return $iconv;
}
/** /**
* Converts a string to UTF-8 based on configuration. * Converts a string to UTF-8 based on configuration.
*/ */
@ -267,25 +337,22 @@ class HTMLPurifier_Encoder
$encoding = $config->get('Core.Encoding'); $encoding = $config->get('Core.Encoding');
if ($encoding === 'utf-8') return $str; if ($encoding === 'utf-8') return $str;
static $iconv = null; static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv'); if ($iconv === null) $iconv = self::iconvAvailable();
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv && !$config->get('Test.ForceNoIconv')) { if ($iconv && !$config->get('Test.ForceNoIconv')) {
$str = iconv($encoding, 'utf-8//IGNORE', $str); // unaffected by bugs, since UTF-8 support all characters
$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
if ($str === false) { if ($str === false) {
// $encoding is not a valid encoding // $encoding is not a valid encoding
restore_error_handler();
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
return ''; return '';
} }
// If the string is bjorked by Shift_JIS or a similar encoding // If the string is bjorked by Shift_JIS or a similar encoding
// that doesn't support all of ASCII, convert the naughty // that doesn't support all of ASCII, convert the naughty
// characters to their true byte-wise ASCII/UTF-8 equivalents. // characters to their true byte-wise ASCII/UTF-8 equivalents.
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding)); $str = strtr($str, self::testEncodingSupportsASCII($encoding));
restore_error_handler();
return $str; return $str;
} elseif ($encoding === 'iso-8859-1') { } elseif ($encoding === 'iso-8859-1') {
$str = utf8_encode($str); $str = utf8_encode($str);
restore_error_handler();
return $str; return $str;
} }
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
@ -298,16 +365,15 @@ class HTMLPurifier_Encoder
*/ */
public static function convertFromUTF8($str, $config, $context) { public static function convertFromUTF8($str, $config, $context) {
$encoding = $config->get('Core.Encoding'); $encoding = $config->get('Core.Encoding');
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = self::convertToASCIIDumbLossless($str);
}
if ($encoding === 'utf-8') return $str; if ($encoding === 'utf-8') return $str;
static $iconv = null; static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv'); if ($iconv === null) $iconv = self::iconvAvailable();
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv && !$config->get('Test.ForceNoIconv')) { if ($iconv && !$config->get('Test.ForceNoIconv')) {
// Undo our previous fix in convertToUTF8, otherwise iconv will barf // Undo our previous fix in convertToUTF8, otherwise iconv will barf
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding); $ascii_fix = self::testEncodingSupportsASCII($encoding);
if (!$escape && !empty($ascii_fix)) { if (!$escape && !empty($ascii_fix)) {
$clear_fix = array(); $clear_fix = array();
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
@ -315,15 +381,17 @@ class HTMLPurifier_Encoder
} }
$str = strtr($str, array_flip($ascii_fix)); $str = strtr($str, array_flip($ascii_fix));
// Normal stuff // Normal stuff
$str = iconv('utf-8', $encoding . '//IGNORE', $str); $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
restore_error_handler();
return $str; return $str;
} elseif ($encoding === 'iso-8859-1') { } elseif ($encoding === 'iso-8859-1') {
$str = utf8_decode($str); $str = utf8_decode($str);
restore_error_handler();
return $str; return $str;
} }
trigger_error('Encoding not supported', E_USER_ERROR); trigger_error('Encoding not supported', E_USER_ERROR);
// You might be tempted to assume that the ASCII representation
// might be OK, however, this is *not* universally true over all
// encodings. So we take the conservative route here, rather
// than forcibly turn on %Core.EscapeNonASCIICharacters
} }
/** /**
@ -373,6 +441,49 @@ class HTMLPurifier_Encoder
return $result; return $result;
} }
/** No bugs detected in iconv. */
const ICONV_OK = 0;
/** Iconv truncates output if converting from UTF-8 to another
* character set with //IGNORE, and a non-encodable character is found */
const ICONV_TRUNCATES = 1;
/** Iconv does not support //IGNORE, making it unusable for
* transcoding purposes */
const ICONV_UNUSABLE = 2;
/**
* glibc iconv has a known bug where it doesn't handle the magic
* //IGNORE stanza correctly. In particular, rather than ignore
* characters, it will return an EILSEQ after consuming some number
* of characters, and expect you to restart iconv as if it were
* an E2BIG. Old versions of PHP did not respect the errno, and
* returned the fragment, so as a result you would see iconv
* mysteriously truncating output. We can work around this by
* manually chopping our input into segments of about 8000
* characters, as long as PHP ignores the error code. If PHP starts
* paying attention to the error code, iconv becomes unusable.
*
* @returns Error code indicating severity of bug.
*/
public static function testIconvTruncateBug() {
static $code = null;
if ($code === null) {
// better not use iconv, otherwise infinite loop!
$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
if ($r === false) {
$code = self::ICONV_UNUSABLE;
} elseif (($c = strlen($r)) < 9000) {
$code = self::ICONV_TRUNCATES;
} elseif ($c > 9000) {
trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
} else {
$code = self::ICONV_OK;
}
}
return $code;
}
/** /**
* This expensive function tests whether or not a given character * This expensive function tests whether or not a given character
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
@ -385,6 +496,11 @@ class HTMLPurifier_Encoder
* which can be used to "undo" any overzealous iconv action. * which can be used to "undo" any overzealous iconv action.
*/ */
public static function testEncodingSupportsASCII($encoding, $bypass = false) { public static function testEncodingSupportsASCII($encoding, $bypass = false) {
// All calls to iconv here are unsafe, proof by case analysis:
// If ICONV_OK, no difference.
// If ICONV_TRUNCATE, all calls involve one character inputs,
// so bug is not triggered.
// If ICONV_UNUSABLE, this call is irrelevant
static $encodings = array(); static $encodings = array();
if (!$bypass) { if (!$bypass) {
if (isset($encodings[$encoding])) return $encodings[$encoding]; if (isset($encodings[$encoding])) return $encodings[$encoding];
@ -398,24 +514,22 @@ class HTMLPurifier_Encoder
if (strpos($lenc, 'iso-8859-') === 0) return array(); if (strpos($lenc, 'iso-8859-') === 0) return array();
} }
$ret = array(); $ret = array();
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
if (iconv('UTF-8', $encoding, 'a') === false) return false;
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
$c = chr($i); // UTF-8 char $c = chr($i); // UTF-8 char
$r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
if ( if (
$r === '' || $r === '' ||
// This line is needed for iconv implementations that do not // This line is needed for iconv implementations that do not
// omit characters that do not exist in the target character set // omit characters that do not exist in the target character set
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c) ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
) { ) {
// Reverse engineer: what's the UTF-8 equiv of this byte // Reverse engineer: what's the UTF-8 equiv of this byte
// sequence? This assumes that there's no variable width // sequence? This assumes that there's no variable width
// encoding that doesn't support ASCII. // encoding that doesn't support ASCII.
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c; $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
} }
} }
restore_error_handler();
$encodings[$encoding] = $ret; $encodings[$encoding] = $ret;
return $ret; return $ret;
} }

View File

@ -1,5 +1,11 @@
<?php <?php
// why is this a top level function? Because PHP 5.2.0 doesn't seem to
// understand how to interpret this filter if it's a static method.
// It's all really silly, but if we go this route it might be reasonable
// to coalesce all of these methods into one.
function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {}
/** /**
* This filter extracts <style> blocks from input HTML, cleans them up * This filter extracts <style> blocks from input HTML, cleans them up
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
@ -21,8 +27,15 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
private $_styleMatches = array(); private $_styleMatches = array();
private $_tidy; private $_tidy;
private $_id_attrdef;
private $_class_attrdef;
private $_enum_attrdef;
public function __construct() { public function __construct() {
$this->_tidy = new csstidy(); $this->_tidy = new csstidy();
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
} }
/** /**
@ -77,27 +90,166 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
$css = substr($css, 0, -3); $css = substr($css, 0, -3);
} }
$css = trim($css); $css = trim($css);
set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
$this->_tidy->parse($css); $this->_tidy->parse($css);
restore_error_handler();
$css_definition = $config->getDefinition('CSS'); $css_definition = $config->getDefinition('CSS');
$html_definition = $config->getDefinition('HTML');
$new_css = array();
foreach ($this->_tidy->css as $k => $decls) { foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector // $decls are all CSS declarations inside an @ selector
$new_decls = array(); $new_decls = array();
foreach ($decls as $selector => $style) { foreach ($decls as $selector => $style) {
$selector = trim($selector); $selector = trim($selector);
if ($selector === '') continue; // should not happen if ($selector === '') continue; // should not happen
if ($selector[0] === '+') { // Parse the selector
if ($selector !== '' && $selector[0] === '+') continue; // Here is the relevant part of the CSS grammar:
} //
if (!empty($scopes)) { // ruleset
$new_selector = array(); // because multiple ones are possible // : selector [ ',' S* selector ]* '{' ...
$selectors = array_map('trim', explode(',', $selector)); // selector
foreach ($scopes as $s1) { // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
foreach ($selectors as $s2) { // combinator
$new_selector[] = "$s1 $s2"; // : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
$delim = ' ' . $x . ' ';
}
} else {
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else {
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
}
} else {
// delimiters to the left of invalid
// basic selector ignored
}
}
}
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
} }
} }
$selector = implode(', ', $new_selector); // now it's a string
} }
if (empty($new_selectors)) continue;
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) { foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) { if (!isset($css_definition->info[$name])) {
unset($style[$name]); unset($style[$name]);
@ -110,10 +262,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
} }
$new_decls[$selector] = $style; $new_decls[$selector] = $style;
} }
$this->_tidy->css[$k] = $new_decls; $new_css[$k] = $new_decls;
} }
// remove stuff that shouldn't be used, could be reenabled // remove stuff that shouldn't be used, could be reenabled
// after security risks are analyzed // after security risks are analyzed
$this->_tidy->css = $new_css;
$this->_tidy->import = array(); $this->_tidy->import = array();
$this->_tidy->charset = null; $this->_tidy->charset = null;
$this->_tidy->namespace = null; $this->_tidy->namespace = null;

View File

@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
return $this->_anonModule; return $this->_anonModule;
} }
private $_anonModule; private $_anonModule = null;
// PUBLIC BUT INTERNAL VARIABLES -------------------------------------- // PUBLIC BUT INTERNAL VARIABLES --------------------------------------

View File

@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
'name' => 'CDATA', 'name' => 'CDATA',
'readonly' => 'Bool#readonly', 'readonly' => 'Bool#readonly',
'size' => 'Number', 'size' => 'Number',
'src' => 'URI#embeds', 'src' => 'URI#embedded',
'tabindex' => 'Number', 'tabindex' => 'Number',
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
'value' => 'CDATA', 'value' => 'CDATA',
@ -84,7 +84,8 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
$button->excludes = $this->makeLookup( $button->excludes = $this->makeLookup(
'form', 'fieldset', // Form 'form', 'fieldset', // Form
'input', 'select', 'textarea', 'label', 'button', // Formctrl 'input', 'select', 'textarea', 'label', 'button', // Formctrl
'a' // as per HTML 4.01 spec, this is omitted by modularization 'a', // as per HTML 4.01 spec, this is omitted by modularization
'isindex', 'iframe' // legacy items
); );
// Extra exclusion: img usemap="" is not permitted within this element. // Extra exclusion: img usemap="" is not permitted within this element.

View File

@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$hr->attr['width'] = 'Length'; $hr->attr['width'] = 'Length';
$img = $this->addBlankElement('img'); $img = $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right'; $img->attr['align'] = 'IAlign';
$img->attr['border'] = 'Pixels'; $img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels'; $img->attr['hspace'] = 'Pixels';
$img->attr['vspace'] = 'Pixels'; $img->attr['vspace'] = 'Pixels';
@ -136,6 +136,22 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$ul->attr['compact'] = 'Bool#compact'; $ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle'; $ul->attr['type'] = 'Enum#square,disc,circle';
// "safe" modifications to "unsafe" elements
// WARNING: If you want to add support for an unsafe, legacy
// attribute, make a new TrustedLegacy module with the trusted
// bit set appropriately
$form = $this->addBlankElement('form');
$form->content_model = 'Flow | #PCDATA';
$form->content_model_type = 'optional';
$form->attr['target'] = 'FrameTarget';
$input = $this->addBlankElement('input');
$input->attr['align'] = 'IAlign';
$legend = $this->addBlankElement('legend');
$legend->attr['align'] = 'LAlign';
} }
} }

View File

@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List'); public $content_sets = array('Flow' => 'List');
public function setup($config) { public function setup($config) {
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
$ol->wrap = "li"; $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); // XXX The wrap attribute is handled by MakeWellFormed. This is all
$ul->wrap = "li"; // quite unsatisfactory, because we generated this
// *specifically* for lists, and now a big chunk of the handling
// is done properly by the List ChildDef. So actually, we just
// want enough information to make autoclosing work properly,
// and then hand off the tricky stuff to the ChildDef.
$ol->wrap = 'li';
$ul->wrap = 'li';
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common'); $this->addElement('li', false, 'Flow', 'Common');

View File

@ -37,6 +37,9 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
'abbr' => 'Text', 'abbr' => 'Text',
'colspan' => 'Number', 'colspan' => 'Number',
'rowspan' => 'Number', 'rowspan' => 'Number',
// Apparently, as of HTML5 this attribute only applies
// to 'th' elements.
'scope' => 'Enum#row,col,rowgroup,colgroup',
), ),
$cell_align $cell_align
); );

View File

@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
'StyleAttribute', 'StyleAttribute',
// Unsafe: // Unsafe:
'Scripting', 'Object', 'Forms', 'Scripting', 'Object', 'Forms',
// Sorta legacy, but present in strict: // Sorta legacy, but present in strict:
'Name', 'Name',
); );
$transitional = array('Legacy', 'Target'); $transitional = array('Legacy', 'Target', 'Iframe');
$xml = array('XMLCommonAttributes'); $xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes'); $non_xml = array('NonXMLCommonAttributes');
@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register( $this->doctypes->register(
'XHTML 1.1', true, 'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')), // Iframe is a real XHTML 1.1 module, despite being
// "transitional"!
array_merge($common, $xml, array('Ruby', 'Iframe')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
array(), array(),
'-//W3C//DTD XHTML 1.1//EN', '-//W3C//DTD XHTML 1.1//EN',
@ -229,6 +231,9 @@ class HTMLPurifier_HTMLModuleManager
if ($config->get('HTML.Nofollow')) { if ($config->get('HTML.Nofollow')) {
$modules[] = 'Nofollow'; $modules[] = 'Nofollow';
} }
if ($config->get('HTML.TargetBlank')) {
$modules[] = 'TargetBlank';
}
// merge in custom modules // merge in custom modules
$modules = array_merge($modules, $this->userModules); $modules = array_merge($modules, $this->userModules);
@ -364,6 +369,13 @@ class HTMLPurifier_HTMLModuleManager
// :TODO: // :TODO:
// non-standalone definitions that don't have a standalone // non-standalone definitions that don't have a standalone
// to merge into could be deferred to the end // to merge into could be deferred to the end
// HOWEVER, it is perfectly valid for a non-standalone
// definition to lack a standalone definition, even
// after all processing: this allows us to safely
// specify extra attributes for elements that may not be
// enabled all in one place. In particular, this might
// be the case for trusted elements. WARNING: care must
// be taken that the /extra/ definitions are all safe.
continue; continue;
} }

View File

@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
*/ */
protected $strategies = array(); protected $strategies = array();
abstract public function __construct();
public function execute($tokens, $config, $context) { public function execute($tokens, $config, $context) {
foreach ($this->strategies as $strategy) { foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context); $tokens = $strategy->execute($tokens, $config, $context);

View File

@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// currently only used to determine if comments should be kept // currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted'); $trusted = $config->get('HTML.Trusted');
$comment_lookup = $config->get('HTML.AllowedComments');
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents'); $remove_script_contents = $config->get('Core.RemoveScriptContents');
$hidden_elements = $config->get('Core.HiddenElements'); $hidden_elements = $config->get('Core.HiddenElements');
@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($textify_comments !== false) { if ($textify_comments !== false) {
$data = $token->data; $data = $token->data;
$token = new HTMLPurifier_Token_Text($data); $token = new HTMLPurifier_Token_Text($data);
} elseif ($trusted) { } elseif ($trusted || $check_comments) {
// keep, but perform comment cleaning // always cleanup comments
$trailing_hyphen = false;
if ($e) { if ($e) {
// perform check whether or not there's a trailing hyphen // perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') { if (substr($token->data, -1) == '-') {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'); $trailing_hyphen = true;
} }
} }
$token->data = rtrim($token->data, '-'); $token->data = rtrim($token->data, '-');
$found_double_hyphen = false; $found_double_hyphen = false;
while (strpos($token->data, '--') !== false) { while (strpos($token->data, '--') !== false) {
if ($e && !$found_double_hyphen) { $found_double_hyphen = true;
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
$found_double_hyphen = true; // prevent double-erroring
$token->data = str_replace('--', '-', $token->data); $token->data = str_replace('--', '-', $token->data);
} }
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
// OK good
if ($e) {
if ($trailing_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
}
if ($found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
}
} else {
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} else { } else {
// strip comments // strip comments
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');

View File

@ -40,7 +40,7 @@ class HTMLPurifier_URI
} else { } else {
// no scheme: retrieve the default one // no scheme: retrieve the default one
$def = $config->getDefinition('URI'); $def = $config->getDefinition('URI');
$scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); $scheme_obj = $def->getDefaultScheme($config, $context);
if (!$scheme_obj) { if (!$scheme_obj) {
// something funky happened to the default scheme object // something funky happened to the default scheme object
trigger_error( trigger_error(
@ -199,6 +199,44 @@ class HTMLPurifier_URI
return $result; return $result;
} }
/**
* Returns true if this URL might be considered a 'local' URL given
* the current context. This is true when the host is null, or
* when it matches the host supplied to the configuration.
*
* Note that this does not do any scheme checking, so it is mostly
* only appropriate for metadata that doesn't care about protocol
* security. isBenign is probably what you actually want.
*/
public function isLocal($config, $context) {
if ($this->host === null) return true;
$uri_def = $config->getDefinition('URI');
if ($uri_def->host === $this->host) return true;
return false;
}
/**
* Returns true if this URL should be considered a 'benign' URL,
* that is:
*
* - It is a local URL (isLocal), and
* - It has a equal or better level of security
*/
public function isBenign($config, $context) {
if (!$this->isLocal($config, $context)) return false;
$scheme_obj = $this->getSchemeObj($config, $context);
if (!$scheme_obj) return false; // conservative approach
$current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
if ($current_scheme_obj->secure) {
if (!$scheme_obj->secure) {
return false;
}
}
return true;
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@ -27,6 +27,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
$this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
$this->registerFilter(new HTMLPurifier_URIFilter_Munge()); $this->registerFilter(new HTMLPurifier_URIFilter_Munge());
} }
@ -52,9 +53,13 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
protected function setupFilters($config) { protected function setupFilters($config) {
foreach ($this->registeredFilters as $name => $filter) { foreach ($this->registeredFilters as $name => $filter) {
$conf = $config->get('URI.' . $name); if ($filter->always_load) {
if ($conf !== false && $conf !== null) {
$this->addFilter($filter, $config); $this->addFilter($filter, $config);
} else {
$conf = $config->get('URI.' . $name);
if ($conf !== false && $conf !== null) {
$this->addFilter($filter, $config);
}
} }
} }
unset($this->registeredFilters); unset($this->registeredFilters);
@ -72,6 +77,10 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme'); if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
} }
public function getDefaultScheme($config, $context) {
return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
}
public function filter(&$uri, $config, $context) { public function filter(&$uri, $config, $context) {
foreach ($this->filters as $name => $f) { foreach ($this->filters as $name => $f) {
$result = $f->filter($uri, $config, $context); $result = $f->filter($uri, $config, $context);

View File

@ -4,7 +4,21 @@
* Chainable filters for custom URI processing. * Chainable filters for custom URI processing.
* *
* These filters can perform custom actions on a URI filter object, * These filters can perform custom actions on a URI filter object,
* including transformation or blacklisting. * including transformation or blacklisting. A filter named Foo
* must have a corresponding configuration directive %URI.Foo,
* unless always_load is specified to be true.
*
* The following contexts may be available while URIFilters are being
* processed:
*
* - EmbeddedURI: true if URI is an embedded resource that will
* be loaded automatically on page load
* - CurrentToken: a reference to the token that is currently
* being processed
* - CurrentAttr: the name of the attribute that is currently being
* processed
* - CurrentCSSProperty: the name of the CSS property that is
* currently being processed (if applicable)
* *
* @warning This filter is called before scheme object validation occurs. * @warning This filter is called before scheme object validation occurs.
* Make sure, if you require a specific scheme object, you * Make sure, if you require a specific scheme object, you
@ -25,7 +39,15 @@ abstract class HTMLPurifier_URIFilter
public $post = false; public $post = false;
/** /**
* Performs initialization for the filter * True if this filter should always be loaded (this permits
* a filter to be named Foo without the corresponding %URI.Foo
* directive existing.)
*/
public $always_load = false;
/**
* Performs initialization for the filter. If the filter returns
* false, this means that it shouldn't be considered active.
*/ */
public function prepare($config) {return true;} public function prepare($config) {return true;}

View File

@ -1,5 +1,9 @@
<?php <?php
// It's not clear to me whether or not Punycode means that hostnames
// do not have canonical forms anymore. As far as I can tell, it's
// not a problem (punycoding should be identity when no Unicode
// points are involved), but I'm not 100% sure
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
{ {
public $name = 'HostBlacklist'; public $name = 'HostBlacklist';

View File

@ -20,13 +20,8 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
$scheme_obj = $uri->getSchemeObj($config, $context); $scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
if (is_null($uri->host) || empty($scheme_obj->browsable)) { if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way
return true; if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL
}
// don't redirect if target host is our host
if ($uri->host === $config->getDefinition('URI')->host) {
return true;
}
$this->makeReplace($uri, $config, $context); $this->makeReplace($uri, $config, $context);
$this->replace = array_map('rawurlencode', $this->replace); $this->replace = array_map('rawurlencode', $this->replace);

View File

@ -19,6 +19,12 @@ abstract class HTMLPurifier_URIScheme
*/ */
public $browsable = false; public $browsable = false;
/**
* Whether or not data transmitted over this scheme is encrypted.
* https is secure, http is not.
*/
public $secure = false;
/** /**
* Whether or not the URI always uses <hier_part>, resolves edge cases * Whether or not the URI always uses <hier_part>, resolves edge cases
* with making relative URIs absolute * with making relative URIs absolute

View File

@ -6,6 +6,7 @@
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http { class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
public $default_port = 443; public $default_port = 443;
public $secure = true;
} }