upgrade htmlpurifier to version 4.1.0

This commit is contained in:
Klaus Leithoff 2010-04-27 11:15:35 +00:00
parent b9dcde1e25
commit 8ce6ac92ae
27 changed files with 342 additions and 35 deletions

View File

@ -9,6 +9,17 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change . Internal change
========================== ==========================
4.1.0, released 2010-04-26
! Support proprietary height attribute on table element
! Support YouTube slideshows that contain /cp/ in their URL.
! Support for data: URI scheme; not enabled by default, add it using
%URI.AllowedSchemes
! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed.
! Support for Internet Explorer compatibility with %HTML.SafeObject
using %Output.FlashCompat.
! Handle <ol><ol> properly, by inserting the necessary <li> tag.
- Always quote the insides of url(...) in CSS.
4.0.0, released 2009-07-07 4.0.0, released 2009-07-07
# APIs for ConfigSchema subsystem have substantially changed. See # APIs for ConfigSchema subsystem have substantially changed. See
docs/dev-config-bcbreaks.txt for details; in essence, anything that docs/dev-config-bcbreaks.txt for details; in essence, anything that

View File

@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run. * FILE, changes will be overwritten the next time the script is run.
* *
* @version 4.0.0 * @version 4.1.0
* *
* @warning * @warning
* You must *not* include any other HTML Purifier files before this file, * You must *not* include any other HTML Purifier files before this file,
@ -176,6 +176,7 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php'; require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php'; require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php'; require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php'; require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php'; require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php'; require 'HTMLPurifier/Lexer/DirectLex.php';
@ -198,6 +199,7 @@ require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php'; require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/ftp.php'; require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php'; require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php'; require 'HTMLPurifier/URIScheme/https.php';

View File

@ -19,7 +19,7 @@
*/ */
/* /*
HTML Purifier 4.0.0 - Standards Compliant HTML Filtering HTML Purifier 4.1.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or This library is free software; you can redistribute it and/or
@ -55,10 +55,10 @@ class HTMLPurifier
{ {
/** Version of HTML Purifier */ /** Version of HTML Purifier */
public $version = '4.0.0'; public $version = '4.1.0';
/** Constant with version of HTML Purifier */ /** Constant with version of HTML Purifier */
const VERSION = '4.0.0'; const VERSION = '4.1.0';
/** Global configuration object */ /** Global configuration object */
public $config; public $config;

View File

@ -170,6 +170,7 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php'; require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php'; require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php'; require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php'; require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php'; require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php'; require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
@ -192,6 +193,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php'; require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php'; require_once $__dir . '/HTMLPurifier/URIScheme/https.php';

View File

@ -47,7 +47,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
// URI at all // URI at all
$result = str_replace($keys, $values, $result); $result = str_replace($keys, $values, $result);
return "url($result)"; return "url('$result')";
} }

View File

@ -24,7 +24,8 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
if ($src) { if ($src) {
$alt = $config->get('Attr.DefaultImageAlt'); $alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) { if ($alt === null) {
$attr['alt'] = basename($attr['src']); // truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else { } else {
$attr['alt'] = $alt; $attr['alt'] = $alt;
} }

View File

@ -37,8 +37,14 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
$attr['value'] = 'window'; $attr['value'] = 'window';
break; break;
case 'movie': case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context); $attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break; break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs // add other cases to support other param name/value pairs
default: default:
$attr['name'] = $attr['value'] = null; $attr['name'] = $attr['value'] = null;

View File

@ -20,7 +20,7 @@ class HTMLPurifier_Config
/** /**
* HTML Purifier's version * HTML Purifier's version
*/ */
public $version = '4.0.0'; public $version = '4.1.0';
/** /**
* Bool indicator whether or not to automatically finalize * Bool indicator whether or not to automatically finalize

View File

@ -0,0 +1,11 @@
AutoFormat.RemoveSpansWithoutAttributes
TYPE: bool
VERSION: 4.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive causes <code>span</code> tags without any attributes
to be removed. It will also remove spans that had all attributes
removed during processing.
</p>
--# vim: et sw=4 sts=4

View File

@ -7,8 +7,7 @@ DEFAULT: false
Whether or not to permit embed tags in documents, with a number of extra Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary what websites like MySpace do to embed tags. Embed is a proprietary
element and will cause your website to stop validating. You probably want element and will cause your website to stop validating; you should
to enable this with %HTML.SafeObject. see if you can use %Output.FlashCompat with %HTML.SafeObject instead
<strong>Highly experimental.</strong> first.</p>
</p>
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -6,9 +6,8 @@ DEFAULT: false
<p> <p>
Whether or not to permit object tags in documents, with a number of extra Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to security features added to prevent script execution. This is similar to
what websites like MySpace do to object tags. You may also want to what websites like MySpace do to object tags. You should also enable
enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer, %Output.FlashCompat in order to generate Internet Explorer
although embed tags will cause your website to stop validating. compatibility code for your object tags.
<strong>Highly experimental.</strong>
</p> </p>
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
Output.FlashCompat
TYPE: bool
VERSION: 4.1.0
DEFAULT: false
--DESCRIPTION--
<p>
If true, HTML Purifier will generate Internet Explorer compatibility
code for all object code. This is highly recommended if you enable
%HTML.SafeObject.
</p>
--# vim: et sw=4 sts=4

View File

@ -12,4 +12,6 @@ array (
--DESCRIPTION-- --DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha. prevents XSS attacks from using pseudo-schemes like javascript or mocha.
There is also support for the <code>data</code> URI scheme, but it is not
enabled by default.
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -97,6 +97,13 @@ class HTMLPurifier_ElementDef
*/ */
public $autoclose = array(); public $autoclose = array();
/**
* If a foreign element is found in this element, test if it is
* allowed by this sub-element; if it is, instead of closing the
* current element, place it inside this element.
*/
public $wrap;
/** /**
* Whether or not this is a formatting element affected by the * Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm. * "Active Formatting Elements" algorithm.

View File

@ -7,13 +7,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
public function preFilter($html, $config, $context) { public function preFilter($html, $config, $context) {
$pre_regex = '#<object[^>]+>.+?'. $pre_regex = '#<object[^>]+>.+?'.
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s'; 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>'; $pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html); return preg_replace($pre_regex, $pre_replace, $html);
} }
public function postFilter($html, $config, $context) { public function postFilter($html, $config, $context) {
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#'; $post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
} }
@ -24,10 +24,10 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
protected function postFilterCallback($matches) { protected function postFilterCallback($matches) {
$url = $this->armorUrl($matches[1]); $url = $this->armorUrl($matches[1]);
return '<object width="425" height="350" type="application/x-shockwave-flash" '. return '<object width="425" height="350" type="application/x-shockwave-flash" '.
'data="http://www.youtube.com/v/'.$url.'">'. 'data="http://www.youtube.com/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/v/'.$url.'"></param>'. '<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'.
'<!--[if IE]>'. '<!--[if IE]>'.
'<embed src="http://www.youtube.com/v/'.$url.'"'. '<embed src="http://www.youtube.com/'.$url.'"'.
'type="application/x-shockwave-flash"'. 'type="application/x-shockwave-flash"'.
'wmode="transparent" width="425" height="350" />'. 'wmode="transparent" width="425" height="350" />'.
'<![endif]-->'. '<![endif]-->'.

View File

@ -31,6 +31,17 @@ class HTMLPurifier_Generator
*/ */
private $_sortAttr; private $_sortAttr;
/**
* Cache of %Output.FlashCompat
*/
private $_flashCompat;
/**
* Stack for keeping track of object information when outputting IE
* compatibility code.
*/
private $_flashStack = array();
/** /**
* Configuration for the generator * Configuration for the generator
*/ */
@ -44,6 +55,7 @@ class HTMLPurifier_Generator
$this->config = $config; $this->config = $config;
$this->_scriptFix = $config->get('Output.CommentScriptContents'); $this->_scriptFix = $config->get('Output.CommentScriptContents');
$this->_sortAttr = $config->get('Output.SortAttr'); $this->_sortAttr = $config->get('Output.SortAttr');
$this->_flashCompat = $config->get('Output.FlashCompat');
$this->_def = $config->getHTMLDefinition(); $this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml; $this->_xhtml = $this->_def->doctype->xml;
} }
@ -104,12 +116,41 @@ class HTMLPurifier_Generator
} elseif ($token instanceof HTMLPurifier_Token_Start) { } elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name); $attr = $this->generateAttributes($token->attr, $token->name);
if ($this->_flashCompat) {
if ($token->name == "object") {
$flash = new stdclass();
$flash->attr = $token->attr;
$flash->param = array();
$this->_flashStack[] = $flash;
}
}
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token instanceof HTMLPurifier_Token_End) { } elseif ($token instanceof HTMLPurifier_Token_End) {
return '</' . $token->name . '>'; $_extra = '';
if ($this->_flashCompat) {
if ($token->name == "object" && !empty($this->_flashStack)) {
$flash = array_pop($this->_flashStack);
$compat_token = new HTMLPurifier_Token_Empty("embed");
foreach ($flash->attr as $name => $val) {
if ($name == "classid") continue;
if ($name == "type") continue;
if ($name == "data") $name = "src";
$compat_token->attr[$name] = $val;
}
foreach ($flash->param as $name => $val) {
if ($name == "movie") $name = "src";
$compat_token->attr[$name] = $val;
}
$_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->";
}
}
return $_extra . '</' . $token->name . '>';
} elseif ($token instanceof HTMLPurifier_Token_Empty) { } elseif ($token instanceof HTMLPurifier_Token_Empty) {
if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
}
$attr = $this->generateAttributes($token->attr, $token->name); $attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' ) // <br /> v. <br> ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>

View File

@ -20,8 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List'); public $content_sets = array('Flow' => 'List');
public function setup($config) { public function setup($config) {
$this->addElement('ol', 'List', 'Required: li', 'Common'); $ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$this->addElement('ul', 'List', 'Required: li', 'Common'); $ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common'); $this->addElement('li', false, 'Flow', 'Common');

View File

@ -20,6 +20,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
'height' => 'Pixels#' . $max, 'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never', 'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal', 'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
'wmode' => 'Enum#window', 'wmode' => 'Enum#window',
'name' => 'ID', 'name' => 'ID',
) )

View File

@ -28,7 +28,10 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
'type' => 'Enum#application/x-shockwave-flash', 'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max, 'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max, 'height' => 'Pixels#' . $max,
'data' => 'URI#embedded' 'data' => 'URI#embedded',
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
) )
); );
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();

View File

@ -15,6 +15,7 @@ class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_T
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background(); $r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background(); $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background(); $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
$r['table@height'] = new HTMLPurifier_AttrTransform_Length('height');
return $r; return $r;
} }

View File

@ -34,16 +34,21 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// ---- // ----
// This is a degenerate case // This is a degenerate case
} else { } else {
// State 1.2: PAR1 if (!$token->is_whitespace || $this->_isInline($current)) {
// ---- // State 1.2: PAR1
// ----
// State 1.3: PAR1\n\nPAR2 // State 1.3: PAR1\n\nPAR2
// ------------ // ------------
// State 1.4: <div>PAR1\n\nPAR2 (see State 2) // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------ // ------------
$token = array($this->_pStart()); $token = array($this->_pStart());
$this->_splitText($text, $token); $this->_splitText($text, $token);
} else {
// State 1.5: \n<hr />
// --
}
} }
} else { } else {
// State 2: <div>PAR1... (similar to 1.4) // State 2: <div>PAR1... (similar to 1.4)

View File

@ -0,0 +1,60 @@
<?php
/**
* Injector that removes spans with no attributes
*/
class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
{
public $name = 'RemoveSpansWithoutAttributes';
public $needed = array('span');
private $attrValidator;
/**
* Used by AttrValidator
*/
private $config;
private $context;
public function prepare($config, $context) {
$this->attrValidator = new HTMLPurifier_AttrValidator();
$this->config = $config;
$this->context = $context;
return parent::prepare($config, $context);
}
public function handleElement(&$token) {
if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
return;
}
// We need to validate the attributes now since this doesn't normally
// happen until after MakeWellFormed. If all the attributes are removed
// the span needs to be removed too.
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;
if (!empty($token->attr)) {
return;
}
$nesting = 0;
$spanContentTokens = array();
while ($this->forwardUntilEndToken($i, $current, $nesting)) {}
if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
// Mark closing span tag for deletion
$current->markForDeletion = true;
// Delete open span tag
$token = false;
}
}
public function handleEnd(&$token) {
if ($token->markForDeletion) {
$token = false;
}
}
}
// vim: et sw=4 sts=4

View File

@ -20,6 +20,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
protected $allowedParam = array( protected $allowedParam = array(
'wmode' => true, 'wmode' => true,
'movie' => true, 'movie' => true,
'flashvars' => true,
'src' => true,
); );
public function prepare($config, $context) { public function prepare($config, $context) {
@ -47,7 +49,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
// We need this fix because YouTube doesn't supply a data // We need this fix because YouTube doesn't supply a data
// attribute, which we need if a type is specified. This is // attribute, which we need if a type is specified. This is
// *very* Flash specific. // *very* Flash specific.
if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') { if (!isset($this->objectStack[$i]->attr['data']) &&
($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) {
$this->objectStack[$i]->attr['data'] = $token->attr['value']; $this->objectStack[$i]->attr['data'] = $token->attr['value'];
} }
// Check if the parameter is the correct value but has not // Check if the parameter is the correct value but has not

View File

@ -26,13 +26,20 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
* Internal accumulator array for SAX parsers. * Internal accumulator array for SAX parsers.
*/ */
protected $tokens = array(); protected $tokens = array();
protected $last_token_was_empty;
private $parent_handler;
private $stack = array();
public function tokenizeHTML($string, $config, $context) { public function tokenizeHTML($string, $config, $context) {
$this->tokens = array(); $this->tokens = array();
$this->last_token_was_empty = false;
$string = $this->normalize($string, $config, $context); $string = $this->normalize($string, $config, $context);
$this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
$parser = new XML_HTMLSax3(); $parser = new XML_HTMLSax3();
$parser->set_object($this); $parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler'); $parser->set_element_handler('openHandler','closeHandler');
@ -44,6 +51,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
$parser->parse($string); $parser->parse($string);
restore_error_handler();
return $this->tokens; return $this->tokens;
} }
@ -58,9 +67,11 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
} }
if ($closed) { if ($closed) {
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
$this->last_token_was_empty = true;
} else { } else {
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
} }
$this->stack[] = $name;
return true; return true;
} }
@ -71,10 +82,12 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
// HTMLSax3 seems to always send empty tags an extra close tag // HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it: // check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach // [TESTME] to make sure it doesn't overreach
if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) { if ($this->last_token_was_empty) {
$this->last_token_was_empty = false;
return true; return true;
} }
$this->tokens[] = new HTMLPurifier_Token_End($name); $this->tokens[] = new HTMLPurifier_Token_End($name);
if (!empty($this->stack)) array_pop($this->stack);
return true; return true;
} }
@ -82,6 +95,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
* Data event handler, interface is defined by PEAR package. * Data event handler, interface is defined by PEAR package.
*/ */
public function dataHandler(&$parser, $data) { public function dataHandler(&$parser, $data) {
$this->last_token_was_empty = false;
$this->tokens[] = new HTMLPurifier_Token_Text($data); $this->tokens[] = new HTMLPurifier_Token_Text($data);
return true; return true;
} }
@ -91,7 +105,18 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/ */
public function escapeHandler(&$parser, $data) { public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) { if (strpos($data, '--') === 0) {
$this->tokens[] = new HTMLPurifier_Token_Comment($data); // remove trailing and leading double-dashes
$data = substr($data, 2);
if (strlen($data) >= 2 && substr($data, -2) == "--") {
$data = substr($data, 0, -2);
}
if (isset($this->stack[sizeof($this->stack) - 1]) &&
$this->stack[sizeof($this->stack) - 1] == "style") {
$this->tokens[] = new HTMLPurifier_Token_Text($data);
} else {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
}
$this->last_token_was_empty = false;
} }
// CDATA is handled elsewhere, but if it was handled here: // CDATA is handled elsewhere, but if it was handled here:
//if (strpos($data, '[CDATA[') === 0) { //if (strpos($data, '[CDATA[') === 0) {
@ -101,6 +126,14 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
return true; return true;
} }
/**
* An error handler that mutes strict errors
*/
public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
if ($errno == E_STRICT) return;
return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@ -83,6 +83,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->injectors[] = $injector; $this->injectors[] = $injector;
} }
foreach ($custom_injectors as $injector) { foreach ($custom_injectors as $injector) {
if (!$injector) continue;
if (is_string($injector)) { if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector"; $injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector; $injector = new $injector;
@ -219,6 +220,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$autoclose = false; $autoclose = false;
} }
if ($autoclose && $definition->info[$token->name]->wrap) {
// check if this is actually a wrap (mmm wraps!)
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$elements = $wrapdef->child->getAllowedElements($config);
if (isset($elements[$token->name])) {
$newtoken = new HTMLPurifier_Token_Start($wrapname);
$this->insertBefore($newtoken);
$reprocess = true;
continue;
}
}
$carryover = false; $carryover = false;
if ($autoclose && $definition->info[$parent->name]->formatting) { if ($autoclose && $definition->info[$parent->name]->formatting) {
$carryover = true; $carryover = true;

View File

@ -0,0 +1,93 @@
<?php
/**
* Implements data: URI for base64 encoded images supported by GD.
*/
class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
public $browsable = true;
public $allowed_types = array(
// you better write validation code for other types if you
// decide to allow them
'image/jpeg' => true,
'image/gif' => true,
'image/png' => true,
);
public function validate(&$uri, $config, $context) {
$result = explode(',', $uri->path, 2);
$is_base64 = false;
$charset = null;
$content_type = null;
if (count($result) == 2) {
list($metadata, $data) = $result;
// do some legwork on the metadata
$metas = explode(';', $metadata);
while(!empty($metas)) {
$cur = array_shift($metas);
if ($cur == 'base64') {
$is_base64 = true;
break;
}
if (substr($cur, 0, 8) == 'charset=') {
// doesn't match if there are arbitrary spaces, but
// whatever dude
if ($charset !== null) continue; // garbage
$charset = substr($cur, 8); // not used
} else {
if ($content_type !== null) continue; // garbage
$content_type = $cur;
}
}
} else {
$data = $result[0];
}
if ($content_type !== null && empty($this->allowed_types[$content_type])) {
return false;
}
if ($charset !== null) {
// error; we don't allow plaintext stuff
$charset = null;
}
$data = rawurldecode($data);
if ($is_base64) {
$raw_data = base64_decode($data);
} else {
$raw_data = $data;
}
// XXX probably want to refactor this into a general mechanism
// for filtering arbitrary content types
$file = tempnam("/tmp", "");
file_put_contents($file, $raw_data);
if (function_exists('exif_imagetype')) {
$image_code = exif_imagetype($file);
} elseif (function_exists('getimagesize')) {
set_error_handler(array($this, 'muteErrorHandler'));
$info = getimagesize($file);
restore_error_handler();
if ($info == false) return false;
$image_code = $info[2];
} else {
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
}
$real_content_type = image_type_to_mime_type($image_code);
if ($real_content_type != $content_type) {
// we're nice guys; if the content type is something else we
// support, change it over
if (empty($this->allowed_types[$real_content_type])) return false;
$content_type = $real_content_type;
}
// ok, it's kosher, rewrite what we need
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;
$uri->fragment = null;
$uri->query = null;
$uri->path = "$content_type;base64," . base64_encode($raw_data);
return true;
}
public function muteErrorHandler($errno, $errstr) {}
}