update to Version 4.1.1:HTML Purifier 4.1.1 is a major security and bugfix release that

improves on 4.1s fix for an XSS vulnerability exploitable on Internet Explorer.  It also contains a number of important bugfixes, including
the removal of improper logic that could result in infinite loops and
fixed parsing for single-attributes with entities with DirectLex.
This commit is contained in:
Klaus Leithoff 2010-06-04 11:13:55 +00:00
parent 4b67a05074
commit 0ec0d04fb3
12 changed files with 108 additions and 59 deletions

View File

@ -9,6 +9,21 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change . Internal change
========================== ==========================
4.1.1, released 2010-05-31
- Fix undefined index warnings in maintenance scripts.
- Fix bug in DirectLex for parsing elements with a single attribute
with entities.
- Rewrite CSS output logic for font-family and url(). Thanks Mario
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
- Emit an error for CollectErrors if a body is extracted
- Fix bug where in background-position for center keyword handling.
- Fix infinite loop when a wrapper element is inserted in a context
where it's not allowed. Thanks Lars <lars@renoz.dk> for reporting.
- Remove +x bit and shebang from index.php; only supported mode is to
explicitly call it with php.
- Make test script less chatty when log_errors is on.
4.1.0, released 2010-04-26 4.1.0, released 2010-04-26
! Support proprietary height attribute on table element ! Support proprietary height attribute on table element
! Support YouTube slideshows that contain /cp/ in their URL. ! Support YouTube slideshows that contain /cp/ in their URL.

View File

@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run. * FILE, changes will be overwritten the next time the script is run.
* *
* @version 4.1.0 * @version 4.1.1
* *
* @warning * @warning
* You must *not* include any other HTML Purifier files before this file, * You must *not* include any other HTML Purifier files before this file,

View File

@ -19,7 +19,7 @@
*/ */
/* /*
HTML Purifier 4.1.0 - Standards Compliant HTML Filtering HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or This library is free software; you can redistribute it and/or
@ -55,10 +55,10 @@ class HTMLPurifier
{ {
/** Version of HTML Purifier */ /** Version of HTML Purifier */
public $version = '4.1.0'; public $version = '4.1.1';
/** Constant with version of HTML Purifier */ /** Constant with version of HTML Purifier */
const VERSION = '4.1.0'; const VERSION = '4.1.1';
/** Global configuration object */ /** Global configuration object */
public $config; public $config;

View File

@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
} }
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$keywords = array(); $keywords = array();
$keywords['h'] = false; // left, right $keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom $keywords['v'] = false; // top, bottom
$keywords['c'] = false; // center $keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array(); $measures = array();
$i = 0; $i = 0;
@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$lbit = ctype_lower($bit) ? $bit : strtolower($bit); $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) { if (isset($lookup[$lbit])) {
$status = $lookup[$lbit]; $status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit; $keywords[$status] = $lbit;
$i++; $i++;
} }
@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
if (!$i) return false; // no valid values were caught if (!$i) return false; // no valid values were caught
$ret = array(); $ret = array();
// first keyword // first keyword
if ($keywords['h']) $ret[] = $keywords['h']; if ($keywords['h']) $ret[] = $keywords['h'];
elseif (count($measures)) $ret[] = array_shift($measures); elseif ($keywords['ch']) {
elseif ($keywords['c']) { $ret[] = $keywords['ch'];
$ret[] = $keywords['c']; $keywords['cv'] = false; // prevent re-use: center = center center
$keywords['c'] = false; // prevent re-use: center = center center
} }
elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v']; if ($keywords['v']) $ret[] = $keywords['v'];
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures); elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) $ret[] = $keywords['c'];
if (empty($ret)) return false; if (empty($ret)) return false;
return implode(' ', $ret); return implode(' ', $ret);

View File

@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0]; $quote = $font[0];
if ($font[$length - 1] !== $quote) continue; if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2); $font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
} }
$font = $new_font; $font = $this->expandCSSEscape($font);
}
// $font is a pure representation of the font name // $font is a pure representation of the font name
if (ctype_alnum($font) && $font !== '') { if (ctype_alnum($font) && $font !== '') {
@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
continue; continue;
} }
// complicated font, requires quoting // bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// armor single quotes and new lines // These ugly transforms don't pose a security
$font = str_replace("\\", "\\\\", $font); // risk (as \\ and \" might). We could try to be clever and
$font = str_replace("'", "\\'", $font); // use single-quote wrapping when there is a double quote
$final .= "'$font', "; // present, but I have choosen not to implement that.
// (warning: this code relies on the selection of quotation
// mark below)
$font = str_replace('\\', '\\5C ', $font);
$font = str_replace('"', '\\22 ', $font);
// complicated font, requires quoting
$final .= "\"$font\", "; // note that this will later get turned into &quot;
} }
$final = rtrim($final, ', '); $final = rtrim($final, ', ');
if ($final === '') return false; if ($final === '') return false;

View File

@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
$uri = substr($uri, 1, $new_length - 1); $uri = substr($uri, 1, $new_length - 1);
} }
$keys = array( '(', ')', ',', ' ', '"', "'"); $uri = $this->expandCSSEscape($uri);
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$result = parent::validate($uri, $config, $context); $result = parent::validate($uri, $config, $context);
if ($result === false) return false; if ($result === false) return false;
// escape necessary characters according to CSS spec // extra sanity check; should have been done by URI
// except for the comma, none of these should appear in the $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
// URI at all
$result = str_replace($keys, $values, $result);
return "url('$result')"; return "url(\"$result\")";
} }

View File

@ -20,7 +20,7 @@ class HTMLPurifier_Config
/** /**
* HTML Purifier's version * HTML Purifier's version
*/ */
public $version = '4.1.0'; public $version = '4.1.1';
/** /**
* Bool indicator whether or not to automatically finalize * Bool indicator whether or not to automatically finalize

View File

@ -23,6 +23,7 @@ $messages = array(
'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', 'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
'Lexer: Missing attribute key' => 'Attribute declaration has no key', 'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote', 'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
'Lexer: Extracted body' => 'Removed document metadata tags',
'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', 'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',

View File

@ -265,7 +265,15 @@ class HTMLPurifier_Lexer
// extract body from document if applicable // extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) { if ($config->get('Core.ConvertDocumentToFragment')) {
$html = $this->extractBody($html); $e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
$new_html = $this->extractBody($html);
if ($e && $new_html != $html) {
$e->send(E_WARNING, 'Lexer: Extracted body');
}
$html = $new_html;
} }
// expand entities that aren't the big five // expand entities that aren't the big five

View File

@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
} }
} }
if ($value === false) $value = ''; if ($value === false) $value = '';
return array($key => $value); return array($key => $this->parseData($value));
} }
// setup loop environment // setup loop environment

View File

@ -165,6 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token = $tokens[$t]; $token = $tokens[$t];
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack); //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
//flush();
// quick-check: if it's not a tag, no need to process // quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) { if (empty($token->is_tag)) {
@ -221,11 +222,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
} }
if ($autoclose && $definition->info[$token->name]->wrap) { if ($autoclose && $definition->info[$token->name]->wrap) {
// check if this is actually a wrap (mmm wraps!) // Check if an element can be wrapped by another
// element to make it valid in a context (for
// example, <ul><ul> needs a <li> in between)
$wrapname = $definition->info[$token->name]->wrap; $wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname]; $wrapdef = $definition->info[$wrapname];
$elements = $wrapdef->child->getAllowedElements($config); $elements = $wrapdef->child->getAllowedElements($config);
if (isset($elements[$token->name])) { $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
$newtoken = new HTMLPurifier_Token_Start($wrapname); $newtoken = new HTMLPurifier_Token_Start($wrapname);
$this->insertBefore($newtoken); $this->insertBefore($newtoken);
$reprocess = true; $reprocess = true;