From ad0ffffd3221013e9645557323b8159784c8e309 Mon Sep 17 00:00:00 2001 From: Klaus Leithoff Date: Fri, 4 Jun 2010 11:16:35 +0000 Subject: [PATCH] update to Version 4.1.1:HTML Purifier 4.1.1 is a major security and bugfix release that improves on 4.1s fix for an XSS vulnerability exploitable on Internet Explorer. It also contains a number of important bugfixes, including > the removal of improper logic that could result in infinite loops and > fixed parsing for single-attributes with entities with DirectLex. --- phpgwapi/inc/htmlpurifier/NEWS | 15 ++++++ .../library/HTMLPurifier.includes.php | 2 +- .../inc/htmlpurifier/library/HTMLPurifier.php | 6 +-- .../library/HTMLPurifier/AttrDef.php | 36 +++++++++++++ .../AttrDef/CSS/BackgroundPosition.php | 21 +++++--- .../HTMLPurifier/AttrDef/CSS/FontFamily.php | 52 ++++++------------- .../library/HTMLPurifier/AttrDef/CSS/URI.php | 12 ++--- .../library/HTMLPurifier/Config.php | 2 +- .../HTMLPurifier/Language/messages/en.php | 1 + .../library/HTMLPurifier/Lexer.php | 10 +++- .../library/HTMLPurifier/Lexer/DirectLex.php | 2 +- .../HTMLPurifier/Strategy/MakeWellFormed.php | 8 ++- 12 files changed, 108 insertions(+), 59 deletions(-) diff --git a/phpgwapi/inc/htmlpurifier/NEWS b/phpgwapi/inc/htmlpurifier/NEWS index 0e8eda6f51..72f59b402f 100644 --- a/phpgwapi/inc/htmlpurifier/NEWS +++ b/phpgwapi/inc/htmlpurifier/NEWS @@ -9,6 +9,21 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +4.1.1, released 2010-05-31 +- Fix undefined index warnings in maintenance scripts. +- Fix bug in DirectLex for parsing elements with a single attribute + with entities. +- Rewrite CSS output logic for font-family and url(). Thanks Mario + Heiderich for reporting and Takeshi + Terada for suggesting the fix. +- Emit an error for CollectErrors if a body is extracted +- Fix bug where in background-position for center keyword handling. +- Fix infinite loop when a wrapper element is inserted in a context + where it's not allowed. Thanks Lars for reporting. +- Remove +x bit and shebang from index.php; only supported mode is to + explicitly call it with php. +- Make test script less chatty when log_errors is on. + 4.1.0, released 2010-04-26 ! Support proprietary height attribute on table element ! Support YouTube slideshows that contain /cp/ in their URL. diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php index 81fa4de117..2ed0f0c17f 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php @@ -7,7 +7,7 @@ * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * FILE, changes will be overwritten the next time the script is run. * - * @version 4.1.0 + * @version 4.1.1 * * @warning * You must *not* include any other HTML Purifier files before this file, diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php index fff4678862..ba2c7b3067 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php @@ -19,7 +19,7 @@ */ /* - HTML Purifier 4.1.0 - Standards Compliant HTML Filtering + HTML Purifier 4.1.1 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -55,10 +55,10 @@ class HTMLPurifier { /** Version of HTML Purifier */ - public $version = '4.1.0'; + public $version = '4.1.1'; /** Constant with version of HTML Purifier */ - const VERSION = '4.1.0'; + const VERSION = '4.1.1'; /** Global configuration object */ public $config; diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php index d32fa62d6a..b2e4f36c5d 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php @@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); } + /** + * Parses a possibly escaped CSS string and returns the "pure" + * version of it. + */ + protected function expandCSSEscape($string) { + // flexibly parse it + $ret = ''; + for ($i = 0, $c = strlen($string); $i < $c; $i++) { + if ($string[$i] === '\\') { + $i++; + if ($i >= $c) { + $ret .= '\\'; + break; + } + if (ctype_xdigit($string[$i])) { + $code = $string[$i]; + for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { + if (!ctype_xdigit($string[$i])) break; + $code .= $string[$i]; + } + // We have to be extremely careful when adding + // new characters, to make sure we're not breaking + // the encoding. + $char = HTMLPurifier_Encoder::unichr(hexdec($code)); + if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; + $ret .= $char; + if ($i < $c && trim($string[$i]) !== '') $i--; + continue; + } + if ($string[$i] === "\n") continue; + } + $ret .= $string[$i]; + } + return $ret; + } + } // vim: et sw=4 sts=4 diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php index 35df3985e2..fae82eaec8 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php @@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef $keywords = array(); $keywords['h'] = false; // left, right $keywords['v'] = false; // top, bottom - $keywords['c'] = false; // center + $keywords['ch'] = false; // center (first word) + $keywords['cv'] = false; // center (second word) $measures = array(); $i = 0; @@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef $lbit = ctype_lower($bit) ? $bit : strtolower($bit); if (isset($lookup[$lbit])) { $status = $lookup[$lbit]; + if ($status == 'c') { + if ($i == 0) { + $status = 'ch'; + } else { + $status = 'cv'; + } + } $keywords[$status] = $lbit; $i++; } @@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef if (!$i) return false; // no valid values were caught - $ret = array(); // first keyword if ($keywords['h']) $ret[] = $keywords['h']; - elseif (count($measures)) $ret[] = array_shift($measures); - elseif ($keywords['c']) { - $ret[] = $keywords['c']; - $keywords['c'] = false; // prevent re-use: center = center center + elseif ($keywords['ch']) { + $ret[] = $keywords['ch']; + $keywords['cv'] = false; // prevent re-use: center = center center } + elseif (count($measures)) $ret[] = array_shift($measures); if ($keywords['v']) $ret[] = $keywords['v']; + elseif ($keywords['cv']) $ret[] = $keywords['cv']; elseif (count($measures)) $ret[] = array_shift($measures); - elseif ($keywords['c']) $ret[] = $keywords['c']; if (empty($ret)) return false; return implode(' ', $ret); diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php index 705ac893d1..42c2054c2a 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef $quote = $font[0]; if ($font[$length - 1] !== $quote) continue; $font = substr($font, 1, $length - 2); - - $new_font = ''; - for ($i = 0, $c = strlen($font); $i < $c; $i++) { - if ($font[$i] === '\\') { - $i++; - if ($i >= $c) { - $new_font .= '\\'; - break; - } - if (ctype_xdigit($font[$i])) { - $code = $font[$i]; - for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { - if (!ctype_xdigit($font[$i])) break; - $code .= $font[$i]; - } - // We have to be extremely careful when adding - // new characters, to make sure we're not breaking - // the encoding. - $char = HTMLPurifier_Encoder::unichr(hexdec($code)); - if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; - $new_font .= $char; - if ($i < $c && trim($font[$i]) !== '') $i--; - continue; - } - if ($font[$i] === "\n") continue; - } - $new_font .= $font[$i]; - } - - $font = $new_font; } + + $font = $this->expandCSSEscape($font); + // $font is a pure representation of the font name if (ctype_alnum($font) && $font !== '') { @@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef continue; } - // complicated font, requires quoting + // bugger out on whitespace. form feed (0C) really + // shouldn't show up regardless + $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); - // armor single quotes and new lines - $font = str_replace("\\", "\\\\", $font); - $font = str_replace("'", "\\'", $font); - $final .= "'$font', "; + // These ugly transforms don't pose a security + // risk (as \\ and \" might). We could try to be clever and + // use single-quote wrapping when there is a double quote + // present, but I have choosen not to implement that. + // (warning: this code relies on the selection of quotation + // mark below) + $font = str_replace('\\', '\\5C ', $font); + $font = str_replace('"', '\\22 ', $font); + + // complicated font, requires quoting + $final .= "\"$font\", "; // note that this will later get turned into " } $final = rtrim($final, ', '); if ($final === '') return false; diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php index 54b7d63f12..1df17dc25b 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php @@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI $uri = substr($uri, 1, $new_length - 1); } - $keys = array( '(', ')', ',', ' ', '"', "'"); - $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'"); - $uri = str_replace($values, $keys, $uri); + $uri = $this->expandCSSEscape($uri); $result = parent::validate($uri, $config, $context); if ($result === false) return false; - // escape necessary characters according to CSS spec - // except for the comma, none of these should appear in the - // URI at all - $result = str_replace($keys, $values, $result); + // extra sanity check; should have been done by URI + $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); - return "url('$result')"; + return "url(\"$result\")"; } diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php index 203831f9f5..2a334b0d83 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php @@ -20,7 +20,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - public $version = '4.1.0'; + public $version = '4.1.1'; /** * Bool indicator whether or not to automatically finalize diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php index aab2e52ebc..8d7b5736bb 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php @@ -23,6 +23,7 @@ $messages = array( 'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', 'Lexer: Missing attribute key' => 'Attribute declaration has no key', 'Lexer: Missing end quote' => 'Attribute declaration has no end quote', +'Lexer: Extracted body' => 'Removed document metadata tags', 'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php index 8cce008d3d..b05e115468 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php @@ -265,7 +265,15 @@ class HTMLPurifier_Lexer // extract body from document if applicable if ($config->get('Core.ConvertDocumentToFragment')) { - $html = $this->extractBody($html); + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + $new_html = $this->extractBody($html); + if ($e && $new_html != $html) { + $e->send(E_WARNING, 'Lexer: Extracted body'); + } + $html = $new_html; } // expand entities that aren't the big five diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php index 439409d051..456e6e1908 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php @@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } } if ($value === false) $value = ''; - return array($key => $value); + return array($key => $this->parseData($value)); } // setup loop environment diff --git a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php index 316b2386ac..c736584007 100644 --- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -165,6 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $token = $tokens[$t]; //echo '
'; printTokens($tokens, $t); printTokens($this->stack); + //flush(); // quick-check: if it's not a tag, no need to process if (empty($token->is_tag)) { @@ -221,11 +222,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } if ($autoclose && $definition->info[$token->name]->wrap) { - // check if this is actually a wrap (mmm wraps!) + // Check if an element can be wrapped by another + // element to make it valid in a context (for + // example,
      needs a
    • in between) $wrapname = $definition->info[$token->name]->wrap; $wrapdef = $definition->info[$wrapname]; $elements = $wrapdef->child->getAllowedElements($config); - if (isset($elements[$token->name])) { + $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config); + if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) { $newtoken = new HTMLPurifier_Token_Start($wrapname); $this->insertBefore($newtoken); $reprocess = true;