update to Version 4.1.1:HTML Purifier 4.1.1 is a major security and bugfix release that

improves on 4.1s fix for an XSS vulnerability exploitable on Internet Explorer. It also contains a number of important bugfixes, including > the removal of improper logic that could result in infinite loops and > fixed parsing for single-attributes with entities with DirectLex.
2024-11-24 17:04:14 +01:00 · 2010-06-04 11:16:14 +00:00 · 2010-06-04 11:16:14 +00:00 · f8a7b498f2
commit f8a7b498f2
parent e1657c74ec
12 changed files with 108 additions and 59 deletions
--- a/phpgwapi/inc/htmlpurifier/NEWS
+++ b/phpgwapi/inc/htmlpurifier/NEWS
@ -9,6 +9,21 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
    . Internal change
 ==========================
 4.1.1, released 2010-05-31
 - Fix undefined index warnings in maintenance scripts.
 - Fix bug in DirectLex for parsing elements with a single attribute
  with entities.
 - Rewrite CSS output logic for font-family and url().  Thanks Mario
  Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
  Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
 - Emit an error for CollectErrors if a body is extracted
 - Fix bug where in background-position for center keyword handling.
 - Fix infinite loop when a wrapper element is inserted in a context
  where it's not allowed.  Thanks Lars <lars@renoz.dk> for reporting.
 - Remove +x bit and shebang from index.php; only supported mode is to
  explicitly call it with php.
 - Make test script less chatty when log_errors is on.
 4.1.0, released 2010-04-26
 ! Support proprietary height attribute on table element
 ! Support YouTube slideshows that contain /cp/ in their URL.
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php
@ -7,7 +7,7 @@
 * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
 * FILE, changes will be overwritten the next time the script is run.
 *
- * @version 4.1.0
+ * @version 4.1.1
 *
 * @warning
 *      You must *not* include any other HTML Purifier files before this file,
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php
@ -19,7 +19,7 @@
 */
 /*
-    HTML Purifier 4.1.0 - Standards Compliant HTML Filtering
+    HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
    Copyright (C) 2006-2008 Edward Z. Yang
    This library is free software; you can redistribute it and/or
@ -55,10 +55,10 @@ class HTMLPurifier
 {
    /** Version of HTML Purifier */
-    public $version = '4.1.0';
+    public $version = '4.1.1';
    /** Constant with version of HTML Purifier */
-    const VERSION = '4.1.0';
+    const VERSION = '4.1.1';
    /** Global configuration object */
    public $config;
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php
@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
        return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
    }
    /**
     * Parses a possibly escaped CSS string and returns the "pure" 
     * version of it.
     */
    protected function expandCSSEscape($string) {
        // flexibly parse it
        $ret = '';
        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
            if ($string[$i] === '\\') {
                $i++;
                if ($i >= $c) {
                    $ret .= '\\';
                    break;
                }
                if (ctype_xdigit($string[$i])) {
                    $code = $string[$i];
                    for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
                        if (!ctype_xdigit($string[$i])) break;
                        $code .= $string[$i];
                    }
                    // We have to be extremely careful when adding
                    // new characters, to make sure we're not breaking
                    // the encoding.
                    $char = HTMLPurifier_Encoder::unichr(hexdec($code));
                    if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
                    $ret .= $char;
                    if ($i < $c && trim($string[$i]) !== '') $i--;
                    continue;
                }
                if ($string[$i] === "\n") continue;
            }
            $ret .= $string[$i];
        }
        return $ret;
    }
 }
 // vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
        $keywords = array();
        $keywords['h'] = false; // left, right
        $keywords['v'] = false; // top, bottom
-        $keywords['c'] = false; // center
+        $keywords['ch'] = false; // center (first word)
        $keywords['cv'] = false; // center (second word)
        $measures = array();
        $i = 0;
@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
            $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
            if (isset($lookup[$lbit])) {
                $status = $lookup[$lbit];
                if ($status == 'c') {
                    if ($i == 0) {
                        $status = 'ch';
                    } else {
                        $status = 'cv';
                    }
                }
                $keywords[$status] = $lbit;
                $i++;
            }
@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
        if (!$i) return false; // no valid values were caught
        $ret = array();
        // first keyword
        if     ($keywords['h'])     $ret[] = $keywords['h'];
-        elseif (count($measures))   $ret[] = array_shift($measures);
+        elseif ($keywords['ch']) {
-        elseif ($keywords['c']) {
+            $ret[] = $keywords['ch'];
-            $ret[] = $keywords['c'];
+            $keywords['cv'] = false; // prevent re-use: center = center center
            $keywords['c'] = false; // prevent re-use: center = center center
        }
        elseif (count($measures))   $ret[] = array_shift($measures);
        if     ($keywords['v'])     $ret[] = $keywords['v'];
        elseif ($keywords['cv'])    $ret[] = $keywords['cv'];
        elseif (count($measures))   $ret[] = array_shift($measures);
        elseif ($keywords['c'])     $ret[] = $keywords['c'];
        if (empty($ret)) return false;
        return implode(' ', $ret);
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                $quote = $font[0];
                if ($font[$length - 1] !== $quote) continue;
                $font = substr($font, 1, $length - 2);
                $new_font = '';
                for ($i = 0, $c = strlen($font); $i < $c; $i++) {
                    if ($font[$i] === '\\') {
                        $i++;
                        if ($i >= $c) {
                            $new_font .= '\\';
                            break;
                        }
                        if (ctype_xdigit($font[$i])) {
                            $code = $font[$i];
                            for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
                                if (!ctype_xdigit($font[$i])) break;
                                $code .= $font[$i];
                            }
                            // We have to be extremely careful when adding
                            // new characters, to make sure we're not breaking
                            // the encoding.
                            $char = HTMLPurifier_Encoder::unichr(hexdec($code));
                            if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
                            $new_font .= $char;
                            if ($i < $c && trim($font[$i]) !== '') $i--;
                            continue;
                        }
                        if ($font[$i] === "\n") continue;
                    }
                    $new_font .= $font[$i];
            }
-                $font = $new_font;
+            $font = $this->expandCSSEscape($font);
-            }
+
            // $font is a pure representation of the font name
            if (ctype_alnum($font) && $font !== '') {
@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                continue;
            }
-            // complicated font, requires quoting
+            // bugger out on whitespace.  form feed (0C) really
            // shouldn't show up regardless
            $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
-            // armor single quotes and new lines
+            // These ugly transforms don't pose a security
-            $font = str_replace("\\", "\\\\", $font);
+            // risk (as \\ and \" might).  We could try to be clever and
-            $font = str_replace("'", "\\'", $font);
+            // use single-quote wrapping when there is a double quote
-            $final .= "'$font', ";
+            // present, but I have choosen not to implement that.
            // (warning: this code relies on the selection of quotation
            // mark below)
            $font = str_replace('\\', '\\5C ', $font);
            $font = str_replace('"',  '\\22 ', $font);
            // complicated font, requires quoting
            $final .= "\"$font\", "; // note that this will later get turned into &quot;
        }
        $final = rtrim($final, ', ');
        if ($final === '') return false;
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php
@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
            $uri = substr($uri, 1, $new_length - 1);
        }
-        $keys   = array(  '(',   ')',   ',',   ' ',   '"',   "'");
+        $uri = $this->expandCSSEscape($uri);
        $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
        $uri = str_replace($values, $keys, $uri);
        $result = parent::validate($uri, $config, $context);
        if ($result === false) return false;
-        // escape necessary characters according to CSS spec
+        // extra sanity check; should have been done by URI
-        // except for the comma, none of these should appear in the
+        $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
        // URI at all
        $result = str_replace($keys, $values, $result);
-        return "url('$result')";
+        return "url(\"$result\")";
    }
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php
@ -20,7 +20,7 @@ class HTMLPurifier_Config
    /**
     * HTML Purifier's version
     */
-    public $version = '4.1.0';
+    public $version = '4.1.1';
    /**
     * Bool indicator whether or not to automatically finalize
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php
@ -23,6 +23,7 @@ $messages = array(
 'Lexer: Missing gt'            => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
 'Lexer: Missing attribute key' => 'Attribute declaration has no key',
 'Lexer: Missing end quote'     => 'Attribute declaration has no end quote',
 'Lexer: Extracted body'        => 'Removed document metadata tags',
 'Strategy_RemoveForeignElements: Tag transform'              => '<$1> element transformed into $CurrentToken.Serialized',
 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php
@ -265,7 +265,15 @@ class HTMLPurifier_Lexer
        // extract body from document if applicable
        if ($config->get('Core.ConvertDocumentToFragment')) {
-            $html = $this->extractBody($html);
+            $e = false;
            if ($config->get('Core.CollectErrors')) {
                $e =& $context->get('ErrorCollector');
            }
            $new_html = $this->extractBody($html);
            if ($e && $new_html != $html) {
                $e->send(E_WARNING, 'Lexer: Extracted body');
            }
            $html = $new_html;
        }
        // expand entities that aren't the big five
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php
@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                }
            }
            if ($value === false) $value = '';
-            return array($key => $value);
+            return array($key => $this->parseData($value));
        }
        // setup loop environment
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php
@ -165,6 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
            $token = $tokens[$t];
            //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
            //flush();
            // quick-check: if it's not a tag, no need to process
            if (empty($token->is_tag)) {
@ -221,11 +222,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                    }
                    if ($autoclose && $definition->info[$token->name]->wrap) {
-                        // check if this is actually a wrap (mmm wraps!)
+                        // Check if an element can be wrapped by another 
                        // element to make it valid in a context (for 
                        // example, <ul><ul> needs a <li> in between)
                        $wrapname = $definition->info[$token->name]->wrap;
                        $wrapdef = $definition->info[$wrapname];
                        $elements = $wrapdef->child->getAllowedElements($config);
-                        if (isset($elements[$token->name])) {
+                        $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
                        if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
                            $newtoken = new HTMLPurifier_Token_Start($wrapname);
                            $this->insertBefore($newtoken);
                            $reprocess = true;