update to Version 4.1.1:HTML Purifier 4.1.1 is a major security and bugfix release that

improves on 4.1s fix for an XSS vulnerability exploitable on Internet Explorer. It also contains a number of important bugfixes, including > the removal of improper logic that could result in infinite loops and > fixed parsing for single-attributes with entities with DirectLex.
2025-02-22 13:20:50 +01:00 · 2010-06-04 11:16:14 +00:00 · 2010-06-04 11:16:14 +00:00 · f8a7b498f2
commit f8a7b498f2
parent e1657c74ec
12 changed files with 108 additions and 59 deletions
--- a/phpgwapi/inc/htmlpurifier/NEWS
+++ b/phpgwapi/inc/htmlpurifier/NEWS
@ -9,6 +9,21 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
    . Internal change
 ==========================

+4.1.1, released 2010-05-31
+- Fix undefined index warnings in maintenance scripts.
+- Fix bug in DirectLex for parsing elements with a single attribute
+  with entities.
+- Rewrite CSS output logic for font-family and url().  Thanks Mario
+  Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
+  Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
+- Emit an error for CollectErrors if a body is extracted
+- Fix bug where in background-position for center keyword handling.
+- Fix infinite loop when a wrapper element is inserted in a context
+  where it's not allowed.  Thanks Lars <lars@renoz.dk> for reporting.
+- Remove +x bit and shebang from index.php; only supported mode is to
+  explicitly call it with php.
+- Make test script less chatty when log_errors is on.
+
 4.1.0, released 2010-04-26
 ! Support proprietary height attribute on table element
 ! Support YouTube slideshows that contain /cp/ in their URL.
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.includes.php
@ -7,7 +7,7 @@
 * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
 * FILE, changes will be overwritten the next time the script is run.
 *
- * @version 4.1.0
+ * @version 4.1.1
 *
 * @warning
 *      You must *not* include any other HTML Purifier files before this file,
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier.php
@ -19,7 +19,7 @@
 */

 /*
-    HTML Purifier 4.1.0 - Standards Compliant HTML Filtering
+    HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
    Copyright (C) 2006-2008 Edward Z. Yang

    This library is free software; you can redistribute it and/or
@ -55,10 +55,10 @@ class HTMLPurifier
 {

    /** Version of HTML Purifier */
-    public $version = '4.1.0';
+    public $version = '4.1.1';

    /** Constant with version of HTML Purifier */
-    const VERSION = '4.1.0';
+    const VERSION = '4.1.1';

    /** Global configuration object */
    public $config;
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef.php
@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
        return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
    }

+    /**
+     * Parses a possibly escaped CSS string and returns the "pure" 
+     * version of it.
+     */
+    protected function expandCSSEscape($string) {
+        // flexibly parse it
+        $ret = '';
+        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
+            if ($string[$i] === '\\') {
+                $i++;
+                if ($i >= $c) {
+                    $ret .= '\\';
+                    break;
+                }
+                if (ctype_xdigit($string[$i])) {
+                    $code = $string[$i];
+                    for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
+                        if (!ctype_xdigit($string[$i])) break;
+                        $code .= $string[$i];
+                    }
+                    // We have to be extremely careful when adding
+                    // new characters, to make sure we're not breaking
+                    // the encoding.
+                    $char = HTMLPurifier_Encoder::unichr(hexdec($code));
+                    if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
+                    $ret .= $char;
+                    if ($i < $c && trim($string[$i]) !== '') $i--;
+                    continue;
+                }
+                if ($string[$i] === "\n") continue;
+            }
+            $ret .= $string[$i];
+        }
+        return $ret;
+    }
+
 }

 // vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
        $keywords = array();
        $keywords['h'] = false; // left, right
        $keywords['v'] = false; // top, bottom
-        $keywords['c'] = false; // center
+        $keywords['ch'] = false; // center (first word)
+        $keywords['cv'] = false; // center (second word)
        $measures = array();

        $i = 0;
@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
            $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
            if (isset($lookup[$lbit])) {
                $status = $lookup[$lbit];
+                if ($status == 'c') {
+                    if ($i == 0) {
+                        $status = 'ch';
+                    } else {
+                        $status = 'cv';
+                    }
+                }
                $keywords[$status] = $lbit;
                $i++;
            }
@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef

        if (!$i) return false; // no valid values were caught

-
        $ret = array();

        // first keyword
        if     ($keywords['h'])     $ret[] = $keywords['h'];
-        elseif (count($measures))   $ret[] = array_shift($measures);
-        elseif ($keywords['c']) {
-            $ret[] = $keywords['c'];
-            $keywords['c'] = false; // prevent re-use: center = center center
+        elseif ($keywords['ch']) {
+            $ret[] = $keywords['ch'];
+            $keywords['cv'] = false; // prevent re-use: center = center center
        }
+        elseif (count($measures))   $ret[] = array_shift($measures);

        if     ($keywords['v'])     $ret[] = $keywords['v'];
+        elseif ($keywords['cv'])    $ret[] = $keywords['cv'];
        elseif (count($measures))   $ret[] = array_shift($measures);
-        elseif ($keywords['c'])     $ret[] = $keywords['c'];

        if (empty($ret)) return false;
        return implode(' ', $ret);
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                $quote = $font[0];
                if ($font[$length - 1] !== $quote) continue;
                $font = substr($font, 1, $length - 2);
-
-                $new_font = '';
-                for ($i = 0, $c = strlen($font); $i < $c; $i++) {
-                    if ($font[$i] === '\\') {
-                        $i++;
-                        if ($i >= $c) {
-                            $new_font .= '\\';
-                            break;
-                        }
-                        if (ctype_xdigit($font[$i])) {
-                            $code = $font[$i];
-                            for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
-                                if (!ctype_xdigit($font[$i])) break;
-                                $code .= $font[$i];
-                            }
-                            // We have to be extremely careful when adding
-                            // new characters, to make sure we're not breaking
-                            // the encoding.
-                            $char = HTMLPurifier_Encoder::unichr(hexdec($code));
-                            if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
-                            $new_font .= $char;
-                            if ($i < $c && trim($font[$i]) !== '') $i--;
-                            continue;
-                        }
-                        if ($font[$i] === "\n") continue;
-                    }
-                    $new_font .= $font[$i];
-                }
-
-                $font = $new_font;
            }
+
+            $font = $this->expandCSSEscape($font);
+
            // $font is a pure representation of the font name

            if (ctype_alnum($font) && $font !== '') {
@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                continue;
            }

-            // complicated font, requires quoting
+            // bugger out on whitespace.  form feed (0C) really
+            // shouldn't show up regardless
+            $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);

-            // armor single quotes and new lines
-            $font = str_replace("\\", "\\\\", $font);
-            $font = str_replace("'", "\\'", $font);
-            $final .= "'$font', ";
+            // These ugly transforms don't pose a security
+            // risk (as \\ and \" might).  We could try to be clever and
+            // use single-quote wrapping when there is a double quote
+            // present, but I have choosen not to implement that.
+            // (warning: this code relies on the selection of quotation
+            // mark below)
+            $font = str_replace('\\', '\\5C ', $font);
+            $font = str_replace('"',  '\\22 ', $font);
+
+            // complicated font, requires quoting
+            $final .= "\"$font\", "; // note that this will later get turned into &quot;
        }
        $final = rtrim($final, ', ');
        if ($final === '') return false;
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php
@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
            $uri = substr($uri, 1, $new_length - 1);
        }

-        $keys   = array(  '(',   ')',   ',',   ' ',   '"',   "'");
-        $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
-        $uri = str_replace($values, $keys, $uri);
+        $uri = $this->expandCSSEscape($uri);

        $result = parent::validate($uri, $config, $context);

        if ($result === false) return false;

-        // escape necessary characters according to CSS spec
-        // except for the comma, none of these should appear in the
-        // URI at all
-        $result = str_replace($keys, $values, $result);
+        // extra sanity check; should have been done by URI
+        $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);

-        return "url('$result')";
+        return "url(\"$result\")";

    }

--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Config.php
@ -20,7 +20,7 @@ class HTMLPurifier_Config
    /**
     * HTML Purifier's version
     */
-    public $version = '4.1.0';
+    public $version = '4.1.1';

    /**
     * Bool indicator whether or not to automatically finalize
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Language/messages/en.php
@ -23,6 +23,7 @@ $messages = array(
 'Lexer: Missing gt'            => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
 'Lexer: Missing attribute key' => 'Attribute declaration has no key',
 'Lexer: Missing end quote'     => 'Attribute declaration has no end quote',
+'Lexer: Extracted body'        => 'Removed document metadata tags',

 'Strategy_RemoveForeignElements: Tag transform'              => '<$1> element transformed into $CurrentToken.Serialized',
 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer.php
@ -265,7 +265,15 @@ class HTMLPurifier_Lexer

        // extract body from document if applicable
        if ($config->get('Core.ConvertDocumentToFragment')) {
-            $html = $this->extractBody($html);
+            $e = false;
+            if ($config->get('Core.CollectErrors')) {
+                $e =& $context->get('ErrorCollector');
+            }
+            $new_html = $this->extractBody($html);
+            if ($e && $new_html != $html) {
+                $e->send(E_WARNING, 'Lexer: Extracted body');
+            }
+            $html = $new_html;
        }

        // expand entities that aren't the big five
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php
@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                }
            }
            if ($value === false) $value = '';
-            return array($key => $value);
+            return array($key => $this->parseData($value));
        }

        // setup loop environment
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php
@ -165,6 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
            $token = $tokens[$t];

            //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
+            //flush();

            // quick-check: if it's not a tag, no need to process
            if (empty($token->is_tag)) {
@ -221,11 +222,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                    }

                    if ($autoclose && $definition->info[$token->name]->wrap) {
-                        // check if this is actually a wrap (mmm wraps!)
+                        // Check if an element can be wrapped by another 
+                        // element to make it valid in a context (for 
+                        // example, <ul><ul> needs a <li> in between)
                        $wrapname = $definition->info[$token->name]->wrap;
                        $wrapdef = $definition->info[$wrapname];
                        $elements = $wrapdef->child->getAllowedElements($config);
-                        if (isset($elements[$token->name])) {
+                        $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
+                        if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
                            $newtoken = new HTMLPurifier_Token_Start($wrapname);
                            $this->insertBefore($newtoken);
                            $reprocess = true;