upgrade HTML Purifier to Version 4.4.0 (add missing files)

2024-11-22 16:03:47 +01:00 · 2012-02-01 16:01:01 +00:00 · 2012-02-01 16:01:01 +00:00 · 69f65e4917
commit 69f65e4917
parent 5511abc5bf
13 changed files with 379 additions and 0 deletions
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php
@ -0,0 +1,24 @@
+<?php
+
+/**
+ * Validates based on {ident} CSS grammar production
+ */
+class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+
+        // early abort: '' and '0' (strings that convert to false) are invalid
+        if (!$string) return false;
+
+        $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
+        if (!preg_match($pattern, $string)) return false;
+        return $string;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php
@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Dummy AttrDef that mimics another AttrDef, BUT it generates clones
+ * with make.
+ */
+class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
+{
+    /**
+     * What we're cloning
+     */
+    protected $clone;
+
+    public function __construct($clone) {
+        $this->clone = $clone;
+    }
+
+    public function validate($v, $config, $context) {
+        return $this->clone->validate($v, $config, $context);
+    }
+
+    public function make($string) {
+        return clone $this->clone;
+    }
+
+}
+
+// vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php
@ -0,0 +1,38 @@
+<?php
+
+// must be called POST validation
+
+/**
+ * Adds target="blank" to all outbound links.  This transform is
+ * only attached if Attr.TargetBlank is TRUE.  This works regardless
+ * of whether or not Attr.AllowedFrameTargets
+ */
+class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
+{
+    private $parser;
+
+    public function __construct() {
+        $this->parser = new HTMLPurifier_URIParser();
+    }
+
+    public function transform($attr, $config, $context) {
+
+        if (!isset($attr['href'])) {
+            return $attr;
+        }
+
+        // XXX Kind of inefficient
+        $url = $this->parser->parse($attr['href']);
+        $scheme = $url->getSchemeObj($config, $context);
+
+        if ($scheme->browsable && !$url->isBenign($config, $context)) {
+            $attr['target'] = 'blank';
+        }
+
+        return $attr;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
@ -0,0 +1,120 @@
+<?php
+
+/**
+ * Definition for list containers ul and ol.
+ */
+class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
+{
+    public $type = 'list';
+    // lying a little bit, so that we can handle ul and ol ourselves
+    // XXX: This whole business with 'wrap' is all a bit unsatisfactory
+    public $elements = array('li' => true, 'ul' => true, 'ol' => true);
+    public function validateChildren($tokens_of_children, $config, $context) {
+        // Flag for subclasses
+        $this->whitespace = false;
+
+        // if there are no tokens, delete parent node
+        if (empty($tokens_of_children)) return false;
+
+        // the new set of children
+        $result = array();
+
+        // current depth into the nest
+        $nesting = 0;
+
+        // a little sanity check to make sure it's not ALL whitespace
+        $all_whitespace = true;
+
+        $seen_li = false;
+        $need_close_li = false;
+
+        foreach ($tokens_of_children as $token) {
+            if (!empty($token->is_whitespace)) {
+                $result[] = $token;
+                continue;
+            }
+            $all_whitespace = false; // phew, we're not talking about whitespace
+
+            if ($nesting == 1 && $need_close_li) {
+                $result[] = new HTMLPurifier_Token_End('li');
+                $nesting--;
+                $need_close_li = false;
+            }
+
+            $is_child = ($nesting == 0);
+
+            if ($token instanceof HTMLPurifier_Token_Start) {
+                $nesting++;
+            } elseif ($token instanceof HTMLPurifier_Token_End) {
+                $nesting--;
+            }
+
+            if ($is_child) {
+                if ($token->name === 'li') {
+                    // good
+                    $seen_li = true;
+                } elseif ($token->name === 'ul' || $token->name === 'ol') {
+                    // we want to tuck this into the previous li
+                    $need_close_li = true;
+                    $nesting++;
+                    if (!$seen_li) {
+                        // create a new li element
+                        $result[] = new HTMLPurifier_Token_Start('li');
+                    } else {
+                        // backtrack until </li> found
+                        while(true) {
+                            $t = array_pop($result);
+                            if ($t instanceof HTMLPurifier_Token_End) {
+                                // XXX actually, these invariants could very plausibly be violated
+                                // if we are doing silly things with modifying the set of allowed elements.
+                                // FORTUNATELY, it doesn't make a difference, since the allowed
+                                // elements are hard-coded here!
+                                if ($t->name !== 'li') {
+                                    trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
+                                    return false;
+                                }
+                                break;
+                            } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
+                                if ($t->name !== 'li') {
+                                    trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
+                                    return false;
+                                }
+                                // XXX this should have a helper for it...
+                                $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
+                                break;
+                            } else {
+                                if (!$t->is_whitespace) {
+                                    trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
+                                    return false;
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    // start wrapping (this doesn't precisely mimic
+                    // browser behavior, but what browsers do is kind of
+                    // hard to mimic in a standards compliant way
+                    // XXX Actually, this has no impact in practice,
+                    // because this gets handled earlier. Arguably,
+                    // we should rip out all of that processing
+                    $result[] = new HTMLPurifier_Token_Start('li');
+                    $nesting++;
+                    $seen_li = true;
+                    $need_close_li = true;
+                }
+            }
+            $result[] = $token;
+        }
+        if ($need_close_li) {
+            $result[] = new HTMLPurifier_Token_End('li');
+        }
+        if (empty($result)) return false;
+        if ($all_whitespace) {
+            return false;
+        }
+        if ($tokens_of_children == $result) return true;
+        return $result;
+    }
+}
+
+// vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt
@ -0,0 +1,9 @@
+Core.EnableIDNA
+TYPE: bool
+DEFAULT: false
+VERSION: 4.4.0
+--DESCRIPTION--
+Allows international domain names in URLs.  This configuration option
+requires the PEAR Net_IDNA2 module to be installed.  It operates by
+punycoding any internationalized host names for maximum portability.
+--# vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt
@ -0,0 +1,10 @@
+HTML.AllowedComments
+TYPE: lookup
+VERSION: 4.4.0
+DEFAULT: array()
+--DESCRIPTION--
+A whitelist which indicates what explicit comment bodies should be
+allowed, modulo leading and trailing whitespace.  See also %HTML.AllowedCommentsRegexp
+(these directives are union'ed together, so a comment is considered
+valid if any directive deems it valid.)
+--# vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt
@ -0,0 +1,15 @@
+HTML.AllowedCommentsRegexp
+TYPE: string/null
+VERSION: 4.4.0
+DEFAULT: NULL
+--DESCRIPTION--
+A regexp, which if it matches the body of a comment, indicates that
+it should be allowed. Trailing and leading spaces are removed prior
+to running this regular expression.
+<strong>Warning:</strong> Make sure you specify
+correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
+comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
+is probably not sufficiently strict, since it also allows <code>foobar</code>.
+See also %HTML.AllowedComments (these directives are union'ed together,
+so a comment is considered valid if any directive deems it valid.)
+--# vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt
@ -0,0 +1,13 @@
+HTML.SafeIframe
+TYPE: bool
+VERSION: 4.4.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+    Whether or not to permit iframe tags in untrusted documents.  This
+    directive must be accompanied by a whitelist of permitted iframes,
+    such as %URI.SafeIframeRegexp, otherwise it will fatally error.
+    This directive has no effect on strict doctypes, as iframes are not
+    valid.
+</p>
+--# vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt
@ -0,0 +1,8 @@
+HTML.TargetBlank
+TYPE: bool
+VERSION: 4.4.0
+DEFAULT: FALSE
+--DESCRIPTION--
+If enabled, <code>target=blank</code> attributes are added to all outgoing links.
+(This includes links from an HTTPS version of a page to an HTTP version.)
+--# vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt
@ -0,0 +1,22 @@
+URI.SafeIframeRegexp
+TYPE: string/null
+VERSION: 4.4.0
+DEFAULT: NULL
+--DESCRIPTION--
+<p>
+    A PCRE regular expression that will be matched against an iframe URI.  This is
+    a relatively inflexible scheme, but works well enough for the most common
+    use-case of iframes: embedded video.  This directive only has an effect if
+    %HTML.SafeIframe is enabled.  Here are some example values:
+</p>
+<ul>
+    <li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li>
+    <li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li>
+    <li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li>
+</ul>
+<p>
+    Note that this directive does not give you enough granularity to, say, disable
+    all <code>autoplay</code> videos.  Pipe up on the HTML Purifier forums if this
+    is a capability you want.
+</p>
+--# vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php
@ -0,0 +1,38 @@
+<?php
+
+/**
+ * XHTML 1.1 Iframe Module provides inline frames.
+ *
+ * @note This module is not considered safe unless an Iframe
+ * whitelisting mechanism is specified.  Currently, the only
+ * such mechanism is %URL.SafeIframeRegexp
+ */
+class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
+{
+
+    public $name = 'Iframe';
+    public $safe = false;
+
+    public function setup($config) {
+        if ($config->get('HTML.SafeIframe')) {
+            $this->safe = true;
+        }
+        $this->addElement(
+            'iframe', 'Inline', 'Flow', 'Common',
+            array(
+                'src' => 'URI#embedded',
+                'width' => 'Length',
+                'height' => 'Length',
+                'name' => 'ID',
+                'scrolling' => 'Enum#yes,no,auto',
+                'frameborder' => 'Enum#0,1',
+                'longdesc' => 'URI',
+                'marginheight' => 'Pixels',
+                'marginwidth' => 'Pixels',
+            )
+        );
+    }
+
+}
+
+// vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php
@ -0,0 +1,19 @@
+<?php
+
+/**
+ * Module adds the target=blank attribute transformation to a tags.  It
+ * is enabled by HTML.TargetBlank
+ */
+class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
+{
+
+    public $name = 'TargetBlank';
+
+    public function setup($config) {
+        $a = $this->addBlankElement('a');
+        $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
+    }
+
+}
+
+// vim: et sw=4 sts=4
--- a/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php
+++ b/phpgwapi/inc/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php
@ -0,0 +1,35 @@
+<?php
+
+/**
+ * Implements safety checks for safe iframes.
+ *
+ * @warning This filter is *critical* for ensuring that %HTML.SafeIframe
+ * works safely.
+ */
+class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
+{
+    public $name = 'SafeIframe';
+    public $always_load = true;
+    protected $regexp = NULL;
+    // XXX: The not so good bit about how this is all setup now is we
+    // can't check HTML.SafeIframe in the 'prepare' step: we have to
+    // defer till the actual filtering.
+    public function prepare($config) {
+        $this->regexp = $config->get('URI.SafeIframeRegexp');
+        return true;
+    }
+    public function filter(&$uri, $config, $context) {
+        // check if filter not applicable
+        if (!$config->get('HTML.SafeIframe')) return true;
+        // check if the filter should actually trigger
+        if (!$context->get('EmbeddedURI', true)) return true;
+        $token = $context->get('CurrentToken', true);
+        if (!($token && $token->name == 'iframe')) return true;
+        // check if we actually have some whitelists enabled
+        if ($this->regexp === null) return false;
+        // actually check the whitelists
+        return preg_match($this->regexp, $uri->toString());
+    }
+}
+
+// vim: et sw=4 sts=4