2009-05-19 19:32:06 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Class that handles operations involving percent-encoding in URIs.
|
|
|
|
*
|
|
|
|
* @warning
|
|
|
|
* Be careful when reusing instances of PercentEncoder. The object
|
|
|
|
* you use for normalize() SHOULD NOT be used for encode(), or
|
|
|
|
* vice-versa.
|
|
|
|
*/
|
|
|
|
class HTMLPurifier_PercentEncoder
|
|
|
|
{
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reserved characters to preserve when using encode().
|
|
|
|
*/
|
|
|
|
protected $preserve = array();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* String of characters that should be preserved while using encode().
|
|
|
|
*/
|
|
|
|
public function __construct($preserve = false) {
|
|
|
|
// unreserved letters, ought to const-ify
|
|
|
|
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
|
|
|
|
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
|
|
|
|
for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
|
|
|
|
$this->preserve[45] = true; // Dash -
|
|
|
|
$this->preserve[46] = true; // Period .
|
|
|
|
$this->preserve[95] = true; // Underscore _
|
|
|
|
$this->preserve[126]= true; // Tilde ~
|
|
|
|
|
|
|
|
// extra letters not to escape
|
|
|
|
if ($preserve !== false) {
|
|
|
|
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
|
|
|
|
$this->preserve[ord($preserve[$i])] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Our replacement for urlencode, it encodes all non-reserved characters,
|
|
|
|
* as well as any extra characters that were instructed to be preserved.
|
|
|
|
* @note
|
|
|
|
* Assumes that the string has already been normalized, making any
|
|
|
|
* and all percent escape sequences valid. Percents will not be
|
|
|
|
* re-escaped, regardless of their status in $preserve
|
|
|
|
* @param $string String to be encoded
|
|
|
|
* @return Encoded string.
|
|
|
|
*/
|
|
|
|
public function encode($string) {
|
|
|
|
$ret = '';
|
2011-03-28 13:52:16 +02:00
|
|
|
//error_log(__METHOD__.__LINE__.$string);
|
2009-05-19 19:32:06 +02:00
|
|
|
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
2011-06-08 12:41:22 +02:00
|
|
|
if (($chr=substr($string,$i,1)) !== '%' && !isset($this->preserve[$int = ord($chr)])) {
|
|
|
|
$ret .= rawurlencode($chr); // '%' . sprintf('%02X', $int);
|
2009-05-19 19:32:06 +02:00
|
|
|
} else {
|
2011-03-28 13:52:16 +02:00
|
|
|
$ret .= substr($string,$i,1);
|
2009-05-19 19:32:06 +02:00
|
|
|
}
|
|
|
|
}
|
2011-03-28 13:52:16 +02:00
|
|
|
//error_log(__METHOD__.__LINE__.$ret);
|
2009-05-19 19:32:06 +02:00
|
|
|
return $ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Fix up percent-encoding by decoding unreserved characters and normalizing.
|
|
|
|
* @warning This function is affected by $preserve, even though the
|
|
|
|
* usual desired behavior is for this not to preserve those
|
|
|
|
* characters. Be careful when reusing instances of PercentEncoder!
|
|
|
|
* @param $string String to normalize
|
|
|
|
*/
|
|
|
|
public function normalize($string) {
|
|
|
|
if ($string == '') return '';
|
|
|
|
$parts = explode('%', $string);
|
|
|
|
$ret = array_shift($parts);
|
|
|
|
foreach ($parts as $part) {
|
|
|
|
$length = strlen($part);
|
|
|
|
if ($length < 2) {
|
|
|
|
$ret .= '%25' . $part;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$encoding = substr($part, 0, 2);
|
|
|
|
$text = substr($part, 2);
|
|
|
|
if (!ctype_xdigit($encoding)) {
|
|
|
|
$ret .= '%25' . $part;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$int = hexdec($encoding);
|
|
|
|
if (isset($this->preserve[$int])) {
|
|
|
|
$ret .= chr($int) . $text;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$encoding = strtoupper($encoding);
|
|
|
|
$ret .= '%' . $encoding . $text;
|
|
|
|
}
|
|
|
|
return $ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// vim: et sw=4 sts=4
|