From fdfcdfc387b5e511b0e1ac97d2d5a9871105ad2d Mon Sep 17 00:00:00 2001 From: Klaus Leithoff Date: Wed, 6 Jun 2012 08:54:35 +0000 Subject: [PATCH] * API: upgrade htmLawed from 1.1.10 to 1.1.11 --- phpgwapi/inc/class.egw_htmLawed.inc.php | 14 +++++++-- phpgwapi/inc/htmLawed/htmLawed.php | 24 +++------------ phpgwapi/inc/htmLawed/htmLawedTest.php | 2 +- phpgwapi/inc/htmLawed/htmLawed_README.htm | 34 +++++++++++++++++---- phpgwapi/inc/htmLawed/htmLawed_README.txt | 24 ++++++++++++--- phpgwapi/inc/htmLawed/htmLawed_TESTCASE.txt | 2 +- 6 files changed, 66 insertions(+), 34 deletions(-) diff --git a/phpgwapi/inc/class.egw_htmLawed.inc.php b/phpgwapi/inc/class.egw_htmLawed.inc.php index 67e71ebaa3..d9c09f681f 100644 --- a/phpgwapi/inc/class.egw_htmLawed.inc.php +++ b/phpgwapi/inc/class.egw_htmLawed.inc.php @@ -115,8 +115,13 @@ class egw_htmLawed * implemented so far: img checking for alt attribute == image; set this to empty * a checking for title, replacing @ */ -function hl_my_tag_transform($element, $attribute_array) +function hl_my_tag_transform($element, $attribute_array=0) { + // If second argument is not received, it means a closing tag is being handled + if(ctype_digit($attribute_array)){ + return ""; + } + //if ($element=='img') error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array)); // Elements other than 'img' or 'img' without a 'img' attribute are returned unchanged if($element == 'img') @@ -179,8 +184,13 @@ function hl_my_tag_transform($element, $attribute_array) * a -checking for title and href, replacing @ accordingly * -navigate to local anchors without reloading the page */ -function hl_email_tag_transform($element, $attribute_array) +function hl_email_tag_transform($element, $attribute_array=0) { + // If second argument is not received, it means a closing tag is being handled + if(ctype_digit($attribute_array)){ + return ""; + } + //if ($element=='a') error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array)); // Elements other than 'img' or 'img' without a 'img' attribute are returned unchanged if($element == 'img') diff --git a/phpgwapi/inc/htmLawed/htmLawed.php b/phpgwapi/inc/htmLawed/htmLawed.php index 96fa2959a7..c0253186aa 100644 --- a/phpgwapi/inc/htmLawed/htmLawed.php +++ b/phpgwapi/inc/htmLawed/htmLawed.php @@ -1,7 +1,7 @@ 1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele if(!empty($m[1])){ - return (!isset($eE[$e]) ? "" : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); + return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); } // open tag & attr @@ -470,8 +470,8 @@ while(strlen($a)){ $aA[$nm] = ''; } break; case 2: // Val - if(preg_match('`^"[^"]*"`', $a, $m) or preg_match("`^'[^']*'`", $a, $m) or preg_match("`^\s*[^\s\"']+`", $a, $m)){ - $m = $m[0]; $w = 1; $mode = 0; $a = ltrim(substr_replace($a, '', 0, hl_bytes($m))); + if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){ + $a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0; $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); } break; @@ -684,23 +684,9 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( // eof } -/** - * Return the number of bytes of a string, independent of mbstring.func_overload - * AND the availability of mbstring - * - * @param string $str - * @return int - */ -function hl_bytes($str) -{ -static $func_overload; -if (is_null($func_overload)) $func_overload = extension_loaded('mbstring') ? ini_get('mbstring.func_overload') : 0; -return $func_overload & 2 ? mb_strlen($str,'8bit') : strlen($str); -} - function hl_version(){ // rel -return '1.1.10'; +return '1.1.11'; // eof } diff --git a/phpgwapi/inc/htmLawed/htmLawedTest.php b/phpgwapi/inc/htmLawed/htmLawedTest.php index a1a6497a99..581beaa256 100644 --- a/phpgwapi/inc/htmLawed/htmLawedTest.php +++ b/phpgwapi/inc/htmLawed/htmLawedTest.php @@ -2,7 +2,7 @@ /* htmLawedTest.php, 22 October 2011 -htmLawed 1.1.10, 5 April 2012 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed diff --git a/phpgwapi/inc/htmLawed/htmLawed_README.htm b/phpgwapi/inc/htmLawed/htmLawed_README.htm index 77dfdd6496..aac76e2444 100644 --- a/phpgwapi/inc/htmLawed/htmLawed_README.htm +++ b/phpgwapi/inc/htmLawed/htmLawed_README.htm @@ -110,8 +110,8 @@ span.totop a, span.totop a:visited {color: #6699cc;}

-
htmLawed_README.txt, 5 April 2012
-htmLawed 1.1.10, 5 April 2012
+
htmLawed_README.txt, 5 June 2012
+htmLawed 1.1.11, 5 June 2012
Copyright Santosh Patnaik
Dual licensed with LGPL 3 and GPL 2 or later
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed 
@@ -1523,14 +1523,30 @@ A PHP Labware internal utility - $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and the finalized attribute name-value pairs as array elements to the function. The function is expected to return the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included).
+  When $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and, in the case of an opening tag, the finalized attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included), etc.
+
+  Any hook_tag function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as a in the closing </a> tag of the element <a href="http://cnn.com">CNN</a>. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like </a>).

  This is a powerful functionality that can be exploited for various objectives: consolidate-and-convert inline style attributes to class, convert embed elements to object, permit only one caption element in a table element, disallow embedding of certain types of media, inject HTML, use
CSSTidy to sanitize style attribute values, etc.

  As an example, the custom hook code below can be used to force a series of specifically ordered id attributes on all elements, and a specific param element inside all object elements:

-    function my_tag_function($element, $attribute_array){ +    function my_tag_function($element, $attribute_array=0){ +
+
+ +      // If second argument is not received, it means a closing tag is being handled +
+ +      if(ctype_digit($attribute_array)){ +
+ +        return "</$element>"; +
+ +      } +

      static $id = 0; @@ -1715,6 +1731,8 @@ A PHP Labware internal utility - $config["hook_tag"], if specified, now receives names of elements in closing tags.
+
  1.1.10 - 22 October 2011. Fix for a bug in the tidy functionality that caused the entire input to be replaced with a single space; new parameter, $config["direct_list_nest"] to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)

  1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of li within dir
@@ -1777,6 +1795,10 @@ A PHP Labware internal utility -
htmLawed.php (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.

Important  The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes:
+
+  (1) From version 1.1-1.1.10 to 1.1.11, if a hook_tag function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a hook_tag function receives only the element name. The hook_tag function therefore may have to be edited. See
section 3.4.9.
+
  Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.
@@ -1828,7 +1850,7 @@ A PHP Labware internal utility - 4.10  Acknowledgements (to top)

-  Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.
+  Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users.

  Thank you!
@@ -2097,7 +2119,7 @@ A PHP Labware internal utility - HTM version of htmLawed_README.txt generated on 05 Apr, 2012 using rTxt2htm from PHP Labware +


HTM version of htmLawed_README.txt generated on 06 Jun, 2012 using rTxt2htm from PHP Labware
diff --git a/phpgwapi/inc/htmLawed/htmLawed_README.txt b/phpgwapi/inc/htmLawed/htmLawed_README.txt index 1c5de2c243..a4cff6e308 100644 --- a/phpgwapi/inc/htmLawed/htmLawed_README.txt +++ b/phpgwapi/inc/htmLawed/htmLawed_README.txt @@ -1,6 +1,6 @@ /* -htmLawed_README.txt, 5 April 2012 -htmLawed 1.1.10, 5 April 2012 +htmLawed_README.txt, 5 June 2012 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed @@ -1183,13 +1183,21 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.). - When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and the `finalized` attribute name-value pairs as array elements to the function. The function is expected to return the full opening tag string like '' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included). + When '$config' parameter 'hook_tag' is set to the name of a function, htmLawed (function 'hl_tag()') will pass on the element name, and, in the case of an opening tag, the `finalized` attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like '' (for empty elements like 'img' and 'input', the element-closing slash '/' should also be included), etc. + + Any 'hook_tag' function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as 'a' in the closing '' tag of the element 'CNN'. Unlike for opening tags, no other value (i.e., the attribute name-value array) is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like ''). This is a *powerful functionality* that can be exploited for various objectives: consolidate-and-convert inline 'style' attributes to 'class', convert 'embed' elements to 'object', permit only one 'caption' element in a 'table' element, disallow embedding of certain types of media, *inject HTML*, use CSSTidy:- http://csstidy.sourceforge.net to sanitize 'style' attribute values, etc. As an example, the custom hook code below can be used to force a series of specifically ordered 'id' attributes on all elements, and a specific 'param' element inside all 'object' elements: - function my_tag_function($element, $attribute_array){ + function my_tag_function($element, $attribute_array=0){ + + // If second argument is not received, it means a closing tag is being handled + if(ctype_digit($attribute_array)){ + return ""; + } + static $id = 0; // Remove any duplicate element if($element == 'param' && isset($attribute_array['allowscriptaccess'])){ @@ -1313,6 +1321,8 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern `Version number - Release date. Notes` + 1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. '$config["hook_tag"]', if specified, now receives names of elements in closing tags. + 1.1.10 - 22 October 2011. Fix for a bug in the 'tidy' functionality that caused the entire input to be replaced with a single space; new parameter, '$config["direct_list_nest"]' to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.) 1.1.9.5 - 6 July 2011. Minor correction of a rule for nesting of 'li' within 'dir' @@ -1373,6 +1383,10 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern Upgrading is as simple as replacing the previous version of 'htmLawed.php' (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content. + *Important* The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes: + + (1) From version 1.1-1.1.10 to 1.1.11, if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9. + Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip. @@ -1419,7 +1433,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern -- 4.10 Acknowledgements ------------------------------------------o - Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. + Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Ulf Harnhammer, Gareth Heyes, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Edward Yang, and many anonymous users. Thank you! diff --git a/phpgwapi/inc/htmLawed/htmLawed_TESTCASE.txt b/phpgwapi/inc/htmLawed/htmLawed_TESTCASE.txt index cd64c0f845..793a5a6a7e 100644 --- a/phpgwapi/inc/htmLawed/htmLawed_TESTCASE.txt +++ b/phpgwapi/inc/htmLawed/htmLawed_TESTCASE.txt @@ -1,6 +1,6 @@ /* htmLawed_TESTCASE.txt, 22 October 2011 -htmLawed 1.1.10, 5 April 2012 +htmLawed 1.1.11, 5 June 2012 Copyright Santosh Patnaik Dual licensed with LGPL 3 and GPL 2 or later A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed