upgrade to htmLawed 1.1.13, 22 July 2012 of Santosh Patnaik

This commit is contained in:
Klaus Leithoff 2012-07-31 15:14:44 +00:00
parent 26ee1fb25e
commit 9bb5ddfdf4
5 changed files with 78 additions and 36 deletions

View File

@ -1,9 +1,9 @@
<?php
/*
htmLawed 1.1.11, 5 June 2012
htmLawed 1.1.13, 22 July 2012
Copyright Santosh Patnaik
Dual licensed with LGPL 3 and GPL 2 or later
Dual licensed with LGPL 3 and GPL 2+
A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed
See htmLawed_README.txt/htm
@ -490,7 +490,7 @@ global $S;
$rl = isset($S[$e]) ? $S[$e] : array();
$a = array(); $nfr = 0;
foreach($aA as $k=>$v){
if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) or isset($rl[$k])) && ((!isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e])))){
if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){
if(isset($aNE[$k])){$v = $k;}
elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues
$v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
@ -624,7 +624,7 @@ if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';}
static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
if($e == 'font'){
$a2 = '';
if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=\s*([^"])(\S+)`i', $a, $m)){
if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';';
}
if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){
@ -688,7 +688,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array(
function hl_version(){
// rel
return '1.1.11';
return '1.1.13';
// eof
}

View File

@ -1,10 +1,10 @@
<?php
/*
htmLawedTest.php, 22 October 2011
htmLawed 1.1.11, 5 June 2012
htmLawedTest.php, 22 July 2012
htmLawed 1.1.13, 22 July 2012
Copyright Santosh Patnaik
Dual licensed with LGPL 3 and GPL 2 or later
Dual licensed with LGPL 3 and GPL 2+
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed
Test htmLawed; user provides text input; input and processed input are shown as highlighted code and rendered HTML; also shown are execution time and peak memory usage
@ -545,7 +545,7 @@ if($do){
$st = microtime();
$out = htmLawed($_POST['text'], $cfg, $_POST['spec']);
$et = microtime();
echo '<br /><a href="htmLawedTest.php" title="[toggle visibility] syntax-highlighted" onclick="javascript:toggle(\'inputR\'); return false;"><span class="notice">Input code &raquo;</span></a> <span class="help" title="tags estimated as half of total &gt; and &lt; chars; values may be inaccurate for non-ASCII text"><small><big>', strlen($_POST['text']), '</big> chars, ~<big>', ($tag = round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2)), '</big> tag', ($tag > 1 ? 's' : ''), '</small>&nbsp;</span><div id="inputR" style="display: none;">', format($_POST['text']), '</div><script type="text/javascript">hl(\'inputR\');</script>', (!isset($_POST['text'][$_hlimit]) ? ' <a href="htmLawedTest.php" title="[toggle visibility] hexdump; non-viewable characters like line-returns are shown as dots" onclick="javascript:toggle(\'inputD\'); return false;"><span class="notice">Input binary &raquo;&nbsp;</span></a><div id="inputD" style="display: none;">'. hexdump($_POST['text']). '</div>' : ''), ' <a href="htmLawedTest.php" title="[toggle visibility] finalized internal settings as interpreted by htmLawed; for developers" onclick="javascript:toggle(\'settingF\'); return false;"><span class="notice">Finalized internal settings &raquo;&nbsp;</span></a> <div id="settingF" style="display: none;">', str_replace(array(' ', "\t", ' '), array(' ', '&nbsp; ', '&nbsp; '), nl2br(htmlspecialchars(print_r($GLOBALS['hlcfg']['config'], true)))), '</div><script type="text/javascript">hl(\'settingF\');</script>', '<br /><a href="htmLawedTest.php" title="[toggle visibility] suitable for copy-paste" onclick="javascript:toggle(\'outputF\'); return false;"><span class="notice">Output &raquo;</span></a> <span class="help" title="approx., server-specific value excluding the \'include()\' call"><small>htmLawed processing time <big>', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), '</big> s</small></span>', (($mem = memory_get_peak_usage()) !== false ? '<span class="help"><small>, peak memory usage <big>'. round(($mem-$pre_mem)/1048576, 2). '</big> <small>MB</small>' : ''), '</small></span><div id="outputF" style="display: block;"><div><textarea id="text2" class="textarea" name="text2" rows="5" cols="100" style="width: 100%;">', htmlspecialchars($out), '</textarea></div><button type="button" onclick="javascript:document.getElementById(\'text2\').focus();document.getElementById(\'text2\').select()" title="select all to copy" style="float:right;">Select all</button>';
echo '<br /><a href="htmLawedTest.php" title="[toggle visibility] syntax-highlighted" onclick="javascript:toggle(\'inputR\'); return false;"><span class="notice">Input code &raquo;</span></a> <span class="help" title="tags estimated as half of total &gt; and &lt; chars; values may be inaccurate for non-ASCII text"><small><big>', strlen($_POST['text']), '</big> chars, ~<big>', ($tag = round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2)), '</big> tag', ($tag > 1 ? 's' : ''), '</small>&nbsp;</span><div id="inputR" style="display: none;">', format($_POST['text']), '</div><script type="text/javascript">hl(\'inputR\');</script>', (!isset($_POST['text'][$_hlimit]) ? ' <a href="htmLawedTest.php" title="[toggle visibility] hexdump; non-viewable characters like line-returns are shown as dots" onclick="javascript:toggle(\'inputD\'); return false;"><span class="notice">Input binary &raquo;&nbsp;</span></a><div id="inputD" style="display: none;">'. hexdump($_POST['text']). '</div>' : ''), ' <a href="htmLawedTest.php" title="[toggle visibility] finalized internal settings as interpreted by htmLawed; for developers" onclick="javascript:toggle(\'settingF\'); return false;"><span class="notice">Finalized internal settings &raquo;&nbsp;</span></a> <div id="settingF" style="display: none;">$config: ', str_replace(array(' ', "\t", ' '), array(' ', '&nbsp; ', '&nbsp; '), nl2br(htmlspecialchars(print_r($GLOBALS['hlcfg']['config'], true)))), '<br />$spec: ', str_replace(array(' ', "\t", ' '), array(' ', '&nbsp; ', '&nbsp; '), nl2br(htmlspecialchars(print_r($GLOBALS['hlcfg']['spec'], true)))), '</div><script type="text/javascript">hl(\'settingF\');</script>', '<br /><a href="htmLawedTest.php" title="[toggle visibility] suitable for copy-paste" onclick="javascript:toggle(\'outputF\'); return false;"><span class="notice">Output &raquo;</span></a> <span class="help" title="approx., server-specific value excluding the \'include()\' call"><small>htmLawed processing time <big>', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), '</big> s</small></span>', (($mem = memory_get_peak_usage()) !== false ? '<span class="help"><small>, peak memory usage <big>'. round(($mem-$pre_mem)/1048576, 2). '</big> <small>MB</small>' : ''), '</small></span><div id="outputF" style="display: block;"><div><textarea id="text2" class="textarea" name="text2" rows="5" cols="100" style="width: 100%;">', htmlspecialchars($out), '</textarea></div><button type="button" onclick="javascript:document.getElementById(\'text2\').focus();document.getElementById(\'text2\').select()" title="select all to copy" style="float:right;">Select all</button>';
if($_w3c_validate && $validation)
{
?>

View File

@ -110,10 +110,10 @@ span.totop a, span.totop a:visited {color: #6699cc;}
<div id="body">
<br />
<div class="comment">htmLawed_README.txt, 5 June 2012<br />
htmLawed 1.1.11, 5 June 2012<br />
<div class="comment">htmLawed_README.txt, 22 July 2012<br />
htmLawed 1.1.13, 22 July 2012<br />
Copyright Santosh Patnaik<br />
Dual licensed with LGPL 3 and GPL 2 or later<br />
Dual licensed with LGPL 3 and GPL 2+<br />
A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed">http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed</a>&#160;</div>
<br />
@ -170,6 +170,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
&#160; * &#160;ensure <strong>unique</strong>&#160;<span class="term">id</span>&#160;attribute values &#160;^~`<br />
&#160; * &#160;<strong>double-quote</strong>&#160;attribute values &#160;^<br />
&#160; * &#160;lower-case <strong>standard attribute values</strong>&#160;like <span class="term">password</span>&#160; ^`<br />
&#160; * &#160;permit custom, non-standard attributes as well as custom rules for standard attributes &#160;~`<br />
<br />
&#160; * &#160;<strong>attribute-specific URL protocol/scheme restriction</strong>&#160; *~`<br />
&#160; * &#160;disable <strong>dynamic expressions</strong>&#160;in <span class="term">style</span>&#160;values &#160;*~`<br />
@ -222,7 +223,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<a name="s1.4" id="s1.4"></a><span class="item-no">1.4</span>&#160; License &amp; copyright
</h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" />
<br />
&#160; htmLawed is free and open-source software dual licensed under LGPL license version <a href="http://www.gnu.org/licenses/lgpl-3.0.txt">3</a>&#160;and GPL license version <a href="http://www.gnu.org/licenses/gpl-2.0.txt">2</a>&#160;or later, and copyrighted by Santosh Patnaik, MD, PhD.<br />
&#160; htmLawed is free and open-source software dual licensed under LGPL license version <a href="http://www.gnu.org/licenses/lgpl-3.0.txt">3</a>, and GPL license version <a href="http://www.gnu.org/licenses/gpl-2.0.txt">2</a>&#160;(or later), and copyrighted by Santosh Patnaik, MD, PhD.<br />
</div>
<div class="sub-section"><h3>
@ -258,7 +259,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<br />
&#160; To easily <strong>test</strong>&#160;htmLawed using a form-based interface, use the provided <a href="htmLawedTest.php">demo</a>&#160;(<span class="term">htmLawed.php</span>&#160;and <span class="term">htmLawedTest.php</span>&#160;should be in the same directory on the web-server).<br />
<br />
&#160; <strong>Note</strong>: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed">htmLawed</a>&#160;website; the filtering itself can be configured, etc., as described here.<br />
&#160; <strong>Note</strong>: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to this <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/oop.htm">page</a>&#160;on the htmLawed website; the filtering itself can be configured, etc., as described here.<br />
<div class="sub-section"><h3>
<a name="s2.1" id="s2.1"></a><span class="item-no">2.1</span>&#160; Simple
@ -558,6 +559,14 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
&#160; <strong>Special characters</strong>: The characters <span class="term">;</span>, <span class="term">,</span>, <span class="term">/</span>, <span class="term">(</span>, <span class="term">)</span>, <span class="term">|</span>, <span class="term">~</span>&#160;and space have special meanings in the rules. Words in the rules that use such characters, or the characters themselves, should be <em>escaped</em>&#160;by enclosing in pairs of double-quotes (<span class="term">"</span>). A back-tick (<span class="term">&#96;</span>) can be used to escape a literal <span class="term">"</span>. An example rule illustrating this is <span class="term">input=value(maxlen=30/match="/^\w/"/default="your &#96;"ID&#96;"")</span>.<br />
<br />
&#160; <strong>Note</strong>: To deny an attribute for all elements for which it is legal, <span class="term">$config["deny_attribute"]</span>&#160;(see <a href="#s3.4">section 3.4</a>) can be used instead of <span class="term">$spec</span>. Also, attributes can be allowed element-specifically through <span class="term">$spec</span>&#160;while being denied globally through <span class="term">$config["deny_attribute"]</span>. The <span class="term">hook_tag</span>&#160;parameter (<a href="#s3.4.9">section 3.4.9</a>) can also be used to implement the <span class="term">$spec</span>&#160;functionality.<br />
<br />
&#160; <span class="term">$spec</span>&#160;can also be used to permit custom, non-standard attributes as well as custom rules for standard attributes. Thus, the following value of <span class="term">$spec</span>&#160;will permit the custom uses of the standard <span class="term">rel</span>&#160;attribute in <span class="term">input</span>&#160;(not permitted as per standards) and of a non-standard attribute, <span class="term">vFlag</span>, in <span class="term">img</span>.<br />
<br />
<code class="code">&#160; &#160; $spec = &#39;img=vFlag; input=rel&#39;</code>
<br />
<br />
&#160; The attribute names can contain alphabets, colons (:) and hyphens (-) but must start with an alphabet.<br />
</div>
<div class="sub-section"><h3>
@ -1158,7 +1167,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<br />
&#160; An option like <span class="term">1</span>&#160;is useful, e.g., when a writer previews his submission, whereas one like <span class="term">3</span>&#160;is useful before content is finalized and made available to all.<br />
<br />
&#160; <strong>Note:</strong>&#160;In the example above, unlike <span class="term">&lt;&#42;&gt;</span>, <span class="term">&lt;xml&gt;</span>&#160;gets considered as a tag (even though there is no HTML element named <span class="term">xml</span>). In general, text matching the regular expression pattern <span class="term">&lt;(/?)([a-zA-Z][a-zA-Z1-6]&#42;)([^&gt;]&#42;?)\s?&gt;</span>&#160;is considered a tag (phrase enclosed by the angled brackets <span class="term">&lt;</span>&#160;and <span class="term">&gt;</span>, and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...).<br />
&#160; <strong>Note:</strong>&#160;In the example above, unlike <span class="term">&lt;&#42;&gt;</span>, <span class="term">&lt;xml&gt;</span>&#160;gets considered as a tag (even though there is no HTML element named <span class="term">xml</span>). Thus, the <span class="term">keep_bad</span>&#160;parameter's value affects <span class="term">&lt;xml&gt;</span>&#160;but not <span class="term">&lt;&#42;&gt;</span>. In general, text matching the regular expression pattern <span class="term">&lt;(/?)([a-zA-Z][a-zA-Z1-6]&#42;)([^&gt;]&#42;?)\s?&gt;</span>&#160;is considered a tag (phrase enclosed by the angled brackets <span class="term">&lt;</span>&#160;and <span class="term">&gt;</span>, and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...), and is subjected to the <span class="term">keep_bad</span>&#160;value.<br />
<br />
&#160; Nesting/content rules for each of the 86 elements in htmLawed's default set (see <a href="#s3.3">section 3.3</a>) are defined in function <span class="term">hl_bal()</span>. This means that if a non-standard element besides <span class="term">embed</span>&#160;is being permitted through <span class="term">$config["elements"]</span>, the element's tag content will end up getting removed if <span class="term">$config["balance"]</span>&#160;is set to <span class="term">1</span>.<br />
<br />
@ -1206,7 +1215,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<a name="s3.4" id="s3.4"></a><span class="item-no">3.4</span>&#160; Attributes
</h3><span class="totop"><a href="#peak">(to top)</a></span><br style="clear: both;" />
<br />
&#160; htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the <span class="term">embed</span>&#160;element (the non-standard <span class="term">embed</span>&#160;element is supported in htmLawed because of its widespread use), and the the <span class="term">xml&#58;space</span>&#160;attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in <a href="#s5.2">section 5.2</a>.<br />
&#160; htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the <span class="term">embed</span>&#160;element (the non-standard <span class="term">embed</span>&#160;element is supported in htmLawed because of its widespread use), and the the <span class="term">xml&#58;space</span>&#160;attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in <a href="#s5.2">section 5.2</a>. Using the <span class="term">$spec</span>&#160;argument, htmLawed can be forced to permit custom, non-standard attributes as well as custom rules for standard attributes (<a href="#s2.3">section 2.3</a>).<br />
<br />
&#160; When <span class="term">$config["deny_attribute"]</span>&#160;is not set, or set to <span class="term">0</span>, or empty (<span class="term">""</span>), all the 111 attributes are permitted. Otherwise, <span class="term">$config["deny_attribute"]</span>&#160;can be set as a list of comma-separated names of the denied attributes. <span class="term">on&#42;</span>&#160;can be used to refer to the group of potentially dangerous, script-accepting attributes: <span class="term">onblur</span>, <span class="term">onchange</span>, <span class="term">onclick</span>, <span class="term">ondblclick</span>, <span class="term">onfocus</span>, <span class="term">onkeydown</span>, <span class="term">onkeypress</span>, <span class="term">onkeyup</span>, <span class="term">onmousedown</span>, <span class="term">onmousemove</span>, <span class="term">onmouseout</span>, <span class="term">onmouseover</span>, <span class="term">onmouseup</span>, <span class="term">onreset</span>, <span class="term">onselect</span>&#160;and <span class="term">onsubmit</span>.<br />
<br />
@ -1539,7 +1548,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<code class="code">&#160; &#160; &#160; // If second argument is not received, it means a closing tag is being handled</code>
<br />
<code class="code">&#160; &#160; &#160; if(ctype_digit($attribute_array)){</code>
<code class="code">&#160; &#160; &#160; if(is_numeric($attribute_array)){</code>
<br />
<code class="code">&#160; &#160; &#160; &#160; return "&lt;/$element&gt;";</code>
@ -1731,6 +1740,10 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<br />
&#160; <em>Version number - Release date. Notes</em><br />
<br />
&#160; 1.1.13 - 22 July 2012. Added feature allowing use of custom, non-standard attributes or custom rules for standard attributes<br />
<br />
&#160; 1.1.12 - 5 July 2012. Fix for a bug in identifying an unquoted value of the <span class="term">face</span>&#160;attribute<br />
<br />
&#160; 1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. <span class="term">$config["hook_tag"]</span>, if specified, now receives names of elements in closing tags.<br />
<br />
&#160; 1.1.10 - 22 October 2011. Fix for a bug in the <span class="term">tidy</span>&#160;functionality that caused the entire input to be replaced with a single space; new parameter, <span class="term">$config["direct_list_nest"]</span>&#160;to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)<br />
@ -1795,9 +1808,9 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
<br />
&#160; Upgrading is as simple as replacing the previous version of <span class="term">htmLawed.php</span>&#160;(assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.<br />
<br />
&#160; <strong>Important</strong>&#160; The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes:<br />
&#160; <strong>Important</strong>&#160; The following upgrades may affect the functionality of a specific htmLawed installation:<br />
<br />
&#160; (1) From version 1.1-1.1.10 to 1.1.11, if a <span class="term">hook_tag</span>&#160;function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a <span class="term">hook_tag</span>&#160;function receives only the element name. The <span class="term">hook_tag</span>&#160;function therefore may have to be edited. See <a href="#s3.4.9">section 3.4.9</a>.<br />
&#160; (1) From version 1.1-1.1.10 to 1.1.11 (or later), if a <span class="term">hook_tag</span>&#160;function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a <span class="term">hook_tag</span>&#160;function receives only the element name. The <span class="term">hook_tag</span>&#160;function therefore may have to be edited. See <a href="#s3.4.9">section 3.4.9</a>.<br />
<br />
&#160; Old versions of htmLawed may be available online. E.g., for version 1.0, check <a href="http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip">http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip</a>, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.<br />
@ -2119,7 +2132,7 @@ A PHP Labware internal utility &#45; <a href="http://www.bioinformatics.org/phpl
</div>
</div>
<br />
<hr /><br /><br /><span class="subtle"><small>HTM version of <em><a href="htmLawed_README.txt">htmLawed_README.txt</a></em> generated on 06 Jun, 2012 using <a href="http://www.bioinformatics.org/phplabware/internal_utilities">rTxt2htm</a> from PHP Labware</small></span>
<hr /><br /><br /><span class="subtle"><small>HTM version of <em><a href="htmLawed_README.txt">htmLawed_README.txt</a></em> generated on 22 Jul, 2012 using <a href="http://www.bioinformatics.org/phplabware/internal_utilities">rTxt2htm</a> from PHP Labware</small></span>
</div><!-- ended div body -->
</div><!-- ended div top -->
</body>

View File

@ -1,8 +1,8 @@
/*
htmLawed_README.txt, 5 June 2012
htmLawed 1.1.11, 5 June 2012
htmLawed_README.txt, 22 July 2012
htmLawed 1.1.13, 22 July 2012
Copyright Santosh Patnaik
Dual licensed with LGPL 3 and GPL 2 or later
Dual licensed with LGPL 3 and GPL 2+
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed
*/
@ -121,6 +121,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
* ensure *unique* 'id' attribute values ^~`
* *double-quote* attribute values ^
* lower-case *standard attribute values* like 'password' ^`
* permit custom, non-standard attributes as well as custom rules for standard attributes ~`
* *attribute-specific URL protocol/scheme restriction* *~`
* disable *dynamic expressions* in 'style' values *~`
@ -171,7 +172,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
-- 1.4 License & copyright ----------------------------------------o
htmLawed is free and open-source software dual licensed under LGPL license version 3:- http://www.gnu.org/licenses/lgpl-3.0.txt and GPL license version 2:- http://www.gnu.org/licenses/gpl-2.0.txt or later, and copyrighted by Santosh Patnaik, MD, PhD.
htmLawed is free and open-source software dual licensed under LGPL license version 3:- http://www.gnu.org/licenses/lgpl-3.0.txt, and GPL license version 2:- http://www.gnu.org/licenses/gpl-2.0.txt (or later), and copyrighted by Santosh Patnaik, MD, PhD.
-- 1.5 Terms used here --------------------------------------------o
@ -204,7 +205,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
To easily *test* htmLawed using a form-based interface, use the provided demo:- htmLawedTest.php ('htmLawed.php' and 'htmLawedTest.php' should be in the same directory on the web-server).
*Note*: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to the htmLawed:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed website; the filtering itself can be configured, etc., as described here.
*Note*: For code for usage of the htmLawed class (for htmLawed in OOP), please refer to this page:- http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/oop.htm on the htmLawed website; the filtering itself can be configured, etc., as described here.
-- 2.1 Simple ------------------------------------------------------
@ -487,6 +488,12 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
*Note*: To deny an attribute for all elements for which it is legal, '$config["deny_attribute"]' (see section:- #3.4) can be used instead of '$spec'. Also, attributes can be allowed element-specifically through '$spec' while being denied globally through '$config["deny_attribute"]'. The 'hook_tag' parameter (section:- #3.4.9) can also be used to implement the '$spec' functionality.
'$spec' can also be used to permit custom, non-standard attributes as well as custom rules for standard attributes. Thus, the following value of '$spec' will permit the custom uses of the standard 'rel' attribute in 'input' (not permitted as per standards) and of a non-standard attribute, 'vFlag', in 'img'.
$spec = 'img=vFlag; input=rel'
The attribute names can contain alphabets, colons (:) and hyphens (-) but must start with an alphabet.
-- 2.4 Performance time & memory usage ----------------------------o
@ -924,7 +931,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
An option like '1' is useful, e.g., when a writer previews his submission, whereas one like '3' is useful before content is finalized and made available to all.
*Note:* In the example above, unlike '<*>', '<xml>' gets considered as a tag (even though there is no HTML element named 'xml'). In general, text matching the regular expression pattern '<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>' is considered a tag (phrase enclosed by the angled brackets '<' and '>', and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...).
*Note:* In the example above, unlike '<*>', '<xml>' gets considered as a tag (even though there is no HTML element named 'xml'). Thus, the 'keep_bad' parameter's value affects '<xml>' but not '<*>'. In general, text matching the regular expression pattern '<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>' is considered a tag (phrase enclosed by the angled brackets '<' and '>', and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...), and is subjected to the 'keep_bad' value.
Nesting/content rules for each of the 86 elements in htmLawed's default set (see section:- #3.3) are defined in function 'hl_bal()'. This means that if a non-standard element besides 'embed' is being permitted through '$config["elements"]', the element's tag content will end up getting removed if '$config["balance"]' is set to '1'.
@ -966,7 +973,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
-- 3.4 Attributes ------------------------------------------------oo
htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the 'embed' element (the non-standard 'embed' element is supported in htmLawed because of its widespread use), and the the 'xml:space' attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in section:- #5.2.
htmLawed will only permit attributes described in the HTML specs (including deprecated ones). It also permits some attributes for use with the 'embed' element (the non-standard 'embed' element is supported in htmLawed because of its widespread use), and the the 'xml:space' attribute (valid only in XHTML 1.1). A list of such 111 attributes and the elements they are allowed in is in section:- #5.2. Using the '$spec' argument, htmLawed can be forced to permit custom, non-standard attributes as well as custom rules for standard attributes (section:- #2.3).
When '$config["deny_attribute"]' is not set, or set to '0', or empty ('""'), all the 111 attributes are permitted. Otherwise, '$config["deny_attribute"]' can be set as a list of comma-separated names of the denied attributes. 'on*' can be used to refer to the group of potentially dangerous, script-accepting attributes: 'onblur', 'onchange', 'onclick', 'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onreset', 'onselect' and 'onsubmit'.
@ -1194,7 +1201,7 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
function my_tag_function($element, $attribute_array=0){
// If second argument is not received, it means a closing tag is being handled
if(ctype_digit($attribute_array)){
if(is_numeric($attribute_array)){
return "</$element>";
}
@ -1321,6 +1328,10 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
`Version number - Release date. Notes`
1.1.13 - 22 July 2012. Added feature allowing use of custom, non-standard attributes or custom rules for standard attributes
1.1.12 - 5 July 2012. Fix for a bug in identifying an unquoted value of the 'face' attribute
1.1.11 - 5 June 2012. Fix for possible problem with handling of multi-byte characters in attribute values in an mbstring.func_overload enviroment. '$config["hook_tag"]', if specified, now receives names of elements in closing tags.
1.1.10 - 22 October 2011. Fix for a bug in the 'tidy' functionality that caused the entire input to be replaced with a single space; new parameter, '$config["direct_list_nest"]' to allow direct descendance of a list in a list. (5 April 2012. Dual licensing from LGPLv3 to LGPLv3 and GPLv2+.)
@ -1383,9 +1394,9 @@ A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/intern
Upgrading is as simple as replacing the previous version of 'htmLawed.php' (assuming it was not modified for customized features). As htmLawed output is almost always used in static documents, upgrading should not affect old, finalized content.
*Important* The following upgrades may affect the functionality of a specific htmLawed as indicated by their corresponding notes:
*Important* The following upgrades may affect the functionality of a specific htmLawed installation:
(1) From version 1.1-1.1.10 to 1.1.11, if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9.
(1) From version 1.1-1.1.10 to 1.1.11 (or later), if a 'hook_tag' function is in use: In version 1.1.11, elements in closing tags (and not just the opening tags) are also passed to the function. There are no attribute names/values to pass, so a 'hook_tag' function receives only the element name. The 'hook_tag' function therefore may have to be edited. See section:- #3.4.9.
Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip, for 1.1.1, htmLawed111.zip, and for 1.1.10, htmLawed1110.zip.

View File

@ -1,8 +1,8 @@
/*
htmLawed_TESTCASE.txt, 22 October 2011
htmLawed 1.1.11, 5 June 2012
htmLawed_TESTCASE.txt, 22 July 2012
htmLawed 1.1.13, 22 July 2012
Copyright Santosh Patnaik
Dual licensed with LGPL 3 and GPL 2 or later
Dual licensed with LGPL 3 and GPL 2+
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed
*/
@ -27,6 +27,7 @@ character encoding to Unicode/UTF-8
<strong>Duplicated:</strong> <a id="id5" id="id6">a</a><br />
<strong>Deprecated:</strong> <a id="id7" target="self" name="n">a</a>, <hr noshade="noshade" /><br />
<strong>Casing:</strong> <a HREF=""></a><br />
<strong>Custom:</strong> <img alt="image" my:data="portrait" /><br />
<strong>Admin-restricted?:</strong> <a href="x" onclick="alert();"></a>
<h6>Attribute values</h6>
@ -266,6 +267,12 @@ I am <span itemprop="name">X</span> but people call me <span itemprop="nickname"
Find me at <a href="http://www.xy.com" itemprop="url">www.xy.com</a>
</div>
<h6>Microsoft Word</h6>
<strong>Proprietary tag</strong>: <p class=3DMsoNormal><o:p>&nbsp;</o:p></p><br />
<strong>XML declaration</strong>: <?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /><br />
<strong>XML-invalid character code-point (may not replicate)</strong>: <p class=3DMsoNormal>“Where is he?” asked both Mary the one so lovely and Jane.</p>
<h6>Non-English text-1</h6>
Inscrieţi-vă acum la a Zecea Conferinţă Internaţională<br />
@ -394,3 +401,14 @@ script:eval(document.all.mycode.expr)')">hi</a><br />
3 < 4 <br />
3 > 4 <br />
> 3 <br />
<._.> hi! <br />
<<< ALERT >>> <br />
if(13<age AND 21>age){say 'teen'} <br />
age >51 and a smoking history of >51 pack-years <b>was</b> <br />
age > 51 and a smoking history of >51 pack-years <b>was</b> <br />
age <51 and a smoking history of <51 pack-years <b>was</b> <br />
age < 51 and a smoking history of < 51 pack-years <b>was</b> <br />
<b>age >51 and a smoking history of >51 pack-years</b> <br />
<b>age > 51 and a smoking history of >51 pack-years</b> <br />
<b>age <51 and a smoking history of <51 pack-years</b> <br />
<b>age < 51 and a smoking history of < 51 pack-years</b> <br />