mirror of
https://github.com/EGroupware/egroupware.git
synced 2025-02-04 12:30:04 +01:00
attempt to improve the parsing/cleaning of html messages.
This commit is contained in:
parent
21a2cab5a8
commit
0075e7f4cd
@ -321,6 +321,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Split it
|
# Split it
|
||||||
|
//_debug_array($attr);
|
||||||
$attrarr = $this->_hair($attr);
|
$attrarr = $this->_hair($attr);
|
||||||
|
|
||||||
# Go through $attrarr, and save the allowed attributes for this element
|
# Go through $attrarr, and save the allowed attributes for this element
|
||||||
@ -377,6 +378,7 @@
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
function _hair($attr)
|
function _hair($attr)
|
||||||
{
|
{
|
||||||
|
//echo __METHOD__.'called<br>';
|
||||||
$attrarr = array();
|
$attrarr = array();
|
||||||
$mode = 0;
|
$mode = 0;
|
||||||
$attrname = '';
|
$attrname = '';
|
||||||
@ -393,7 +395,9 @@
|
|||||||
case 0: # attribute name, href for instance
|
case 0: # attribute name, href for instance
|
||||||
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
|
if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
|
||||||
{
|
{
|
||||||
|
//echo 'mode 0:'.$match[0].'<br>';
|
||||||
$attrname = $match[1];
|
$attrname = $match[1];
|
||||||
|
//echo 'mode 0 -> attrname:'.$attrname.'<br>';
|
||||||
$working = $mode = 1;
|
$working = $mode = 1;
|
||||||
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
|
$attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
|
||||||
}
|
}
|
||||||
@ -404,6 +408,7 @@
|
|||||||
$working = 1;
|
$working = 1;
|
||||||
$mode = 2;
|
$mode = 2;
|
||||||
$attr = preg_replace('/^\s*=\s*/', '', $attr);
|
$attr = preg_replace('/^\s*=\s*/', '', $attr);
|
||||||
|
//echo 'mode 1:'.$attr.'<br>';
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (preg_match('/^\s+/', $attr)) # valueless
|
if (preg_match('/^\s+/', $attr)) # valueless
|
||||||
@ -420,9 +425,10 @@
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2: # attribute value, a URL after href= for instance
|
case 2: # attribute value, a URL after href= for instance
|
||||||
|
//echo 'mode 2 Attrname:'.$attrname.'<br>';
|
||||||
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
|
if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
|
||||||
{
|
{
|
||||||
$thisval = $this->_bad_protocol($match[1]);
|
$thisval = ($attrname == 'name' ? $match[1] : $this->_bad_protocol($match[1]));
|
||||||
$attrarr[] = array(
|
$attrarr[] = array(
|
||||||
'name' => $attrname,
|
'name' => $attrname,
|
||||||
'value' => $thisval,
|
'value' => $thisval,
|
||||||
@ -432,11 +438,12 @@
|
|||||||
$working = 1;
|
$working = 1;
|
||||||
$mode = 0;
|
$mode = 0;
|
||||||
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
|
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
|
||||||
|
//echo 'mode 2:'.$attr.'<br>';
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
|
if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
|
||||||
{
|
{
|
||||||
$thisval = $this->_bad_protocol($match[1]);
|
$thisval = ($attrname == 'name' ? $match[1] : $this->_bad_protocol($match[1]));
|
||||||
$attrarr[] = array(
|
$attrarr[] = array(
|
||||||
'name' => $attrname,
|
'name' => $attrname,
|
||||||
'value' => $thisval,
|
'value' => $thisval,
|
||||||
@ -446,11 +453,12 @@
|
|||||||
$working = 1;
|
$working = 1;
|
||||||
$mode = 0;
|
$mode = 0;
|
||||||
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
|
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
|
||||||
|
//echo 'mode 2:'.$attr.'<br>';
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
|
if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
|
||||||
{
|
{
|
||||||
$thisval = $this->_bad_protocol($match[1]);
|
$thisval = ($attrname == 'name' ? $match[1] : $this->_bad_protocol($match[1]));
|
||||||
$attrarr[] = array(
|
$attrarr[] = array(
|
||||||
'name' => $attrname,
|
'name' => $attrname,
|
||||||
'value' => $thisval,
|
'value' => $thisval,
|
||||||
@ -513,6 +521,7 @@
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
function _bad_protocol_once($string)
|
function _bad_protocol_once($string)
|
||||||
{
|
{
|
||||||
|
if ($string[0]=='#') return $string; // its an anchor, dont check for protocol any further
|
||||||
$string2 = preg_split('/:|:|:/i', $string, 2);
|
$string2 = preg_split('/:|:|:/i', $string, 2);
|
||||||
if(isset($string2[1]) && !preg_match('%/\?%',$string2[0]))
|
if(isset($string2[1]) && !preg_match('%/\?%',$string2[0]))
|
||||||
{
|
{
|
||||||
@ -535,21 +544,24 @@
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
function _bad_protocol_once2($string)
|
function _bad_protocol_once2($string)
|
||||||
{
|
{
|
||||||
$string2 = $this->_decode_entities($string2);
|
$string2 = $this->_decode_entities($string);
|
||||||
$string2 = preg_replace('/\s/', '', $string);
|
$string2 = preg_replace('/\s/', '', $string2);
|
||||||
$string2 = $this->_no_null($string2);
|
$string2 = $this->_no_null($string2);
|
||||||
|
$string2 = preg_replace('/\xad+/', '', $string2); # deals with Opera "feature"
|
||||||
$string2 = strtolower($string2);
|
$string2 = strtolower($string2);
|
||||||
|
|
||||||
$allowed = false;
|
$allowed = false;
|
||||||
foreach ($this->allowed_protocols as $one_protocol)
|
if(is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0)
|
||||||
{
|
{
|
||||||
if (strtolower($one_protocol) == $string2)
|
foreach ($this->allowed_protocols as $one_protocol)
|
||||||
{
|
{
|
||||||
$allowed = true;
|
if (strtolower($one_protocol) == $string2)
|
||||||
break;
|
{
|
||||||
|
$allowed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($allowed)
|
if ($allowed)
|
||||||
{
|
{
|
||||||
return "$string2:";
|
return "$string2:";
|
||||||
|
@ -1072,13 +1072,13 @@ class translation
|
|||||||
if ($_body) {
|
if ($_body) {
|
||||||
if ($addbracesforendtag === true )
|
if ($addbracesforendtag === true )
|
||||||
{
|
{
|
||||||
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)</'.$endtag.'>~sim','',$_body);
|
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)</'.$endtag.'[\s]*>~simU','',$_body);
|
||||||
// remove left over tags, unfinished ones, and so on
|
// remove left over tags, unfinished ones, and so on
|
||||||
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
||||||
}
|
}
|
||||||
if ($addbracesforendtag === false )
|
if ($addbracesforendtag === false )
|
||||||
{
|
{
|
||||||
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)'.$endtag.'~sim','',$_body);
|
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)'.$endtag.'~simU','',$_body);
|
||||||
// remove left over tags, unfinished ones, and so on
|
// remove left over tags, unfinished ones, and so on
|
||||||
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
||||||
$_body = preg_replace('~'.$endtag.'~','',$_body);
|
$_body = preg_replace('~'.$endtag.'~','',$_body);
|
||||||
@ -1103,7 +1103,7 @@ class translation
|
|||||||
#print "</pre>";
|
#print "</pre>";
|
||||||
#print "<hr>";
|
#print "<hr>";
|
||||||
self::replaceTagsCompletley($_html,'style');
|
self::replaceTagsCompletley($_html,'style');
|
||||||
$Rules = array ('@<script[^>]*?>.*?</script>@si', // Strip out javascript
|
$Rules = array ('@<script[^>]*?>.*?</script>@siU', // Strip out javascript
|
||||||
'@&(quot|#34);@i', // Replace HTML entities
|
'@&(quot|#34);@i', // Replace HTML entities
|
||||||
'@&(amp|#38);@i', // Ampersand &
|
'@&(amp|#38);@i', // Ampersand &
|
||||||
'@&(lt|#60);@i', // Less Than <
|
'@&(lt|#60);@i', // Less Than <
|
||||||
|
Loading…
Reference in New Issue
Block a user