mirror of
https://github.com/EGroupware/egroupware.git
synced 2025-01-11 16:38:39 +01:00
attempt of switching from htmlpurifier to htmLawed and replacing kses by htmLawed calls; this is done for performance and resource considerations; still the common call in egw is by html::purify, but htmLawed is doing the work now. let me know if there are issues regarding; if we cannot find issues within EGroupware, we will clean out the related sourcecode of kses and htmlpurifier with the next step
This commit is contained in:
parent
581b7cbd9c
commit
33633cd7a5
@ -38,6 +38,20 @@ class felamimail_bo
|
||||
*/
|
||||
static $tidy_config = array('clean'=>true,'output-html'=>true,'join-classes'=>true,'join-styles'=>true,'show-body-only'=>"auto",'word-2000'=>true,'wrap'=>0);
|
||||
|
||||
/**
|
||||
* static used to configure htmLawed, for use with emails
|
||||
*
|
||||
* @array
|
||||
*/
|
||||
static $htmLawed_config = array('comment'=>1,
|
||||
//'keep_bad'=>2,
|
||||
'balance'=>0,//turn off tag-balancing (config['balance']=>0). That will not introduce any security risk; only standards-compliant tag nesting check/filtering will be turned off (basic tag-balance will remain; i.e., there won't be any unclosed tag, etc., after filtering)
|
||||
'tidy'=>1,
|
||||
'elements' => "* -script",
|
||||
'schemes'=>'href: file, ftp, http, https, mailto; src: cid, data, file, ftp, http, https; *:file, http, https',
|
||||
'hook_tag' =>"hl_email_tag_transform",
|
||||
);
|
||||
|
||||
/**
|
||||
* errorMessage
|
||||
*
|
||||
@ -1207,240 +1221,31 @@ class felamimail_bo
|
||||
//error_log($_html);
|
||||
//repair doubleencoded ampersands
|
||||
$_html = str_replace('&','&',$_html);
|
||||
self::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags
|
||||
self::replaceTagsCompletley($_html,'head'); // Strip out stuff in head
|
||||
self::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs
|
||||
self::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs
|
||||
if (stripos($_html,'style')!==false) self::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags
|
||||
if (stripos($_html,'head')!==false) self::replaceTagsCompletley($_html,'head'); // Strip out stuff in head
|
||||
if (stripos($_html,'![if')!==false && stripos($_html,'<![endif]>')!==false) self::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs
|
||||
if (stripos($_html,'!--[if')!==false && stripos($_html,'<![endif]-->')!==false) self::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs
|
||||
//error_log($_html);
|
||||
// force the use of kses, as it is still have the edge over purifier with some stuff
|
||||
$usepurify = false;
|
||||
$usepurify = true;
|
||||
if ($usepurify)
|
||||
{
|
||||
// we need a customized config, as we may allow external images, $GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs']
|
||||
if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html);
|
||||
$_html = html::purify($_html);
|
||||
// Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards
|
||||
if (stripos($_html,'!doctype')!==false) self::replaceTagsCompletley($_html,'!doctype');
|
||||
if (stripos($_html,'?xml:namespace')!==false) self::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);
|
||||
if (strpos($_html,'!CURSOR')!==false) self::replaceTagsCompletley($_html,'!CURSOR');
|
||||
// purify got switched to htmLawed
|
||||
$_html = html::purify($_html,self::$htmLawed_config,array(),true);
|
||||
// clean out comments , should not be needed as purify should do the job.
|
||||
$search = array(
|
||||
'@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions
|
||||
'@<!--[\s\S]*?[ \t\n\r]*-->@', // Strip multi-line comments including CDATA
|
||||
);
|
||||
//$_html = preg_replace($search,"",$_html);
|
||||
// remove non printable chars
|
||||
$_html = preg_replace('/([\000-\012])/','',$_html);
|
||||
//error_log($_html);
|
||||
}
|
||||
else
|
||||
{
|
||||
//echo $_html;exit;
|
||||
$kses = new kses();
|
||||
$kses->AddProtocol('cid');
|
||||
// since check protocoll is called for every value associated to an attribute we have to add color and background-color to the valid protocolls
|
||||
$kses->AddProtocol('color');
|
||||
$kses->AddProtocol('font-size');
|
||||
$kses->AddProtocol('background-color');
|
||||
#$kses->AddHTML('html', array(
|
||||
# 'xmlns' => array(),
|
||||
# 'lang' => array(),
|
||||
# )
|
||||
#);
|
||||
#$kses->AddHTML('head');
|
||||
#$kses->AddHTML('body', array(
|
||||
# 'class' => array(),
|
||||
# 'id' => array(),
|
||||
# )
|
||||
#);
|
||||
#$kses->AddHTML('meta', array(
|
||||
# 'http-equiv' => array(),
|
||||
# 'content' => array(),
|
||||
# )
|
||||
#);
|
||||
#$kses->AddHTML('link',array(
|
||||
# 'rel' => array(), // ="stylesheet"
|
||||
# 'type' => array(), //="text/css"
|
||||
# 'href' => array(),
|
||||
# 'media' => array(),
|
||||
# )
|
||||
#);
|
||||
$kses->AddHTML(
|
||||
'p', array(
|
||||
"class" => array('maxlen' => 20),
|
||||
'align' => array('minlen' => 1, 'maxlen' => 10)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML("tbody");
|
||||
$kses->AddHTML("thead");
|
||||
$kses->AddHTML("tt");
|
||||
$kses->AddHTML("br");
|
||||
$kses->AddHTML("b");
|
||||
$kses->AddHTML("u");
|
||||
$kses->AddHTML("s");
|
||||
$kses->AddHTML("i");
|
||||
$kses->AddHTML('em');
|
||||
$kses->AddHTML("strong");
|
||||
$kses->AddHTML("strike");
|
||||
$kses->AddHTML("center");
|
||||
$kses->AddHTML(
|
||||
"font",array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"color" => array('maxlen' => 20),
|
||||
"size"=>array('maxlen'=>2)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"hr",array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"style" => array('minlen' => 1),
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"div",array(
|
||||
"class" => array('maxlen' => 20),
|
||||
'align' => array('maxlen' => 10)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML("ul");
|
||||
$kses->AddHTML(
|
||||
"ol",array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"type" => array('maxlen' => 20)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML("li");
|
||||
$kses->AddHTML("h1");
|
||||
$kses->AddHTML("h2");
|
||||
$kses->AddHTML("h3");
|
||||
$kses->AddHTML(
|
||||
"style",array(
|
||||
"type" => array('maxlen' => 20),
|
||||
"color" => array('maxlen' => 20),
|
||||
"background-color" => array('maxlen' => 20),
|
||||
"background" => array('maxlen' => 5),
|
||||
)
|
||||
);
|
||||
|
||||
$kses->AddHTML("select");
|
||||
$kses->AddHTML(
|
||||
"option",array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"value" => array('maxlen' => 45),
|
||||
"selected" => array()
|
||||
)
|
||||
);
|
||||
|
||||
$kses->AddHTML(
|
||||
"a", array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"href" => array('maxlen' => 348, 'minlen' => 10),
|
||||
"name" => array('minlen' => 2),
|
||||
'target' => array('maxlen' => 10)
|
||||
)
|
||||
);
|
||||
|
||||
$kses->AddHTML(
|
||||
"pre", array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"wrap" => array('maxlen' => 10)
|
||||
)
|
||||
);
|
||||
|
||||
// Allows 'td' tag with colspan|rowspan|class|style|width|nowrap attributes,
|
||||
// colspan has minval of 2 and maxval of 5
|
||||
// rowspan has minval of 3 and maxval of 6
|
||||
// class has minlen of 1 char and maxlen of 10 chars
|
||||
// style has minlen of 5 chars and maxlen of 100 chars
|
||||
// width has maxval of 100
|
||||
// nowrap is valueless
|
||||
$kses->AddHTML(
|
||||
"table",array(
|
||||
"class" => array("minlen" => 1, 'maxlen' => 20),
|
||||
"border" => array("minlen" => 1, 'maxlen' => 10),
|
||||
"cellpadding" => array("minlen" => 0, 'maxlen' => 10),
|
||||
"cellspacing" => array("minlen" => 0, 'maxlen' => 10),
|
||||
"width" => array("maxlen" => 5),
|
||||
"style" => array('minlen' => 5, 'maxlen' => 100),
|
||||
"bgcolor" => array('maxlen' => 10),
|
||||
"align" => array('maxlen' => 10),
|
||||
"valign" => array('maxlen' => 10),
|
||||
"bordercolor" => array('maxlen' => 10)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"tr",array(
|
||||
"colspan" => array('minval' => 2, 'maxval' => 5),
|
||||
"rowspan" => array('minval' => 3, 'maxval' => 6),
|
||||
"class" => array("minlen" => 1, 'maxlen' => 20),
|
||||
"width" => array("maxlen" => 5),
|
||||
"style" => array('minlen' => 5, 'maxlen' => 100),
|
||||
"align" => array('maxlen' => 10),
|
||||
'bgcolor' => array('maxlen' => 10),
|
||||
"valign" => array('maxlen' => 10),
|
||||
"nowrap" => array('valueless' => 'y')
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"td",array(
|
||||
"colspan" => array('minval' => 2, 'maxval' => 5),
|
||||
"rowspan" => array('minval' => 3, 'maxval' => 6),
|
||||
"class" => array("minlen" => 1, 'maxlen' => 20),
|
||||
"width" => array("maxlen" => 5),
|
||||
"style" => array('minlen' => 5, 'maxlen' => 100),
|
||||
"align" => array('maxlen' => 10),
|
||||
'bgcolor' => array('maxlen' => 10),
|
||||
"valign" => array('maxlen' => 10),
|
||||
"nowrap" => array('valueless' => 'y')
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"th",array(
|
||||
"colspan" => array('minval' => 2, 'maxval' => 5),
|
||||
"rowspan" => array('minval' => 3, 'maxval' => 6),
|
||||
"class" => array("minlen" => 1, 'maxlen' => 20),
|
||||
"width" => array("maxlen" => 5),
|
||||
"style" => array('minlen' => 5, 'maxlen' => 100),
|
||||
"align" => array('maxlen' => 10),
|
||||
'bgcolor' => array('maxlen' => 10),
|
||||
"valign" => array('maxlen' => 10),
|
||||
"nowrap" => array('valueless' => 'y')
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"span",array(
|
||||
"class" => array("minlen" => 1, 'maxlen' => 20),
|
||||
"style" => array('minlen' => 5, 'maxlen' => 100)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
"blockquote",array(
|
||||
"class" => array("minlen" => 1, 'maxlen' => 20),
|
||||
"style" => array("minlen" => 1),
|
||||
"cite" => array('maxlen' => 30),
|
||||
"type" => array('maxlen' => 10),
|
||||
"dir" => array("minlen" => 1, 'maxlen' => 10)
|
||||
)
|
||||
);
|
||||
$kses->AddHTML(
|
||||
'img',array(
|
||||
"class" => array('maxlen' => 20),
|
||||
"src" => array("minlen" => 4, 'maxlen' => 384, $GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs'] ? '' : 'match' => '/^cid:.*/'),
|
||||
"align" => array("minlen" => 1),
|
||||
"border" => array('maxlen' => 30),
|
||||
"width" => array("minlen" => 1, 'maxlen' => 3),
|
||||
"height" => array("minlen" => 1, 'maxlen' => 3),
|
||||
)
|
||||
);
|
||||
|
||||
// no scripts allowed
|
||||
// clean out comments
|
||||
$search = array(
|
||||
'@<!--[\s\S]*?[ \t\n\r]*-->@', // Strip multi-line comments including CDATA
|
||||
'@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions
|
||||
);
|
||||
//error_log(__METHOD__.$_html);
|
||||
$_html = preg_replace($search,"",$_html);
|
||||
// do the kses clean out first, to avoid general problems with content later on
|
||||
$_html = $kses->Parse($_html);
|
||||
// remove non printable chars
|
||||
$_html = preg_replace('/([\000-\012])/','',$_html);
|
||||
$_html = preg_replace('/([\000-\012])/','',$_html);
|
||||
//error_log($_html);
|
||||
}
|
||||
// using purify above should have tidied the tags already sufficiently
|
||||
@ -1464,14 +1269,8 @@ class felamimail_bo
|
||||
{
|
||||
//$to = ini_get('max_execution_time');
|
||||
//@set_time_limit(10);
|
||||
//$p = microtime(true);
|
||||
$htmLawed = new egw_htmLawed();
|
||||
//$pela = microtime(true);
|
||||
$_html = $htmLawed->egw_htmLawed($_html);
|
||||
//$le = microtime(true);
|
||||
//$a=$pela-$p;
|
||||
//$b=$le-$pela;
|
||||
//error_log(__METHOD__.__LINE__.' new egw_htmLawed:'.$a.' htmlLawed took:'.$b);
|
||||
//error_log(__METHOD__.__LINE__.$_html);
|
||||
//@set_time_limit($to);
|
||||
}
|
||||
|
@ -1054,6 +1054,12 @@ blockquote[type=cite] {
|
||||
font-size: 11px;
|
||||
}
|
||||
</style>'.$additionalStyle.'
|
||||
<script type="text/javascript">
|
||||
function GoToAnchor(aname)
|
||||
{
|
||||
window.location.hash=aname;
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
';
|
||||
@ -1560,8 +1566,7 @@ blockquote[type=cite] {
|
||||
{
|
||||
$link = $GLOBALS['egw']->link('/index.php',array('menuaction' => 'felamimail.uicompose.compose'));
|
||||
$newBody = preg_replace("/href=(\"|\')mailto:([\w,\-,\/,\?,\=,\.,&,!\n,\%,@,\*,#,:,~,\+]+)(\"|\')/ie",
|
||||
"'href=\"#\"'.' onclick=\"egw_openWindowCentered(\'$link&send_to='.base64_encode('$2').'\', \'compose\', 700, egw_getWindowOuterHeight());\"'", $newBody);
|
||||
// "'href=\"$link&send_to='.base64_encode('$2').'\"'", $newBody);
|
||||
"'href=\"$link&send_to='.base64_encode('$2').'\"'.' target=\"compose\" onclick=\"window.open(this,this.target,\'dependent=yes,width=700,height=egw_getWindowOuterHeight(),location=no,menubar=no,toolbar=no,scrollbars=yes,status=yes\'); return false;\"'", $newBody);
|
||||
//print "<pre>".htmlentities($newBody)."</pre><hr>";
|
||||
}
|
||||
// replace emails within the text with clickable links.
|
||||
|
@ -80,10 +80,8 @@ class egw_htmLawed
|
||||
*/
|
||||
|
||||
$this->Configuration = array('comment'=>0,
|
||||
'balance'=>0,
|
||||
//'keep_bad'=>3,
|
||||
'balance'=>0,//turn off tag-balancing (config['balance']=>0). That will not introduce any security risk; only standards-compliant tag nesting check/filtering will be turned off (basic tag-balance will remain; i.e., there won't be any unclosed tag, etc., after filtering)
|
||||
'tidy'=>1,
|
||||
//'direct_list_nest'=>1,
|
||||
'elements' => "* -script",
|
||||
'schemes'=>'href: file, ftp, http, https, mailto; src: cid, data, file, ftp, http, https; *:file, http, https',
|
||||
'hook_tag' =>"hl_my_tag_transform",
|
||||
@ -101,6 +99,7 @@ class egw_htmLawed
|
||||
*/
|
||||
function egw_htmLawed($html2check, $Config=null, $Spec=array())
|
||||
{
|
||||
//error_log(__METHOD__.__LINE__.' Input:'.$html2check);
|
||||
if (is_array($Config) && is_array($this->Configuration)) $Config = array_merge($this->Configuration, $Config);
|
||||
if (empty($Config)) $Config = $this->Configuration;
|
||||
if (empty($Spec)) $Spec = $this->Spec;
|
||||
@ -113,16 +112,25 @@ class egw_htmLawed
|
||||
* hl_my_tag_transform
|
||||
*
|
||||
* function to provide individual checks for element attribute pairs
|
||||
* implemented so far: img checking for alt attribute == image; set this to empty
|
||||
* implemented so far: img checking for alt attribute == image; set this to empty
|
||||
* a checking for title, replacing @
|
||||
*/
|
||||
function hl_my_tag_transform($element, $attribute_array)
|
||||
{
|
||||
//error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array));
|
||||
//if ($element=='img') error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array));
|
||||
// Elements other than 'img' or 'img' without a 'img' attribute are returned unchanged
|
||||
if($element == 'img' && isset($attribute_array['alt']))
|
||||
if($element == 'img')
|
||||
{
|
||||
// Re-build 'alt'
|
||||
$attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
|
||||
if (isset($attribute_array['alt'])) $attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
|
||||
if (isset($attribute_array['alt'])&&strpos($attribute_array['alt'],'@')!==false) $attribute_array['alt']=str_replace('@','(at)',$attribute_array['alt']);
|
||||
}
|
||||
if($element == 'a')
|
||||
{
|
||||
if (isset($attribute_array['title']))
|
||||
{
|
||||
if (strpos($attribute_array['title'],'@')!==false) $attribute_array['title']=str_replace('@','(at)',$attribute_array['title']);
|
||||
}
|
||||
}
|
||||
/*
|
||||
// Elements other than 'span' or 'span' without a 'style' attribute are returned unchanged
|
||||
@ -165,3 +173,67 @@ function hl_my_tag_transform($element, $attribute_array)
|
||||
return "<{$element}{$attributes}". (isset($empty_elements[$element]) ? ' /' : ''). '>';
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_email_tag_transform
|
||||
*
|
||||
* function to provide individual checks for element attribute pairs
|
||||
* implemented so far: img -checking for alt attribute == image; set this to empty
|
||||
* -control for/on external Images and src-length
|
||||
* a -checking for title and href, replacing @ accordingly
|
||||
* -navigate to local anchors without reloading the page
|
||||
*/
|
||||
function hl_email_tag_transform($element, $attribute_array)
|
||||
{
|
||||
//if ($element=='a') error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array));
|
||||
// Elements other than 'img' or 'img' without a 'img' attribute are returned unchanged
|
||||
if($element == 'img')
|
||||
{
|
||||
// Re-build 'alt'
|
||||
if (isset($attribute_array['alt'])) $attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
|
||||
if (isset($attribute_array['alt'])&&strpos($attribute_array['alt'],'@')!==false) $attribute_array['alt']=str_replace('@','(at)',$attribute_array['alt']);
|
||||
// $GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs'] ? '' : 'match' => '/^cid:.*/'),
|
||||
if (isset($attribute_array['src']))
|
||||
{
|
||||
if (!(strlen($attribute_array['src'])>4 && strlen($attribute_array['src']<400)))
|
||||
{
|
||||
$attribute_array['alt']= $attribute_array['alt'].' [blocked (reason: url length):'.$attribute_array['src'].']';
|
||||
if (!isset($attribute_array['title'])) $attribute_array['title']=$attribute_array['alt'];
|
||||
$attribute_array['src']=common::image('phpgwapi','dialog_error');
|
||||
}
|
||||
if (!$GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs'])
|
||||
{
|
||||
if (!preg_match('/^cid:.*/',$attribute_array['src']))
|
||||
{
|
||||
$attribute_array['alt']= $attribute_array['alt'].' [blocked external image:'.$attribute_array['src'].']';
|
||||
if (!isset($attribute_array['title'])) $attribute_array['title']=$attribute_array['alt'];
|
||||
$attribute_array['src']=common::image('phpgwapi','dialog_error');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if($element == 'a')
|
||||
{
|
||||
if (isset($attribute_array['title']))
|
||||
{
|
||||
if (strpos($attribute_array['title'],'@')!==false) $attribute_array['title']=str_replace('@','(at)',$attribute_array['title']);
|
||||
}
|
||||
if (isset($attribute_array['name']) && isset($attribute_array['id'])) $attribute_array['id'] = $attribute_array['name'];
|
||||
if (strpos($attribute_array['href'],'@')!==false) $attribute_array['href'] = str_replace('@','%40',$attribute_array['href']);
|
||||
if (strpos($attribute_array['href'],'#')===0)
|
||||
{
|
||||
$attribute_array['href'] = "javascript:GoToAnchor('".trim(substr($attribute_array['href'],1))."');";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Build the attributes string
|
||||
$attributes = '';
|
||||
foreach($attribute_array as $k=>$v){
|
||||
$attributes .= " {$k}=\"{$v}\"";
|
||||
}
|
||||
|
||||
// Return the opening tag with attributes
|
||||
static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
|
||||
return "<{$element}{$attributes}". (isset($empty_elements[$element]) ? ' /' : ''). '>';
|
||||
}
|
||||
|
||||
|
@ -183,101 +183,6 @@ class html
|
||||
return preg_replace( $Expr, "<a href=\"http://$0\" target=\"_blank\">$0</a>", $result );
|
||||
}
|
||||
|
||||
/**
|
||||
* activates URLs in a text, URLs get replaced by html-links using htmlpurify
|
||||
*
|
||||
* @param string $content text containing URLs
|
||||
* @return string html with activated links
|
||||
*/
|
||||
static function activateLinks($content)
|
||||
{
|
||||
if (!$content || strlen($content) < 20) return $content; // performance
|
||||
|
||||
// spamsaver emailaddress
|
||||
$result = preg_replace('/'.$NotAnchor.'mailto:([a-z0-9._-]+)@([a-z0-9_-]+)\.([a-z0-9._-]+)/i',
|
||||
'<a href="#" onclick="document.location=\'mai\'+\'lto:\\1\'+unescape(\'%40\')+\'\\2.\\3\'; return false;">\\1 AT \\2 DOT \\3</a>',
|
||||
$content);
|
||||
|
||||
$config = self::purifyCreateDefaultConfig();
|
||||
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8'));
|
||||
// maybe the two following lines are useful for caching???
|
||||
$config->set('HTML.DefinitionID', 'activatelinks');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
// doctype and tidylevel
|
||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'light');
|
||||
// EnableID is needed for anchor tags
|
||||
$config->set('Attr.EnableID',true);
|
||||
// enable target attributes
|
||||
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
|
||||
// actual allowed tags and attributes
|
||||
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true, 'data'=>true));
|
||||
$config->set('AutoFormat.RemoveEmpty', true);
|
||||
$config->set('HTML.Allowed', 'br,p[align],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[align|style],hr[class|style],'.
|
||||
'font[size|color],'.
|
||||
'ul[type],ol[type|start],li,'.
|
||||
'h1,h2,h3,h4,h5,h6,'.
|
||||
'span[class|style],'.
|
||||
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
|
||||
'tbody,thead,tfoot,colgroup,'.
|
||||
'col[width|span],'.
|
||||
'blockquote[class|cite|dir],'.
|
||||
'tr[class|style|align|bgcolor|align|valign],'.
|
||||
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'a[href|target|name|title],'.
|
||||
'img[src|alt|title|align|style|width|height]');
|
||||
$config->set('Attr.DefaultInvalidImage', 'Image removed by htmlpurify');
|
||||
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
|
||||
$config->set('AutoFormat.Linkify',true);
|
||||
return self::purify($result,$config);
|
||||
}
|
||||
|
||||
/**
|
||||
* deactivates URLs in a text, URLs get replaced by html-links using htmlpurify
|
||||
*
|
||||
* @param string $content text containing URLs
|
||||
* @return string html with activated links
|
||||
*/
|
||||
static function deactivateLinks($_html)
|
||||
{
|
||||
$config = self::purifyCreateDefaultConfig();
|
||||
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8'));
|
||||
// maybe the two following lines are useful for caching???
|
||||
$config->set('HTML.DefinitionID', 'deactivatelinks');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
// doctype and tidylevel
|
||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'light');
|
||||
// EnableID is needed for anchor tags
|
||||
$config->set('Attr.EnableID',true);
|
||||
// enable target attributes
|
||||
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
|
||||
// actual allowed tags and attributes
|
||||
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true, 'data'=>true));
|
||||
$config->set('AutoFormat.RemoveEmpty', true);
|
||||
$config->set('HTML.Allowed', 'br,p[align],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[align|style],hr[class|style],'.
|
||||
'font[size|color],'.
|
||||
'ul[type],ol[type|start],li,'.
|
||||
'h1,h2,h3,h4,h5,h6,'.
|
||||
'span[class|style],'.
|
||||
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
|
||||
'tbody,thead,tfoot,colgroup,'.
|
||||
'col[width|span],'.
|
||||
'blockquote[class|cite|dir],'.
|
||||
'tr[class|style|align|bgcolor|align|valign],'.
|
||||
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'a[href|target|name|title],'.
|
||||
'img[src|alt|title|align|style|width|height]');
|
||||
$config->set('Attr.DefaultInvalidImage', 'Image removed by htmlpurify');
|
||||
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
|
||||
|
||||
$config->set('AutoFormat.DisplayLinkURI',true);
|
||||
$_html = self::purify($_html,$config);
|
||||
return $_html;
|
||||
}
|
||||
|
||||
/**
|
||||
* escapes chars with special meaning in html as entities
|
||||
*
|
||||
@ -1393,129 +1298,21 @@ class html
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* creates the HTMLPurifier default config
|
||||
*
|
||||
* @return HTMLPurifier_Config object
|
||||
*/
|
||||
static function purifyCreateDefaultConfig()
|
||||
{
|
||||
// add htmlpurifiers library to include_path
|
||||
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.path.php');
|
||||
// include most of the required files, for best performance with bytecode caches
|
||||
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.includes.php');
|
||||
// installs an autoloader for other files
|
||||
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.autoload.php');
|
||||
// testcase to test the processing of purify
|
||||
//$html = "<h1 onclick=\"alert('hallo');\"> h1 </h1>".$html;
|
||||
return HTMLPurifier_Config::createDefault();
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a HTMLPurifier default config for the needs of HTMLTidy
|
||||
*
|
||||
* @return HTMLPurifier_Config object
|
||||
*/
|
||||
static function purifyCreateHTMLTidyConfig()
|
||||
{
|
||||
$config = html::purifyCreateDefaultConfig();
|
||||
// maybe the two following lines are useful for caching???
|
||||
$config->set('HTML.DefinitionID', 'egroupwareHTMLTidyConfig');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8')); // doctype and tidylevel
|
||||
$config->set('Core.RemoveInvalidImg', false);
|
||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'light');
|
||||
$config->set('Attr.EnableID',true);
|
||||
// enable target attributes
|
||||
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
|
||||
// actual allowed tags and attributes
|
||||
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true));
|
||||
$config->set('AutoFormat.RemoveEmpty', true);
|
||||
$config->set('HTML.Allowed', 'br,p[align],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[align|style],hr[class|style],'.
|
||||
'font[size|color],'.
|
||||
'ul[type],ol[type|start],li,'.
|
||||
'h1,h2,h3,h4,h5,h6,'.
|
||||
'span[class|style],'.
|
||||
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
|
||||
'tbody,thead,tfoot,colgroup,'.
|
||||
'col[width|span],'.
|
||||
'blockquote[class|cite|dir],'.
|
||||
'tr[class|style|align|bgcolor|align|valign],'.
|
||||
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'a[href|target|name|title],'.
|
||||
'img[src|alt|title|align|style|width|height]');
|
||||
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true, 'data'=>true));
|
||||
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
|
||||
return $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs HTMLPurifier over supplied html to remove malicious code
|
||||
*
|
||||
* @param string $html
|
||||
* @param HTMLPurifier_Config $config=null
|
||||
*/
|
||||
static function purify($html,$config=null)
|
||||
static function purify($html,$config=null,$spec=array(),$_force=false)
|
||||
{
|
||||
static $purifier;
|
||||
|
||||
$defaultConfig = array('valid_xhtml'=>1,'safe'=>1);
|
||||
if (empty($html)) return $html; // no need to process further
|
||||
|
||||
if (is_null($purifier) || !is_null($config))
|
||||
{
|
||||
// add htmlpurifiers library to include_path
|
||||
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.path.php');
|
||||
// include most of the required files, for best performance with bytecode caches
|
||||
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.includes.php');
|
||||
// installs an autoloader for other files
|
||||
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.autoload.php');
|
||||
// testcase to test the processing of purify
|
||||
//$html = "<h1 onclick=\"alert('hallo');\"> h1 </h1>".$html;
|
||||
if (is_null($config))
|
||||
{
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8'));
|
||||
// maybe the two following lines are useful for caching???
|
||||
$config->set('HTML.DefinitionID', 'egroupware');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
// doctype and tidylevel
|
||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'light');
|
||||
// EnableID is needed for anchor tags
|
||||
$config->set('Attr.EnableID',true);
|
||||
// enable target attributes
|
||||
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
|
||||
// actual allowed tags and attributes
|
||||
$config->set('HTML.Allowed', 'br,p[class|align|style],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[class|align|style],hr[class|style],'.
|
||||
'ul[class|type],ol[class|type|start],li,'.
|
||||
'h1,h2,h3,h4,h5,h6,'.
|
||||
'span[class|style],'.
|
||||
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
|
||||
'tbody,thead,tfoot,colgroup,'.
|
||||
'col[class|width|span],'.
|
||||
'blockquote[class|cite|dir],'.
|
||||
'tr[class|style|align|bgcolor|align|valign],'.
|
||||
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
|
||||
'a[class|href|target|name|title],'.
|
||||
'img[class|src|alt|title|align|style|width|height]');
|
||||
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
|
||||
}
|
||||
$purifier = new HTMLPurifier($config);
|
||||
// the latter may enable you to modify the config later on, but by now
|
||||
// the effort for e.g. enabling anchor tags is already included above
|
||||
//$def =& $purifier->config->getHTMLDefinition(true);
|
||||
//$def->addAttribute('a', 'name', 'Text');
|
||||
|
||||
|
||||
}
|
||||
|
||||
$result = $purifier->purify( $html );
|
||||
|
||||
//error_log(__METHOD__.$purifier->version);
|
||||
return $result;
|
||||
$htmLawed = new egw_htmLawed();
|
||||
if (is_array($config) && $_force===false) $config = array_merge($defaultConfig, $config);
|
||||
if (empty($config)) $config = $defaultConfig;
|
||||
//error_log(__METHOD__.__LINE__.array2string($config));
|
||||
return $htmLawed->egw_htmLawed($html,$config,$spec);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1035,7 +1035,7 @@ class translation
|
||||
// some characterreplacements, as they fail to translate
|
||||
$sar = array(
|
||||
'@(\x84|\x93|\x94)@',
|
||||
'@(\x96|\x97)@',
|
||||
'@(\x96|\x97|\x1a)@',
|
||||
'@(\x91|\x92)@',
|
||||
'@(\x85)@',
|
||||
'@(\x86)@',
|
||||
@ -1137,29 +1137,38 @@ class translation
|
||||
static function replaceTagsCompletley(&$_body,$tag,$endtag='',$addbracesforendtag=true)
|
||||
{
|
||||
if ($tag) $tag = strtolower($tag);
|
||||
$singleton = false;
|
||||
if ($endtag=='/>') $singleton =true;
|
||||
if ($endtag == '' || empty($endtag) || !isset($endtag))
|
||||
{
|
||||
$endtag = $tag;
|
||||
} else {
|
||||
$endtag = strtolower($endtag);
|
||||
//error_log(__METHOD__.' Using EndTag:'.$endtag);
|
||||
//error_log(__METHOD__.' Using EndTag:'.$endtag);
|
||||
}
|
||||
// strip tags out of the message completely with their content
|
||||
$taglen=strlen($tag);
|
||||
$endtaglen=strlen($endtag);
|
||||
if ($_body) {
|
||||
if ($addbracesforendtag === true )
|
||||
if ($singleton)
|
||||
{
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)</'.$endtag.'[\s]*>~simU','',$_body);
|
||||
// remove left over tags, unfinished ones, and so on
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
||||
$_body = preg_replace('~<'.$tag.'[^>].*? '.$endtag.'~simU','',$_body);
|
||||
}
|
||||
if ($addbracesforendtag === false )
|
||||
else
|
||||
{
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)'.$endtag.'~simU','',$_body);
|
||||
// remove left over tags, unfinished ones, and so on
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
||||
$_body = preg_replace('~'.$endtag.'~','',$_body);
|
||||
if ($addbracesforendtag === true )
|
||||
{
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)</'.$endtag.'[\s]*>~simU','',$_body);
|
||||
// remove left over tags, unfinished ones, and so on
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
||||
}
|
||||
if ($addbracesforendtag === false )
|
||||
{
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)'.$endtag.'~simU','',$_body);
|
||||
// remove left over tags, unfinished ones, and so on
|
||||
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
|
||||
$_body = preg_replace('~'.$endtag.'~','',$_body);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user