attempt of switching from htmlpurifier to htmLawed and replacing kses by htmLawed calls; this is done for performance and resource considerations; still the common call in egw is by html::purify, but htmLawed is doing the work now. let me know if there are issues regarding; if we cannot find issues within EGroupware, we will clean out the related sourcecode of kses and htmlpurifier with the next step

This commit is contained in:
Klaus Leithoff 2012-05-30 14:47:03 +00:00
parent 581b7cbd9c
commit 33633cd7a5
5 changed files with 139 additions and 457 deletions

View File

@ -38,6 +38,20 @@ class felamimail_bo
*/
static $tidy_config = array('clean'=>true,'output-html'=>true,'join-classes'=>true,'join-styles'=>true,'show-body-only'=>"auto",'word-2000'=>true,'wrap'=>0);
/**
* static used to configure htmLawed, for use with emails
*
* @array
*/
static $htmLawed_config = array('comment'=>1,
//'keep_bad'=>2,
'balance'=>0,//turn off tag-balancing (config['balance']=>0). That will not introduce any security risk; only standards-compliant tag nesting check/filtering will be turned off (basic tag-balance will remain; i.e., there won't be any unclosed tag, etc., after filtering)
'tidy'=>1,
'elements' => "* -script",
'schemes'=>'href: file, ftp, http, https, mailto; src: cid, data, file, ftp, http, https; *:file, http, https',
'hook_tag' =>"hl_email_tag_transform",
);
/**
* errorMessage
*
@ -1207,240 +1221,31 @@ class felamimail_bo
//error_log($_html);
//repair doubleencoded ampersands
$_html = str_replace('&','&',$_html);
self::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags
self::replaceTagsCompletley($_html,'head'); // Strip out stuff in head
self::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs
self::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs
if (stripos($_html,'style')!==false) self::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags
if (stripos($_html,'head')!==false) self::replaceTagsCompletley($_html,'head'); // Strip out stuff in head
if (stripos($_html,'![if')!==false && stripos($_html,'<![endif]>')!==false) self::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs
if (stripos($_html,'!--[if')!==false && stripos($_html,'<![endif]-->')!==false) self::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs
//error_log($_html);
// force the use of kses, as it is still have the edge over purifier with some stuff
$usepurify = false;
$usepurify = true;
if ($usepurify)
{
// we need a customized config, as we may allow external images, $GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs']
if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html);
$_html = html::purify($_html);
// Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards
if (stripos($_html,'!doctype')!==false) self::replaceTagsCompletley($_html,'!doctype');
if (stripos($_html,'?xml:namespace')!==false) self::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);
if (strpos($_html,'!CURSOR')!==false) self::replaceTagsCompletley($_html,'!CURSOR');
// purify got switched to htmLawed
$_html = html::purify($_html,self::$htmLawed_config,array(),true);
// clean out comments , should not be needed as purify should do the job.
$search = array(
'@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions
'@<!--[\s\S]*?[ \t\n\r]*-->@', // Strip multi-line comments including CDATA
);
//$_html = preg_replace($search,"",$_html);
// remove non printable chars
$_html = preg_replace('/([\000-\012])/','',$_html);
//error_log($_html);
}
else
{
//echo $_html;exit;
$kses = new kses();
$kses->AddProtocol('cid');
// since check protocoll is called for every value associated to an attribute we have to add color and background-color to the valid protocolls
$kses->AddProtocol('color');
$kses->AddProtocol('font-size');
$kses->AddProtocol('background-color');
#$kses->AddHTML('html', array(
# 'xmlns' => array(),
# 'lang' => array(),
# )
#);
#$kses->AddHTML('head');
#$kses->AddHTML('body', array(
# 'class' => array(),
# 'id' => array(),
# )
#);
#$kses->AddHTML('meta', array(
# 'http-equiv' => array(),
# 'content' => array(),
# )
#);
#$kses->AddHTML('link',array(
# 'rel' => array(), // ="stylesheet"
# 'type' => array(), //="text/css"
# 'href' => array(),
# 'media' => array(),
# )
#);
$kses->AddHTML(
'p', array(
"class" => array('maxlen' => 20),
'align' => array('minlen' => 1, 'maxlen' => 10)
)
);
$kses->AddHTML("tbody");
$kses->AddHTML("thead");
$kses->AddHTML("tt");
$kses->AddHTML("br");
$kses->AddHTML("b");
$kses->AddHTML("u");
$kses->AddHTML("s");
$kses->AddHTML("i");
$kses->AddHTML('em');
$kses->AddHTML("strong");
$kses->AddHTML("strike");
$kses->AddHTML("center");
$kses->AddHTML(
"font",array(
"class" => array('maxlen' => 20),
"color" => array('maxlen' => 20),
"size"=>array('maxlen'=>2)
)
);
$kses->AddHTML(
"hr",array(
"class" => array('maxlen' => 20),
"style" => array('minlen' => 1),
)
);
$kses->AddHTML(
"div",array(
"class" => array('maxlen' => 20),
'align' => array('maxlen' => 10)
)
);
$kses->AddHTML("ul");
$kses->AddHTML(
"ol",array(
"class" => array('maxlen' => 20),
"type" => array('maxlen' => 20)
)
);
$kses->AddHTML("li");
$kses->AddHTML("h1");
$kses->AddHTML("h2");
$kses->AddHTML("h3");
$kses->AddHTML(
"style",array(
"type" => array('maxlen' => 20),
"color" => array('maxlen' => 20),
"background-color" => array('maxlen' => 20),
"background" => array('maxlen' => 5),
)
);
$kses->AddHTML("select");
$kses->AddHTML(
"option",array(
"class" => array('maxlen' => 20),
"value" => array('maxlen' => 45),
"selected" => array()
)
);
$kses->AddHTML(
"a", array(
"class" => array('maxlen' => 20),
"href" => array('maxlen' => 348, 'minlen' => 10),
"name" => array('minlen' => 2),
'target' => array('maxlen' => 10)
)
);
$kses->AddHTML(
"pre", array(
"class" => array('maxlen' => 20),
"wrap" => array('maxlen' => 10)
)
);
// Allows 'td' tag with colspan|rowspan|class|style|width|nowrap attributes,
// colspan has minval of 2 and maxval of 5
// rowspan has minval of 3 and maxval of 6
// class has minlen of 1 char and maxlen of 10 chars
// style has minlen of 5 chars and maxlen of 100 chars
// width has maxval of 100
// nowrap is valueless
$kses->AddHTML(
"table",array(
"class" => array("minlen" => 1, 'maxlen' => 20),
"border" => array("minlen" => 1, 'maxlen' => 10),
"cellpadding" => array("minlen" => 0, 'maxlen' => 10),
"cellspacing" => array("minlen" => 0, 'maxlen' => 10),
"width" => array("maxlen" => 5),
"style" => array('minlen' => 5, 'maxlen' => 100),
"bgcolor" => array('maxlen' => 10),
"align" => array('maxlen' => 10),
"valign" => array('maxlen' => 10),
"bordercolor" => array('maxlen' => 10)
)
);
$kses->AddHTML(
"tr",array(
"colspan" => array('minval' => 2, 'maxval' => 5),
"rowspan" => array('minval' => 3, 'maxval' => 6),
"class" => array("minlen" => 1, 'maxlen' => 20),
"width" => array("maxlen" => 5),
"style" => array('minlen' => 5, 'maxlen' => 100),
"align" => array('maxlen' => 10),
'bgcolor' => array('maxlen' => 10),
"valign" => array('maxlen' => 10),
"nowrap" => array('valueless' => 'y')
)
);
$kses->AddHTML(
"td",array(
"colspan" => array('minval' => 2, 'maxval' => 5),
"rowspan" => array('minval' => 3, 'maxval' => 6),
"class" => array("minlen" => 1, 'maxlen' => 20),
"width" => array("maxlen" => 5),
"style" => array('minlen' => 5, 'maxlen' => 100),
"align" => array('maxlen' => 10),
'bgcolor' => array('maxlen' => 10),
"valign" => array('maxlen' => 10),
"nowrap" => array('valueless' => 'y')
)
);
$kses->AddHTML(
"th",array(
"colspan" => array('minval' => 2, 'maxval' => 5),
"rowspan" => array('minval' => 3, 'maxval' => 6),
"class" => array("minlen" => 1, 'maxlen' => 20),
"width" => array("maxlen" => 5),
"style" => array('minlen' => 5, 'maxlen' => 100),
"align" => array('maxlen' => 10),
'bgcolor' => array('maxlen' => 10),
"valign" => array('maxlen' => 10),
"nowrap" => array('valueless' => 'y')
)
);
$kses->AddHTML(
"span",array(
"class" => array("minlen" => 1, 'maxlen' => 20),
"style" => array('minlen' => 5, 'maxlen' => 100)
)
);
$kses->AddHTML(
"blockquote",array(
"class" => array("minlen" => 1, 'maxlen' => 20),
"style" => array("minlen" => 1),
"cite" => array('maxlen' => 30),
"type" => array('maxlen' => 10),
"dir" => array("minlen" => 1, 'maxlen' => 10)
)
);
$kses->AddHTML(
'img',array(
"class" => array('maxlen' => 20),
"src" => array("minlen" => 4, 'maxlen' => 384, $GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs'] ? '' : 'match' => '/^cid:.*/'),
"align" => array("minlen" => 1),
"border" => array('maxlen' => 30),
"width" => array("minlen" => 1, 'maxlen' => 3),
"height" => array("minlen" => 1, 'maxlen' => 3),
)
);
// no scripts allowed
// clean out comments
$search = array(
'@<!--[\s\S]*?[ \t\n\r]*-->@', // Strip multi-line comments including CDATA
'@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions
);
//error_log(__METHOD__.$_html);
$_html = preg_replace($search,"",$_html);
// do the kses clean out first, to avoid general problems with content later on
$_html = $kses->Parse($_html);
// remove non printable chars
$_html = preg_replace('/([\000-\012])/','',$_html);
$_html = preg_replace('/([\000-\012])/','',$_html);
//error_log($_html);
}
// using purify above should have tidied the tags already sufficiently
@ -1464,14 +1269,8 @@ class felamimail_bo
{
//$to = ini_get('max_execution_time');
//@set_time_limit(10);
//$p = microtime(true);
$htmLawed = new egw_htmLawed();
//$pela = microtime(true);
$_html = $htmLawed->egw_htmLawed($_html);
//$le = microtime(true);
//$a=$pela-$p;
//$b=$le-$pela;
//error_log(__METHOD__.__LINE__.' new egw_htmLawed:'.$a.' htmlLawed took:'.$b);
//error_log(__METHOD__.__LINE__.$_html);
//@set_time_limit($to);
}

View File

@ -1054,6 +1054,12 @@ blockquote[type=cite] {
font-size: 11px;
}
</style>'.$additionalStyle.'
<script type="text/javascript">
function GoToAnchor(aname)
{
window.location.hash=aname;
}
</script>
</head>
<body>
';
@ -1560,8 +1566,7 @@ blockquote[type=cite] {
{
$link = $GLOBALS['egw']->link('/index.php',array('menuaction' => 'felamimail.uicompose.compose'));
$newBody = preg_replace("/href=(\"|\')mailto:([\w,\-,\/,\?,\=,\.,&amp;,!\n,\%,@,\*,#,:,~,\+]+)(\"|\')/ie",
"'href=\"#\"'.' onclick=\"egw_openWindowCentered(\'$link&send_to='.base64_encode('$2').'\', \'compose\', 700, egw_getWindowOuterHeight());\"'", $newBody);
// "'href=\"$link&send_to='.base64_encode('$2').'\"'", $newBody);
"'href=\"$link&send_to='.base64_encode('$2').'\"'.' target=\"compose\" onclick=\"window.open(this,this.target,\'dependent=yes,width=700,height=egw_getWindowOuterHeight(),location=no,menubar=no,toolbar=no,scrollbars=yes,status=yes\'); return false;\"'", $newBody);
//print "<pre>".htmlentities($newBody)."</pre><hr>";
}
// replace emails within the text with clickable links.

View File

@ -80,10 +80,8 @@ class egw_htmLawed
*/
$this->Configuration = array('comment'=>0,
'balance'=>0,
//'keep_bad'=>3,
'balance'=>0,//turn off tag-balancing (config['balance']=>0). That will not introduce any security risk; only standards-compliant tag nesting check/filtering will be turned off (basic tag-balance will remain; i.e., there won't be any unclosed tag, etc., after filtering)
'tidy'=>1,
//'direct_list_nest'=>1,
'elements' => "* -script",
'schemes'=>'href: file, ftp, http, https, mailto; src: cid, data, file, ftp, http, https; *:file, http, https',
'hook_tag' =>"hl_my_tag_transform",
@ -101,6 +99,7 @@ class egw_htmLawed
*/
function egw_htmLawed($html2check, $Config=null, $Spec=array())
{
//error_log(__METHOD__.__LINE__.' Input:'.$html2check);
if (is_array($Config) && is_array($this->Configuration)) $Config = array_merge($this->Configuration, $Config);
if (empty($Config)) $Config = $this->Configuration;
if (empty($Spec)) $Spec = $this->Spec;
@ -113,16 +112,25 @@ class egw_htmLawed
* hl_my_tag_transform
*
* function to provide individual checks for element attribute pairs
* implemented so far: img checking for alt attribute == image; set this to empty
* implemented so far: img checking for alt attribute == image; set this to empty
* a checking for title, replacing @
*/
function hl_my_tag_transform($element, $attribute_array)
{
//error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array));
//if ($element=='img') error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array));
// Elements other than 'img' or 'img' without a 'img' attribute are returned unchanged
if($element == 'img' && isset($attribute_array['alt']))
if($element == 'img')
{
// Re-build 'alt'
$attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
if (isset($attribute_array['alt'])) $attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
if (isset($attribute_array['alt'])&&strpos($attribute_array['alt'],'@')!==false) $attribute_array['alt']=str_replace('@','(at)',$attribute_array['alt']);
}
if($element == 'a')
{
if (isset($attribute_array['title']))
{
if (strpos($attribute_array['title'],'@')!==false) $attribute_array['title']=str_replace('@','(at)',$attribute_array['title']);
}
}
/*
// Elements other than 'span' or 'span' without a 'style' attribute are returned unchanged
@ -165,3 +173,67 @@ function hl_my_tag_transform($element, $attribute_array)
return "<{$element}{$attributes}". (isset($empty_elements[$element]) ? ' /' : ''). '>';
}
/**
* hl_email_tag_transform
*
* function to provide individual checks for element attribute pairs
* implemented so far: img -checking for alt attribute == image; set this to empty
* -control for/on external Images and src-length
* a -checking for title and href, replacing @ accordingly
* -navigate to local anchors without reloading the page
*/
function hl_email_tag_transform($element, $attribute_array)
{
//if ($element=='a') error_log(__METHOD__.__LINE__." ".$element.'->'.array2string($attribute_array));
// Elements other than 'img' or 'img' without a 'img' attribute are returned unchanged
if($element == 'img')
{
// Re-build 'alt'
if (isset($attribute_array['alt'])) $attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
if (isset($attribute_array['alt'])&&strpos($attribute_array['alt'],'@')!==false) $attribute_array['alt']=str_replace('@','(at)',$attribute_array['alt']);
// $GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs'] ? '' : 'match' => '/^cid:.*/'),
if (isset($attribute_array['src']))
{
if (!(strlen($attribute_array['src'])>4 && strlen($attribute_array['src']<400)))
{
$attribute_array['alt']= $attribute_array['alt'].' [blocked (reason: url length):'.$attribute_array['src'].']';
if (!isset($attribute_array['title'])) $attribute_array['title']=$attribute_array['alt'];
$attribute_array['src']=common::image('phpgwapi','dialog_error');
}
if (!$GLOBALS['egw_info']['user']['preferences']['felamimail']['allowExternalIMGs'])
{
if (!preg_match('/^cid:.*/',$attribute_array['src']))
{
$attribute_array['alt']= $attribute_array['alt'].' [blocked external image:'.$attribute_array['src'].']';
if (!isset($attribute_array['title'])) $attribute_array['title']=$attribute_array['alt'];
$attribute_array['src']=common::image('phpgwapi','dialog_error');
}
}
}
}
if($element == 'a')
{
if (isset($attribute_array['title']))
{
if (strpos($attribute_array['title'],'@')!==false) $attribute_array['title']=str_replace('@','(at)',$attribute_array['title']);
}
if (isset($attribute_array['name']) && isset($attribute_array['id'])) $attribute_array['id'] = $attribute_array['name'];
if (strpos($attribute_array['href'],'@')!==false) $attribute_array['href'] = str_replace('@','%40',$attribute_array['href']);
if (strpos($attribute_array['href'],'#')===0)
{
$attribute_array['href'] = "javascript:GoToAnchor('".trim(substr($attribute_array['href'],1))."');";
}
}
// Build the attributes string
$attributes = '';
foreach($attribute_array as $k=>$v){
$attributes .= " {$k}=\"{$v}\"";
}
// Return the opening tag with attributes
static $empty_elements = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1);
return "<{$element}{$attributes}". (isset($empty_elements[$element]) ? ' /' : ''). '>';
}

View File

@ -183,101 +183,6 @@ class html
return preg_replace( $Expr, "<a href=\"http://$0\" target=\"_blank\">$0</a>", $result );
}
/**
* activates URLs in a text, URLs get replaced by html-links using htmlpurify
*
* @param string $content text containing URLs
* @return string html with activated links
*/
static function activateLinks($content)
{
if (!$content || strlen($content) < 20) return $content; // performance
// spamsaver emailaddress
$result = preg_replace('/'.$NotAnchor.'mailto:([a-z0-9._-]+)@([a-z0-9_-]+)\.([a-z0-9._-]+)/i',
'<a href="#" onclick="document.location=\'mai\'+\'lto:\\1\'+unescape(\'%40\')+\'\\2.\\3\'; return false;">\\1 AT \\2 DOT \\3</a>',
$content);
$config = self::purifyCreateDefaultConfig();
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8'));
// maybe the two following lines are useful for caching???
$config->set('HTML.DefinitionID', 'activatelinks');
$config->set('HTML.DefinitionRev', 1);
// doctype and tidylevel
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config->set('HTML.TidyLevel', 'light');
// EnableID is needed for anchor tags
$config->set('Attr.EnableID',true);
// enable target attributes
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
// actual allowed tags and attributes
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true, 'data'=>true));
$config->set('AutoFormat.RemoveEmpty', true);
$config->set('HTML.Allowed', 'br,p[align],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[align|style],hr[class|style],'.
'font[size|color],'.
'ul[type],ol[type|start],li,'.
'h1,h2,h3,h4,h5,h6,'.
'span[class|style],'.
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
'tbody,thead,tfoot,colgroup,'.
'col[width|span],'.
'blockquote[class|cite|dir],'.
'tr[class|style|align|bgcolor|align|valign],'.
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'a[href|target|name|title],'.
'img[src|alt|title|align|style|width|height]');
$config->set('Attr.DefaultInvalidImage', 'Image removed by htmlpurify');
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
$config->set('AutoFormat.Linkify',true);
return self::purify($result,$config);
}
/**
* deactivates URLs in a text, URLs get replaced by html-links using htmlpurify
*
* @param string $content text containing URLs
* @return string html with activated links
*/
static function deactivateLinks($_html)
{
$config = self::purifyCreateDefaultConfig();
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8'));
// maybe the two following lines are useful for caching???
$config->set('HTML.DefinitionID', 'deactivatelinks');
$config->set('HTML.DefinitionRev', 1);
// doctype and tidylevel
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config->set('HTML.TidyLevel', 'light');
// EnableID is needed for anchor tags
$config->set('Attr.EnableID',true);
// enable target attributes
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
// actual allowed tags and attributes
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true, 'data'=>true));
$config->set('AutoFormat.RemoveEmpty', true);
$config->set('HTML.Allowed', 'br,p[align],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[align|style],hr[class|style],'.
'font[size|color],'.
'ul[type],ol[type|start],li,'.
'h1,h2,h3,h4,h5,h6,'.
'span[class|style],'.
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
'tbody,thead,tfoot,colgroup,'.
'col[width|span],'.
'blockquote[class|cite|dir],'.
'tr[class|style|align|bgcolor|align|valign],'.
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'a[href|target|name|title],'.
'img[src|alt|title|align|style|width|height]');
$config->set('Attr.DefaultInvalidImage', 'Image removed by htmlpurify');
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
$config->set('AutoFormat.DisplayLinkURI',true);
$_html = self::purify($_html,$config);
return $_html;
}
/**
* escapes chars with special meaning in html as entities
*
@ -1393,129 +1298,21 @@ class html
return $html;
}
/**
* creates the HTMLPurifier default config
*
* @return HTMLPurifier_Config object
*/
static function purifyCreateDefaultConfig()
{
// add htmlpurifiers library to include_path
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.path.php');
// include most of the required files, for best performance with bytecode caches
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.includes.php');
// installs an autoloader for other files
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.autoload.php');
// testcase to test the processing of purify
//$html = "<h1 onclick=\"alert('hallo');\"> h1 </h1>".$html;
return HTMLPurifier_Config::createDefault();
}
/**
* creates a HTMLPurifier default config for the needs of HTMLTidy
*
* @return HTMLPurifier_Config object
*/
static function purifyCreateHTMLTidyConfig()
{
$config = html::purifyCreateDefaultConfig();
// maybe the two following lines are useful for caching???
$config->set('HTML.DefinitionID', 'egroupwareHTMLTidyConfig');
$config->set('HTML.DefinitionRev', 1);
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8')); // doctype and tidylevel
$config->set('Core.RemoveInvalidImg', false);
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config->set('HTML.TidyLevel', 'light');
$config->set('Attr.EnableID',true);
// enable target attributes
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
// actual allowed tags and attributes
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true));
$config->set('AutoFormat.RemoveEmpty', true);
$config->set('HTML.Allowed', 'br,p[align],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[align|style],hr[class|style],'.
'font[size|color],'.
'ul[type],ol[type|start],li,'.
'h1,h2,h3,h4,h5,h6,'.
'span[class|style],'.
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
'tbody,thead,tfoot,colgroup,'.
'col[width|span],'.
'blockquote[class|cite|dir],'.
'tr[class|style|align|bgcolor|align|valign],'.
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'a[href|target|name|title],'.
'img[src|alt|title|align|style|width|height]');
$config->set('URI.AllowedSchemes', array('http'=>true, 'https'=>true, 'ftp'=>true, 'file'=>true, 'cid'=>true, 'data'=>true));
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
return $config;
}
/**
* Runs HTMLPurifier over supplied html to remove malicious code
*
* @param string $html
* @param HTMLPurifier_Config $config=null
*/
static function purify($html,$config=null)
static function purify($html,$config=null,$spec=array(),$_force=false)
{
static $purifier;
$defaultConfig = array('valid_xhtml'=>1,'safe'=>1);
if (empty($html)) return $html; // no need to process further
if (is_null($purifier) || !is_null($config))
{
// add htmlpurifiers library to include_path
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.path.php');
// include most of the required files, for best performance with bytecode caches
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.includes.php');
// installs an autoloader for other files
require_once(EGW_API_INC.'/htmlpurifier/library/HTMLPurifier.autoload.php');
// testcase to test the processing of purify
//$html = "<h1 onclick=\"alert('hallo');\"> h1 </h1>".$html;
if (is_null($config))
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Core.Encoding', (self::$charset?self::$charset:'UTF-8'));
// maybe the two following lines are useful for caching???
$config->set('HTML.DefinitionID', 'egroupware');
$config->set('HTML.DefinitionRev', 1);
// doctype and tidylevel
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config->set('HTML.TidyLevel', 'light');
// EnableID is needed for anchor tags
$config->set('Attr.EnableID',true);
// enable target attributes
$config->set('Attr.AllowedFrameTargets','_blank,_top,_self,_parent');
// actual allowed tags and attributes
$config->set('HTML.Allowed', 'br,p[class|align|style],b,i,u,s,em,pre,tt,strong,strike,sub,sup,center,div[class|align|style],hr[class|style],'.
'ul[class|type],ol[class|type|start],li,'.
'h1,h2,h3,h4,h5,h6,'.
'span[class|style],'.
'table[class|border|cellpadding|cellspacing|width|style|align|bgcolor|align],'.
'tbody,thead,tfoot,colgroup,'.
'col[class|width|span],'.
'blockquote[class|cite|dir],'.
'tr[class|style|align|bgcolor|align|valign],'.
'td[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'th[class|colspan|rowspan|width|style|align|bgcolor|align|valign|nowrap],'.
'a[class|href|target|name|title],'.
'img[class|src|alt|title|align|style|width|height]');
$config->set('Cache.SerializerPath', ($GLOBALS['egw_info']['server']['temp_dir']?$GLOBALS['egw_info']['server']['temp_dir']:sys_get_temp_dir()));
}
$purifier = new HTMLPurifier($config);
// the latter may enable you to modify the config later on, but by now
// the effort for e.g. enabling anchor tags is already included above
//$def =& $purifier->config->getHTMLDefinition(true);
//$def->addAttribute('a', 'name', 'Text');
}
$result = $purifier->purify( $html );
//error_log(__METHOD__.$purifier->version);
return $result;
$htmLawed = new egw_htmLawed();
if (is_array($config) && $_force===false) $config = array_merge($defaultConfig, $config);
if (empty($config)) $config = $defaultConfig;
//error_log(__METHOD__.__LINE__.array2string($config));
return $htmLawed->egw_htmLawed($html,$config,$spec);
}
/**

View File

@ -1035,7 +1035,7 @@ class translation
// some characterreplacements, as they fail to translate
$sar = array(
'@(\x84|\x93|\x94)@',
'@(\x96|\x97)@',
'@(\x96|\x97|\x1a)@',
'@(\x91|\x92)@',
'@(\x85)@',
'@(\x86)@',
@ -1137,29 +1137,38 @@ class translation
static function replaceTagsCompletley(&$_body,$tag,$endtag='',$addbracesforendtag=true)
{
if ($tag) $tag = strtolower($tag);
$singleton = false;
if ($endtag=='/>') $singleton =true;
if ($endtag == '' || empty($endtag) || !isset($endtag))
{
$endtag = $tag;
} else {
$endtag = strtolower($endtag);
//error_log(__METHOD__.' Using EndTag:'.$endtag);
//error_log(__METHOD__.' Using EndTag:'.$endtag);
}
// strip tags out of the message completely with their content
$taglen=strlen($tag);
$endtaglen=strlen($endtag);
if ($_body) {
if ($addbracesforendtag === true )
if ($singleton)
{
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)</'.$endtag.'[\s]*>~simU','',$_body);
// remove left over tags, unfinished ones, and so on
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
$_body = preg_replace('~<'.$tag.'[^>].*? '.$endtag.'~simU','',$_body);
}
if ($addbracesforendtag === false )
else
{
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)'.$endtag.'~simU','',$_body);
// remove left over tags, unfinished ones, and so on
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
$_body = preg_replace('~'.$endtag.'~','',$_body);
if ($addbracesforendtag === true )
{
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)</'.$endtag.'[\s]*>~simU','',$_body);
// remove left over tags, unfinished ones, and so on
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
}
if ($addbracesforendtag === false )
{
$_body = preg_replace('~<'.$tag.'[^>]*?>(.*)'.$endtag.'~simU','',$_body);
// remove left over tags, unfinished ones, and so on
$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);
$_body = preg_replace('~'.$endtag.'~','',$_body);
}
}
}
}