disable the use of tidy when using activesync

This commit is contained in:
Klaus Leithoff 2015-10-23 10:47:13 +00:00
parent a8e13d4526
commit d39330abe3
4 changed files with 55 additions and 83 deletions

View File

@ -4006,11 +4006,9 @@ class emailadmin_imapbase
/**
* clean a message from elements regarded as potentially harmful
* param string/reference $_html is the text to be processed
* param boolean $usepurify - obsolet, as we always use htmlLawed
* param boolean $cleanTags - use tidy (if available) to clean/balance tags
* return nothing
*/
static function getCleanHTML(&$_html, $usepurify = false, $cleanTags=true)
static function getCleanHTML(&$_html)
{
// remove CRLF and TAB as it is of no use in HTML.
// but they matter in <pre>, so we rather don't
@ -4026,66 +4024,34 @@ class emailadmin_imapbase
//if (stripos($_html,'![if')!==false && stripos($_html,'<![endif]>')!==false) translation::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs
//if (stripos($_html,'!--[if')!==false && stripos($_html,'<![endif]-->')!==false) translation::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs
//error_log(__METHOD__.' ('.__LINE__.') '.$_html);
// force the use of kses, as it is still have the edge over purifier with some stuff
$usepurify = true;
if ($usepurify)
{
// we need a customized config, as we may allow external images, $GLOBALS['egw_info']['user']['preferences']['mail']['allowExternalIMGs']
if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html);
// Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards
if (stripos($_html,'!doctype')!==false) translation::replaceTagsCompletley($_html,'!doctype');
if (stripos($_html,'?xml:namespace')!==false) translation::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);
if (stripos($_html,'?xml version')!==false) translation::replaceTagsCompletley($_html,'\?xml version','\?>',false);
if (strpos($_html,'!CURSOR')!==false) translation::replaceTagsCompletley($_html,'!CURSOR');
// htmLawed filter only the 'body'
//preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $_html, $matches);
//if ($matches[2])
//{
// $hasOther = true;
// $_html = $matches[2];
//}
// purify got switched to htmLawed
// some testcode to test purifying / htmlawed
//$_html = "<BLOCKQUOTE>hi <div> there </div> kram <br> </blockquote>".$_html;
$_html = html::purify($_html,self::$htmLawed_config,array(),true);
//if ($hasOther) $_html = $matches[1]. $_html. $matches[3];
// clean out comments , should not be needed as purify should do the job.
$search = array(
'@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions
'@<!--[\s\S]*?[ \t\n\r]*-->@', // Strip multi-line comments including CDATA
);
$_html = preg_replace($search,"",$_html);
// remove non printable chars
$_html = preg_replace('/([\000-\012])/','',$_html);
//error_log(__METHOD__.':'.__LINE__.':'.$_html);
}
// using purify above should have tidied the tags already sufficiently
if ($usepurify == false && $cleanTags==true)
{
if (extension_loaded('tidy'))
{
$tidy = new tidy();
$cleaned = $tidy->repairString($_html, self::$tidy_config,'utf8');
// Found errors. Strip it all so there's some output
if($tidy->getStatus() == 2)
{
error_log(__METHOD__.' ('.__LINE__.') '.' ->'.$tidy->errorBuffer);
}
else
{
$_html = $cleaned;
}
}
else
{
//$to = ini_get('max_execution_time');
//@set_time_limit(10);
$htmLawed = new egw_htmLawed();
$_html = $htmLawed->egw_htmLawed($_html);
//error_log(__METHOD__.' ('.__LINE__.') '.$_html);
//@set_time_limit($to);
}
}
if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html);
// Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards
if (stripos($_html,'!doctype')!==false) translation::replaceTagsCompletley($_html,'!doctype');
if (stripos($_html,'?xml:namespace')!==false) translation::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);
if (stripos($_html,'?xml version')!==false) translation::replaceTagsCompletley($_html,'\?xml version','\?>',false);
if (strpos($_html,'!CURSOR')!==false) translation::replaceTagsCompletley($_html,'!CURSOR');
// htmLawed filter only the 'body'
//preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $_html, $matches);
//if ($matches[2])
//{
// $hasOther = true;
// $_html = $matches[2];
//}
// purify got switched to htmLawed
// some testcode to test purifying / htmlawed
//$_html = "<BLOCKQUOTE>hi <div> there </div> kram <br> </blockquote>".$_html;
$_html = html::purify($_html,self::$htmLawed_config,array(),true);
//if ($hasOther) $_html = $matches[1]. $_html. $matches[3];
// clean out comments , should not be needed as purify should do the job.
$search = array(
'@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions
'@<!--[\s\S]*?[ \t\n\r]*-->@', // Strip multi-line comments including CDATA
);
$_html = preg_replace($search,"",$_html);
// remove non printable chars
$_html = preg_replace('/([\000-\012])/','',$_html);
//error_log(__METHOD__.':'.__LINE__.':'.$_html);
}
/**
@ -4448,7 +4414,9 @@ class emailadmin_imapbase
if ($part)
{
$_encoding = $part->getBodyPartDecode($_partID);
//error_log(__METHOD__.__LINE__.':'.$_encoding.'#');
$partToReturn = $part->getBodyPart($_partID, $_stream);
//error_log(__METHOD__.__LINE__.':'.$partToReturn.'#');
}
// if we get an empty result, server may have trouble fetching data with UID FETCH $_uid (BINARY.PEEK[$_partID])
// thus we trigger a second go with UID FETCH $_uid (BODY.PEEK[$_partID])
@ -4674,15 +4642,17 @@ class emailadmin_imapbase
* getdisplayableBody - creates the bodypart of the email as textual representation
* @param object $mailClass the mailClass object to be used
* @param array $bodyParts with the bodyparts
* @param boolean $preserveHTML switch to preserve HTML
* @param boolean $useTidy switch to use tidy
* @return string a preformatted string with the mails converted to text
*/
static function &getdisplayableBody(&$mailClass, $bodyParts, $preserveHTML = false)
static function &getdisplayableBody(&$mailClass, $bodyParts, $preserveHTML = false, $useTidy = true)
{
$message='';
for($i=0; $i<count($bodyParts); $i++)
{
if (!isset($bodyParts[$i]['body'])) {
$bodyParts[$i]['body'] = self::getdisplayableBody($mailClass, $bodyParts[$i], $preserveHTML);
$bodyParts[$i]['body'] = self::getdisplayableBody($mailClass, $bodyParts[$i], $preserveHTML, $useTidy);
$message .= empty($bodyParts[$i]['body'])?'':$bodyParts[$i]['body'];
continue;
}
@ -4740,7 +4710,7 @@ class emailadmin_imapbase
// as translation::convert reduces \r\n to \n and purifier eats \n -> peplace it with a single space
$newBody = str_replace("\n"," ",$newBody);
// convert HTML to text, as we dont want HTML in infologs
if (extension_loaded('tidy'))
if ($useTidy && extension_loaded('tidy'))
{
$tidy = new tidy();
$cleaned = $tidy->repairString($newBody, self::$tidy_config,'utf8');
@ -4784,7 +4754,7 @@ class emailadmin_imapbase
if ($preserveHTML==false) $newBody = translation::convertHTMLToText($newBody,self::$displayCharset,true,true);
//error_log(__METHOD__.' ('.__LINE__.') '.' after convertHTMLToText:'.$newBody);
if ($preserveHTML==false) $newBody = nl2br($newBody); // we need this, as htmLawed removes \r\n
$mailClass->getCleanHTML($newBody,false,$preserveHTML); // remove stuff we regard as unwanted
$mailClass->getCleanHTML($newBody); // remove stuff we regard as unwanted
if ($preserveHTML==false) $newBody = str_replace("<br />","\r\n",$newBody);
//error_log(__METHOD__.' ('.__LINE__.') '.' after getClean:'.$newBody);
$message .= $newBody;

View File

@ -537,7 +537,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
$this->mail->reopen($folder);
$bodyStruct = $this->mail->getMessageBody($uid, 'html_only');
$bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true);
$bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true,false);
if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' html_only:'.$bodyBUFFHtml);
if ($bodyBUFFHtml != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/html')) {
// may be html
@ -549,7 +549,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain, fetch text:');
// if the new part of the message is html, we must preserve it, and handle that the original mail is text/plain
$bodyStruct = $this->mail->getMessageBody($uid,'never_display');//'never_display');
$bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false);
$bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false);
if ($bodyBUFF != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/plain')) {
if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain (fetched with never_display):'.$bodyBUFF);
$Body = $Body."\r\n".$bodyBUFF.$sigTextPlain;
@ -609,7 +609,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
$this->mail->reopen($folder);
$bodyStruct = $this->mail->getMessageBody($uid, 'html_only');
$bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true);
$bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true,false);
if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' html_only:'.$bodyBUFFHtml);
if ($bodyBUFFHtml != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/html')) {
// may be html
@ -621,7 +621,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain, fetch text:');
// if the new part of the message is html, we must preserve it, and handle that the original mail is text/plain
$bodyStruct = $this->mail->getMessageBody($uid,'never_display');//'never_display');
$bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false);
$bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false);
if ($bodyBUFF != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/plain')) {
if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain (fetched with never_display):'.$bodyBUFF);
$Body = $Body."\r\n".$bodyBUFF.$sigTextPlain;
@ -819,7 +819,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
if ($this->debugLevel>0) debugLog(__METHOD__.__LINE__. ' for message with ID:'.$id.' with headers:'.array2string($headers));
if ($bodypreference === false) {
$bodyStruct = $this->mail->getMessageBody($id, 'only_if_no_text', '', null, true,$_folderName);
$body = $this->mail->getdisplayableBody($this->mail,$bodyStruct);
$body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false);
//$body = html_entity_decode($body,ENT_QUOTES,$this->mail->detect_encoding($body));
if (stripos($body,'<style')!==false) $body = preg_replace("/<style.*?<\/style>/is", "", $body); // in case there is only a html part
// remove all other html
@ -852,7 +852,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
$css ='';
$bodyStruct = $this->mail->getMessageBody($id, 'html_only', '', null, true,$_folderName);
if ($this->debugLevel>2) debugLog(__METHOD__.__LINE__.' html_only Struct:'.array2string($bodyStruct));
$body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true);//$this->ui->getdisplayableBody($bodyStruct,false);
$body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true,false);
if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' html_only:'.$body);
if ($body != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/html')) {
// may be html
@ -865,7 +865,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
$output->airsyncbasenativebodytype=1;
$bodyStruct = $this->mail->getMessageBody($id,'never_display', '', null, true,$_folderName); //'only_if_no_text');
if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' plain text Struct:'.array2string($bodyStruct));
$body = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false);
$body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false);
if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' never display html(plain text only):'.$body);
}
// whatever format decode (using the correct encoding)
@ -881,7 +881,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
}
else
{
$plainBody = $this->mail->getdisplayableBody($this->mail,$bodyStructplain);//$this->ui->getdisplayableBody($bodyStruct,false);
$plainBody = $this->mail->getdisplayableBody($this->mail,$bodyStructplain,false,false);
}
}
//if ($this->debugLevel>0) debugLog("MIME Body".$body);
@ -1033,7 +1033,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send
if ($this->debugLevel>0) debugLog("Plaintext Body:".$plainBody);
/* we use plainBody (set above) instead
$bodyStruct = $this->mail->getMessageBody($id,'only_if_no_text'); //'never_display');
$plain = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false);
$plain = $this->mail->getdisplayableBody($this->mail,$bodyStruct);
$plain = html_entity_decode($plain,ENT_QUOTES,$this->mail->detect_encoding($plain));
$plain = strip_tags($plain);
//$plain = str_replace("\n","\r\n",str_replace("\r","",$plain));

View File

@ -543,7 +543,7 @@ class mail_compose
$content['mail_htmltext'] = implode('',$contentArr);
}
}
$content['mail_htmltext'] = $this->_getCleanHTML($content['mail_htmltext'], false, false);
$content['mail_htmltext'] = $this->_getCleanHTML($content['mail_htmltext']);
$content['mail_htmltext'] = translation::convertHTMLToText($content['mail_htmltext'],$charset=false,false,true);
$content['body'] = $content['mail_htmltext'];
@ -2118,12 +2118,12 @@ class mail_compose
}
static function _getCleanHTML($_body, $usepurify = false, $cleanTags=true)
static function _getCleanHTML($_body)
{
static $nonDisplayAbleCharacters = array('[\016]','[\017]',
'[\020]','[\021]','[\022]','[\023]','[\024]','[\025]','[\026]','[\027]',
'[\030]','[\031]','[\032]','[\033]','[\034]','[\035]','[\036]','[\037]');
mail_bo::getCleanHTML($_body, $usepurify, $cleanTags);
mail_bo::getCleanHTML($_body);
return preg_replace($nonDisplayAbleCharacters, '', $_body);
}

View File

@ -2733,11 +2733,13 @@ class mail_ui
{
$singleBodyPart['body'] = preg_replace($sar,$rar,$singleBodyPart['body']);
}
//error_log(__METHOD__.__LINE__.'reports:'.$singleBodyPart['charSet']);
$singleBodyPart['body'] = translation::convert_jsonsafe($singleBodyPart['body'],$singleBodyPart['charSet']);
//error_log(__METHOD__.__LINE__.array2string($singleBodyPart));
if($singleBodyPart['mimeType'] == 'text/plain')
{
$newBody = @htmlentities($singleBodyPart['body'],ENT_QUOTES, strtoupper(mail_bo::$displayCharset));
//error_log(__METHOD__.__LINE__.'..'.$newBody);
// if empty and charset is utf8 try sanitizing the string in question
if (empty($newBody) && strtolower($singleBodyPart['charSet'])=='utf-8') $newBody = @htmlentities(iconv('utf-8', 'utf-8', $singleBodyPart['body']),ENT_QUOTES, strtoupper(mail_bo::$displayCharset));
// if the conversion to htmlentities fails somehow, try without specifying the charset, which defaults to iso-
@ -2749,6 +2751,7 @@ class mail_ui
// create links for websites
if ($modifyURI) $newBody = html::activate_links($newBody);
//error_log(__METHOD__.__LINE__.'..'.$newBody);
// redirect links for websites if you use no cookies
#if (!($GLOBALS['egw_info']['server']['usecookies']))
# $newBody = preg_replace("/href=(\"|\')((http(s?):\/\/)|(www\.))([\w,\-,\/,\?,\=,\.,&amp;,!\n,\%,@,\(,\),\*,#,:,~,\+]+)(\"|\')/ie",
@ -2766,7 +2769,7 @@ class mail_ui
// to display a mailpart of mimetype plain/text, may be better taged as preformatted
#$newBody = nl2br($newBody);
// since we do not display the message as HTML anymore we may want to insert good linebreaking (for visibility).
//error_log($newBody);
//error_log(__METHOD__.__LINE__.'..'.$newBody);
// dont break lines that start with > (&gt; as the text was processed with htmlentities before)
$newBody = "<pre>".mail_bo::wordwrap($newBody,90,"\n",'&gt;')."</pre>";
}
@ -2777,9 +2780,8 @@ class mail_ui
#error_log(print_r($newBody,true));
// do the cleanup, set for the use of purifier
$usepurifier = true;
$newBodyBuff = $newBody;
mail_bo::getCleanHTML($newBody,$usepurifier);
mail_bo::getCleanHTML($newBody);
// in a way, this tests if we are having real utf-8 (the displayCharset) by now; we should if charsets reported (or detected) are correct
if (strtoupper(mail_bo::$displayCharset) == 'UTF-8')
{
@ -2790,7 +2792,7 @@ class mail_ui
$newBody = $newBodyBuff;
$tv = mail_bo::$htmLawed_config['tidy'];
mail_bo::$htmLawed_config['tidy'] = 0;
mail_bo::getCleanHTML($newBody,$usepurifier);
mail_bo::getCleanHTML($newBody);
mail_bo::$htmLawed_config['tidy'] = $tv;
}
}