diff --git a/emailadmin/inc/class.emailadmin_imapbase.inc.php b/emailadmin/inc/class.emailadmin_imapbase.inc.php index 308152cc37..075a3661c6 100644 --- a/emailadmin/inc/class.emailadmin_imapbase.inc.php +++ b/emailadmin/inc/class.emailadmin_imapbase.inc.php @@ -4006,11 +4006,9 @@ class emailadmin_imapbase /** * clean a message from elements regarded as potentially harmful * param string/reference $_html is the text to be processed - * param boolean $usepurify - obsolet, as we always use htmlLawed - * param boolean $cleanTags - use tidy (if available) to clean/balance tags * return nothing */ - static function getCleanHTML(&$_html, $usepurify = false, $cleanTags=true) + static function getCleanHTML(&$_html) { // remove CRLF and TAB as it is of no use in HTML. // but they matter in
, so we rather don't
@@ -4026,66 +4024,34 @@ class emailadmin_imapbase
 		//if (stripos($_html,'![if')!==false && stripos($_html,'')!==false) translation::replaceTagsCompletley($_html,'!\[if','',false); // Strip out stuff in ifs
 		//if (stripos($_html,'!--[if')!==false && stripos($_html,'')!==false) translation::replaceTagsCompletley($_html,'!--\[if','',false); // Strip out stuff in ifs
 		//error_log(__METHOD__.' ('.__LINE__.') '.$_html);
-		// force the use of kses, as it is still have the edge over purifier with some stuff
-		$usepurify = true;
-		if ($usepurify)
-		{
-			// we need a customized config, as we may allow external images, $GLOBALS['egw_info']['user']['preferences']['mail']['allowExternalIMGs']
-			if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html);
-			// Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards
-			if (stripos($_html,'!doctype')!==false) translation::replaceTagsCompletley($_html,'!doctype');
-			if (stripos($_html,'?xml:namespace')!==false) translation::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);
-			if (stripos($_html,'?xml version')!==false) translation::replaceTagsCompletley($_html,'\?xml version','\?>',false);
-			if (strpos($_html,'!CURSOR')!==false) translation::replaceTagsCompletley($_html,'!CURSOR');
-			// htmLawed filter only the 'body'
-			//preg_match('`(]*>)(.+?)(.*?)`ims', $_html, $matches);
-			//if ($matches[2])
-			//{
-			//	$hasOther = true;
-			//	$_html = $matches[2];
-			//}
-			// purify got switched to htmLawed
-			// some testcode to test purifying / htmlawed
-			//$_html = "
hi
there
kram
".$_html; - $_html = html::purify($_html,self::$htmLawed_config,array(),true); - //if ($hasOther) $_html = $matches[1]. $_html. $matches[3]; - // clean out comments , should not be needed as purify should do the job. - $search = array( - '@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions - '@@', // Strip multi-line comments including CDATA - ); - $_html = preg_replace($search,"",$_html); - // remove non printable chars - $_html = preg_replace('/([\000-\012])/','',$_html); - //error_log(__METHOD__.':'.__LINE__.':'.$_html); - } - // using purify above should have tidied the tags already sufficiently - if ($usepurify == false && $cleanTags==true) - { - if (extension_loaded('tidy')) - { - $tidy = new tidy(); - $cleaned = $tidy->repairString($_html, self::$tidy_config,'utf8'); - // Found errors. Strip it all so there's some output - if($tidy->getStatus() == 2) - { - error_log(__METHOD__.' ('.__LINE__.') '.' ->'.$tidy->errorBuffer); - } - else - { - $_html = $cleaned; - } - } - else - { - //$to = ini_get('max_execution_time'); - //@set_time_limit(10); - $htmLawed = new egw_htmLawed(); - $_html = $htmLawed->egw_htmLawed($_html); - //error_log(__METHOD__.' ('.__LINE__.') '.$_html); - //@set_time_limit($to); - } - } + + if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html); + // Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards + if (stripos($_html,'!doctype')!==false) translation::replaceTagsCompletley($_html,'!doctype'); + if (stripos($_html,'?xml:namespace')!==false) translation::replaceTagsCompletley($_html,'\?xml:namespace','/>',false); + if (stripos($_html,'?xml version')!==false) translation::replaceTagsCompletley($_html,'\?xml version','\?>',false); + if (strpos($_html,'!CURSOR')!==false) translation::replaceTagsCompletley($_html,'!CURSOR'); + // htmLawed filter only the 'body' + //preg_match('`(]*>)(.+?)(.*?)`ims', $_html, $matches); + //if ($matches[2]) + //{ + // $hasOther = true; + // $_html = $matches[2]; + //} + // purify got switched to htmLawed + // some testcode to test purifying / htmlawed + //$_html = "
hi
there
kram
".$_html; + $_html = html::purify($_html,self::$htmLawed_config,array(),true); + //if ($hasOther) $_html = $matches[1]. $_html. $matches[3]; + // clean out comments , should not be needed as purify should do the job. + $search = array( + '@url\(http:\/\/[^\)].*?\)@si', // url calls e.g. in style definitions + '@@', // Strip multi-line comments including CDATA + ); + $_html = preg_replace($search,"",$_html); + // remove non printable chars + $_html = preg_replace('/([\000-\012])/','',$_html); + //error_log(__METHOD__.':'.__LINE__.':'.$_html); } /** @@ -4448,7 +4414,9 @@ class emailadmin_imapbase if ($part) { $_encoding = $part->getBodyPartDecode($_partID); + //error_log(__METHOD__.__LINE__.':'.$_encoding.'#'); $partToReturn = $part->getBodyPart($_partID, $_stream); + //error_log(__METHOD__.__LINE__.':'.$partToReturn.'#'); } // if we get an empty result, server may have trouble fetching data with UID FETCH $_uid (BINARY.PEEK[$_partID]) // thus we trigger a second go with UID FETCH $_uid (BODY.PEEK[$_partID]) @@ -4674,15 +4642,17 @@ class emailadmin_imapbase * getdisplayableBody - creates the bodypart of the email as textual representation * @param object $mailClass the mailClass object to be used * @param array $bodyParts with the bodyparts + * @param boolean $preserveHTML switch to preserve HTML + * @param boolean $useTidy switch to use tidy * @return string a preformatted string with the mails converted to text */ - static function &getdisplayableBody(&$mailClass, $bodyParts, $preserveHTML = false) + static function &getdisplayableBody(&$mailClass, $bodyParts, $preserveHTML = false, $useTidy = true) { $message=''; for($i=0; $i peplace it with a single space $newBody = str_replace("\n"," ",$newBody); // convert HTML to text, as we dont want HTML in infologs - if (extension_loaded('tidy')) + if ($useTidy && extension_loaded('tidy')) { $tidy = new tidy(); $cleaned = $tidy->repairString($newBody, self::$tidy_config,'utf8'); @@ -4784,7 +4754,7 @@ class emailadmin_imapbase if ($preserveHTML==false) $newBody = translation::convertHTMLToText($newBody,self::$displayCharset,true,true); //error_log(__METHOD__.' ('.__LINE__.') '.' after convertHTMLToText:'.$newBody); if ($preserveHTML==false) $newBody = nl2br($newBody); // we need this, as htmLawed removes \r\n - $mailClass->getCleanHTML($newBody,false,$preserveHTML); // remove stuff we regard as unwanted + $mailClass->getCleanHTML($newBody); // remove stuff we regard as unwanted if ($preserveHTML==false) $newBody = str_replace("
","\r\n",$newBody); //error_log(__METHOD__.' ('.__LINE__.') '.' after getClean:'.$newBody); $message .= $newBody; diff --git a/mail/inc/class.mail_activesync.inc.php b/mail/inc/class.mail_activesync.inc.php index 5c9fefbe16..39daa0ef6e 100644 --- a/mail/inc/class.mail_activesync.inc.php +++ b/mail/inc/class.mail_activesync.inc.php @@ -537,7 +537,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send $this->mail->reopen($folder); $bodyStruct = $this->mail->getMessageBody($uid, 'html_only'); - $bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true); + $bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true,false); if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' html_only:'.$bodyBUFFHtml); if ($bodyBUFFHtml != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/html')) { // may be html @@ -549,7 +549,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain, fetch text:'); // if the new part of the message is html, we must preserve it, and handle that the original mail is text/plain $bodyStruct = $this->mail->getMessageBody($uid,'never_display');//'never_display'); - $bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false); + $bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false); if ($bodyBUFF != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/plain')) { if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain (fetched with never_display):'.$bodyBUFF); $Body = $Body."\r\n".$bodyBUFF.$sigTextPlain; @@ -609,7 +609,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send $this->mail->reopen($folder); $bodyStruct = $this->mail->getMessageBody($uid, 'html_only'); - $bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true); + $bodyBUFFHtml = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true,false); if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' html_only:'.$bodyBUFFHtml); if ($bodyBUFFHtml != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/html')) { // may be html @@ -621,7 +621,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain, fetch text:'); // if the new part of the message is html, we must preserve it, and handle that the original mail is text/plain $bodyStruct = $this->mail->getMessageBody($uid,'never_display');//'never_display'); - $bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false); + $bodyBUFF = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false); if ($bodyBUFF != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/plain')) { if ($this->debugLevel>0) debugLog("MIME Body".' Type:plain (fetched with never_display):'.$bodyBUFF); $Body = $Body."\r\n".$bodyBUFF.$sigTextPlain; @@ -819,7 +819,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send if ($this->debugLevel>0) debugLog(__METHOD__.__LINE__. ' for message with ID:'.$id.' with headers:'.array2string($headers)); if ($bodypreference === false) { $bodyStruct = $this->mail->getMessageBody($id, 'only_if_no_text', '', null, true,$_folderName); - $body = $this->mail->getdisplayableBody($this->mail,$bodyStruct); + $body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false); //$body = html_entity_decode($body,ENT_QUOTES,$this->mail->detect_encoding($body)); if (stripos($body,'/is", "", $body); // in case there is only a html part // remove all other html @@ -852,7 +852,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send $css =''; $bodyStruct = $this->mail->getMessageBody($id, 'html_only', '', null, true,$_folderName); if ($this->debugLevel>2) debugLog(__METHOD__.__LINE__.' html_only Struct:'.array2string($bodyStruct)); - $body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true);//$this->ui->getdisplayableBody($bodyStruct,false); + $body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,true,false); if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' html_only:'.$body); if ($body != "" && (is_array($bodyStruct) && $bodyStruct[0]['mimeType']=='text/html')) { // may be html @@ -865,7 +865,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send $output->airsyncbasenativebodytype=1; $bodyStruct = $this->mail->getMessageBody($id,'never_display', '', null, true,$_folderName); //'only_if_no_text'); if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' plain text Struct:'.array2string($bodyStruct)); - $body = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false); + $body = $this->mail->getdisplayableBody($this->mail,$bodyStruct,false,false); if ($this->debugLevel>3) debugLog(__METHOD__.__LINE__.' never display html(plain text only):'.$body); } // whatever format decode (using the correct encoding) @@ -881,7 +881,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send } else { - $plainBody = $this->mail->getdisplayableBody($this->mail,$bodyStructplain);//$this->ui->getdisplayableBody($bodyStruct,false); + $plainBody = $this->mail->getdisplayableBody($this->mail,$bodyStructplain,false,false); } } //if ($this->debugLevel>0) debugLog("MIME Body".$body); @@ -1033,7 +1033,7 @@ class mail_activesync implements activesync_plugin_write, activesync_plugin_send if ($this->debugLevel>0) debugLog("Plaintext Body:".$plainBody); /* we use plainBody (set above) instead $bodyStruct = $this->mail->getMessageBody($id,'only_if_no_text'); //'never_display'); - $plain = $this->mail->getdisplayableBody($this->mail,$bodyStruct);//$this->ui->getdisplayableBody($bodyStruct,false); + $plain = $this->mail->getdisplayableBody($this->mail,$bodyStruct); $plain = html_entity_decode($plain,ENT_QUOTES,$this->mail->detect_encoding($plain)); $plain = strip_tags($plain); //$plain = str_replace("\n","\r\n",str_replace("\r","",$plain)); diff --git a/mail/inc/class.mail_compose.inc.php b/mail/inc/class.mail_compose.inc.php index 4645b994ea..ecc83c998c 100644 --- a/mail/inc/class.mail_compose.inc.php +++ b/mail/inc/class.mail_compose.inc.php @@ -543,7 +543,7 @@ class mail_compose $content['mail_htmltext'] = implode('',$contentArr); } } - $content['mail_htmltext'] = $this->_getCleanHTML($content['mail_htmltext'], false, false); + $content['mail_htmltext'] = $this->_getCleanHTML($content['mail_htmltext']); $content['mail_htmltext'] = translation::convertHTMLToText($content['mail_htmltext'],$charset=false,false,true); $content['body'] = $content['mail_htmltext']; @@ -2118,12 +2118,12 @@ class mail_compose } - static function _getCleanHTML($_body, $usepurify = false, $cleanTags=true) + static function _getCleanHTML($_body) { static $nonDisplayAbleCharacters = array('[\016]','[\017]', '[\020]','[\021]','[\022]','[\023]','[\024]','[\025]','[\026]','[\027]', '[\030]','[\031]','[\032]','[\033]','[\034]','[\035]','[\036]','[\037]'); - mail_bo::getCleanHTML($_body, $usepurify, $cleanTags); + mail_bo::getCleanHTML($_body); return preg_replace($nonDisplayAbleCharacters, '', $_body); } diff --git a/mail/inc/class.mail_ui.inc.php b/mail/inc/class.mail_ui.inc.php index 4a1fe63b61..a057922db7 100644 --- a/mail/inc/class.mail_ui.inc.php +++ b/mail/inc/class.mail_ui.inc.php @@ -2733,11 +2733,13 @@ class mail_ui { $singleBodyPart['body'] = preg_replace($sar,$rar,$singleBodyPart['body']); } + //error_log(__METHOD__.__LINE__.'reports:'.$singleBodyPart['charSet']); $singleBodyPart['body'] = translation::convert_jsonsafe($singleBodyPart['body'],$singleBodyPart['charSet']); //error_log(__METHOD__.__LINE__.array2string($singleBodyPart)); if($singleBodyPart['mimeType'] == 'text/plain') { $newBody = @htmlentities($singleBodyPart['body'],ENT_QUOTES, strtoupper(mail_bo::$displayCharset)); + //error_log(__METHOD__.__LINE__.'..'.$newBody); // if empty and charset is utf8 try sanitizing the string in question if (empty($newBody) && strtolower($singleBodyPart['charSet'])=='utf-8') $newBody = @htmlentities(iconv('utf-8', 'utf-8', $singleBodyPart['body']),ENT_QUOTES, strtoupper(mail_bo::$displayCharset)); // if the conversion to htmlentities fails somehow, try without specifying the charset, which defaults to iso- @@ -2749,6 +2751,7 @@ class mail_ui // create links for websites if ($modifyURI) $newBody = html::activate_links($newBody); + //error_log(__METHOD__.__LINE__.'..'.$newBody); // redirect links for websites if you use no cookies #if (!($GLOBALS['egw_info']['server']['usecookies'])) # $newBody = preg_replace("/href=(\"|\')((http(s?):\/\/)|(www\.))([\w,\-,\/,\?,\=,\.,&,!\n,\%,@,\(,\),\*,#,:,~,\+]+)(\"|\')/ie", @@ -2766,7 +2769,7 @@ class mail_ui // to display a mailpart of mimetype plain/text, may be better taged as preformatted #$newBody = nl2br($newBody); // since we do not display the message as HTML anymore we may want to insert good linebreaking (for visibility). - //error_log($newBody); + //error_log(__METHOD__.__LINE__.'..'.$newBody); // dont break lines that start with > (> as the text was processed with htmlentities before) $newBody = "
".mail_bo::wordwrap($newBody,90,"\n",'>')."
"; } @@ -2777,9 +2780,8 @@ class mail_ui #error_log(print_r($newBody,true)); // do the cleanup, set for the use of purifier - $usepurifier = true; $newBodyBuff = $newBody; - mail_bo::getCleanHTML($newBody,$usepurifier); + mail_bo::getCleanHTML($newBody); // in a way, this tests if we are having real utf-8 (the displayCharset) by now; we should if charsets reported (or detected) are correct if (strtoupper(mail_bo::$displayCharset) == 'UTF-8') { @@ -2790,7 +2792,7 @@ class mail_ui $newBody = $newBodyBuff; $tv = mail_bo::$htmLawed_config['tidy']; mail_bo::$htmLawed_config['tidy'] = 0; - mail_bo::getCleanHTML($newBody,$usepurifier); + mail_bo::getCleanHTML($newBody); mail_bo::$htmLawed_config['tidy'] = $tv; } }