forked from extern/egroupware
reintroduce tidy in calls from egw-mail-app when only getCleanHTML is called, as it is correcting html structure issues for us
This commit is contained in:
parent
9f4a6db299
commit
f06cf7bb01
@ -4687,23 +4687,8 @@ class emailadmin_imapbase
|
|||||||
if ($bodyParts[$i]['charSet']===false) $bodyParts[$i]['charSet'] = translation::detect_encoding($bodyParts[$i]['body']);
|
if ($bodyParts[$i]['charSet']===false) $bodyParts[$i]['charSet'] = translation::detect_encoding($bodyParts[$i]['body']);
|
||||||
// add line breaks to $bodyParts
|
// add line breaks to $bodyParts
|
||||||
//error_log(__METHOD__.' ('.__LINE__.') '.' Charset:'.$bodyParts[$i]['charSet'].'->'.$bodyParts[$i]['body']);
|
//error_log(__METHOD__.' ('.__LINE__.') '.' Charset:'.$bodyParts[$i]['charSet'].'->'.$bodyParts[$i]['body']);
|
||||||
$newBody = translation::convert($bodyParts[$i]['body'], $bodyParts[$i]['charSet']);
|
$newBody = translation::convert_jsonsafe($bodyParts[$i]['body'], $bodyParts[$i]['charSet']);
|
||||||
//error_log(__METHOD__.' ('.__LINE__.') '.' MimeType:'.$bodyParts[$i]['mimeType'].'->'.$newBody);
|
//error_log(__METHOD__.' ('.__LINE__.') '.' MimeType:'.$bodyParts[$i]['mimeType'].'->'.$newBody);
|
||||||
/*
|
|
||||||
// in a way, this tests if we are having real utf-8 (the displayCharset) by now; we should if charsets reported (or detected) are correct
|
|
||||||
if (strtoupper(self::$displayCharset) == 'UTF-8')
|
|
||||||
{
|
|
||||||
$test = json_encode($newBody);
|
|
||||||
//error_log(__METHOD__.' ('.__LINE__.') '.'#'.$test.'# ->'.strlen($newBody).' Error:'.json_last_error());
|
|
||||||
if (json_last_error() != JSON_ERROR_NONE && strlen($newBody)>0)
|
|
||||||
{
|
|
||||||
// this should not be needed, unless something fails with charset detection/ wrong charset passed
|
|
||||||
error_log(__METHOD__.' ('.__LINE__.') '.' Charset Reported:'.$bodyParts[$i]['charSet'].' Carset Detected:'.translation::detect_encoding($bodyParts[$i]['body']));
|
|
||||||
$newBody = utf8_encode($newBody);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
//error_log(__METHOD__.' ('.__LINE__.') '.' before purify:'.$newBody);
|
|
||||||
$mailClass->activeMimeType = 'text/plain';
|
$mailClass->activeMimeType = 'text/plain';
|
||||||
if ($bodyParts[$i]['mimeType'] == 'text/html') {
|
if ($bodyParts[$i]['mimeType'] == 'text/html') {
|
||||||
$mailClass->activeMimeType = $bodyParts[$i]['mimeType'];
|
$mailClass->activeMimeType = $bodyParts[$i]['mimeType'];
|
||||||
|
@ -2123,6 +2123,22 @@ class mail_compose
|
|||||||
static $nonDisplayAbleCharacters = array('[\016]','[\017]',
|
static $nonDisplayAbleCharacters = array('[\016]','[\017]',
|
||||||
'[\020]','[\021]','[\022]','[\023]','[\024]','[\025]','[\026]','[\027]',
|
'[\020]','[\021]','[\022]','[\023]','[\024]','[\025]','[\026]','[\027]',
|
||||||
'[\030]','[\031]','[\032]','[\033]','[\034]','[\035]','[\036]','[\037]');
|
'[\030]','[\031]','[\032]','[\033]','[\034]','[\035]','[\036]','[\037]');
|
||||||
|
|
||||||
|
if (extension_loaded('tidy'))
|
||||||
|
{
|
||||||
|
$tidy = new tidy();
|
||||||
|
$cleaned = $tidy->repairString($_body, mail_bo::$tidy_config,'utf8');
|
||||||
|
// Found errors. Strip it all so there's some output
|
||||||
|
if($tidy->getStatus() == 2)
|
||||||
|
{
|
||||||
|
error_log(__METHOD__.' ('.__LINE__.') '.' ->'.$tidy->errorBuffer);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$_body = $cleaned;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mail_bo::getCleanHTML($_body);
|
mail_bo::getCleanHTML($_body);
|
||||||
return preg_replace($nonDisplayAbleCharacters, '', $_body);
|
return preg_replace($nonDisplayAbleCharacters, '', $_body);
|
||||||
}
|
}
|
||||||
|
@ -2778,10 +2778,50 @@ class mail_ui
|
|||||||
$newBody = $singleBodyPart['body'];
|
$newBody = $singleBodyPart['body'];
|
||||||
//TODO:$newBody = $this->highlightQuotes($newBody);
|
//TODO:$newBody = $this->highlightQuotes($newBody);
|
||||||
#error_log(print_r($newBody,true));
|
#error_log(print_r($newBody,true));
|
||||||
|
if (extension_loaded('tidy'))
|
||||||
|
{
|
||||||
|
$tidy = new tidy();
|
||||||
|
$cleaned = $tidy->repairString($newBody, mail_bo::$tidy_config,'utf8');
|
||||||
|
// Found errors. Strip it all so there's some output
|
||||||
|
if($tidy->getStatus() == 2)
|
||||||
|
{
|
||||||
|
error_log(__METHOD__.' ('.__LINE__.') '.' ->'.$tidy->errorBuffer);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$newBody = $cleaned;
|
||||||
|
}
|
||||||
|
if (!$preserveHTML)
|
||||||
|
{
|
||||||
|
// filter only the 'body', as we only want that part, if we throw away the html
|
||||||
|
preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $newBody, $matches=array());
|
||||||
|
if ($matches[2])
|
||||||
|
{
|
||||||
|
$hasOther = true;
|
||||||
|
$newBody = $matches[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// htmLawed filter only the 'body'
|
||||||
|
preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $newBody, $matches=array());
|
||||||
|
if ($matches[2])
|
||||||
|
{
|
||||||
|
$hasOther = true;
|
||||||
|
$newBody = $matches[2];
|
||||||
|
}
|
||||||
|
$htmLawed = new egw_htmLawed();
|
||||||
|
// the next line should not be needed, but produces better results on HTML 2 Text conversion,
|
||||||
|
// as we switched off HTMLaweds tidy functionality
|
||||||
|
$newBody = str_replace(array('&amp;','<DIV><BR></DIV>',"<DIV> </DIV>",'<div> </div>'),array('&','<BR>','<BR>','<BR>'),$newBody);
|
||||||
|
$newBody = $htmLawed->egw_htmLawed($newBody);
|
||||||
|
if ($hasOther && $preserveHTML) $newBody = $matches[1]. $newBody. $matches[3];
|
||||||
|
}
|
||||||
// do the cleanup, set for the use of purifier
|
// do the cleanup, set for the use of purifier
|
||||||
$newBodyBuff = $newBody;
|
//$newBodyBuff = $newBody;
|
||||||
mail_bo::getCleanHTML($newBody);
|
mail_bo::getCleanHTML($newBody);
|
||||||
|
/*
|
||||||
// in a way, this tests if we are having real utf-8 (the displayCharset) by now; we should if charsets reported (or detected) are correct
|
// in a way, this tests if we are having real utf-8 (the displayCharset) by now; we should if charsets reported (or detected) are correct
|
||||||
if (strtoupper(mail_bo::$displayCharset) == 'UTF-8')
|
if (strtoupper(mail_bo::$displayCharset) == 'UTF-8')
|
||||||
{
|
{
|
||||||
@ -2796,7 +2836,7 @@ class mail_ui
|
|||||||
mail_bo::$htmLawed_config['tidy'] = $tv;
|
mail_bo::$htmLawed_config['tidy'] = $tv;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
// removes stuff between http and ?http
|
// removes stuff between http and ?http
|
||||||
$Protocol = '(http:\/\/|(ftp:\/\/|https:\/\/))'; // only http:// gets removed, other protocolls are shown
|
$Protocol = '(http:\/\/|(ftp:\/\/|https:\/\/))'; // only http:// gets removed, other protocolls are shown
|
||||||
$newBody = preg_replace('~'.$Protocol.'[^>]*\?'.$Protocol.'~sim','$1',$newBody); // removes stuff between http:// and ?http://
|
$newBody = preg_replace('~'.$Protocol.'[^>]*\?'.$Protocol.'~sim','$1',$newBody); // removes stuff between http:// and ?http://
|
||||||
|
Loading…
Reference in New Issue
Block a user