* Mail: remove content of script and style tags from displayed html mail

only script tags itself, not their content was removed before, and style tags were not completely removed, if regular expression run into an error
This commit is contained in:
ralf 2024-09-24 13:46:44 +02:00
parent b4a7362241
commit 24ccfbf3ab
2 changed files with 24 additions and 21 deletions

View File

@ -137,28 +137,26 @@ class HtmLawed
*/
static function getStyles(&$html)
{
$ct=0;
$newStyle = null;
if (stripos($html,'<style')!==false) $ct = preg_match_all('#<style(?:\s.*)?>(.+)</style>#isU', $html, $newStyle);
if ($ct>0)
if (stripos($html,'<style')!==false)
{
//error_log(__METHOD__.__LINE__.array2string($newStyle[0]));
$style2buffer = implode('',$newStyle[0]);
// only replace what we have found, we use it here, as we use the same routine in Api\Mail\Html::replaceTagsCompletley
// no need to do the extra routine
$html = str_ireplace($newStyle[0],'',$html);
}
if (!empty($style2buffer))
{
//error_log(__METHOD__.__LINE__.array2string($style2buffer));
$test = json_encode($style2buffer);
//error_log(__METHOD__.__LINE__.'#'.$test.'# ->'.strlen($style2buffer).' Error:'.json_last_error());
//if (json_last_error() != JSON_ERROR_NONE && strlen($style2buffer)>0)
if ($test=="null" && strlen($style2buffer)>0)
$newStyle = null;
preg_match_all('#<style(?:\s.*)?>(.+)</style>#isU', $html, $newStyle);
if (isset($newStyle))
{
// this should not be needed, unless something fails with charset detection/ wrong charset passed
error_log(__METHOD__.__LINE__.' Found Invalid sequence for utf-8 in CSS:'.$style2buffer.' Carset Detected:'.Api\Translation::detect_encoding($style2buffer));
$style2buffer = utf8_encode($style2buffer);
$style2buffer = implode("\n", $newStyle[0]);
// only replace what we have found, we use it here, as we use the same routine in Api\Mail\Html::replaceTagsCompletley
// no need to do the extra routine
$html = str_ireplace($newStyle[0],'',$html);
}
if (!empty($style2buffer))
{
$test = json_encode($style2buffer);
if ($test=="null" && strlen($style2buffer))
{
// this should not be needed, unless something fails with charset detection/ wrong charset passed
error_log(__METHOD__.__LINE__.' Found Invalid sequence for utf-8 in CSS:'.$style2buffer.' Carset Detected:'.Api\Translation::detect_encoding($style2buffer));
$style2buffer = utf8_encode($style2buffer);
}
}
}
$style = $style2buffer ?? '';

View File

@ -3584,6 +3584,11 @@ $filter['before']= date("d-M-Y", $cutoffdate2);
{
$alreadyHtmlLawed=false;
$newBody = $singleBodyPart['body'];
// remove script tags incl. their content, includes e.g. <script type="application/ld+json">
// before HtmLawed below only removes the script-tags but leaves the content
Mail\Html::replaceTagsCompletley($newBody, 'script');
//TODO:$newBody = $this->highlightQuotes($newBody);
#error_log(print_r($newBody,true));
if ($useTidy && extension_loaded('tidy'))
@ -3599,7 +3604,7 @@ $filter['before']= date("d-M-Y", $cutoffdate2);
{
$newBody = $cleaned;
}
// filter only the 'body', as we only want that part, if we throw away the Api\Html
// filter only the 'body', as we only want that part, if we throw away the html
if (preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $newBody, $matches) && !empty($matches[2]))
{
$hasOther = true;