* email: fix for Helpdesk Issue:3430 (Unstructured display of text when converting email to infolog)

This commit is contained in:
Klaus Leithoff 2012-06-04 11:31:39 +00:00
parent f926ab9c1c
commit fda576c01f
2 changed files with 47 additions and 15 deletions

View File

@ -43,7 +43,7 @@ class felamimail_bo
*
* @array
*/
static $htmLawed_config = array('comment'=>1,
static $htmLawed_config = array('comment'=>1, //remove comments
//'keep_bad'=>2,
'balance'=>0,//turn off tag-balancing (config['balance']=>0). That will not introduce any security risk; only standards-compliant tag nesting check/filtering will be turned off (basic tag-balance will remain; i.e., there won't be any unclosed tag, etc., after filtering)
'tidy'=>1,
@ -1235,6 +1235,7 @@ class felamimail_bo
// Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards
if (stripos($_html,'!doctype')!==false) self::replaceTagsCompletley($_html,'!doctype');
if (stripos($_html,'?xml:namespace')!==false) self::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);
if (stripos($_html,'?xml version')!==false) self::replaceTagsCompletley($_html,'\?xml version','\?>',false);
if (strpos($_html,'!CURSOR')!==false) self::replaceTagsCompletley($_html,'!CURSOR');
// purify got switched to htmLawed
$_html = html::purify($_html,self::$htmLawed_config,array(),true);
@ -2506,7 +2507,13 @@ class felamimail_bo
if (self::$debug) error_log(__METHOD__.__LINE__."$_folderName,$_startMessage, $_numberOfMessages, $_sort, $reverse, ".array2string($_filter).", $_thisUIDOnly");
$reverse = (bool)$_reverse;
// get the list of messages to fetch
if (self::$debug) $starttime = microtime (true);
$this->reopen($_folderName);
if (self::$debug)
{
$endtime = microtime(true) - $starttime;
error_log(__METHOD__. " time used for reopen: ".$endtime.' for Folder:'.$_folderName);
}
//$this->icServer->selectMailbox($_folderName);
$rByUid = true; // try searching by uid. this var will be passed by reference to getSortedList, and may be set to false, if UID retrieval fails
#print "<pre>";
@ -2523,8 +2530,13 @@ class felamimail_bo
//$_filter['range'] ="$_startMessage:*";
}
if (self::$debug) error_log(__METHOD__.__LINE__."$_folderName, $_sort, $reverse, ".array2string($_filter).", $rByUid");
if (self::$debug) $starttime = microtime (true);
$sortResult = $this->getSortedList($_folderName, $_sort, $reverse, $_filter, $rByUid, $_cacheResult);
if (self::$debug)
{
$endtime = microtime(true) - $starttime;
error_log(__METHOD__. " time used for getSortedList: ".$endtime.' for Folder:'.$_folderName.' Filter:'.array2string($_filter).' Ids:'.array2string($_thisUIDOnly));
}
if (self::$debug) error_log(__METHOD__.__LINE__.array2string($sortResult));
#$this->icServer->setDebug(false);
#print "</pre>";
@ -2560,6 +2572,7 @@ class felamimail_bo
$queryString = implode(',', $sortResult);
// fetch the data for the selected messages
if (self::$debug) $starttime = microtime(true);
$headersNew = $this->icServer->getSummary($queryString, $rByUid);
if ($headersNew == null && empty($_thisUIDOnly)) // -> if we request uids, do not try to look for messages with ids
{
@ -2573,7 +2586,12 @@ class felamimail_bo
$headersNew[] = $rv[0];
}
}
if (self::$debug) error_log(__METHOD__.__LINE__.' Query:'.$queryString.' Result:'.array2string($headersNew));
if (self::$debug)
{
$endtime = microtime(true) - $starttime;
error_log(__METHOD__. " time used for getSummary: ".$endtime.' for Folder:'.$_folderName.' Filter:'.array2string($_filter));
error_log(__METHOD__.__LINE__.' Query:'.$queryString.' Result:'.array2string($headersNew));
}
$count = 0;
@ -2583,6 +2601,7 @@ class felamimail_bo
$count = 0;
if (is_array($headersNew)) {
if (self::$debug) $starttime = microtime(true);
foreach((array)$headersNew as $headerObject) {
//if($count == 0) error_log(__METHOD__.array2string($headerObject));
if (empty($headerObject['UID'])) continue;
@ -2672,6 +2691,11 @@ class felamimail_bo
$count++;
}
if (self::$debug)
{
$endtime = microtime(true) - $starttime;
error_log(__METHOD__. " time used for the rest: ".$endtime.' for Folder:'.$_folderName);
}
//self::$debug=false;
// sort the messages to the requested displayorder
if(is_array($retValue['header'])) {
@ -4339,6 +4363,7 @@ class felamimail_bo
}
}
*/
//error_log(__METHOD__.__LINE__.' before purify:'.$newBody);
if ($bodyParts[$i]['mimeType'] == 'text/html') {
// as translation::convert reduces \r\n to \n and purifier eats \n -> peplace it with a single space
$newBody = str_replace("\n"," ",$newBody);
@ -4359,13 +4384,23 @@ class felamimail_bo
}
else
{
// htmLawed filter only the 'body'
preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $newBody, $matches);
if ($matches[2])
{
$hasOther = true;
$newBody = $matches[2];
}
$htmLawed = new egw_htmLawed();
$newBody = $htmLawed->egw_htmLawed($newBody);
//$newBody = html::purify($newBody,html::purifyCreateHTMLTidyConfig());
if ($hasOther && $preserveHTML) $newBody = $matches[1]. $newBody. $matches[3];
}
//error_log(__METHOD__.__LINE__.' after purify:'.$newBody);
if ($preserveHTML==false) $newBody = $bofelamimail->convertHTMLToText($newBody,true);
$bofelamimail->getCleanHTML($newBody,false,$preserveHTML); // new Body passed by reference
//error_log(__METHOD__.__LINE__.' after convertHTMLToText:'.$newBody);
if ($preserveHTML==false) $newBody = nl2br($newBody); // we need this, as htmLawed removes \r\n
$bofelamimail->getCleanHTML($newBody,false,$preserveHTML); // remove stuff we regard as unwanted
if ($preserveHTML==false) $newBody = str_replace("<br />","\r\n",$newBody);
//error_log(__METHOD__.__LINE__.' after getClean:'.$newBody);
$message .= $newBody;
continue;

View File

@ -79,7 +79,7 @@ class egw_htmLawed
);
*/
$this->Configuration = array('comment'=>0,
$this->Configuration = array('comment'=>1, //remove comments
'balance'=>0,//turn off tag-balancing (config['balance']=>0). That will not introduce any security risk; only standards-compliant tag nesting check/filtering will be turned off (basic tag-balance will remain; i.e., there won't be any unclosed tag, etc., after filtering)
'tidy'=>1,
'elements' => "* -script",
@ -125,13 +125,10 @@ function hl_my_tag_transform($element, $attribute_array)
if (isset($attribute_array['alt'])) $attribute_array['alt'] = ($attribute_array['alt']=='image'?'':$attribute_array['alt']);
if (isset($attribute_array['alt'])&&strpos($attribute_array['alt'],'@')!==false) $attribute_array['alt']=str_replace('@','(at)',$attribute_array['alt']);
}
if($element == 'a')
{
if (isset($attribute_array['title']))
{
if (strpos($attribute_array['title'],'@')!==false) $attribute_array['title']=str_replace('@','(at)',$attribute_array['title']);
}
}
/*
// Elements other than 'span' or 'span' without a 'style' attribute are returned unchanged
if($element == 'span' && isset($attribute_array['style']))
@ -211,12 +208,12 @@ function hl_email_tag_transform($element, $attribute_array)
}
}
}
if($element == 'a')
{
if (isset($attribute_array['title']))
{
if (strpos($attribute_array['title'],'@')!==false) $attribute_array['title']=str_replace('@','(at)',$attribute_array['title']);
}
if($element == 'a')
{
if (isset($attribute_array['name']) && isset($attribute_array['id'])) $attribute_array['id'] = $attribute_array['name'];
if (strpos($attribute_array['href'],'@')!==false) $attribute_array['href'] = str_replace('@','%40',$attribute_array['href']);
if (strpos($attribute_array['href'],'#')===0)