From d4b0e572a17f333d9b645b4db68e63edd3130c86 Mon Sep 17 00:00:00 2001 From: Klaus Leithoff Date: Wed, 18 Nov 2009 10:38:15 +0000 Subject: [PATCH] some more documentation, some improvement in convertHTMLToText and replaceEmailAdresses --- phpgwapi/inc/class.translation.inc.php | 86 ++++++++++++++++---------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/phpgwapi/inc/class.translation.inc.php b/phpgwapi/inc/class.translation.inc.php index 681cb881a0..05f8f001d9 100644 --- a/phpgwapi/inc/class.translation.inc.php +++ b/phpgwapi/inc/class.translation.inc.php @@ -1000,22 +1000,28 @@ class translation } /** - * replace emailaddresses enclosed in <> (eg.: ) with the emailaddress only (e.g: me@you.de) - * always returns 1 - */ + * replace emailaddresses enclosed in <> (eg.: ) with the emailaddress only (e.g: me@you.de) + * as well as those emailadresses in links, and within broken links + * @param string the text to process + * @return 1 + */ static function replaceEmailAdresses(&$text) { // replace emailaddresses eclosed in <> (eg.: ) with the emailaddress only (e.g: me@you.de) - $text = preg_replace("/(<|<)(([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)/ie","'$2'", $text); + $text = preg_replace("/(<|<)*(([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)*/ie","'$2 '", $text); + $text = preg_replace("/(<|<a href=\")*(mailto:([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)*/ie","'$2 '", $text); + $text = preg_replace('~]+href=\"(mailto:)+([^"]+)\"[^>]*>~si','$2 ',$text); + $text = preg_replace("/(([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))( |\s)*(<\/a>)*( |\s)*(>|>)*/ie","'$1 '", $text); return 1; } - /* + /** * strip tags out of the message completely with their content - * param $_body is the text to be processed - * param $tag is the tagname which is to be removed. Note, that only the name of the tag is to be passed to the function + * @param string $_body is the text to be processed + * @param string $tag is the tagname which is to be removed. Note, that only the name of the tag is to be passed to the function * without the enclosing brackets - * param $endtag can be different from tag but should be used only, if begin and endtag are known to be different e.g.: + * @param string $endtag can be different from tag but should be used only, if begin and endtag are known to be different e.g.: + * @return void the modified text is passed via reference */ static function replaceTagsCompletley(&$_body,$tag,$endtag='') { @@ -1036,10 +1042,18 @@ class translation } } - static function convertHTMLToText($_html,$displayCharset=false,$stripcrl=false) + /** + * convertHTMLToText + * @param string $_html : Text to be stripped down + * @param string $displayCharset : charset to use; should be a valid charset + * @param bool $stripcrl : flag to indicate for the removal of all crlf \r\n + * @param bool $stripalltags : flag to indicate wether or not to strip $_html from all remaining tags + * @return text $_html : the modified text. + */ + static function convertHTMLToText($_html,$displayCharset=false,$stripcrl=false,$stripalltags=true) { if ($displayCharset === false) $displayCharset = self::$system_charset; - #error_log($_html); + //error_log(__METHOD__.$_html); #print '
'; #print "
"; print htmlspecialchars($_html);
 		#print "
"; @@ -1070,6 +1084,7 @@ class translation chr(174), ); $_html = preg_replace($Rules, $Replace, $_html); + // removing carriage return linefeeds if ($stripcrl === true ) $_html = preg_replace('@(\r\n)@i',' ',$_html); $tags = array ( @@ -1101,10 +1116,15 @@ class translation $_html = preg_replace($tags,$Replace,$_html); $_html = preg_replace('~\s*]*>~si',' - ',$_html); $_html = preg_replace('~]+>~s','',$_html); + // replace emailaddresses eclosed in <> (eg.: ) with the emailaddress only (e.g: me@you.de) + self::replaceEmailAdresses($_html); //convert hrefs to description -> URL $_html = preg_replace('~]+href=\"([^"]+)\"[^>]*>(.*)~si','[$2 -> $1]',$_html); - $_html = preg_replace('~<[^>^@]+>~s','',$_html); - #$_html = strip_tags($_html); + //this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content + if ( $stripalltags ) { + $_html = preg_replace('~<[^>^@]+>~s','',$_html); + //$_html = strip_tags($_html, ''); + } // reducing spaces $_html = preg_replace('~ +~s',' ',$_html); // we dont reduce whitespace at the start or the end of the line, since its used for structuring the document @@ -1115,8 +1135,7 @@ class translation $_html = html_entity_decode($_html, ENT_COMPAT, $displayCharset); - // replace emailaddresses eclosed in <> (eg.: ) with the emailaddress only (e.g: me@you.de) - self::replaceEmailAdresses($_html); + //self::replaceEmailAdresses($_html); #error_log($text); $pos = strpos($_html, 'blockquote'); #error_log("convert HTML2Text"); @@ -1145,24 +1164,29 @@ class translation foreach($quoteParts3 as $quotePart3) { $allowedLength = 76-strlen("\r\n$indentString"); - if (strlen($quotePart3) > $allowedLength) { - $s=explode(" ", $quotePart3); - $quotePart3 = ""; - $linecnt = 0; - foreach ($s as $k=>$v) { - $cnt = strlen($v); - // only break long words within the wordboundaries, - if($cnt > $allowedLength) { - $v=wordwrap($v, $allowedLength, "\r\n$indentString", true); + // only break lines, if not already indented + if ($quotePart3[0] != $indentString) + { + if (strlen($quotePart3) > $allowedLength) { + $s=explode(" ", $quotePart3); + $quotePart3 = ""; + $linecnt = 0; + foreach ($s as $k=>$v) { + $cnt = strlen($v); + // only break long words within the wordboundaries, + // but it may destroy links, so we check for href and dont do it if we find it + if($cnt > $allowedLength && stripos($v,'href=')===false) { + $v=wordwrap($v, $allowedLength, "\r\n$indentString", true); + } + // the rest should be broken at the start of the new word that exceeds the limit + if ($linecnt+$cnt > $allowedLength) { + $v="\r\n$indentString$v"; + $linecnt = 0; + } else { + $linecnt += $cnt; + } + if (strlen($v)) $quotePart3 .= (strlen($quotePart3) ? " " : "").$v; } - // the rest should be broken at the start of the new word that exceeds the limit - if ($linecnt+$cnt > $allowedLength) { - $v="\r\n$indentString$v"; - $linecnt = 0; - } else { - $linecnt += $cnt; - } - if (strlen($v)) $quotePart3 .= (strlen($quotePart3) ? " " : "").$v; } } $asciiTextBuff[] = $indentString . $quotePart3 ;