From 76980e665ca53e9c39090f10d1c00da5ae8b9f64 Mon Sep 17 00:00:00 2001 From: Klaus Leithoff Date: Tue, 10 May 2011 15:32:44 +0000 Subject: [PATCH] HTML2Text: reduce CR/LF groups of more than 2 to a sequence of 2 CR/LF --- phpgwapi/inc/class.translation.inc.php | 39 ++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/phpgwapi/inc/class.translation.inc.php b/phpgwapi/inc/class.translation.inc.php index c8a827f022..f1f0740f37 100644 --- a/phpgwapi/inc/class.translation.inc.php +++ b/phpgwapi/inc/class.translation.inc.php @@ -1233,13 +1233,46 @@ class translation self::replaceEmailAdresses($_html); //convert hrefs to description -> URL $_html = preg_replace('~]+href=\"([^"]+)\"[^>]*>(.*)~si','[$2 -> $1]',$_html); + + // reducing double \r\n to single ones, dont mess with pre sections + if ($stripcrl === true ) + { + if (stripos($_html,'
')!==false)
+			{
+				$contentArr = html::splithtmlByPRE($_html);
+				foreach ($contentArr as $k =>&$elem)
+				{
+					if (stripos($elem,'
')===false)
+					{
+						//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
+						if ( $stripalltags ) {
+							$_html = preg_replace('~<[^>^@]+>~s','',$_html);
+						}
+						// strip out whitespace inbetween CR/LF
+						$elem = preg_replace('~\r\n\s+\r\n~si', "\r\n\r\n", $elem);
+						// strip out / reduce exess CR/LF 
+						$elem = preg_replace('~\r\n{3,}~si',"\r\n\r\n",$elem);
+					}
+				}
+				$_html = implode('',$contentArr);
+			}
+			else
+			{
+				//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
+				if ( $stripalltags ) {
+					$_html = preg_replace('~<[^>^@]+>~s','',$_html);
+				}
+				// strip out whitespace inbetween CR/LF
+				$_html = preg_replace('~\r\n\s+\r\n~si', "\r\n\r\n", $_html);
+				// strip out / reduce exess CR/LF 
+				$_html = preg_replace('~(\r\n){3,}~si',"\r\n\r\n",$_html);
+			}
+		}
 		//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
 		if ( $stripalltags ) {
-			$_html = preg_replace('~<[^>^@]+>~s','',$_html);
+			//$_html = preg_replace('~<[^>^@]+>~s','',$_html);
 			//$_html = strip_tags($_html, '');
 		}
-		// reducing double \r\n to single ones
-		//$_html = str_replace("\r\n\r\n", "\r\n", $_html); // ToDo: this needsv to be more sophosticated 
 		// reducing spaces
 		$_html = preg_replace('~ +~s',' ',$_html);
 		// we dont reduce whitespace at the start or the end of the line, since its used for structuring the document