handling a problem while importing mails as infolog, if they contain no text/plain part; this implied the move of some functionality from bocompose to bofelamimail

2009-03-09 11:05:16 +00:00 · 2009-03-09 11:05:16 +00:00 · 5d6bc0d0b3
commit 5d6bc0d0b3
parent 9e30fe47c8
3 changed files with 186 additions and 137 deletions
--- a/felamimail/inc/class.bocompose.inc.php
+++ b/felamimail/inc/class.bocompose.inc.php
@ -145,142 +145,13 @@
 		static function replaceEmailAdresses(&$text)
 		{
 			// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
-			$text = preg_replace("/(<|&lt;)(([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|&gt;)/ie","'$2'", $text);
+			bofelamimail::replaceEmailAdresses($text);
 			return 1;
 		}

-		function convertHTMLToText($_html) 
+		function convertHTMLToText(&$_html) 
 		{
-			#error_log($_html);
-			#print '<hr>';
-			#print "<pre>"; print htmlspecialchars($_html); 
-			#print "</pre>";
-			#print "<hr>";
-			bofelamimail::replaceTagsCompletley($_html,'style');
-			$Rules = array ('@<script[^>]*?>.*?</script>@si', // Strip out javascript
-				'@&(quot|#34);@i',                // Replace HTML entities
-				'@&(amp|#38);@i',                 //   Ampersand &
-				'@&(lt|#60);@i',                  //   Less Than <
-				'@&(gt|#62);@i',                  //   Greater Than >
-				'@&(nbsp|#160);@i',               //   Non Breaking Space
-				'@&(iexcl|#161);@i',              //   Inverted Exclamation point
-				'@&(cent|#162);@i',               //   Cent
-				'@&(pound|#163);@i',              //   Pound
-				'@&(copy|#169);@i',               //   Copyright
-				'@&(reg|#174);@i',                //   Registered
-			);
-			$Replace = array ('',
-				'"',
-				'&',
-				'<',
-				'>',
-				' ',
-				chr(161),
-				chr(162),
-				chr(163),
-				chr(169),
-				chr(174),
-			);
-			$_html = preg_replace($Rules, $Replace, $_html);
-			$tags = array (
-				0 => '~<h[123][^>]*>~si',
-				1 => '~<h[456][^>]*>~si',
-				2 => '~<table[^>]*>~si',
-				3 => '~<tr[^>]*>~si',
-				4 => '~<li[^>]*>~si',
-				5 => '~<br[^>]*>\r*\n*~si',
-				6 => '~<br[^>]*>~si',
-				7 => '~<p[^>]*>~si',
-				8 => '~<div[^>]*>~si',
-				9 => '~<hr[^>]*>~si',
-				10 => '/<blockquote type="cite">/',
-			);
-			$Replace = array (
-				0 => "\r\n",
-				1 => "\r\n",
-				2 => "\r\n",
-				3 => "\r\n",
-				4 => "\r\n",
-				5 => "\r\n",
-				6 => "\r\n",
-				7 => "\r\n",
-				8 => "\r\n",
-				9 => "\r\n__________________________________________________\r\n",
-				10 => '#blockquote#type#cite#',
-			);
-    		$_html = preg_replace($tags,$Replace,$_html);
-    		$_html = preg_replace('~</t(d|h)>\s*<t(d|h)[^>]*>~si',' - ',$_html);
-			$_html = preg_replace('~<img[^>]+>~s','',$_html);
-			//convert hrefs to description -> URL
-			$_html = preg_replace('~<a[^>]+href=\"([^"]+)\"[^>]*>(.*)</a>~si','[$2 -> $1]',$_html);
-    		$_html = preg_replace('~<[^>^@]+>~s','',$_html);
-			#$_html = strip_tags($_html);
-    		// reducing spaces
-    		$_html = preg_replace('~ +~s',' ',$_html);
-			// we dont reduce whitespace at the start or the end of the line, since its used for structuring the document
-    		#$_html = preg_replace('~^\s+~m','',$_html);
-    		#$_html = preg_replace('~\s+$~m','',$_html);
-			// restoring the preserved blockquote
-			$_html = preg_replace('~#blockquote#type#cite#~s','<blockquote type="cite">',$_html);
-
-			
-			$_html = html_entity_decode($_html, ENT_COMPAT, $this->displayCharset);
-			// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
-			self::replaceEmailAdresses($_html);
-			#error_log($text);
-			$pos = strpos($_html, 'blockquote');
-			#error_log("convert HTML2Text");
-			if($pos === false) {
-				return $_html;
-			} else {
-				$indent = 0;
-				$indentString = '';
-				
-				$quoteParts = preg_split('/<blockquote type="cite">/', $_html, -1, PREG_SPLIT_OFFSET_CAPTURE);
-
-				foreach($quoteParts as $quotePart) {
-					if($quotePart[1] > 0) {
-						$indent++;
-						$indentString .= '>';
-					}
-					$quoteParts2 = preg_split('/<\/blockquote>/', $quotePart[0], -1, PREG_SPLIT_OFFSET_CAPTURE);
-				
-					foreach($quoteParts2 as $quotePart2) {
-						if($quotePart2[1] > 0) {
-							$indent--;
-							$indentString = substr($indentString, 0, $indent);
-						}
-
-						$quoteParts3 = explode("\r\n", $quotePart2[0]);
-
-						foreach($quoteParts3 as $quotePart3) {
-							$allowedLength = 76-strlen("\r\n$indentString");
-							if (strlen($quotePart3) > $allowedLength) {
-								$s=explode(" ", $quotePart3);
-								$quotePart3 = "";
-								$linecnt = 0;
-								foreach ($s as $k=>$v) {
-									$cnt = strlen($v);
-									// only break long words within the wordboundaries, 
-									if($cnt > $allowedLength) {
-										$v=wordwrap($v, $allowedLength, "\r\n$indentString", true);
-									}
-									// the rest should be broken at the start of the new word that exceeds the limit  
-									if ($linecnt+$cnt > $allowedLength) {
-										$v="\r\n$indentString$v";
-										$linecnt = 0;
-									} else {
-										$linecnt += $cnt;
-									}
-									if (strlen($v))  $quotePart3 .= (strlen($quotePart3) ? " " : "").$v;
-								}
-							}
-							$asciiTextBuff[] = $indentString . $quotePart3 ;
-						}
-					}
-				}
-				return implode("\r\n",$asciiTextBuff);
-			}
+			return bofelamimail::convertHTMLToText($_html);
 		}
 		
 		function convertHTMLToTextTiny($_html) 
--- a/felamimail/inc/class.bofelamimail.inc.php
+++ b/felamimail/inc/class.bofelamimail.inc.php
@ -777,12 +777,35 @@

 		static function getCleanHTML(&$_html)
 		{
-
+			#echo $_html;exit;
 			$kses = new kses();
 			$kses->AddProtocol('cid');
 			// since check protocoll is called for every value associated to an attribute we have to add color and background-color to the valid protocolls
 			$kses->AddProtocol('color');
 			$kses->AddProtocol('background-color');
+			#$kses->AddHTML('html', array(
+			#		'xmlns' => array(),
+			#		'lang' => array(),
+			#	)
+			#);
+			#$kses->AddHTML('head');
+			#$kses->AddHTML('body', array(
+			#		'class' => array(),
+			#		'id' => array(),
+			#	)
+			#);
+			#$kses->AddHTML('meta', array(
+			#		'http-equiv' => array(),
+			#		'content' => array(),
+			#	)
+			#);
+			#$kses->AddHTML('link',array(
+			#		'rel' => array(), // ="stylesheet" 
+			#		'type' => array(), //="text/css" 
+			#		'href' => array(),
+			#		'media' => array(),
+			#	)
+			#);
 			$kses->AddHTML(
 				'p', array(
 					'align'	=> array('minlen' =>   1, 'maxlen' =>  10)
@ -814,6 +837,7 @@
 			);
 			$kses->AddHTML(
 				"div",array(
+			#		'class' => array(),
 					'align' => array('maxlen' => 10)
 				)
 			);
@ -955,6 +979,153 @@
 			$_html = preg_replace('/([\000-\012])/','',$_html);
 		}

+		/**
+		* replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
+		* always returns 1
+		*/
+		static function replaceEmailAdresses(&$text)
+		{
+			// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
+			$text = preg_replace("/(<|&lt;)(([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|&gt;)/ie","'$2'", $text);
+			return 1;
+		}
+
+		static function convertHTMLToText($_html,$stripcrl=false) 
+		{
+			#error_log($_html);
+			#print '<hr>';
+			#print "<pre>"; print htmlspecialchars($_html); 
+			#print "</pre>";
+			#print "<hr>";
+			self::replaceTagsCompletley($_html,'style');
+			$Rules = array ('@<script[^>]*?>.*?</script>@si', // Strip out javascript
+				'@&(quot|#34);@i',                // Replace HTML entities
+				'@&(amp|#38);@i',                 //   Ampersand &
+				'@&(lt|#60);@i',                  //   Less Than <
+				'@&(gt|#62);@i',                  //   Greater Than >
+				'@&(nbsp|#160);@i',               //   Non Breaking Space
+				'@&(iexcl|#161);@i',              //   Inverted Exclamation point
+				'@&(cent|#162);@i',               //   Cent
+				'@&(pound|#163);@i',              //   Pound
+				'@&(copy|#169);@i',               //   Copyright
+				'@&(reg|#174);@i',                //   Registered
+			);
+			$Replace = array ('',
+				'"',
+				'+',
+				'<',
+				'>',
+				' ',
+				chr(161),
+				chr(162),
+				chr(163),
+				chr(169),
+				chr(174),
+			);
+			$_html = preg_replace($Rules, $Replace, $_html);
+			//   removing carriage return linefeeds
+			if ($stripcrl === true ) $_html = preg_replace('@(\r\n)@i',' ',$_html); 
+			$tags = array (
+				0 => '~<h[123][^>]*>\r*\n*~si',
+				1 => '~<h[456][^>]*>\r*\n*~si',
+				2 => '~<table[^>]*>\r*\n*~si',
+				3 => '~<tr[^>]*>\r*\n*~si',
+				4 => '~<li[^>]*>\r*\n*~si',
+				5 => '~<br[^>]*>\r*\n*~si',
+				6 => '~<br[^>]*>~si',
+				7 => '~<p[^>]*>\r*\n*~si',
+				8 => '~<div[^>]*>\r*\n*~si',
+				9 => '~<hr[^>]*>\r*\n*~si',
+				10 => '/<blockquote type="cite">/',
+			);
+			$Replace = array (
+				0 => "\r\n",
+				1 => "\r\n",
+				2 => "\r\n",
+				3 => "\r\n",
+				4 => "\r\n",
+				5 => "\r\n",
+				6 => "\r\n",
+				7 => "\r\n",
+				8 => "\r\n",
+				9 => "\r\n__________________________________________________\r\n",
+				10 => '#blockquote#type#cite#',
+			);
+    		$_html = preg_replace($tags,$Replace,$_html);
+    		$_html = preg_replace('~</t(d|h)>\s*<t(d|h)[^>]*>~si',' - ',$_html);
+			$_html = preg_replace('~<img[^>]+>~s','',$_html);
+			//convert hrefs to description -> URL
+			$_html = preg_replace('~<a[^>]+href=\"([^"]+)\"[^>]*>(.*)</a>~si','[$2 -> $1]',$_html);
+    		$_html = preg_replace('~<[^>^@]+>~s','',$_html);
+			#$_html = strip_tags($_html);
+    		// reducing spaces
+    		$_html = preg_replace('~ +~s',' ',$_html);
+			// we dont reduce whitespace at the start or the end of the line, since its used for structuring the document
+    		#$_html = preg_replace('~^\s+~m','',$_html);
+    		#$_html = preg_replace('~\s+$~m','',$_html);
+			// restoring the preserved blockquote
+			$_html = preg_replace('~#blockquote#type#cite#~s','<blockquote type="cite">',$_html);
+
+			
+			$_html = html_entity_decode($_html, ENT_COMPAT, self::$displayCharset);
+			// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
+			self::replaceEmailAdresses($_html);
+			#error_log($text);
+			$pos = strpos($_html, 'blockquote');
+			#error_log("convert HTML2Text");
+			if($pos === false) {
+				return $_html;
+			} else {
+				$indent = 0;
+				$indentString = '';
+				
+				$quoteParts = preg_split('/<blockquote type="cite">/', $_html, -1, PREG_SPLIT_OFFSET_CAPTURE);
+
+				foreach($quoteParts as $quotePart) {
+					if($quotePart[1] > 0) {
+						$indent++;
+						$indentString .= '>';
+					}
+					$quoteParts2 = preg_split('/<\/blockquote>/', $quotePart[0], -1, PREG_SPLIT_OFFSET_CAPTURE);
+				
+					foreach($quoteParts2 as $quotePart2) {
+						if($quotePart2[1] > 0) {
+							$indent--;
+							$indentString = substr($indentString, 0, $indent);
+						}
+
+						$quoteParts3 = explode("\r\n", $quotePart2[0]);
+
+						foreach($quoteParts3 as $quotePart3) {
+							$allowedLength = 76-strlen("\r\n$indentString");
+							if (strlen($quotePart3) > $allowedLength) {
+								$s=explode(" ", $quotePart3);
+								$quotePart3 = "";
+								$linecnt = 0;
+								foreach ($s as $k=>$v) {
+									$cnt = strlen($v);
+									// only break long words within the wordboundaries, 
+									if($cnt > $allowedLength) {
+										$v=wordwrap($v, $allowedLength, "\r\n$indentString", true);
+									}
+									// the rest should be broken at the start of the new word that exceeds the limit  
+									if ($linecnt+$cnt > $allowedLength) {
+										$v="\r\n$indentString$v";
+										$linecnt = 0;
+									} else {
+										$linecnt += $cnt;
+									}
+									if (strlen($v))  $quotePart3 .= (strlen($quotePart3) ? " " : "").$v;
+								}
+							}
+							$asciiTextBuff[] = $indentString . $quotePart3 ;
+						}
+					}
+				}
+				return implode("\r\n",$asciiTextBuff);
+			}
+		}
+
 		/**
 		* retrieve a attachment
 		*
--- a/infolog/inc/class.infolog_ui.inc.php
+++ b/infolog/inc/class.infolog_ui.inc.php
@ -1376,7 +1376,8 @@ class infolog_ui
 			$bofelamimail->reopen($mailbox);

 			$headers = $bofelamimail->getMessageHeader($uid,$partid);
-			$bodyParts = $bofelamimail->getMessageBody($uid,'text/plain',$partid);
+			// dont force retrieval of the textpart, let felamimail preferences decide
+			$bodyParts = $bofelamimail->getMessageBody($uid,'',$partid);
 			$attachments = $bofelamimail->getMessageAttachments($uid,$partid);

 			if ($bofelamimail->isSentFolder($mailbox)) $mailaddress = $bofelamimail->decode_header($headers['TO']);
@ -1389,6 +1390,13 @@ class infolog_ui
 			{
 				// add line breaks to $bodyParts
 				$newBody  = $GLOBALS['egw']->translation->convert($bodyParts[$i]['body'], $bodyParts[$i]['charSet']);
+				if ($bodyParts[$i]['mimeType'] == 'text/html') {
+					// convert HTML to text, as we dont want HTML in infologs
+					$newBody = $bofelamimail->convertHTMLToText($newBody,true);
+					$bofelamimail->getCleanHTML($newBody); // new Body passed by reference
+					$message .= $newBody;
+					continue;
+				}
 				$newBody = strip_tags($newBody);
 				$newBody  = explode("\n",$newBody);
 				// create it new, with good line breaks
@ -1401,8 +1409,8 @@ class infolog_ui
 						// if you want to strip all empty lines uncomment the following
 						#continue;
 					}
-					$bodyAppend = $bofelamimail->wordwrap($value,75,"\n");
-					$message .= $bodyAppend;
+					$message .= $bofelamimail->wordwrap($value,75,"\n");
+					#$message .= $bodyAppend;
 				}
 			}

@ -1422,7 +1430,6 @@ class infolog_ui
 					unset($attachments[$num]['attachment']);
 				}
 			}
-
 			return $this->edit($this->bo->import_mail(
 				$mailaddress,
 				$subject,