2005-11-05 09:51:06 +01:00
< ? php
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* EGroupware API - Translations
2009-04-20 13:59:39 +02:00
*
2008-03-15 16:30:15 +01:00
* @ link http :// www . egroupware . org
* @ author Joseph Engo < jengo @ phpgroupware . org >
* @ author Dan Kuykendall < seek3r @ phpgroupware . org >
* Copyright ( C ) 2000 , 2001 Joseph Engo
* @ license http :// opensource . org / licenses / lgpl - license . php LGPL - GNU Lesser General Public License
* @ package api
* @ version $Id $
*/
2016-03-05 11:47:20 +01:00
use EGroupware\Api ;
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* EGroupware API - Translations
2009-04-20 13:59:39 +02:00
*
2016-03-05 11:47:20 +01:00
* @ deprecated use Api\Translation for non - mail specific methods
2008-03-15 16:30:15 +01:00
*/
2016-03-05 11:47:20 +01:00
class translation extends Api\Translation
2008-03-15 16:30:15 +01:00
{
2009-04-01 17:20:32 +02:00
/**
* Return the decoded string meeting some additional requirements for mailheaders
*
* @ param string $_string -> part of an mailheader
* @ param string $displayCharset the charset parameter specifies the character set to represent the result by ( if iconv_mime_decode is to be used )
* @ return string
*/
2009-04-20 13:59:39 +02:00
static function decodeMailHeader ( $_string , $displayCharset = 'utf-8' )
2009-04-01 17:20:32 +02:00
{
2009-04-03 22:12:35 +02:00
//error_log(__FILE__.','.__METHOD__.':'."called with $_string and CHARSET $displayCharset");
2015-09-15 13:30:05 +02:00
if ( function_exists ( 'imap_mime_header_decode' ))
2009-04-20 13:59:39 +02:00
{
2010-06-07 16:31:59 +02:00
// some characterreplacements, as they fail to translate
$sar = array (
'@(\x84|\x93|\x94)@' ,
2012-05-25 14:23:11 +02:00
'@(\x96|\x97|\x1a)@' ,
2010-06-07 16:31:59 +02:00
'@(\x91|\x92)@' ,
'@(\x85)@' ,
'@(\x86)@' ,
);
$rar = array (
'"' ,
'-' ,
'\'' ,
'...' ,
'+' ,
);
2009-04-01 17:20:32 +02:00
$newString = '' ;
$string = preg_replace ( '/\?=\s+=\?/' , '?= =?' , $_string );
$elements = imap_mime_header_decode ( $string );
2010-06-07 16:31:59 +02:00
2009-09-08 15:25:57 +02:00
$convertAtEnd = false ;
2009-04-20 13:59:39 +02:00
foreach (( array ) $elements as $element )
{
2012-10-24 12:54:44 +02:00
if ( $element -> charset == 'default' ) $element -> charset = self :: detect_encoding ( $element -> text );
2009-09-08 15:25:57 +02:00
if ( $element -> charset != 'x-unknown' )
{
2010-06-07 16:31:59 +02:00
if ( strtoupper ( $element -> charset ) != 'UTF-8' ) $element -> text = preg_replace ( $sar , $rar , $element -> text );
2012-11-07 12:46:14 +01:00
// check if there is a possible nested encoding; make sure that the inputstring and the decoded result are different to avoid loops
if ( preg_match ( '/\?=.+=\?/' , $element -> text ) && $element -> text != $_string )
2011-08-18 16:49:16 +02:00
{
$element -> text = self :: decodeMailHeader ( $element -> text , $element -> charset );
$element -> charset = $displayCharset ;
}
2009-09-08 15:25:57 +02:00
$newString .= self :: convert ( $element -> text , $element -> charset );
}
else
{
$newString .= $element -> text ;
$convertAtEnd = true ;
}
2009-04-01 17:20:32 +02:00
}
2009-10-19 09:41:03 +02:00
if ( $convertAtEnd ) $newString = self :: decodeMailHeader ( $newString , $displayCharset );
2009-04-01 17:20:32 +02:00
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $newString );
2009-04-20 13:59:39 +02:00
}
elseif ( function_exists ( mb_decode_mimeheader ))
{
2015-04-26 13:00:01 +02:00
$matches = null ;
if ( preg_match_all ( '/=\?.*\?Q\?.*\?=/iU' , $string = $_string , $matches ))
2009-04-20 13:59:39 +02:00
{
foreach ( $matches [ 0 ] as $match )
{
2009-04-01 17:20:32 +02:00
$fixedMatch = str_replace ( '_' , ' ' , $match );
$string = str_replace ( $match , $fixedMatch , $string );
}
2015-04-26 13:00:01 +02:00
$string = str_replace ( '=?ISO8859-' , '=?ISO-8859-' ,
str_replace ( '=?windows-1258' , '=?ISO-8859-1' , $string ));
2009-04-01 17:20:32 +02:00
}
$string = mb_decode_mimeheader ( $string );
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $string );
2009-04-20 13:59:39 +02:00
}
elseif ( function_exists ( iconv_mime_decode ))
{
2009-04-01 17:20:32 +02:00
// continue decoding also if an error occurs
$string = @ iconv_mime_decode ( $_string , 2 , $displayCharset );
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $string );
}
// no decoding function available
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $_string );
}
2009-09-04 15:40:10 +02:00
/**
2009-11-18 11:38:15 +01:00
* replace emailaddresses enclosed in <> ( eg .: < me @ you . de > ) with the emailaddress only ( e . g : me @ you . de )
* as well as those emailadresses in links , and within broken links
* @ param string the text to process
* @ return 1
*/
2009-09-04 15:40:10 +02:00
static function replaceEmailAdresses ( & $text )
{
2009-12-16 11:57:37 +01:00
//error_log($text);
2010-05-06 13:16:57 +02:00
//replace CRLF with something other to be preserved via preg_replace as CRLF seems to vanish
2016-03-05 11:47:20 +01:00
$text2 = str_replace ( " \r \n " , '<#cr-lf#>' , $text );
// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
$text3 = preg_replace ( " /(<|<a href= \" )*(mailto:([ \ w \ .,-.,_.,0-9.]+)(@)([ \ w \ .,-.,_.,0-9.]+))(>|>)*/i " , " $ 2 " , $text2 );
2013-08-29 12:39:08 +02:00
//$text = preg_replace_callback("/(<|<a href=\")*(mailto:([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)*/i",'self::transform_mailto2text',$text);
//$text = preg_replace('~<a[^>]+href=\"(mailto:)+([^"]+)\"[^>]*>~si','$2 ',$text);
2016-03-05 11:47:20 +01:00
$text4 = preg_replace_callback ( '~<a[^>]+href=\"(mailto:)+([^"]+)\"[^>]*>([ @\w\.,-.,_.,0-9.]+)<\/a>~si' , 'self::transform_mailto2text' , $text3 );
$text5 = preg_replace ( " /(([ \ w \ .,-.,_.,0-9.]+)(@)([ \ w \ .,-.,_.,0-9.]+))( | \ s)*(< \ /a>)*( | \ s)*(>|>)*/i " , " $ 1 " , $text4 );
$text6 = preg_replace ( " /(<|<)*(([ \ w \ .,-.,_.,0-9.]+)@([ \ w \ .,-.,_.,0-9.]+))(>|>)*/i " , " $ 2 " , $text5 );
$text = str_replace ( '<#cr-lf#>' , " \r \n " , $text6 );
2009-09-04 15:40:10 +02:00
return 1 ;
}
2009-11-18 11:38:15 +01:00
/**
2009-09-04 15:40:10 +02:00
* strip tags out of the message completely with their content
2009-11-18 11:38:15 +01:00
* @ param string $_body is the text to be processed
* @ param string $tag is the tagname which is to be removed . Note , that only the name of the tag is to be passed to the function
2009-12-02 15:56:41 +01:00
* without the enclosing brackets
2009-11-18 11:38:15 +01:00
* @ param string $endtag can be different from tag but should be used only , if begin and endtag are known to be different e . g .: <!-- -->
2015-04-26 13:00:01 +02:00
* @ param bool $addbracesforendtag if endtag is given , you may decide if the </ and > braces are to be added ,
2009-12-02 15:56:41 +01:00
* or if you want the string to be matched as is
2009-11-18 11:38:15 +01:00
* @ return void the modified text is passed via reference
2009-09-04 15:40:10 +02:00
*/
2009-12-02 15:56:41 +01:00
static function replaceTagsCompletley ( & $_body , $tag , $endtag = '' , $addbracesforendtag = true )
2009-09-04 15:40:10 +02:00
{
if ( $tag ) $tag = strtolower ( $tag );
2012-05-25 14:23:11 +02:00
$singleton = false ;
if ( $endtag == '/>' ) $singleton = true ;
2009-09-04 15:40:10 +02:00
if ( $endtag == '' || empty ( $endtag ) || ! isset ( $endtag ))
{
2011-11-30 10:20:05 +01:00
$endtag = $tag ;
2009-09-04 15:40:10 +02:00
} else {
2011-11-30 10:20:05 +01:00
$endtag = strtolower ( $endtag );
2012-05-25 14:23:11 +02:00
//error_log(__METHOD__.' Using EndTag:'.$endtag);
2009-09-04 15:40:10 +02:00
}
// strip tags out of the message completely with their content
if ( $_body ) {
2012-05-25 14:23:11 +02:00
if ( $singleton )
2009-12-02 15:56:41 +01:00
{
2012-12-04 15:13:36 +01:00
//$_body = preg_replace('~<'.$tag.'[^>].*? '.$endtag.'~simU','',$_body);
$_body = preg_replace ( '~<?' . $tag . '[^>].* ' . $endtag . '~simU' , '' , $_body ); // we are in Ungreedy mode, so we expect * to be ungreedy without specifying ?
2009-12-02 15:56:41 +01:00
}
2012-05-25 14:23:11 +02:00
else
2009-12-02 15:56:41 +01:00
{
2014-04-14 17:09:07 +02:00
$found = null ;
2012-05-25 14:23:11 +02:00
if ( $addbracesforendtag === true )
{
2014-04-14 17:09:07 +02:00
if ( stripos ( $_body , '<' . $tag ) !== false ) $ct = preg_match_all ( '#<' . $tag . '(?:\s.*)?>(.+)</' . $endtag . '>#isU' , $_body , $found );
if ( $ct > 0 )
{
//error_log(__METHOD__.__LINE__.array2string($found[0]));
// only replace what we have found
$_body = str_ireplace ( $found [ 0 ], '' , $_body );
}
2012-05-25 14:23:11 +02:00
// remove left over tags, unfinished ones, and so on
$_body = preg_replace ( '~<' . $tag . '[^>]*?>~si' , '' , $_body );
}
if ( $addbracesforendtag === false )
{
2014-04-14 17:09:07 +02:00
if ( stripos ( $_body , '<' . $tag ) !== false ) $ct = preg_match_all ( '#<' . $tag . '(?:\s.*)?>(.+)' . $endtag . '#isU' , $_body , $found );
if ( $ct > 0 )
{
//error_log(__METHOD__.__LINE__.array2string($found[0]));
// only replace what we have found
$_body = str_ireplace ( $found [ 0 ], '' , $_body );
}
/*
2014-02-25 16:48:23 +01:00
$_body = preg_replace ( '~<' . $tag . '[^>]*?>(.*?)' . $endtag . '~simU' , '' , $_body );
2014-04-14 17:09:07 +02:00
*/
2012-05-25 14:23:11 +02:00
// remove left over tags, unfinished ones, and so on
2015-04-26 13:00:01 +02:00
$_body = preg_replace ( array ( '~<' . $tag . '[^>]*?>~si' , '~' . $endtag . '~' ), '' , $_body );
2012-05-25 14:23:11 +02:00
}
2009-12-02 15:56:41 +01:00
}
2009-09-04 15:40:10 +02:00
}
}
2013-08-29 12:39:08 +02:00
static function transform_mailto2text ( $matches )
{
//error_log(__METHOD__.__LINE__.array2string($matches));
// this is the actual url
$matches [ 2 ] = trim ( strip_tags ( $matches [ 2 ]));
$matches [ 3 ] = trim ( strip_tags ( $matches [ 3 ]));
$matches [ 2 ] = str_replace ( array ( '%40' , '%20' ), array ( '@' , ' ' ), $matches [ 2 ]);
$matches [ 3 ] = str_replace ( array ( '%40' , '%20' ), array ( '@' , ' ' ), $matches [ 3 ]);
return $matches [ 1 ] . $matches [ 2 ] . ( $matches [ 2 ] == $matches [ 3 ] ? ' ' : ' -> ' . $matches [ 3 ] . ' ' );
}
2013-04-25 16:18:49 +02:00
static function transform_url2text ( $matches )
{
//error_log(__METHOD__.__LINE__.array2string($matches));
$linkTextislink = false ;
// this is the actual url
$matches [ 2 ] = trim ( strip_tags ( $matches [ 2 ]));
if ( $matches [ 2 ] == $matches [ 1 ]) $linkTextislink = true ;
$matches [ 1 ] = str_replace ( ' ' , '%20' , $matches [ 1 ]);
return ( $linkTextislink ? ' ' : '[ ' ) . $matches [ 1 ] . ( $linkTextislink ? '' : ' -> ' . $matches [ 2 ]) . ( $linkTextislink ? ' ' : ' ]' );
}
2009-11-18 11:38:15 +01:00
/**
* convertHTMLToText
* @ param string $_html : Text to be stripped down
* @ param string $displayCharset : charset to use ; should be a valid charset
* @ param bool $stripcrl : flag to indicate for the removal of all crlf \r\n
* @ param bool $stripalltags : flag to indicate wether or not to strip $_html from all remaining tags
* @ return text $_html : the modified text .
*/
static function convertHTMLToText ( $_html , $displayCharset = false , $stripcrl = false , $stripalltags = true )
2009-09-04 15:40:10 +02:00
{
2013-02-22 11:58:39 +01:00
// assume input isHTML, but test the input anyway, because,
// if it is not, we may not want to strip whitespace
$isHTML = true ;
if ( strlen ( strip_tags ( $_html )) == strlen ( $_html ))
{
$isHTML = false ;
// return $_html; // maybe we should not proceed at all
}
2009-09-04 15:40:10 +02:00
if ( $displayCharset === false ) $displayCharset = self :: $system_charset ;
2009-11-18 11:38:15 +01:00
//error_log(__METHOD__.$_html);
2009-09-04 15:40:10 +02:00
#print '<hr>';
#print "<pre>"; print htmlspecialchars($_html);
#print "</pre>";
#print "<hr>";
2012-06-04 16:56:45 +02:00
if ( stripos ( $_html , 'style' ) !== false ) self :: replaceTagsCompletley ( $_html , 'style' ); // clean out empty or pagewide style definitions / left over tags
if ( stripos ( $_html , 'head' ) !== false ) self :: replaceTagsCompletley ( $_html , 'head' ); // Strip out stuff in head
if ( stripos ( $_html , '![if' ) !== false && stripos ( $_html , '<![endif]>' ) !== false ) self :: replaceTagsCompletley ( $_html , '!\[if' , '<!\[endif\]>' , false ); // Strip out stuff in ifs
if ( stripos ( $_html , '!--[if' ) !== false && stripos ( $_html , '<![endif]-->' ) !== false ) self :: replaceTagsCompletley ( $_html , '!--\[if' , '<!\[endif\]-->' , false ); // Strip out stuff in ifs
2010-01-19 14:29:16 +01:00
$Rules = array ( '@<script[^>]*?>.*?</script>@siU' , // Strip out javascript
2009-09-04 15:40:10 +02:00
'@&(quot|#34);@i' , // Replace HTML entities
'@&(amp|#38);@i' , // Ampersand &
'@&(lt|#60);@i' , // Less Than <
'@&(gt|#62);@i' , // Greater Than >
'@&(nbsp|#160);@i' , // Non Breaking Space
'@&(iexcl|#161);@i' , // Inverted Exclamation point
'@&(cent|#162);@i' , // Cent
'@&(pound|#163);@i' , // Pound
'@&(copy|#169);@i' , // Copyright
'@&(reg|#174);@i' , // Registered
2010-09-15 10:39:07 +02:00
'@&(trade|#8482);@i' , // trade
2010-08-31 16:23:58 +02:00
'@'@i' , // singleQuote
2010-09-15 10:39:07 +02:00
'@(\xc2\xa0)@' , // nbsp or tab (encoded windows-style)
2013-05-07 13:38:44 +02:00
'@(\xe2\x80\x8b)@' , // ZERO WIDTH SPACE
2009-09-04 15:40:10 +02:00
);
$Replace = array ( '' ,
'"' ,
2010-09-09 12:19:23 +02:00
'#amper#sand#' ,
2009-09-04 15:40:10 +02:00
'<' ,
'>' ,
' ' ,
chr ( 161 ),
chr ( 162 ),
chr ( 163 ),
2010-09-15 10:39:07 +02:00
'(C)' , //chr(169),// copyrighgt
'(R)' , //chr(174),// registered
'(TM)' , // trade
2010-08-31 16:23:58 +02:00
" ' " ,
2010-09-15 10:39:07 +02:00
' ' ,
2013-05-07 13:38:44 +02:00
'' ,
2009-09-04 15:40:10 +02:00
);
$_html = preg_replace ( $Rules , $Replace , $_html );
2009-11-18 11:38:15 +01:00
2010-08-31 16:23:58 +02:00
// removing carriage return linefeeds, preserve those enclosed in <pre> </pre> tags
2011-04-09 11:45:41 +02:00
if ( $stripcrl === true )
2010-08-31 16:23:58 +02:00
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $_html , '<pre ' ) !== false || stripos ( $_html , '<pre>' ) !== false )
2010-08-31 16:23:58 +02:00
{
2016-03-13 12:22:44 +01:00
$contentArr = self :: splithtmlByPRE ( $_html );
2010-08-31 16:23:58 +02:00
foreach ( $contentArr as $k =>& $elem )
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $elem , '<pre ' ) === false && stripos ( $elem , '<pre>' ) === false )
2010-08-31 16:23:58 +02:00
{
2011-05-05 10:59:54 +02:00
//$elem = str_replace('@(\r\n)@i',' ',$elem);
2013-09-18 14:14:59 +02:00
$elem = str_replace ( array ( " \r \n " , " \n " ),( $isHTML ? '' : ' ' ), $elem );
2010-08-31 16:23:58 +02:00
}
}
$_html = implode ( '' , $contentArr );
}
2011-05-05 10:59:54 +02:00
else
{
2013-09-18 14:14:59 +02:00
$_html = str_replace ( array ( " \r \n " , " \n " ),( $isHTML ? '' : ' ' ), $_html );
2011-05-05 10:59:54 +02:00
}
2010-08-31 16:23:58 +02:00
}
2009-09-04 15:40:10 +02:00
$tags = array (
0 => '~<h[123][^>]*>\r*\n*~si' ,
1 => '~<h[456][^>]*>\r*\n*~si' ,
2 => '~<table[^>]*>\r*\n*~si' ,
3 => '~<tr[^>]*>\r*\n*~si' ,
4 => '~<li[^>]*>\r*\n*~si' ,
5 => '~<br[^>]*>\r*\n*~si' ,
6 => '~<br[^>]*>~si' ,
7 => '~<p[^>]*>\r*\n*~si' ,
8 => '~<div[^>]*>\r*\n*~si' ,
9 => '~<hr[^>]*>\r*\n*~si' ,
10 => '/<blockquote type="cite">/' ,
2010-08-31 16:23:58 +02:00
11 => '/<blockquote>/' ,
12 => '~</blockquote>~si' ,
13 => '~<blockquote[^>]*>~si' ,
2016-02-02 13:57:21 +01:00
14 => '/<=([1234567890])/' ,
15 => '/>=([1234567890])/' ,
16 => '/<([1234567890])/' ,
17 => '/>([1234567890])/' ,
2009-09-04 15:40:10 +02:00
);
$Replace = array (
0 => " \r \n " ,
1 => " \r \n " ,
2 => " \r \n " ,
3 => " \r \n " ,
4 => " \r \n " ,
5 => " \r \n " ,
6 => " \r \n " ,
7 => " \r \n " ,
8 => " \r \n " ,
9 => " \r \n __________________________________________________ \r \n " ,
10 => '#blockquote#type#cite#' ,
2010-08-31 16:23:58 +02:00
11 => '#blockquote#type#cite#' ,
12 => '#blockquote#end#cite#' ,
13 => '#blockquote#type#cite#' ,
2016-02-02 13:57:21 +01:00
14 => '#lowerorequal#than#$1' ,
15 => '#greaterorequal#than#$1' ,
16 => '#lower#than#$1' ,
17 => '#greater#than#$1' ,
2009-09-04 15:40:10 +02:00
);
$_html = preg_replace ( $tags , $Replace , $_html );
$_html = preg_replace ( '~</t(d|h)>\s*<t(d|h)[^>]*>~si' , ' - ' , $_html );
$_html = preg_replace ( '~<img[^>]+>~s' , '' , $_html );
2009-11-18 11:38:15 +01:00
// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
self :: replaceEmailAdresses ( $_html );
2009-09-04 15:40:10 +02:00
//convert hrefs to description -> URL
2013-04-25 16:18:49 +02:00
//$_html = preg_replace('~<a[^>]+href=\"([^"]+)\"[^>]*>(.*)</a>~si','[$2 -> $1]',$_html);
2014-02-25 16:48:23 +01:00
$_html = preg_replace_callback ( '~<a[^>]+href=\"([^"]+)\"[^>]*>(.*?)</a>~si' , 'self::transform_url2text' , $_html );
2011-05-10 17:32:44 +02:00
// reducing double \r\n to single ones, dont mess with pre sections
2013-02-22 11:58:39 +01:00
if ( $stripcrl === true && $isHTML )
2011-05-10 17:32:44 +02:00
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $_html , '<pre ' ) !== false || stripos ( $_html , '<pre>' ) !== false )
2011-05-10 17:32:44 +02:00
{
2016-03-13 12:22:44 +01:00
$contentArr = self :: splithtmlByPRE ( $_html );
2011-05-10 17:32:44 +02:00
foreach ( $contentArr as $k =>& $elem )
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $elem , '<pre ' ) === false && stripos ( $elem , '<pre>' ) === false )
2011-05-10 17:32:44 +02:00
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
}
// strip out whitespace inbetween CR/LF
$elem = preg_replace ( '~\r\n\s+\r\n~si' , " \r \n \r \n " , $elem );
2011-08-04 09:27:23 +02:00
// strip out / reduce exess CR/LF
2011-05-10 17:32:44 +02:00
$elem = preg_replace ( '~\r\n{3,}~si' , " \r \n \r \n " , $elem );
}
}
$_html = implode ( '' , $contentArr );
}
else
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
}
// strip out whitespace inbetween CR/LF
$_html = preg_replace ( '~\r\n\s+\r\n~si' , " \r \n \r \n " , $_html );
2011-08-04 09:27:23 +02:00
// strip out / reduce exess CR/LF
2011-05-10 17:32:44 +02:00
$_html = preg_replace ( '~(\r\n){3,}~si' , " \r \n \r \n " , $_html );
}
}
2009-11-18 11:38:15 +01:00
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
2011-05-11 12:39:55 +02:00
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
2009-11-18 11:38:15 +01:00
//$_html = strip_tags($_html, '<a>');
}
2013-02-22 11:58:39 +01:00
// reducing spaces (not for input that was plain text from the beginning)
if ( $isHTML ) $_html = preg_replace ( '~ +~s' , ' ' , $_html );
2010-09-09 12:19:23 +02:00
// restoring ampersands
$_html = str_replace ( '#amper#sand#' , '&' , $_html );
2016-02-02 13:57:21 +01:00
// restoring lower|greater[or equal] than
$_html = str_replace ( '#lowerorequal#than#' , '<=' , $_html );
$_html = str_replace ( '#greaterorequal#than#' , '>=' , $_html );
$_html = str_replace ( '#lower#than#' , '<' , $_html );
$_html = str_replace ( '#greater#than#' , '>' , $_html );
2011-05-05 10:59:54 +02:00
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' -> '.$_html);
2009-09-04 15:40:10 +02:00
$_html = html_entity_decode ( $_html , ENT_COMPAT , $displayCharset );
2011-05-05 10:59:54 +02:00
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' After html_entity_decode: -> '.$_html);
2009-11-18 11:38:15 +01:00
//self::replaceEmailAdresses($_html);
2009-09-04 15:40:10 +02:00
$pos = strpos ( $_html , 'blockquote' );
2010-09-09 12:19:23 +02:00
//error_log("convert HTML2Text: $_html");
2009-09-04 15:40:10 +02:00
if ( $pos === false ) {
return $_html ;
} else {
$indent = 0 ;
$indentString = '' ;
2010-08-31 16:23:58 +02:00
$quoteParts = preg_split ( '/#blockquote#type#cite#/' , $_html , - 1 , PREG_SPLIT_OFFSET_CAPTURE );
2009-09-04 15:40:10 +02:00
foreach ( $quoteParts as $quotePart ) {
if ( $quotePart [ 1 ] > 0 ) {
$indent ++ ;
$indentString .= '>' ;
}
2010-08-31 16:23:58 +02:00
$quoteParts2 = preg_split ( '/#blockquote#end#cite#/' , $quotePart [ 0 ], - 1 , PREG_SPLIT_OFFSET_CAPTURE );
2009-09-04 15:40:10 +02:00
foreach ( $quoteParts2 as $quotePart2 ) {
if ( $quotePart2 [ 1 ] > 0 ) {
$indent -- ;
$indentString = substr ( $indentString , 0 , $indent );
}
$quoteParts3 = explode ( " \r \n " , $quotePart2 [ 0 ]);
foreach ( $quoteParts3 as $quotePart3 ) {
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'Line:'.$quotePart3);
2009-09-04 15:40:10 +02:00
$allowedLength = 76 - strlen ( " \r \n $indentString " );
2009-11-18 11:38:15 +01:00
// only break lines, if not already indented
2010-09-09 12:19:23 +02:00
if ( substr ( $quotePart3 , 0 , strlen ( $indentString )) != $indentString )
2009-11-18 11:38:15 +01:00
{
if ( strlen ( $quotePart3 ) > $allowedLength ) {
$s = explode ( " " , $quotePart3 );
$quotePart3 = " " ;
$linecnt = 0 ;
foreach ( $s as $k => $v ) {
$cnt = strlen ( $v );
// only break long words within the wordboundaries,
// but it may destroy links, so we check for href and dont do it if we find it
if ( $cnt > $allowedLength && stripos ( $v , 'href=' ) === false ) {
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'LongWordFound:'.$v);
2009-11-18 11:38:15 +01:00
$v = wordwrap ( $v , $allowedLength , " \r \n $indentString " , true );
}
// the rest should be broken at the start of the new word that exceeds the limit
if ( $linecnt + $cnt > $allowedLength ) {
$v = " \r \n $indentString $v " ;
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'breaking here:'.$v);
2009-11-18 11:38:15 +01:00
$linecnt = 0 ;
} else {
$linecnt += $cnt ;
}
if ( strlen ( $v )) $quotePart3 .= ( strlen ( $quotePart3 ) ? " " : " " ) . $v ;
2009-09-04 15:40:10 +02:00
}
}
}
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'partString to return:'.$indentString . $quotePart3);
2009-09-04 15:40:10 +02:00
$asciiTextBuff [] = $indentString . $quotePart3 ;
}
}
}
return implode ( " \r \n " , $asciiTextBuff );
}
}
2016-03-13 12:22:44 +01:00
/**
* split html by PRE tag , return array with all content pre - sections isolated in array elements
* @ author Leithoff , Klaus
* @ param string html
* @ return mixed array of parts or unaffected html
*/
static function splithtmlByPRE ( $html )
{
$searchFor = '<pre ' ;
$pos = stripos ( $html , $searchFor );
if ( $pos === false )
{
$searchFor = '<pre>' ;
$pos = stripos ( $html , $searchFor );
}
if ( $pos === false )
{
return $html ;
}
$html2ret [] = substr ( $html , 0 , $pos );
while ( $pos !== false )
{
$endofpre = stripos ( $html , '</pre>' , $pos );
$length = $endofpre - $pos + 6 ;
$html2ret [] = substr ( $html , $pos , $length );
$searchFor = '<pre ' ;
$pos = stripos ( $html , $searchFor , $endofpre + 6 );
if ( $pos === false )
{
$searchFor = '<pre>' ;
$pos = stripos ( $html , $searchFor , $endofpre + 6 );
}
$html2ret [] = ( $pos ? substr ( $html , $endofpre + 6 , $pos - ( $endofpre + 6 )) : substr ( $html , $endofpre + 6 ));
//$pos=false;
}
//error_log(__METHOD__.__LINE__.array2string($html2ret));
return $html2ret ;
}
2008-03-15 16:30:15 +01:00
}