) with the emailaddress only (e.g: me@you.de)
if (!$noRepEmailAddr) self::replaceEmailAdresses($_html);
//convert hrefs to description -> URL
//$_html = preg_replace('~]+href=\"([^"]+)\"[^>]*>(.*)~si','[$2 -> $1]',$_html);
$_html = preg_replace_callback('~]+href=\"([^"]+)\"[^>]*>(.*?)~si','self::transform_url2text',$_html);
// reducing double \r\n to single ones, dont mess with pre sections
if ($stripcrl === true && $isHTML)
{
if (stripos($_html,'')!==false)
{
$contentArr = self::splithtmlByPRE($_html);
foreach ($contentArr as $k =>&$elem)
{
if (stripos($elem,'')===false)
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace('~<[^>^@]+>~s','',$_html);
}
// strip out whitespace inbetween CR/LF
$elem = preg_replace('~\r\n\s+\r\n~si', "\r\n\r\n", $elem);
// strip out / reduce exess CR/LF
$elem = preg_replace('~\r\n{3,}~si',"\r\n\r\n",$elem);
}
}
$_html = implode('',$contentArr);
}
else
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace('~<[^>^@]+>~s','',$_html);
}
// strip out whitespace inbetween CR/LF
$_html = preg_replace('~\r\n\s+\r\n~si', "\r\n\r\n", $_html);
// strip out / reduce exess CR/LF
$_html = preg_replace('~(\r\n){3,}~si',"\r\n\r\n",$_html);
}
}
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace('~<[^>^@]+>~s','',$_html);
//$_html = strip_tags($_html, '');
}
// reducing spaces (not for input that was plain text from the beginning)
if ($isHTML) $_html = preg_replace('~ +~s',' ',$_html);
// restoring ampersands
$_html = str_replace('#amper#sand#','&',$_html);
// restoring lower|greater[or equal] than
$_html = str_replace('#lowerorequal#than#','<=',$_html);
$_html = str_replace('#greaterorequal#than#','>=',$_html);
$_html = str_replace('#lower#than#','<',$_html);
$_html = str_replace('#greater#than#','>',$_html);
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' -> '.$_html);
$_html = Api\Translation::convert($_html, $displayCharset, 'utf-8');
$_html = html_entity_decode($_html, ENT_COMPAT, 'utf-8');
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' After html_entity_decode: -> '.$_html);
//self::replaceEmailAdresses($_html);
$pos = strpos($_html, 'blockquote');
//error_log("convert HTML2Text: $_html");
if($pos === false) {
return $_html;
} else {
$indent = 0;
$indentString = '';
$quoteParts = preg_split('/#blockquote#type#cite#/', $_html, -1, PREG_SPLIT_OFFSET_CAPTURE);
foreach($quoteParts as $quotePart) {
if($quotePart[1] > 0) {
$indent++;
$indentString .= '>';
}
$quoteParts2 = preg_split('/#blockquote#end#cite#/', $quotePart[0], -1, PREG_SPLIT_OFFSET_CAPTURE);
foreach($quoteParts2 as $quotePart2) {
if($quotePart2[1] > 0) {
$indent--;
$indentString = substr($indentString, 0, $indent);
}
$quoteParts3 = explode("\r\n", $quotePart2[0]);
foreach($quoteParts3 as $quotePart3) {
//error_log(__METHOD__.__LINE__.'Line:'.$quotePart3);
$allowedLength = 76-strlen("\r\n$indentString");
// only break lines, if not already indented
if (substr($quotePart3,0,strlen($indentString)) != $indentString)
{
if (strlen($quotePart3) > $allowedLength) {
$s=explode(" ", $quotePart3);
$quotePart3 = "";
$linecnt = 0;
foreach ($s as $k=>$v) {
$cnt = strlen($v);
// only break long words within the wordboundaries,
// but it may destroy links, so we check for href and dont do it if we find it
if($cnt > $allowedLength && stripos($v,'href=')===false) {
//error_log(__METHOD__.__LINE__.'LongWordFound:'.$v);
$v=wordwrap($v, $allowedLength, "\r\n$indentString", true);
}
// the rest should be broken at the start of the new word that exceeds the limit
if ($linecnt+$cnt > $allowedLength) {
$v="\r\n$indentString$v";
//error_log(__METHOD__.__LINE__.'breaking here:'.$v);
$linecnt = 0;
} else {
$linecnt += $cnt;
}
if (strlen($v)) $quotePart3 .= (strlen($quotePart3) ? " " : "").$v;
}
}
}
//error_log(__METHOD__.__LINE__.'partString to return:'.$indentString . $quotePart3);
$asciiTextBuff[] = $indentString . $quotePart3 ;
}
}
}
return implode("\r\n",$asciiTextBuff);
}
}
/**
* Replace HTML lists with a plain text equivalent
*
* @param string $html
*
* @return string
*/
static function replaceLists($html)
{
if(!$html || stripos($html, 'loadHTML(
''. Api\Translation::convert($html,preg_match('/]+content="[^>"]+charset=([^;"]+)/i', $html, $matches) ? $matches[1] : false, 'utf8'),
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS
))
{
// do not sent possible xml errors in buffer further otherwise it may get caught as et2 template error in Widget
libxml_clear_errors();
// Failed to parse
return $html;
}
$dom->normalizeDocument();
foreach(array('ol','ul') as $list_type)
{
$lists = $dom->getElementsByTagName($list_type);
foreach($lists as $list)
{
$list_text = "\r\n";
$item_count = 0;
$prefix = $list_type == 'ul' ? ' * ' : '. ';
$elements = $list->getElementsByTagName('li');
foreach($elements as $element)
{
if($element->tagName == 'li')
{
$list_text .= ($list_type == 'ol' ? ' '. ++$item_count : '') . $prefix . $element->textContent . "\r\n";
}
else
{
$list_text .= $element->nodeValue;
}
}
$list->parentNode->replaceChild($dom->createTextNode($list_text), $list);
}
}
$dom->removeChild($dom->firstChild);
// do not sent possible xml errors in buffer further otherwise it may get caught as et2 template error in Widget
libxml_clear_errors();
return $dom->saveHTML();
}
/**
* split html by PRE tag, return array with all content pre-sections isolated in array elements
* @author Leithoff, Klaus
* @param string html
* @return mixed array of parts or unaffected html
*/
static function splithtmlByPRE($html)
{
$searchFor = '';
$pos = stripos($html,$searchFor);
}
if ($pos === false || !$html)
{
return $html;
}
$html2ret[] = substr($html,0,$pos);
while ($pos!==false)
{
// avoid infinit loop in case the endof pre can't be found, just give the
// end position to return the rest of content as return html
if (($endofpre = stripos($html, '
', $pos)) === false)
{
$html2ret[] = substr($html, $pos);
break;
}
$length = $endofpre-$pos+6;
$html2ret[] = substr($html,$pos,$length);
$searchFor = '';
$pos = stripos($html,$searchFor, $endofpre+6);
}
$html2ret[] = ($pos ? substr($html,$endofpre+6,$pos-($endofpre+6)): substr($html,$endofpre+6));
//$pos=false;
}
//error_log(__METHOD__.__LINE__.array2string($html2ret));
return $html2ret;
}
}