fixing a problem of swallowed whitespace while converting html to text

This commit is contained in:
Klaus Leithoff 2011-05-05 08:59:54 +00:00
parent 504b762ff7
commit 98122085f7

View File

@ -1183,11 +1183,16 @@ class translation
{ {
if (stripos($elem,'<pre>')===false) if (stripos($elem,'<pre>')===false)
{ {
$elem = preg_replace('@(\r\n)@i',' ',$elem); //$elem = str_replace('@(\r\n)@i',' ',$elem);
$elem = str_replace(array("\r\n","\n"),' ',$elem);
} }
} }
$_html = implode('',$contentArr); $_html = implode('',$contentArr);
} }
else
{
$_html = str_replace(array("\r\n","\n"),' ',$_html);
}
} }
$tags = array ( $tags = array (
0 => '~<h[123][^>]*>\r*\n*~si', 0 => '~<h[123][^>]*>\r*\n*~si',
@ -1233,6 +1238,8 @@ class translation
$_html = preg_replace('~<[^>^@]+>~s','',$_html); $_html = preg_replace('~<[^>^@]+>~s','',$_html);
//$_html = strip_tags($_html, '<a>'); //$_html = strip_tags($_html, '<a>');
} }
// reducing double \r\n to single ones
//$_html = str_replace("\r\n\r\n", "\r\n", $_html); // ToDo: this needsv to be more sophosticated
// reducing spaces // reducing spaces
$_html = preg_replace('~ +~s',' ',$_html); $_html = preg_replace('~ +~s',' ',$_html);
// we dont reduce whitespace at the start or the end of the line, since its used for structuring the document // we dont reduce whitespace at the start or the end of the line, since its used for structuring the document
@ -1240,7 +1247,9 @@ class translation
#$_html = preg_replace('~\s+$~m','',$_html); #$_html = preg_replace('~\s+$~m','',$_html);
// restoring ampersands // restoring ampersands
$_html = str_replace('#amper#sand#','&',$_html); $_html = str_replace('#amper#sand#','&',$_html);
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' -> '.$_html);
$_html = html_entity_decode($_html, ENT_COMPAT, $displayCharset); $_html = html_entity_decode($_html, ENT_COMPAT, $displayCharset);
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' After html_entity_decode: -> '.$_html);
//self::replaceEmailAdresses($_html); //self::replaceEmailAdresses($_html);
#error_log($text); #error_log($text);
$pos = strpos($_html, 'blockquote'); $pos = strpos($_html, 'blockquote');