fix not allways working transliteration of utf-8 to ascii

- using now mb_convert_encoding($str, 'html-entities', 'utf-8') if available
- remove all non-ascii as a precausing after all conversions attempts
This commit is contained in:
Ralf Becker 2018-12-11 17:42:35 +01:00
parent a25f8ece13
commit b34fc0cfc2

View File

@ -826,14 +826,23 @@ class Translation
{
static $extra = array(
'ß' => 'ss',
'̈' => 'e', // mb_convert_encoding return ̈ for all German umlauts
);
$entities = htmlentities($_str,ENT_QUOTES,self::charset());
if (function_exists('mb_convert_encoding'))
{
$entities = mb_convert_encoding($_str, 'html-entities', self::charset());
}
else
{
$entities = htmlentities($_str, ENT_QUOTES, self::charset());
}
$estr = str_replace(array_keys($extra),array_values($extra), $entities);
$ustr = preg_replace('/&([aAuUoO])uml;/','\\1e', $estr); // replace german umlauts with the letter plus one 'e'
$astr = preg_replace('/&([a-zA-Z])(grave|acute|circ|ring|cedil|tilde|slash|uml);/','\\1', $ustr); // remove all types of accents
return preg_replace('/&([a-zA-Z]+|#[0-9]+|);/','', $astr); // remove all other entities
return preg_replace('/[^\x20-\x7f]/', '', // remove all non-ascii
preg_replace('/&([a-zA-Z]+|#[0-9]+|);/','', $astr)); // remove all other entities
}
/**