Api: Add charset parameter to tidy->repairString() to avoid mangling some non-ascii characters when merging into document

This commit is contained in:
nathangray 2020-10-14 14:53:20 -06:00
parent 7cf2db5e24
commit 8fa11c8f0b

View File

@ -1174,7 +1174,7 @@ abstract class Merge
// Clean HTML, if it's being kept // Clean HTML, if it's being kept
if($replace_tags && extension_loaded('tidy')) { if($replace_tags && extension_loaded('tidy')) {
$tidy = new tidy(); $tidy = new tidy();
$cleaned = $tidy->repairString($value, self::$tidy_config); $cleaned = $tidy->repairString($value, self::$tidy_config, 'utf8');
// Found errors. Strip it all so there's some output // Found errors. Strip it all so there's some output
if($tidy->getStatus() == 2) if($tidy->getStatus() == 2)
{ {
@ -1205,7 +1205,7 @@ abstract class Merge
} }
// replace all control chars (C0+C1) but CR (\015), LF (\012) and TAB (\011) (eg. vertical tabulators) with space // replace all control chars (C0+C1) but CR (\015), LF (\012) and TAB (\011) (eg. vertical tabulators) with space
// as they are not allowed in xml // as they are not allowed in xml
$value = preg_replace('/[\000-\010\013\014\016-\037\177-\237]/u',' ',$value); $value = preg_replace('/[\000-\010\013\014\016-\037\177-\237\x{FFF0}-\x{FFFD}]/u',' ',$value);
if(is_numeric($value) && $name != '$$user/account_id$$') // account_id causes problems with the preg_replace below if(is_numeric($value) && $name != '$$user/account_id$$') // account_id causes problems with the preg_replace below
{ {
$names[] = preg_quote($name,'/'); $names[] = preg_quote($name,'/');