diff --git a/api/src/Html.php b/api/src/Html.php index dd475d980d..8724fede5f 100644 --- a/api/src/Html.php +++ b/api/src/Html.php @@ -63,11 +63,30 @@ class Html $Protocol = '(http:\/\/|(ftp:\/\/|https:\/\/))'; // only http:// gets removed, other protocolls are shown $Domain = '([\w-]+\.[\w-.]+)'; $Subdir = '([\w\-\.,@?^=%&;:\/~\+#]*[\w\-\@?^=%&\/~\+#])?'; - $Expr = '/' . $NotAnchor . $Protocol . $Domain . $Subdir . '/i'; + $optStuff = '("|"|;)?'; + $Expr = '/' . $NotAnchor . $Protocol . $Domain . $Subdir . $optStuff . '/i'; // use preg_replace_callback as we experienced problems with https links $result3 = preg_replace_callback($Expr, function ($match) { - return "".$match[3].$match[4].""; + $additionalQuote="";//at the end, ... + // only one " at the end is found. chance is, it is not belonging to the URL + if ($match[5]==';' && (strlen($match[4])-6) >=0 && strpos($match[4],'"',strlen($match[4])-6)!==false && strpos(substr($match[4],0,strlen($match[4])-6),'"')===false) + { + $match[4] = substr($match[4],0,strpos($match[4],'"',strlen($match[4])-6)); + $additionalQuote = """; + } + // if there is quoted stuff within the URL then we have at least one more " in match[4], so chance is the last " is matched by the one within + if ($match[5]==';' && (strlen($match[4])-6) >=0 && strpos($match[4],'"',strlen($match[4])-6)!==false && strpos(substr($match[4],0,strlen($match[4])-6),'"')!==false) + { + $match[4] .= $match[5]; + } + if ($match[5]==';'&&$match[4]==""") + { + $match[4] =''; + $additionalQuote = """; + } + //error_log(__METHOD__.__LINE__.array2string($match)); + return "".$match[3].$match[4]."$additionalQuote"; }, $result2); // Now match things beginning with www.