From f2effdff1c9607b48bfc1745d84da65d2f673c6d Mon Sep 17 00:00:00 2001 From: Klaus Leithoff <kl@stylite.de> Date: Mon, 2 May 2016 12:09:42 +0000 Subject: [PATCH] handle doublequotes that may enclose URLs --- api/src/Html.php | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/api/src/Html.php b/api/src/Html.php index dd475d980d..8724fede5f 100644 --- a/api/src/Html.php +++ b/api/src/Html.php @@ -63,11 +63,30 @@ class Html $Protocol = '(http:\/\/|(ftp:\/\/|https:\/\/))'; // only http:// gets removed, other protocolls are shown $Domain = '([\w-]+\.[\w-.]+)'; $Subdir = '([\w\-\.,@?^=%&;:\/~\+#]*[\w\-\@?^=%&\/~\+#])?'; - $Expr = '/' . $NotAnchor . $Protocol . $Domain . $Subdir . '/i'; + $optStuff = '("|"|;)?'; + $Expr = '/' . $NotAnchor . $Protocol . $Domain . $Subdir . $optStuff . '/i'; // use preg_replace_callback as we experienced problems with https links $result3 = preg_replace_callback($Expr, function ($match) { - return "<a href=\"".($match[1]&&!$match[2]?$match[1]:'').($match[2]?$match[2]:'').$match[3].$match[4]."\" target=\"_blank\">".$match[3].$match[4]."</a>"; + $additionalQuote="";//at the end, ... + // only one " at the end is found. chance is, it is not belonging to the URL + if ($match[5]==';' && (strlen($match[4])-6) >=0 && strpos($match[4],'"',strlen($match[4])-6)!==false && strpos(substr($match[4],0,strlen($match[4])-6),'"')===false) + { + $match[4] = substr($match[4],0,strpos($match[4],'"',strlen($match[4])-6)); + $additionalQuote = """; + } + // if there is quoted stuff within the URL then we have at least one more " in match[4], so chance is the last " is matched by the one within + if ($match[5]==';' && (strlen($match[4])-6) >=0 && strpos($match[4],'"',strlen($match[4])-6)!==false && strpos(substr($match[4],0,strlen($match[4])-6),'"')!==false) + { + $match[4] .= $match[5]; + } + if ($match[5]==';'&&$match[4]==""") + { + $match[4] =''; + $additionalQuote = """; + } + //error_log(__METHOD__.__LINE__.array2string($match)); + return "<a href=\"".($match[1]&&!$match[2]?$match[1]:'').($match[2]?$match[2]:'').$match[3].$match[4]."\" target=\"_blank\">".$match[3].$match[4]."</a>$additionalQuote"; }, $result2); // Now match things beginning with www.