Improved CLI parsing; now support keywords containing URLs (#241)

2025-06-21 18:21:29 +02:00 · 2020-06-09 15:36:03 -04:00 · 2020-06-09 15:36:03 -04:00 · aac8455a5f
commit aac8455a5f
parent 73046207d5
2 changed files with 7 additions and 8 deletions
--- a/apprise/utils.py
+++ b/apprise/utils.py
@ -120,7 +120,7 @@ GET_PHONE_NO_RE = re.compile(r'^\+?(?P<phone>[0-9\s)(+-]+)\s*$')

 # Regular expression used to destinguish between multiple URLs
 URL_DETECTION_RE = re.compile(
-    r'([a-z0-9]+?:\/\/.*?)[\s,]*(?=$|[a-z0-9]+?:\/\/)', re.I)
+    r'([a-z0-9]+?:\/\/.*?)(?=$|[\s,]+[a-z0-9]{2,9}?:\/\/)', re.I)

 # validate_regex() utilizes this mapping to track and re-use pre-complied
 # regular expressions
@ -482,13 +482,6 @@ def split_urls(urls):
    except TypeError:
        results = []

-    if len(results) > 0 and results[len(results) - 1][-1] != urls[-1]:
-        # we always want to save the end of url URL if we can; This handles
-        # cases where there is actually a comma (,) at the end of a single URL
-        # that would have otherwise got lost when our regex passed over it.
-        results[len(results) - 1] += \
-            re.match(r'.*?([\s,]+)?$', urls).group(1).rstrip()
-
    return results


--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -542,6 +542,12 @@ def test_split_urls():
    assert 'windows://' in results
    assert 'gnome://' in results

+    # We don't want to parse out URLs that are part of another URL's arguments
+    results = utils.split_urls('discord://host?url=https://localhost')
+    assert isinstance(results, list)
+    assert len(results) == 1
+    assert 'discord://host?url=https://localhost' in results
+
    # Commas and spaces found inside URLs are ignored
    urls = [
        'mailgun://noreply@sandbox.mailgun.org/apikey/?to=test@example.com,'