Do not sanitize http:// attachment URLs (#1122)

2025-08-11 01:04:52 +02:00 · 2024-05-11 16:03:01 -04:00
parent 5fd912f5fe
commit fa2d338568
6 changed files with 55 additions and 14 deletions
--- a/apprise/attachment/base.py
+++ b/apprise/attachment/base.py
@ -315,7 +315,7 @@ class AttachBase(URLBase):
            "download() is implimented by the child class.")
    @staticmethod
-    def parse_url(url, verify_host=True, mimetype_db=None):
+    def parse_url(url, verify_host=True, mimetype_db=None, sanitize=True):
        """Parses the URL and returns it broken apart into a dictionary.
        This is very specific and customized for Apprise.
@ -333,7 +333,8 @@ class AttachBase(URLBase):
            successful, otherwise None is returned.
        """
-        results = URLBase.parse_url(url, verify_host=verify_host)
+        results = URLBase.parse_url(
            url, verify_host=verify_host, sanitize=sanitize)
        if not results:
            # We're done; we failed to parse our url
--- a/apprise/attachment/http.py
+++ b/apprise/attachment/http.py
@ -296,8 +296,7 @@ class AttachHTTP(AttachBase):
        """
        Tidy memory if open
        """
-        with self._lock:
+        self.invalidate()
            self.invalidate()
    def url(self, privacy=False, *args, **kwargs):
        """
@ -363,8 +362,7 @@ class AttachHTTP(AttachBase):
        us to re-instantiate this object.
        """
-        results = AttachBase.parse_url(url)
+        results = AttachBase.parse_url(url, sanitize=False)
        if not results:
            # We're done early as we couldn't load the results
            return results
--- a/apprise/url.py
+++ b/apprise/url.py
@ -744,7 +744,7 @@ class URLBase:
    @staticmethod
    def parse_url(url, verify_host=True, plus_to_space=False,
-                  strict_port=False):
+                  strict_port=False, sanitize=True):
        """Parses the URL and returns it broken apart into a dictionary.
        This is very specific and customized for Apprise.
@ -765,7 +765,8 @@ class URLBase:
        results = parse_url(
            url, default_schema='unknown', verify_host=verify_host,
-            plus_to_space=plus_to_space, strict_port=strict_port)
+            plus_to_space=plus_to_space, strict_port=strict_port,
            sanitize=sanitize)
        if not results:
            # We're done; we failed to parse our url
--- a/apprise/utils.py
+++ b/apprise/utils.py
@ -541,7 +541,7 @@ def tidy_path(path):
    return path
-def parse_qsd(qs, simple=False, plus_to_space=False):
+def parse_qsd(qs, simple=False, plus_to_space=False, sanitize=True):
    """
    Query String Dictionary Builder
@ -568,6 +568,8 @@ def parse_qsd(qs, simple=False, plus_to_space=False):
    per normal URL Encoded defininition. Normal URL parsing applies
    this, but `+` is very actively used character with passwords,
    api keys, tokens, etc.  So Apprise does not do this by default.
    if sanitize is set to False, then kwargs are not placed into lowercase
    """
    # Our return result set:
@ -608,7 +610,7 @@ def parse_qsd(qs, simple=False, plus_to_space=False):
        # Always Query String Dictionary (qsd) for every entry we have
        # content is always made lowercase for easy indexing
-        result['qsd'][key.lower().strip()] = val
+        result['qsd'][key.lower().strip() if sanitize else key] = val
        if simple:
            # move along
@ -636,7 +638,7 @@ def parse_qsd(qs, simple=False, plus_to_space=False):
 def parse_url(url, default_schema='http', verify_host=True, strict_port=False,
-              simple=False, plus_to_space=False):
+              simple=False, plus_to_space=False, sanitize=True):
    """A function that greatly simplifies the parsing of a url
    specified by the end user.
@ -691,6 +693,8 @@ def parse_url(url, default_schema='http', verify_host=True, strict_port=False,
     If the URL can't be parsed then None is returned
     If sanitize is set to False, then kwargs are not placed in lowercase
     and wrapping whitespace is not removed
    """
    if not isinstance(url, str):
@ -750,7 +754,8 @@ def parse_url(url, default_schema='http', verify_host=True, strict_port=False,
    # while ensuring that all keys are lowercase
    if qsdata:
        result.update(parse_qsd(
-            qsdata, simple=simple, plus_to_space=plus_to_space))
+            qsdata, simple=simple, plus_to_space=plus_to_space,
            sanitize=sanitize))
    # Now do a proper extraction of data; http:// is just substitued in place
    # to allow urlparse() to function as expected, we'll swap this back to the
--- a/test/test_apprise_utils.py
+++ b/test/test_apprise_utils.py
@ -719,6 +719,38 @@ def test_parse_url_general():
    assert result['qsd+'] == {}
    assert result['qsd:'] == {}
    # Sanitizing
    result = utils.parse_url(
        'hTTp://hostname/?+KeY=ValueA&-kEy=ValueB&KEY=Value%20+C&:cOlON=YeS',
        sanitize=False)
    assert len(result['qsd-']) == 1
    assert len(result['qsd+']) == 1
    assert len(result['qsd']) == 4
    assert len(result['qsd:']) == 1
    assert result['schema'] == 'http'
    assert result['host'] == 'hostname'
    assert result['port'] is None
    assert result['user'] is None
    assert result['password'] is None
    assert result['fullpath'] == '/'
    assert result['path'] == '/'
    assert result['query'] is None
    assert result['url'] == 'http://hostname/'
    assert '+KeY' in result['qsd']
    assert '-kEy' in result['qsd']
    assert ':cOlON' in result['qsd']
    assert result['qsd:']['cOlON'] == 'YeS'
    assert 'key' not in result['qsd']
    assert 'KeY' in result['qsd+']
    assert result['qsd+']['KeY'] == 'ValueA'
    assert 'kEy' in result['qsd-']
    assert result['qsd-']['kEy'] == 'ValueB'
    assert result['qsd']['KEY'] == 'Value +C'
    assert result['qsd']['+KeY'] == result['qsd+']['KeY']
    assert result['qsd']['-kEy'] == result['qsd-']['kEy']
 def test_parse_url_simple():
    "utils: parse_url() testing """
--- a/test/test_attach_http.py
+++ b/test/test_attach_http.py
@ -190,7 +190,7 @@ def test_attach_http(mock_get, mock_post):
    # Test custom url get parameters
    results = AttachHTTP.parse_url(
-        'http://user:pass@localhost/apprise.gif?dl=1&cache=300')
+        'http://user:pass@localhost/apprise.gif?DL=1&cache=300')
    assert isinstance(results, dict)
    attachment = AttachHTTP(**results)
    assert isinstance(attachment.url(), str) is True
@ -200,12 +200,16 @@ def test_attach_http(mock_get, mock_post):
    assert attachment
    assert mock_get.call_count == 1
    assert 'params' in mock_get.call_args_list[0][1]
-    assert 'dl' in mock_get.call_args_list[0][1]['params']
+    assert 'DL' in mock_get.call_args_list[0][1]['params']
    # Verify that arguments that are reserved for apprise are not
    # passed along
    assert 'cache' not in mock_get.call_args_list[0][1]['params']
    with mock.patch('os.unlink', side_effect=OSError()):
        # Test invalidation with exception thrown
        attachment.invalidate()
    results = AttachHTTP.parse_url(
        'http://user:pass@localhost/apprise.gif?+key=value&cache=True')
    assert isinstance(results, dict)