Better handling of multiple URLs delimited by space/comma

This commit is contained in:
Chris Caron 2019-04-27 23:21:35 -04:00 committed by GitHub
parent 25f5066e27
commit 28b67d42f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 131 additions and 2 deletions

View File

@ -33,6 +33,7 @@ from .common import NotifyType
from .common import NotifyFormat
from .utils import is_exclusive_match
from .utils import parse_list
from .utils import split_urls
from .utils import GET_SCHEMA_RE
from .logger import logger
@ -161,7 +162,9 @@ class Apprise(object):
if isinstance(servers, six.string_types):
# build our server list
servers = parse_list(servers)
servers = split_urls(servers)
if len(servers) == 0:
return False
elif isinstance(servers, (ConfigBase, NotifyBase, AppriseConfig)):
# Go ahead and just add our plugin into our list

View File

@ -107,6 +107,10 @@ GET_EMAIL_RE = re.compile(
re.IGNORECASE,
)
# Regular expression used to destinguish between multiple URLs
URL_DETECTION_RE = re.compile(
r'([a-z0-9]+?:\/\/.*?)[\s,]*(?=$|[a-z0-9]+?:\/\/)', re.I)
def is_hostname(hostname):
"""
@ -463,6 +467,28 @@ def parse_bool(arg, default=False):
return bool(arg)
def split_urls(urls):
"""
Takes a string containing URLs separated by comma's and/or spaces and
returns a list.
"""
try:
results = URL_DETECTION_RE.findall(urls)
except TypeError:
results = []
if len(results) > 0 and results[len(results) - 1][-1] != urls[-1]:
# we always want to save the end of url URL if we can; This handles
# cases where there is actually a comma (,) at the end of a single URL
# that would have otherwise got lost when our regex passed over it.
results[len(results) - 1] += \
re.match(r'.*?([\s,]+)?$', urls).group(1).rstrip()
return results
def parse_list(*args):
"""
Take a string list and break it into a delimited

View File

@ -318,6 +318,9 @@ def test_apprise_tagging(mock_post, mock_get):
# Create our object
a = Apprise()
# An invalid addition can't add the tag
assert(a.add('averyinvalidschema://localhost', tag='uhoh') is False)
# Add entry and assign it to a tag called 'awesome'
assert(a.add('json://localhost/path1/', tag='awesome') is True)

View File

@ -24,6 +24,7 @@
# THE SOFTWARE.
from __future__ import print_function
import re
try:
# Python 2.7
from urllib import unquote
@ -406,8 +407,104 @@ def test_is_email():
assert utils.is_email(None) is False
def test_split_urls():
"""utils: split_urls() testing """
# A simple single array entry (As str)
results = utils.split_urls('')
assert isinstance(results, list)
assert len(results) == 0
# just delimeters
results = utils.split_urls(', ,, , ,,, ')
assert isinstance(results, list)
assert len(results) == 0
results = utils.split_urls(',')
assert isinstance(results, list)
assert len(results) == 0
results = utils.split_urls(None)
assert isinstance(results, list)
assert len(results) == 0
results = utils.split_urls(42)
assert isinstance(results, list)
assert len(results) == 0
results = utils.split_urls('this is not a parseable url at all')
assert isinstance(results, list)
assert len(results) == 0
# Now test valid URLs
results = utils.split_urls('windows://')
assert isinstance(results, list)
assert len(results) == 1
assert 'windows://' in results
results = utils.split_urls('windows:// gnome://')
assert isinstance(results, list)
assert len(results) == 2
assert 'windows://' in results
assert 'gnome://' in results
# Commas and spaces found inside URLs are ignored
urls = [
'mailgun://noreply@sandbox.mailgun.org/apikey/?to=test@example.com,'
'test2@example.com,, abcd@example.com',
'mailgun://noreply@sandbox.another.mailgun.org/apikey/'
'?to=hello@example.com,,hmmm@example.com,, abcd@example.com, ,',
'windows://',
]
# Since comma's and whitespace are the delimiters; they won't be
# present at the end of the URL; so we just need to write a special
# rstrip() as a regular exression to handle whitespace (\s) and comma
# delimiter
rstrip_re = re.compile(r'[\s,]+$')
# Since a comma acts as a delimiter, we run a risk of a problem where the
# comma exists as part of the URL and is therefore lost if it was found
# at the end of it.
results = utils.split_urls(', '.join(urls))
assert isinstance(results, list)
assert len(results) == len(urls)
for url in urls:
assert rstrip_re.sub('', url) in results
# However if a comma is found at the end of a single url without a new
# match to hit, it is saved and not lost
# The comma at the end of the password will not be lost if we're
# dealing with a single entry:
url = 'http://hostname?password=,abcd,'
results = utils.split_urls(url)
assert isinstance(results, list)
assert len(results) == 1
assert url in results
# however if we have multiple entries, commas and spaces between
# URLs will be lost, however the last URL will not lose the comma
urls = [
'schema1://hostname?password=,abcd,',
'schema2://hostname?password=,abcd,',
]
results = utils.split_urls(', '.join(urls))
assert isinstance(results, list)
assert len(results) == len(urls)
# No match because the comma is gone in the results entry
# schema1://hostname?password=,abcd
assert urls[0] not in results
assert urls[0][:-1] in results
# However we wouldn't have lost the comma in the second one:
# schema2://hostname?password=,abcd,
assert urls[1] in results
def test_parse_list():
"utils: parse_list() testing """
"""utils: parse_list() testing """
# A simple single array entry (As str)
results = utils.parse_list(