From 44a3527221c51c7b1059cf1f849998ff966eca7f Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Fri, 15 Nov 2019 22:55:43 -0500 Subject: [PATCH] added cache control to attachments feature --- apprise/AppriseAttachment.py | 48 +++++++++++++-- apprise/attachment/AttachBase.py | 102 +++++++++++++++++++++++++++---- apprise/attachment/AttachFile.py | 6 ++ apprise/attachment/AttachHTTP.py | 48 ++++++++------- test/test_apprise_attachments.py | 47 ++++++++++++-- test/test_attach_file.py | 30 +++++++++ test/test_attach_http.py | 10 ++- test/test_email_plugin.py | 1 - test/test_utils.py | 2 +- 9 files changed, 244 insertions(+), 50 deletions(-) diff --git a/apprise/AppriseAttachment.py b/apprise/AppriseAttachment.py index ee046582..1a79f82f 100644 --- a/apprise/AppriseAttachment.py +++ b/apprise/AppriseAttachment.py @@ -38,18 +38,35 @@ class AppriseAttachment(object): """ - def __init__(self, paths=None, asset=None, **kwargs): + def __init__(self, paths=None, asset=None, cache=True, **kwargs): """ Loads all of the paths/urls specified (if any). The path can either be a single string identifying one explicit location, otherwise you can pass in a series of locations to scan via a list. + + By default we cache our responses so that subsiquent calls does not + cause the content to be retrieved again. For local file references + this makes no difference at all. But for remote content, this does + mean more then one call can be made to retrieve the (same) data. This + method can be somewhat inefficient if disabled. Only disable caching + if you understand the consequences. + + You can alternatively set the cache value to an int identifying the + number of seconds the previously retrieved can exist for before it + should be considered expired. + + It's also worth nothing that the cache value is only set to elements + that are not already of subclass AttachBase() """ # Initialize our attachment listings self.attachments = list() + # Set our cache flag + self.cache = cache + # Prepare our Asset Object self.asset = \ asset if isinstance(asset, AppriseAsset) else AppriseAsset() @@ -61,14 +78,30 @@ class AppriseAttachment(object): # Parse Source domain based on from_addr raise TypeError("One or more attachments could not be added.") - def add(self, attachments, asset=None, db=None): + def add(self, attachments, asset=None, cache=None): """ Adds one or more attachments into our list. + By default we cache our responses so that subsiquent calls does not + cause the content to be retrieved again. For local file references + this makes no difference at all. But for remote content, this does + mean more then one call can be made to retrieve the (same) data. This + method can be somewhat inefficient if disabled. Only disable caching + if you understand the consequences. + + You can alternatively set the cache value to an int identifying the + number of seconds the previously retrieved can exist for before it + should be considered expired. + + It's also worth nothing that the cache value is only set to elements + that are not already of subclass AttachBase() """ # Initialize our return status return_status = True + # Initialize our default cache value + cache = cache if cache is not None else self.cache + if isinstance(asset, AppriseAsset): # prepare default asset asset = self.asset @@ -107,7 +140,8 @@ class AppriseAttachment(object): # Instantiate ourselves an object, this function throws or # returns None if it fails - instance = AppriseAttachment.instantiate(_attachment, asset=asset) + instance = AppriseAttachment.instantiate( + _attachment, asset=asset, cache=cache) if not isinstance(instance, attachment.AttachBase): return_status = False continue @@ -119,12 +153,14 @@ class AppriseAttachment(object): return return_status @staticmethod - def instantiate(url, asset=None, suppress_exceptions=True): + def instantiate(url, asset=None, cache=None, suppress_exceptions=True): """ Returns the instance of a instantiated attachment plugin based on the provided Attachment URL. If the url fails to be parsed, then None is returned. + A specified cache value will over-ride anything set + """ # Attempt to acquire the schema at the very least to allow our # attachment based urls. @@ -156,6 +192,10 @@ class AppriseAttachment(object): results['asset'] = \ asset if isinstance(asset, AppriseAsset) else AppriseAsset() + if cache is not None: + # Force an over-ride of the cache value to what we have specified + results['cache'] = cache + if suppress_exceptions: try: # Attempt to create an instance of our plugin using the parsed diff --git a/apprise/attachment/AttachBase.py b/apprise/attachment/AttachBase.py index 8fdd53af..dce973f5 100644 --- a/apprise/attachment/AttachBase.py +++ b/apprise/attachment/AttachBase.py @@ -24,8 +24,10 @@ # THE SOFTWARE. import os +import time import mimetypes from ..URLBase import URLBase +from ..utils import parse_bool class AttachBase(URLBase): @@ -59,7 +61,7 @@ class AttachBase(URLBase): # 5 MB = 5242880 bytes max_file_size = 5242880 - def __init__(self, name=None, mimetype=None, **kwargs): + def __init__(self, name=None, mimetype=None, cache=True, **kwargs): """ Initialize some general logging and common server arguments that will keep things consistent when working with the configurations that @@ -70,6 +72,17 @@ class AttachBase(URLBase): The mime-type is automatically detected, but you can over-ride this by explicitly stating what it should be. + + By default we cache our responses so that subsiquent calls does not + cause the content to be retrieved again. For local file references + this makes no difference at all. But for remote content, this does + mean more then one call can be made to retrieve the (same) data. This + method can be somewhat inefficient if disabled. Only disable caching + if you understand the consequences. + + You can alternatively set the cache value to an int identifying the + number of seconds the previously retrieved can exist for before it + should be considered expired. """ super(AttachBase, self).__init__(**kwargs) @@ -96,6 +109,18 @@ class AttachBase(URLBase): # Absolute path to attachment self.download_path = None + # Set our cache flag + # it can be True, or an integer + try: + self.cache = cache if isinstance(cache, bool) else int(cache) + if self.cache < 0: + raise ValueError() + + except (ValueError, TypeError): + err = 'An invalid cache value ({}) was specified.'.format(cache) + self.logger.warning(err) + raise TypeError(err) + # Validate mimetype if specified if self._mimetype: if next((t for t in mimetypes.types_map.values() @@ -110,13 +135,13 @@ class AttachBase(URLBase): @property def path(self): """ - Returns the absolute path to the filename + Returns the absolute path to the filename. If this is not known or + is know but has been considered expired (due to cache setting), then + content is re-retrieved prior to returning. """ - if self.download_path: - # return our fixed content - return self.download_path - if not self.download(): + if not self.exists(): + # we could not obtain our path return None return self.download_path @@ -130,7 +155,7 @@ class AttachBase(URLBase): # return our fixed content return self._name - if not self.detected_name and not self.download(): + if not self.exists(): # we could not obtain our name return None @@ -157,8 +182,8 @@ class AttachBase(URLBase): # return our pre-calculated cached content return self._mimetype - if not self.detected_mimetype and not self.download(): - # we could not obtain our name + if not self.exists(): + # we could not obtain our attachment return None if not self.detected_mimetype: @@ -179,14 +204,58 @@ class AttachBase(URLBase): return self.detected_mimetype \ if self.detected_mimetype else self.unknown_mimetype + def exists(self): + """ + Simply returns true if the object has downloaded and stored the + attachment AND the attachment has not expired. + """ + if self.download_path and os.path.isfile(self.download_path) \ + and self.cache: + + # We have enough reason to look further into our cached value + if self.cache is True: + # return our fixed content as is; we will always cache it + return True + + # Verify our cache time to determine whether we will get our + # content again. + try: + age_in_sec = time.time() - os.stat(self.download_path).st_mtime + if age_in_sec <= self.cache: + return True + + except (OSError, IOError): + # The file is not present + pass + + return self.download() + + def invalidate(self): + """ + Release any temporary data that may be open by child classes. + Externally fetched content should be automatically cleaned up when + this function is called. + + This function should also reset the following entries to None: + - detected_name : Should identify a human readable filename + - download_path: Must contain a absolute path to content + - detected_mimetype: Should identify mimetype of content + """ + self.detected_name = None + self.download_path = None + self.detected_mimetype = None + return + def download(self): """ This function must be over-ridden by inheriting classes. - Inherited classes should populate: - - detected_name : Should identify a human readable filename + Inherited classes MUST populate: + - detected_name: Should identify a human readable filename - download_path: Must contain a absolute path to content - detected_mimetype: Should identify mimetype of content + + If a download fails, you should ensure these values are set to None. """ raise NotImplementedError( "download() is implimented by the child class.") @@ -226,6 +295,17 @@ class AttachBase(URLBase): results['name'] = results['qsd'].get('name', '') \ .strip().lower() + # Our cache value + if 'cache' in results['qsd']: + # First try to get it's integer value + try: + results['cache'] = int(results['qsd']['cache']) + + except (ValueError, TypeError): + # No problem, it just isn't an integer; now treat it as a bool + # instead: + results['cache'] = parse_bool(results['qsd']['cache']) + return results def __len__(self): diff --git a/apprise/attachment/AttachFile.py b/apprise/attachment/AttachFile.py index dfcf1226..478e3d6f 100644 --- a/apprise/attachment/AttachFile.py +++ b/apprise/attachment/AttachFile.py @@ -81,6 +81,9 @@ class AttachFile(AttachBase): validate it. """ + # Ensure any existing content set has been invalidated + self.invalidate() + if not os.path.isfile(self.dirty_path): return False @@ -100,6 +103,9 @@ class AttachFile(AttachBase): # a call do download() before returning a success self.download_path = self.dirty_path self.detected_name = os.path.basename(self.download_path) + + # We don't need to set our self.detected_mimetype as it can be + # pulled at the time it's needed based on the detected_name return True @staticmethod diff --git a/apprise/attachment/AttachHTTP.py b/apprise/attachment/AttachHTTP.py index cc45a427..f5986fbb 100644 --- a/apprise/attachment/AttachHTTP.py +++ b/apprise/attachment/AttachHTTP.py @@ -85,10 +85,8 @@ class AttachHTTP(AttachBase): Perform retrieval of the configuration based on the specified request """ - if self._temp_file is not None: - # There is nothing to do; we're already pointing at our downloaded - # content - return True + # Ensure any existing content set has been invalidated + self.invalidate() # prepare header headers = { @@ -188,13 +186,8 @@ class AttachHTTP(AttachBase): int(self.max_file_size / 1024), self.url(privacy=True))) - # Reset our temporary object - self._temp_file = None - - # Ensure our detected name and mimetype are - # reset - self.detected_name = None - self.detected_mimetype = None + # Invalidate any variables previously set + self.invalidate() # Return False (signifying a failure) return False @@ -220,12 +213,8 @@ class AttachHTTP(AttachBase): 'configuration from %s.' % self.host) self.logger.debug('Socket Exception: %s' % str(e)) - # Reset our temporary object - self._temp_file = None - - # Ensure our detected name and mimetype are reset - self.detected_name = None - self.detected_mimetype = None + # Invalidate any variables previously set + self.invalidate() # Return False (signifying a failure) return False @@ -239,12 +228,8 @@ class AttachHTTP(AttachBase): 'Could not write attachment to disk: {}'.format( self.url(privacy=True))) - # Reset our temporary object - self._temp_file = None - - # Ensure our detected name and mimetype are reset - self.detected_name = None - self.detected_mimetype = None + # Invalidate any variables previously set + self.invalidate() # Return False (signifying a failure) return False @@ -252,14 +237,31 @@ class AttachHTTP(AttachBase): # Return our success return True + def invalidate(self): + """ + Close our temporary file + """ + if self._temp_file: + self._temp_file.close() + self._temp_file = None + + super(AttachHTTP, self).invalidate() + def url(self, privacy=False, *args, **kwargs): """ Returns the URL built dynamically based on specified arguments. """ + # Prepare our cache value + if isinstance(self.cache, bool) or not self.cache: + cache = 'yes' if self.cache else 'no' + else: + cache = int(self.cache) + # Define any arguments set args = { 'verify': 'yes' if self.verify_certificate else 'no', + 'cache': cache, } if self._mimetype: diff --git a/test/test_apprise_attachments.py b/test/test_apprise_attachments.py index 7ac07826..7e78994a 100644 --- a/test/test_apprise_attachments.py +++ b/test/test_apprise_attachments.py @@ -58,7 +58,8 @@ def test_apprise_attachment(): assert not aa # An attachment object using a custom Apprise Asset object - aa = AppriseAttachment(asset=AppriseAsset()) + # Set a cache expiry of 5 minutes (300 seconds) + aa = AppriseAttachment(asset=AppriseAsset(), cache=300) # still no attachments added assert len(aa) == 0 @@ -70,6 +71,9 @@ def test_apprise_attachment(): # There is now 1 attachment assert len(aa) == 1 + # our attachment took on our cache value + assert aa[0].cache == 300 + # we can test the object as a boolean and get a value of True now assert aa @@ -81,23 +85,34 @@ def test_apprise_attachment(): # There is now 2 attachments assert len(aa) == 2 + # No cache set, so our cache defaults to True + assert aa[1].cache is True + # Reset our object aa = AppriseAttachment() # We can add by lists as well in a variety of formats attachments = ( path, - 'file://{}?name=newfilename.gif'.format(path), + 'file://{}?name=newfilename.gif?cache=120'.format(path), AppriseAttachment.instantiate( - 'file://{}?name=anotherfilename.gif'.format(path)), + 'file://{}?name=anotherfilename.gif'.format(path), cache=100), ) # Add them - assert aa.add(attachments) + assert aa.add(attachments, cache=False) # There is now 3 attachments assert len(aa) == 3 + # Take on our fixed cache value of False. + # The last entry will have our set value of 100 + assert aa[0].cache is False + # Even though we set a value of 120, we take on the value of False because + # it was forced on the instantiate call + assert aa[1].cache is False + assert aa[2].cache == 100 + # We can pop the last element off of the list as well attachment = aa.pop() assert isinstance(attachment, AttachBase) @@ -140,6 +155,30 @@ def test_apprise_attachment(): assert len(aa) == 0 assert not aa + assert aa.add(AppriseAttachment.instantiate( + 'file://{}?name=andanother.png&cache=Yes'.format(path))) + assert aa.add(AppriseAttachment.instantiate( + 'file://{}?name=andanother.png&cache=No'.format(path))) + AppriseAttachment.instantiate( + 'file://{}?name=andanother.png&cache=600'.format(path)) + assert aa.add(AppriseAttachment.instantiate( + 'file://{}?name=andanother.png&cache=600'.format(path))) + + assert len(aa) == 3 + assert aa[0].cache is True + assert aa[1].cache is False + assert aa[2].cache == 600 + + # Negative cache are not allowed + assert not aa.add(AppriseAttachment.instantiate( + 'file://{}?name=andanother.png&cache=-600'.format(path))) + + # No length change + assert len(aa) == 3 + + # Reset our object + aa.clear() + # if instantiating attachments from the class, it will throw a TypeError # if attachments couldn't be loaded with pytest.raises(TypeError): diff --git a/test/test_attach_file.py b/test/test_attach_file.py index 936534f0..fa3954ce 100644 --- a/test/test_attach_file.py +++ b/test/test_attach_file.py @@ -24,6 +24,7 @@ # THE SOFTWARE. import re +import time import mock from os.path import dirname from os.path import join @@ -51,6 +52,35 @@ def test_attach_file_parse_url(): assert AttachFile.parse_url('file://') is None +def test_file_expiry(tmpdir): + """ + API: AttachFile Expiry + """ + path = join(TEST_VAR_DIR, 'apprise-test.gif') + image = tmpdir.mkdir("apprise_file").join("test.jpg") + with open(path, 'rb') as data: + image.write(data) + + aa = AppriseAttachment.instantiate(str(image), cache=30) + + # Our file is now available + assert aa.exists() + + # Our second call has the file already downloaded, but now compares + # it's date against when we consider it to have expire. We're well + # under 30 seconds here (our set value), so this will succeed + assert aa.exists() + + with mock.patch('time.time', return_value=time.time() + 31): + # This will force a re-download as our cache will have + # expired + assert aa.exists() + + with mock.patch('time.time', side_effect=OSError): + # We will throw an exception + assert aa.exists() + + def test_attach_file(): """ API: AttachFile() diff --git a/test/test_attach_http.py b/test/test_attach_http.py index 277d60ed..991cf910 100644 --- a/test/test_attach_http.py +++ b/test/test_attach_http.py @@ -333,17 +333,15 @@ def test_attach_http(mock_get): # Handle edge-case where detected_name is None for whatever reason attachment.detected_name = None - assert attachment.mimetype == 'application/octet-stream' - assert attachment.name == '{}{}'.format( - AttachHTTP.unknown_filename, - mimetypes.guess_extension(attachment.mimetype) - ) + assert attachment.mimetype == attachment.unknown_mimetype + assert attachment.name.startswith(AttachHTTP.unknown_filename) assert len(attachment) == getsize(path) # Exception handling mock_get.return_value = None for _exception in REQUEST_EXCEPTIONS: - aa = AppriseAttachment.instantiate('http://localhost/exception.gif') + aa = AppriseAttachment.instantiate( + 'http://localhost/exception.gif?cache=30') assert isinstance(aa, AttachHTTP) mock_get.side_effect = _exception diff --git a/test/test_email_plugin.py b/test/test_email_plugin.py index c7ceb20e..c20bef42 100644 --- a/test/test_email_plugin.py +++ b/test/test_email_plugin.py @@ -43,7 +43,6 @@ logging.disable(logging.CRITICAL) # Attachment Directory TEST_VAR_DIR = os.path.join(os.path.dirname(__file__), 'var') - TEST_URLS = ( ################################## # NotifyEmail diff --git a/test/test_utils.py b/test/test_utils.py index a63936dc..cb85e8dd 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -736,7 +736,7 @@ def test_exclusive_match(): logic='match_me', data=data, match_all='match_me') is True -def test_apprise_validate_regex(tmpdir): +def test_apprise_validate_regex(): """ API: Apprise() Validate Regex tests