From 6a2b6130dbdb5df33cf9a5f4d89d02b5bf43ab3a Mon Sep 17 00:00:00 2001 From: Ryan Young Date: Thu, 27 Jan 2022 16:33:34 -0800 Subject: [PATCH] Add notification body conversion for HTML to plain text (#527) --- apprise/Apprise.py | 50 +--------------- apprise/conversion.py | 130 ++++++++++++++++++++++++++++++++++++++++ test/test_conversion.py | 58 ++++++++++++++++++ 3 files changed, 191 insertions(+), 47 deletions(-) create mode 100644 apprise/conversion.py create mode 100644 test/test_conversion.py diff --git a/apprise/Apprise.py b/apprise/Apprise.py index 8930b2a7..77d8a895 100644 --- a/apprise/Apprise.py +++ b/apprise/Apprise.py @@ -23,14 +23,12 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import re import os import six -from markdown import markdown from itertools import chain from .common import NotifyType -from .common import NotifyFormat from .common import MATCH_ALL_TAG +from .conversion import convert_between from .utils import is_exclusive_match from .utils import parse_list from .utils import parse_urls @@ -516,50 +514,8 @@ class Apprise(object): # was set to None), or we did define a tag and the logic above # determined we need to notify the service it's associated with if server.notify_format not in conversion_map: - if body_format == NotifyFormat.MARKDOWN and \ - server.notify_format == NotifyFormat.HTML: - - # Apply Markdown - conversion_map[server.notify_format] = markdown(body) - - elif body_format == NotifyFormat.TEXT and \ - server.notify_format == NotifyFormat.HTML: - - # Basic TEXT to HTML format map; supports keys only - re_map = { - # Support Ampersand - r'&': '&', - - # Spaces to   for formatting purposes since - # multiple spaces are treated as one an this may - # not be the callers intention - r' ': ' ', - - # Tab support - r'\t': '   ', - - # Greater than and Less than Characters - r'>': '>', - r'<': '<', - } - - # Compile our map - re_table = re.compile( - r'(' + '|'.join( - map(re.escape, re_map.keys())) + r')', - re.IGNORECASE, - ) - - # Execute our map against our body in addition to - # swapping out new lines and replacing them with
- conversion_map[server.notify_format] = \ - re.sub(r'\r*\n', '
\r\n', - re_table.sub( - lambda x: re_map[x.group()], body)) - - else: - # Store entry directly - conversion_map[server.notify_format] = body + conversion_map[server.notify_format] = \ + convert_between(body_format, server.notify_format, body) if interpret_escapes: # diff --git a/apprise/conversion.py b/apprise/conversion.py new file mode 100644 index 00000000..560a5b9c --- /dev/null +++ b/apprise/conversion.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2022 Chris Caron +# All rights reserved. +# +# This code is licensed under the MIT License. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + + +import re +import six +from markdown import markdown +from os import linesep +from .common import NotifyFormat + +if six.PY2: + from HTMLParser import HTMLParser +else: + from html.parser import HTMLParser + + +def convert_between(from_format, to_format, body): + """ + Converts between different notification formats. If no conversion exists, + or the selected one fails, the original text will be returned. + """ + + converters = { + (NotifyFormat.MARKDOWN, NotifyFormat.HTML): markdown, + (NotifyFormat.TEXT, NotifyFormat.HTML): text_to_html, + (NotifyFormat.HTML, NotifyFormat.TEXT): html_to_text, + } + + convert = converters.get((from_format, to_format)) + return convert(body) if convert is not None else body + + +def text_to_html(body): + """ + Converts a notification body from plain text to HTML. + """ + + # Basic TEXT to HTML format map; supports keys only + re_map = { + # Support Ampersand + r'&': '&', + + # Spaces to   for formatting purposes since + # multiple spaces are treated as one an this may + # not be the callers intention + r' ': ' ', + + # Tab support + r'\t': '   ', + + # Greater than and Less than Characters + r'>': '>', + r'<': '<', + } + + # Compile our map + re_table = re.compile( + r'(' + '|'.join( + map(re.escape, re_map.keys())) + r')', + re.IGNORECASE, + ) + + # Execute our map against our body in addition to + # swapping out new lines and replacing them with
+ return re.sub( + r'\r*\n', '
\r\n', re_table.sub(lambda x: re_map[x.group()], body)) + + +def html_to_text(body): + """ + Converts a notification body from HTML to plain text. + """ + + parser = HTMLConverter() + parser.feed(body) + parser.close() + return parser.converted + + +class HTMLConverter(HTMLParser, object): + """An HTML to plain text converter tuned for email messages.""" + + def __init__(self, **kwargs): + super(HTMLConverter, self).__init__(**kwargs) + + self.converted = "" + + def close(self): + # Removes all html before the last "}". Some HTML can return additional + # style information with text output. + self.converted = str(self.converted).split('}')[-1].strip() + + def handle_data(self, data): + self.converted += data.strip() + + def handle_starttag(self, tag, attrs): + if tag == 'li': + self.converted += linesep + '- ' + elif tag == 'blockquote': + self.converted += linesep + linesep + '\t' + elif tag in ('p', 'h1', 'h2', 'h3', 'h4', 'tr', 'th'): + self.converted += linesep + '\n' + elif tag == 'br': + self.converted += linesep + + def handle_endtag(self, tag): + if tag == 'blockquote': + self.converted += linesep + linesep diff --git a/test/test_conversion.py b/test/test_conversion.py new file mode 100644 index 00000000..506fb806 --- /dev/null +++ b/test/test_conversion.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2022 Chris Caron +# All rights reserved. +# +# This code is licensed under the MIT License. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from apprise import NotifyFormat +from apprise.conversion import convert_between + +# Disable logging for a cleaner testing output +import logging +logging.disable(logging.CRITICAL) + + +def test_html_to_text(): + """conversion: Test HTML to plain text + """ + + def convert(body): + return convert_between(NotifyFormat.HTML, NotifyFormat.TEXT, body) + + assert convert("No HTML code here.") == "No HTML code here." + + clist = convert("
  • Lots and lots
  • of lists.
") + assert "Lots and lots" in clist + assert "of lists." in clist + + assert "To be or not to be." in convert( + "
To be or not to be.
") + + cspace = convert( + "

Fancy heading

" + "

And a paragraph too.
Plus line break.

") + assert "Fancy heading" in cspace + assert "And a paragraph too.\nPlus line break." in cspace + + assert convert( + "" + "

Some obnoxious text here.

") == "Some obnoxious text here."