Add notification body conversion for HTML to plain text (#527)

2025-02-07 13:59:31 +01:00 · 2022-01-27 16:33:34 -08:00 · 2022-01-27 16:33:34 -08:00 · 6a2b6130db
commit 6a2b6130db
parent 5e2a293195
3 changed files with 191 additions and 47 deletions
--- a/apprise/Apprise.py
+++ b/apprise/Apprise.py
@ -23,14 +23,12 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.

-import re
 import os
 import six
-from markdown import markdown
 from itertools import chain
 from .common import NotifyType
-from .common import NotifyFormat
 from .common import MATCH_ALL_TAG
+from .conversion import convert_between
 from .utils import is_exclusive_match
 from .utils import parse_list
 from .utils import parse_urls
@ -516,50 +514,8 @@ class Apprise(object):
            # was set to None), or we did define a tag and the logic above
            # determined we need to notify the service it's associated with
            if server.notify_format not in conversion_map:
-                if body_format == NotifyFormat.MARKDOWN and \
-                        server.notify_format == NotifyFormat.HTML:
-
-                    # Apply Markdown
-                    conversion_map[server.notify_format] = markdown(body)
-
-                elif body_format == NotifyFormat.TEXT and \
-                        server.notify_format == NotifyFormat.HTML:
-
-                    # Basic TEXT to HTML format map; supports keys only
-                    re_map = {
-                        # Support Ampersand
-                        r'&': '&amp;',
-
-                        # Spaces to &nbsp; for formatting purposes since
-                        # multiple spaces are treated as one an this may
-                        # not be the callers intention
-                        r' ': '&nbsp;',
-
-                        # Tab support
-                        r'\t': '&nbsp;&nbsp;&nbsp;',
-
-                        # Greater than and Less than Characters
-                        r'>': '&gt;',
-                        r'<': '&lt;',
-                    }
-
-                    # Compile our map
-                    re_table = re.compile(
-                        r'(' + '|'.join(
-                            map(re.escape, re_map.keys())) + r')',
-                        re.IGNORECASE,
-                    )
-
-                    # Execute our map against our body in addition to
-                    # swapping out new lines and replacing them with <br/>
-                    conversion_map[server.notify_format] = \
-                        re.sub(r'\r*\n', '<br/>\r\n',
-                               re_table.sub(
-                                   lambda x: re_map[x.group()], body))
-
-                else:
-                    # Store entry directly
-                    conversion_map[server.notify_format] = body
+                conversion_map[server.notify_format] = \
+                    convert_between(body_format, server.notify_format, body)

            if interpret_escapes:
                #
--- a/apprise/conversion.py
+++ b/apprise/conversion.py
@ -0,0 +1,130 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2022 Chris Caron <lead2gold@gmail.com>
+# All rights reserved.
+#
+# This code is licensed under the MIT License.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files(the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions :
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+import re
+import six
+from markdown import markdown
+from os import linesep
+from .common import NotifyFormat
+
+if six.PY2:
+    from HTMLParser import HTMLParser
+else:
+    from html.parser import HTMLParser
+
+
+def convert_between(from_format, to_format, body):
+    """
+    Converts between different notification formats. If no conversion exists,
+    or the selected one fails, the original text will be returned.
+    """
+
+    converters = {
+        (NotifyFormat.MARKDOWN, NotifyFormat.HTML): markdown,
+        (NotifyFormat.TEXT, NotifyFormat.HTML): text_to_html,
+        (NotifyFormat.HTML, NotifyFormat.TEXT): html_to_text,
+    }
+
+    convert = converters.get((from_format, to_format))
+    return convert(body) if convert is not None else body
+
+
+def text_to_html(body):
+    """
+    Converts a notification body from plain text to HTML.
+    """
+
+    # Basic TEXT to HTML format map; supports keys only
+    re_map = {
+        # Support Ampersand
+        r'&': '&amp;',
+
+        # Spaces to &nbsp; for formatting purposes since
+        # multiple spaces are treated as one an this may
+        # not be the callers intention
+        r' ': '&nbsp;',
+
+        # Tab support
+        r'\t': '&nbsp;&nbsp;&nbsp;',
+
+        # Greater than and Less than Characters
+        r'>': '&gt;',
+        r'<': '&lt;',
+    }
+
+    # Compile our map
+    re_table = re.compile(
+        r'(' + '|'.join(
+            map(re.escape, re_map.keys())) + r')',
+        re.IGNORECASE,
+    )
+
+    # Execute our map against our body in addition to
+    # swapping out new lines and replacing them with <br/>
+    return re.sub(
+        r'\r*\n', '<br/>\r\n', re_table.sub(lambda x: re_map[x.group()], body))
+
+
+def html_to_text(body):
+    """
+    Converts a notification body from HTML to plain text.
+    """
+
+    parser = HTMLConverter()
+    parser.feed(body)
+    parser.close()
+    return parser.converted
+
+
+class HTMLConverter(HTMLParser, object):
+    """An HTML to plain text converter tuned for email messages."""
+
+    def __init__(self, **kwargs):
+        super(HTMLConverter, self).__init__(**kwargs)
+
+        self.converted = ""
+
+    def close(self):
+        # Removes all html before the last "}". Some HTML can return additional
+        # style information with text output.
+        self.converted = str(self.converted).split('}')[-1].strip()
+
+    def handle_data(self, data):
+        self.converted += data.strip()
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'li':
+            self.converted += linesep + '- '
+        elif tag == 'blockquote':
+            self.converted += linesep + linesep + '\t'
+        elif tag in ('p', 'h1', 'h2', 'h3', 'h4', 'tr', 'th'):
+            self.converted += linesep + '\n'
+        elif tag == 'br':
+            self.converted += linesep
+
+    def handle_endtag(self, tag):
+        if tag == 'blockquote':
+            self.converted += linesep + linesep
--- a/test/test_conversion.py
+++ b/test/test_conversion.py
@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2022 Chris Caron <lead2gold@gmail.com>
+# All rights reserved.
+#
+# This code is licensed under the MIT License.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files(the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions :
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from apprise import NotifyFormat
+from apprise.conversion import convert_between
+
+# Disable logging for a cleaner testing output
+import logging
+logging.disable(logging.CRITICAL)
+
+
+def test_html_to_text():
+    """conversion: Test HTML to plain text
+    """
+
+    def convert(body):
+        return convert_between(NotifyFormat.HTML, NotifyFormat.TEXT, body)
+
+    assert convert("No HTML code here.") == "No HTML code here."
+
+    clist = convert("<ul><li>Lots and lots</li><li>of lists.</li></ul>")
+    assert "Lots and lots" in clist
+    assert "of lists." in clist
+
+    assert "To be or not to be." in convert(
+        "<blockquote>To be or not to be.</blockquote>")
+
+    cspace = convert(
+        "<h2>Fancy heading</h2>"
+        "<p>And a paragraph too.<br>Plus line break.</p>")
+    assert "Fancy heading" in cspace
+    assert "And a paragraph too.\nPlus line break." in cspace
+
+    assert convert(
+        "<style>body { font: 200%; }</style>"
+        "<p>Some obnoxious text here.</p>") == "Some obnoxious text here."