Telegram and HTML title conversion updates (#574)

2025-03-03 09:31:31 +01:00 · 2022-04-23 08:30:45 -04:00 · 2022-04-23 08:30:45 -04:00 · 8a3acc815d
commit 8a3acc815d
parent 1908b26668
5 changed files with 177 additions and 67 deletions
--- a/apprise/Apprise.py
+++ b/apprise/Apprise.py
@ -546,7 +546,7 @@ class Apprise(object):
                 conversion_body_map[server.notify_format]) = \
                    convert_between(
                        body_format, server.notify_format, body=body,
-                        title=title)
+                        title=title, title_format=server.title_format)

                if interpret_escapes:
                    #
--- a/apprise/conversion.py
+++ b/apprise/conversion.py
@ -36,7 +36,8 @@ else:
    from html.parser import HTMLParser


-def convert_between(from_format, to_format, body, title=None):
+def convert_between(from_format, to_format, body, title=None,
+                    title_format=NotifyFormat.TEXT):
    """
    Converts between different notification formats. If no conversion exists,
    or the selected one fails, the original text will be returned.
@ -60,27 +61,31 @@ def convert_between(from_format, to_format, body, title=None):
        title = '' if not title else title

    convert = converters.get((from_format, to_format))
-    title, body = convert(title=title, body=body) \
+    title, body = convert(title=title, body=body, title_format=title_format) \
        if convert is not None else (title, body)

    return (title, body)


-def markdown_to_html(body, title=None):
+def markdown_to_html(body, title=None, title_format=None):
    """
    Handle Markdown conversions
    """

+    if title_format == NotifyFormat.HTML and title:
+        # perform conversion if otherwise told to do so
+        title = markdown(title)
+
    return (
        # Title
-        '' if not title else markdown(title),
+        '' if not title else title,

        # Body
        markdown(body),
    )


-def text_to_html(body, title=None):
+def text_to_html(body, title=None, title_format=None):
    """
    Converts a notification body from plain text to HTML.
    """
@ -124,7 +129,7 @@ def text_to_html(body, title=None):
                lambda x: re_map[x.group()], body)))


-def html_to_text(body, title=None):
+def html_to_text(body, title=None, title_format=None):
    """
    Converts a notification body from HTML to plain text.
    """
--- a/apprise/plugins/NotifyBase.py
+++ b/apprise/plugins/NotifyBase.py
@ -121,6 +121,13 @@ class NotifyBase(BASE_OBJECT):
    # automatically placed into the body
    title_maxlen = 250

+    # Set this to HTML for services that support the conversion of HTML in
+    # the title. For example; services like Telegram support HTML in the
+    # title, however services like Email (where this goes in the Subject line)
+    # do not (but the body does).  By default we do not convert titles but
+    # allow those who wish to over-ride this to do so.
+    title_format = NotifyFormat.TEXT
+
    # Set the maximum line count; if this is set to anything larger then zero
    # the message (prior to it being sent) will be truncated to this number
    # of lines. Setting this to zero disables this feature.
--- a/apprise/plugins/NotifyTelegram.py
+++ b/apprise/plugins/NotifyTelegram.py
@ -105,6 +105,9 @@ class NotifyTelegram(NotifyBase):
    # The maximum allowable characters allowed in the body per message
    body_maxlen = 4096

+    # Allow the title to support HTML character sets
+    title_format = NotifyFormat.HTML
+
    # Telegram is limited to sending a maximum of 100 requests per second.
    request_rate_per_sec = 0.001

@ -541,7 +544,7 @@ class NotifyTelegram(NotifyBase):
            'disable_web_page_preview': not self.preview,
        }

-        # Prepare Email Message
+        # Prepare Message Body
        if self.notify_format == NotifyFormat.MARKDOWN:
            payload['parse_mode'] = 'MARKDOWN'

@ -550,7 +553,7 @@ class NotifyTelegram(NotifyBase):
                body,
            )

-        else:  # TEXT or HTML
+        elif self.notify_format == NotifyFormat.HTML:
            # Use Telegram's HTML mode
            payload['parse_mode'] = 'HTML'

@ -561,62 +564,71 @@ class NotifyTelegram(NotifyBase):
            telegram_escape_html_dict = {
                # HTML Spaces (&nbsp;) and tabs (&emsp;) aren't supported
                # See https://core.telegram.org/bots/api#html-style
-                r'nbsp': ' ',
+                r'\&nbsp;?': ' ',

                # Tabs become 3 spaces
-                r'emsp': '   ',
+                r'\&emsp;?': '   ',

                # Some characters get re-escaped by the Telegram upstream
                # service so we need to convert these back,
-                r'apos': '\'',
-                r'quot': '"',
+                r'\&apos;?': '\'',
+                r'\&quot;?': '"',
+
+                # the following tags are not supported
+                r'<[ \t]*/?(br|p|div|span|body|script|meta|html|font'
+                r'|label|iframe|li|ol|ul)[^>]*>': '',
+
+                # The following characters can be altered to become supported
+                r'<[ \t]*pre[^>]*>': '<code>',
+                r'<[ \t]*/pre[^>]*>': '</code>',
+
+                # Bold
+                r'<[ \t]*(h[0-9]+|title|strong)[^>]*>': '<b>',
+                r'<[ \t]*/(h[0-9]+|title|strong)[^>]*>': '</b>',
+
+                # Italic
+                r'<[ \t]*(caption|em)[^>]*>': '<i>',
+                r'<[ \t]*/(caption|em)[^>]*>': '</i>',
+            }
+
+            for k, v in telegram_escape_html_dict.items():
+                body = re.sub(k, v, body, re.I)
+                if title:
+                    title = re.sub(k, v, title, re.I)
+
+            # prepare our payload based on HTML or TEXT
+            payload['text'] = '{}{}'.format(
+                '<b>{}</b>\r\n'.format(title) if title else '',
+                body,
+            )
+
+        else:  # self.notify_format == NotifyFormat.TEXT:
+            # Use Telegram's HTML mode
+            payload['parse_mode'] = 'HTML'
+
+            # Further html escaping required...
+            telegram_escape_text_dict = {
+                # We need to escape characters that conflict with html
+                # entity blocks (< and >) when displaying text
+                r'>': '&gt;',
+                r'<': '&lt;',
            }

            # Create a regular expression from the dictionary keys
-            html_regex = re.compile("&(%s);?" % "|".join(
-                map(re.escape, telegram_escape_html_dict.keys())).lower(),
+            text_regex = re.compile("(%s)" % "|".join(
+                map(re.escape, telegram_escape_text_dict.keys())).lower(),
                re.I)

            # For each match, look-up corresponding value in dictionary
-            # we look +1 to ignore the & that does not appear in the index
-            # we only look at the first 4 characters because we don't want to
-            # fail on &apos; as it's accepted (along with &apos - no
-            # semi-colon)
-            body = html_regex.sub(  # pragma: no branch
-                lambda mo: telegram_escape_html_dict[
-                    mo.string[mo.start():mo.end()][1:5]], body)
+            body = text_regex.sub(  # pragma: no branch
+                lambda mo: telegram_escape_text_dict[
+                    mo.string[mo.start():mo.end()]], body)

            if title:
                # For each match, look-up corresponding value in dictionary
-                # Indexing is explained above (for how the body is parsed)
-                title = html_regex.sub(  # pragma: no branch
-                    lambda mo: telegram_escape_html_dict[
-                        mo.string[mo.start():mo.end()][1:5]], title)
-
-            if self.notify_format == NotifyFormat.TEXT:
-                # Further html escaping required...
-                telegram_escape_text_dict = {
-                    # We need to escape characters that conflict with html
-                    # entity blocks (< and >) when displaying text
-                    r'>': '&gt;',
-                    r'<': '&lt;',
-                }
-
-                # Create a regular expression from the dictionary keys
-                text_regex = re.compile("(%s)" % "|".join(
-                    map(re.escape, telegram_escape_text_dict.keys())).lower(),
-                    re.I)
-
-                # For each match, look-up corresponding value in dictionary
-                body = text_regex.sub(  # pragma: no branch
+                title = text_regex.sub(  # pragma: no branch
                    lambda mo: telegram_escape_text_dict[
-                        mo.string[mo.start():mo.end()]], body)
-
-                if title:
-                    # For each match, look-up corresponding value in dictionary
-                    title = text_regex.sub(  # pragma: no branch
-                        lambda mo: telegram_escape_text_dict[
-                            mo.string[mo.start():mo.end()]], title)
+                        mo.string[mo.start():mo.end()]], title)

            # prepare our payload based on HTML or TEXT
            payload['text'] = '{}{}'.format(
--- a/test/test_plugin_telegram.py
+++ b/test/test_plugin_telegram.py
@ -407,7 +407,7 @@ def test_plugin_telegram_general(mock_post):

    # Test our payload
    assert payload['text'] == \
-        '<b>special characters</b>\r\n<p>\'"This can\'t\t\r\nfail us"\'</p>'
+        '<b>special characters</b>\r\n\'"This can\'t\t\r\nfail us"\''

    # Test sending attachments
    attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
@ -629,10 +629,10 @@ def test_plugin_telegram_formating_py3(mock_post):

    # Test that everything is escaped properly in a TEXT mode
    assert payload['text'] == \
-        '<b>🚨 Change detected for &lt;i&gt;Apprise Test Title&lt;/i&gt;</b>' \
-        '\r\n&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body Title' \
-        '&lt;/i&gt;&lt;/a&gt; had &lt;a href="http://127.0.0.1"&gt;a change' \
-        '&lt;/a&gt;'
+        '<b>🚨 Change detected&nbsp;for&nbsp;&lt;i&gt;Apprise&nbsp;Test' \
+        '&nbsp;Title&lt;/i&gt;</b>\r\n&lt;a href="http://localhost"&gt;' \
+        '&lt;i&gt;Apprise Body&nbsp;Title&lt;/i&gt;&lt;/a&gt;&nbsp;had' \
+        '&nbsp;&lt;a&nbsp;href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'

    # Reset our values
    mock_post.reset_mock()
@ -716,9 +716,9 @@ def test_plugin_telegram_formating_py3(mock_post):

    # Test that everything is escaped properly in a HTML mode
    assert payload['text'] == \
-        '<b><p>🚨 Change detected for <em>Apprise Test Title</em></p></b>' \
-        '\r\n<p><em><a href="http://localhost">Apprise Body Title</a></em> ' \
-        'had <a href="http://127.0.0.1">a change</a></p>'
+        '<b>🚨 Change detected for <i>Apprise Test Title</i></b>\r\n<i>' \
+        '<a href="http://localhost">Apprise Body Title</a></i> ' \
+        'had <a href="http://127.0.0.1">a change</a>'


@pytest.mark.skipif(sys.version_info.major >= 3, reason="Requires Python 2.x+")
@ -809,11 +809,11 @@ def test_plugin_telegram_formating_py2(mock_post):

    # Test that everything is escaped properly in a TEXT mode
    assert payload['text'].encode('utf-8') == \
-        '<b>\xf0\x9f\x9a\xa8 Change detected for &lt;i&gt;' \
-        'Apprise Test Title&lt;/i&gt;</b>\r\n' \
-        '&lt;a href="http://localhost"&gt;&lt;i&gt;' \
-        'Apprise Body Title&lt;/i&gt;&lt;/a&gt; had &lt;a ' \
-        'href="http://127.0.0.1"&gt;a change&lt;/a&gt;'
+        '<b>\xf0\x9f\x9a\xa8 Change detected&nbsp;for&nbsp;' \
+        '&lt;i&gt;Apprise&nbsp;Test&nbsp;Title&lt;/i&gt;</b>\r\n' \
+        '&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body&nbsp;' \
+        'Title&lt;/i&gt;&lt;/a&gt;&nbsp;had&nbsp;&lt;a&nbsp;' \
+        'href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'

    # Reset our values
    mock_post.reset_mock()
@ -897,10 +897,9 @@ def test_plugin_telegram_formating_py2(mock_post):

    # Test that everything is escaped properly in a HTML mode
    assert payload['text'].encode('utf-8') == \
-        '<b><p>\xf0\x9f\x9a\xa8 Change detected for ' \
-        '<em>Apprise Test Title</em></p></b>\r\n<p><em>' \
-        '<a href="http://localhost">Apprise Body Title</a></em>' \
-        ' had <a href="http://127.0.0.1">a change</a></p>'
+        '<b>\xf0\x9f\x9a\xa8 Change detected for <i>Apprise Test Title</i>' \
+        '</b>\r\n<i><a href="http://localhost">Apprise Body Title</a></i> ' \
+        'had <a href="http://127.0.0.1">a change</a>'

    # Reset our values
    mock_post.reset_mock()
@ -951,3 +950,90 @@ def test_plugin_telegram_formating_py2(mock_post):
        '<b>\xd7\x9b\xd7\x95\xd7\xaa\xd7\xa8\xd7\xaa '\
        '\xd7\xa0\xd7\xa4\xd7\x9c\xd7\x90\xd7\x94</b>\r\n[_[\xd7\x96\xd7\x95 '\
        '\xd7\x94\xd7\x95\xd7\x93\xd7\xa2\xd7\x94](http://localhost)_'
+
+
+@mock.patch('requests.post')
+def test_plugin_telegram_html_formatting(mock_post):
+    """
+    NotifyTelegram() HTML Formatting
+
+    """
+    # on't send anything other than <b>, <i>, <a>,<code> and <pre>
+
+    # Disable Throttling to speed testing
+    plugins.NotifyTelegram.request_rate_per_sec = 0
+
+    # Prepare Mock
+    mock_post.return_value = requests.Request()
+    mock_post.return_value.status_code = requests.codes.ok
+    mock_post.return_value.content = '{}'
+
+    # Simple success response
+    mock_post.return_value.content = dumps({
+        "ok": True,
+        "result": [{
+            "update_id": 645421321,
+            "message": {
+                "message_id": 2,
+                "from": {
+                    "id": 532389719,
+                    "is_bot": False,
+                    "first_name": "Chris",
+                    "language_code": "en-US"
+                },
+                "chat": {
+                    "id": 532389719,
+                    "first_name": "Chris",
+                    "type": "private"
+                },
+                "date": 1519694394,
+                "text": "/start",
+                "entities": [{
+                    "offset": 0,
+                    "length": 6,
+                    "type": "bot_command",
+                }],
+            }},
+        ],
+    })
+    mock_post.return_value.status_code = requests.codes.ok
+
+    aobj = Apprise()
+    aobj.add('tgram://123456789:abcdefg_hijklmnop/')
+
+    assert len(aobj) == 1
+
+    assert isinstance(aobj[0], plugins.NotifyTelegram)
+
+    # Test our HTML Conversion
+    title = '<title>&apos;information&apos</title>'
+    body = '<em>&quot;This is in Italic&quot</em><br/>' \
+           '<h5>&emsp;&emspHeadings&nbsp;are dropped and' \
+           '&nbspconverted to bold</h5>'
+
+    assert aobj.notify(title=title, body=body, body_format=NotifyFormat.HTML)
+
+    # 1 call to look up bot owner, and second for notification
+    assert mock_post.call_count == 2
+
+    payload = loads(mock_post.call_args_list[1][1]['data'])
+
+    # Test that everything is escaped properly in a HTML mode
+    assert payload['text'] == \
+        '<b><b>\'information\'</b></b>\r\n<i>"This is in Italic"</i>' \
+        '<b>      Headings are dropped and converted to bold</b>'
+
+    mock_post.reset_mock()
+
+    assert aobj.notify(title=title, body=body, body_format=NotifyFormat.TEXT)
+
+    # owner has already been looked up, so only one call is made
+    assert mock_post.call_count == 1
+
+    payload = loads(mock_post.call_args_list[0][1]['data'])
+
+    assert payload['text'] == \
+        '<b>&lt;title&gt;&amp;apos;information&amp;apos&lt;/title&gt;</b>' \
+        '\r\n&lt;em&gt;&amp;quot;This is in&nbsp;Italic&amp;quot&lt;/em&gt;' \
+        '&lt;br/&gt;&lt;h5&gt;&amp;emsp;&amp;emspHeadings&amp;nbsp;are' \
+        '&nbsp;dropped&nbsp;and&amp;nbspconverted&nbsp;to&nbsp;bold&lt;/h5&gt;'