Telegram and HTML title conversion updates (#574)

This commit is contained in:
Chris Caron 2022-04-23 08:30:45 -04:00 committed by GitHub
parent 1908b26668
commit 8a3acc815d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 177 additions and 67 deletions

View File

@ -546,7 +546,7 @@ class Apprise(object):
conversion_body_map[server.notify_format]) = \
convert_between(
body_format, server.notify_format, body=body,
title=title)
title=title, title_format=server.title_format)
if interpret_escapes:
#

View File

@ -36,7 +36,8 @@ else:
from html.parser import HTMLParser
def convert_between(from_format, to_format, body, title=None):
def convert_between(from_format, to_format, body, title=None,
title_format=NotifyFormat.TEXT):
"""
Converts between different notification formats. If no conversion exists,
or the selected one fails, the original text will be returned.
@ -60,27 +61,31 @@ def convert_between(from_format, to_format, body, title=None):
title = '' if not title else title
convert = converters.get((from_format, to_format))
title, body = convert(title=title, body=body) \
title, body = convert(title=title, body=body, title_format=title_format) \
if convert is not None else (title, body)
return (title, body)
def markdown_to_html(body, title=None):
def markdown_to_html(body, title=None, title_format=None):
"""
Handle Markdown conversions
"""
if title_format == NotifyFormat.HTML and title:
# perform conversion if otherwise told to do so
title = markdown(title)
return (
# Title
'' if not title else markdown(title),
'' if not title else title,
# Body
markdown(body),
)
def text_to_html(body, title=None):
def text_to_html(body, title=None, title_format=None):
"""
Converts a notification body from plain text to HTML.
"""
@ -124,7 +129,7 @@ def text_to_html(body, title=None):
lambda x: re_map[x.group()], body)))
def html_to_text(body, title=None):
def html_to_text(body, title=None, title_format=None):
"""
Converts a notification body from HTML to plain text.
"""

View File

@ -121,6 +121,13 @@ class NotifyBase(BASE_OBJECT):
# automatically placed into the body
title_maxlen = 250
# Set this to HTML for services that support the conversion of HTML in
# the title. For example; services like Telegram support HTML in the
# title, however services like Email (where this goes in the Subject line)
# do not (but the body does). By default we do not convert titles but
# allow those who wish to over-ride this to do so.
title_format = NotifyFormat.TEXT
# Set the maximum line count; if this is set to anything larger then zero
# the message (prior to it being sent) will be truncated to this number
# of lines. Setting this to zero disables this feature.

View File

@ -105,6 +105,9 @@ class NotifyTelegram(NotifyBase):
# The maximum allowable characters allowed in the body per message
body_maxlen = 4096
# Allow the title to support HTML character sets
title_format = NotifyFormat.HTML
# Telegram is limited to sending a maximum of 100 requests per second.
request_rate_per_sec = 0.001
@ -541,7 +544,7 @@ class NotifyTelegram(NotifyBase):
'disable_web_page_preview': not self.preview,
}
# Prepare Email Message
# Prepare Message Body
if self.notify_format == NotifyFormat.MARKDOWN:
payload['parse_mode'] = 'MARKDOWN'
@ -550,7 +553,7 @@ class NotifyTelegram(NotifyBase):
body,
)
else: # TEXT or HTML
elif self.notify_format == NotifyFormat.HTML:
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML'
@ -561,39 +564,48 @@ class NotifyTelegram(NotifyBase):
telegram_escape_html_dict = {
# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
r'nbsp': ' ',
r'\ ?': ' ',
# Tabs become 3 spaces
r'emsp': ' ',
r'\ ?': ' ',
# Some characters get re-escaped by the Telegram upstream
# service so we need to convert these back,
r'apos': '\'',
r'quot': '"',
r'\'?': '\'',
r'\"?': '"',
# the following tags are not supported
r'<[ \t]*/?(br|p|div|span|body|script|meta|html|font'
r'|label|iframe|li|ol|ul)[^>]*>': '',
# The following characters can be altered to become supported
r'<[ \t]*pre[^>]*>': '<code>',
r'<[ \t]*/pre[^>]*>': '</code>',
# Bold
r'<[ \t]*(h[0-9]+|title|strong)[^>]*>': '<b>',
r'<[ \t]*/(h[0-9]+|title|strong)[^>]*>': '</b>',
# Italic
r'<[ \t]*(caption|em)[^>]*>': '<i>',
r'<[ \t]*/(caption|em)[^>]*>': '</i>',
}
# Create a regular expression from the dictionary keys
html_regex = re.compile("&(%s);?" % "|".join(
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)
# For each match, look-up corresponding value in dictionary
# we look +1 to ignore the & that does not appear in the index
# we only look at the first 4 characters because we don't want to
# fail on &apos; as it's accepted (along with &apos - no
# semi-colon)
body = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)
for k, v in telegram_escape_html_dict.items():
body = re.sub(k, v, body, re.I)
if title:
# For each match, look-up corresponding value in dictionary
# Indexing is explained above (for how the body is parsed)
title = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], title)
title = re.sub(k, v, title, re.I)
# prepare our payload based on HTML or TEXT
payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '',
body,
)
else: # self.notify_format == NotifyFormat.TEXT:
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML'
if self.notify_format == NotifyFormat.TEXT:
# Further html escaping required...
telegram_escape_text_dict = {
# We need to escape characters that conflict with html

View File

@ -407,7 +407,7 @@ def test_plugin_telegram_general(mock_post):
# Test our payload
assert payload['text'] == \
'<b>special characters</b>\r\n<p>\'"This can\'t\t\r\nfail us"\'</p>'
'<b>special characters</b>\r\n\'"This can\'t\t\r\nfail us"\''
# Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
@ -629,10 +629,10 @@ def test_plugin_telegram_formating_py3(mock_post):
# Test that everything is escaped properly in a TEXT mode
assert payload['text'] == \
'<b>🚨 Change detected for &lt;i&gt;Apprise Test Title&lt;/i&gt;</b>' \
'\r\n&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body Title' \
'&lt;/i&gt;&lt;/a&gt; had &lt;a href="http://127.0.0.1"&gt;a change' \
'&lt;/a&gt;'
'<b>🚨 Change detected&nbsp;for&nbsp;&lt;i&gt;Apprise&nbsp;Test' \
'&nbsp;Title&lt;/i&gt;</b>\r\n&lt;a href="http://localhost"&gt;' \
'&lt;i&gt;Apprise Body&nbsp;Title&lt;/i&gt;&lt;/a&gt;&nbsp;had' \
'&nbsp;&lt;a&nbsp;href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'
# Reset our values
mock_post.reset_mock()
@ -716,9 +716,9 @@ def test_plugin_telegram_formating_py3(mock_post):
# Test that everything is escaped properly in a HTML mode
assert payload['text'] == \
'<b><p>🚨 Change detected for <em>Apprise Test Title</em></p></b>' \
'\r\n<p><em><a href="http://localhost">Apprise Body Title</a></em> ' \
'had <a href="http://127.0.0.1">a change</a></p>'
'<b>🚨 Change detected for <i>Apprise Test Title</i></b>\r\n<i>' \
'<a href="http://localhost">Apprise Body Title</a></i> ' \
'had <a href="http://127.0.0.1">a change</a>'
@pytest.mark.skipif(sys.version_info.major >= 3, reason="Requires Python 2.x+")
@ -809,11 +809,11 @@ def test_plugin_telegram_formating_py2(mock_post):
# Test that everything is escaped properly in a TEXT mode
assert payload['text'].encode('utf-8') == \
'<b>\xf0\x9f\x9a\xa8 Change detected for &lt;i&gt;' \
'Apprise Test Title&lt;/i&gt;</b>\r\n' \
'&lt;a href="http://localhost"&gt;&lt;i&gt;' \
'Apprise Body Title&lt;/i&gt;&lt;/a&gt; had &lt;a ' \
'href="http://127.0.0.1"&gt;a change&lt;/a&gt;'
'<b>\xf0\x9f\x9a\xa8 Change detected&nbsp;for&nbsp;' \
'&lt;i&gt;Apprise&nbsp;Test&nbsp;Title&lt;/i&gt;</b>\r\n' \
'&lt;a href="http://localhost"&gt;&lt;i&gt;Apprise Body&nbsp;' \
'Title&lt;/i&gt;&lt;/a&gt;&nbsp;had&nbsp;&lt;a&nbsp;' \
'href="http://127.0.0.1"&gt;a&nbsp;change&lt;/a&gt;'
# Reset our values
mock_post.reset_mock()
@ -897,10 +897,9 @@ def test_plugin_telegram_formating_py2(mock_post):
# Test that everything is escaped properly in a HTML mode
assert payload['text'].encode('utf-8') == \
'<b><p>\xf0\x9f\x9a\xa8 Change detected for ' \
'<em>Apprise Test Title</em></p></b>\r\n<p><em>' \
'<a href="http://localhost">Apprise Body Title</a></em>' \
' had <a href="http://127.0.0.1">a change</a></p>'
'<b>\xf0\x9f\x9a\xa8 Change detected for <i>Apprise Test Title</i>' \
'</b>\r\n<i><a href="http://localhost">Apprise Body Title</a></i> ' \
'had <a href="http://127.0.0.1">a change</a>'
# Reset our values
mock_post.reset_mock()
@ -951,3 +950,90 @@ def test_plugin_telegram_formating_py2(mock_post):
'<b>\xd7\x9b\xd7\x95\xd7\xaa\xd7\xa8\xd7\xaa '\
'\xd7\xa0\xd7\xa4\xd7\x9c\xd7\x90\xd7\x94</b>\r\n[_[\xd7\x96\xd7\x95 '\
'\xd7\x94\xd7\x95\xd7\x93\xd7\xa2\xd7\x94](http://localhost)_'
@mock.patch('requests.post')
def test_plugin_telegram_html_formatting(mock_post):
"""
NotifyTelegram() HTML Formatting
"""
# on't send anything other than <b>, <i>, <a>,<code> and <pre>
# Disable Throttling to speed testing
plugins.NotifyTelegram.request_rate_per_sec = 0
# Prepare Mock
mock_post.return_value = requests.Request()
mock_post.return_value.status_code = requests.codes.ok
mock_post.return_value.content = '{}'
# Simple success response
mock_post.return_value.content = dumps({
"ok": True,
"result": [{
"update_id": 645421321,
"message": {
"message_id": 2,
"from": {
"id": 532389719,
"is_bot": False,
"first_name": "Chris",
"language_code": "en-US"
},
"chat": {
"id": 532389719,
"first_name": "Chris",
"type": "private"
},
"date": 1519694394,
"text": "/start",
"entities": [{
"offset": 0,
"length": 6,
"type": "bot_command",
}],
}},
],
})
mock_post.return_value.status_code = requests.codes.ok
aobj = Apprise()
aobj.add('tgram://123456789:abcdefg_hijklmnop/')
assert len(aobj) == 1
assert isinstance(aobj[0], plugins.NotifyTelegram)
# Test our HTML Conversion
title = '<title>&apos;information&apos</title>'
body = '<em>&quot;This is in Italic&quot</em><br/>' \
'<h5>&emsp;&emspHeadings&nbsp;are dropped and' \
'&nbspconverted to bold</h5>'
assert aobj.notify(title=title, body=body, body_format=NotifyFormat.HTML)
# 1 call to look up bot owner, and second for notification
assert mock_post.call_count == 2
payload = loads(mock_post.call_args_list[1][1]['data'])
# Test that everything is escaped properly in a HTML mode
assert payload['text'] == \
'<b><b>\'information\'</b></b>\r\n<i>"This is in Italic"</i>' \
'<b> Headings are dropped and converted to bold</b>'
mock_post.reset_mock()
assert aobj.notify(title=title, body=body, body_format=NotifyFormat.TEXT)
# owner has already been looked up, so only one call is made
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])
assert payload['text'] == \
'<b>&lt;title&gt;&amp;apos;information&amp;apos&lt;/title&gt;</b>' \
'\r\n&lt;em&gt;&amp;quot;This is in&nbsp;Italic&amp;quot&lt;/em&gt;' \
'&lt;br/&gt;&lt;h5&gt;&amp;emsp;&amp;emspHeadings&amp;nbsp;are' \
'&nbsp;dropped&nbsp;and&amp;nbspconverted&nbsp;to&nbsp;bold&lt;/h5&gt;'