Telegram escaping completely refactored (#386)

This commit is contained in:
Chris Caron 2021-05-15 16:08:53 -04:00 committed by GitHub
parent 7f7ee043d9
commit 59aa5f5d10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 25 deletions

View File

@ -524,39 +524,73 @@ class NotifyTelegram(NotifyBase):
body,
)
elif self.notify_format == NotifyFormat.HTML:
else: # HTML or TEXT
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML'
# Telegram's HTML support doesn't like having HTML escaped
# characters passed into it. to handle this situation, we need to
# search the body for these sequences and convert them to the
# output the user expected
telegram_escape_html_dict = {
# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
body = re.sub(' ?', ' ', body, re.I)
r'nbsp': ' ',
# Tabs become 3 spaces
body = re.sub(' ?', ' ', body, re.I)
r'emsp': ' ',
# Some characters get re-escaped by the Telegram upstream
# service so we need to convert these back,
r'apos': '\'',
r'quot': '"',
}
# Create a regular expression from the dictionary keys
html_regex = re.compile("&(%s);?" % "|".join(
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)
# For each match, look-up corresponding value in dictionary
# we look +1 to ignore the & that does not appear in the index
# we only look at the first 4 characters because we don't want to
# fail on ' as it's accepted (along with &apos - no
# semi-colon)
body = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)
if title:
# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
title = re.sub(' ?', ' ', title, re.I)
# For each match, look-up corresponding value in dictionary
# Indexing is explained above (for how the body is parsed)
title = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], title)
# Tabs become 3 spaces
title = re.sub(' ?', ' ', title, re.I)
if self.notify_format == NotifyFormat.TEXT:
telegram_escape_text_dict = {
# We need to escape characters that conflict with html
# entity blocks (< and >) when displaying text
r'>': '&gt;',
r'<': '&lt;',
}
payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '',
body,
)
# Create a regular expression from the dictionary keys
text_regex = re.compile("(%s)" % "|".join(
map(re.escape, telegram_escape_text_dict.keys())).lower(),
re.I)
else: # pass directly as is...
payload['parse_mode'] = 'HTML'
# For each match, look-up corresponding value in dictionary
body = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], body)
# Telegram strangely escapes all HTML characters for us already
# but to avoid causing issues with HTML, we escape the < and >
# characters
title = re.sub('>', '&gt;', title, re.I)
title = re.sub('<', '&lt;', title, re.I)
body = re.sub('>', '&gt;', body, re.I)
body = re.sub('<', '&lt;', body, re.I)
if title:
# For each match, look-up corresponding value in dictionary
title = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], title)
payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '',

View File

@ -29,6 +29,7 @@ import pytest
import mock
import requests
from json import dumps
from json import loads
from apprise import Apprise
from apprise import AppriseAttachment
from apprise import AppriseAsset
@ -202,11 +203,26 @@ def test_notify_telegram_plugin(mock_post, mock_get):
})
mock_post.return_value.status_code = requests.codes.ok
# Test sending attachments
obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345')
assert len(obj.targets) == 1
assert obj.targets[0] == '12345'
# Test the escaping of characters since Telegram escapes stuff for us to
# which we need to consider
mock_post.reset_mock()
body = "<p>\'\"This can't\t\r\nfail&nbsp;us\"\'</p>"
assert obj.notify(
body=body, title='special characters',
notify_type=NotifyType.INFO) is True
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])
# Our special characters are escaped properly
assert payload['text'] == \
'<b>special characters</b>\r\n&lt;p&gt;'\
'\'"This can\'t\t\r\nfail us"\'&lt;/p&gt;'
# Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
assert obj.notify(
body='body', title='title', notify_type=NotifyType.INFO,