mirror of
https://github.com/caronc/apprise.git
synced 2024-11-25 01:24:03 +01:00
Re-worked Telegram HTML/Markdown -> HTML Conversion (#579)
This commit is contained in:
parent
3c07d80975
commit
fd0cb3ffcc
@ -96,7 +96,9 @@ class HTMLConverter(HTMLParser, object):
|
||||
'div', 'td', 'th', 'code', 'pre', 'label', 'li',)
|
||||
|
||||
# the folowing tags ignore any internal text
|
||||
IGNORE_TAGS = ('style', 'link', 'meta', 'title', 'html', 'head', 'script')
|
||||
IGNORE_TAGS = (
|
||||
'form', 'input', 'textarea', 'select', 'ul', 'ol', 'style', 'link',
|
||||
'meta', 'title', 'html', 'head', 'script')
|
||||
|
||||
# Condense Whitespace
|
||||
WS_TRIM = re.compile(r'[\s]+', re.DOTALL | re.MULTILINE)
|
||||
|
@ -177,44 +177,85 @@ class NotifyTelegram(NotifyBase):
|
||||
# characters passed into it. to handle this situation, we need to
|
||||
# search the body for these sequences and convert them to the
|
||||
# output the user expected
|
||||
__telegram_escape_html_dict = {
|
||||
# New Lines
|
||||
re.compile(r'<\s*/?br\s*/?>\r*\n?', re.I): '\r\n',
|
||||
re.compile(r'<\s*/(br|p|div|li)[^>]*>\r*\n?', re.I): '\r\n',
|
||||
|
||||
# The following characters can be altered to become supported
|
||||
re.compile(r'<\s*pre[^>]*>', re.I): '<code>',
|
||||
re.compile(r'<\s*/pre[^>]*>', re.I): '</code>',
|
||||
__telegram_escape_html_entries = (
|
||||
# Comments
|
||||
(re.compile(
|
||||
r'\s*<!.+?-->\s*',
|
||||
(re.I | re.M | re.S)), '', {}),
|
||||
|
||||
# the following tags are not supported
|
||||
re.compile(
|
||||
r'<\s*(br|p|div|span|body|script|meta|html|font'
|
||||
r'|label|iframe|li|ol|ul|source|script)[^>]*>', re.I): '',
|
||||
(re.compile(
|
||||
r'\s*<\s*(!?DOCTYPE|p|div|span|body|script|link|'
|
||||
r'meta|html|font|head|label|form|input|textarea|select|iframe|'
|
||||
r'source|script)([^a-z0-9>][^>]*)?>\s*',
|
||||
(re.I | re.M | re.S)), '', {}),
|
||||
|
||||
re.compile(
|
||||
r'<\s*/(span|body|script|meta|html|font'
|
||||
r'|label|iframe|ol|ul|source|script)[^>]*>', re.I): '',
|
||||
|
||||
# Italic
|
||||
re.compile(r'<\s*(caption|em)[^>]*>', re.I): '<i>',
|
||||
re.compile(r'<\s*/(caption|em)[^>]*>', re.I): '</i>',
|
||||
# All closing tags to be removed are put here
|
||||
(re.compile(
|
||||
r'\s*<\s*/(span|body|script|meta|html|font|head|'
|
||||
r'label|form|input|textarea|select|ol|ul|link|'
|
||||
r'iframe|source|script)([^a-z0-9>][^>]*)?>\s*',
|
||||
(re.I | re.M | re.S)), '', {}),
|
||||
|
||||
# Bold
|
||||
re.compile(r'<\s*(h[1-6]|title|strong)[^>]*>', re.I): '<b>',
|
||||
re.compile(r'<\s*/(h[1-6]|title|strong)[^>]*>', re.I): '</b>',
|
||||
(re.compile(
|
||||
r'<\s*(strong)([^a-z0-9>][^>]*)?>',
|
||||
(re.I | re.M | re.S)), '<b>', {}),
|
||||
(re.compile(
|
||||
r'<\s*/\s*(strong)([^a-z0-9>][^>]*)?>',
|
||||
(re.I | re.M | re.S)), '</b>', {}),
|
||||
(re.compile(
|
||||
r'\s*<\s*(h[1-6]|title)([^a-z0-9>][^>]*)?>\s*',
|
||||
(re.I | re.M | re.S)), '{}<b>', {'html': '\r\n'}),
|
||||
(re.compile(
|
||||
r'\s*<\s*/\s*(h[1-6]|title)([^a-z0-9>][^>]*)?>\s*',
|
||||
(re.I | re.M | re.S)),
|
||||
'</b>{}', {'html': '<br/>'}),
|
||||
|
||||
# Italic
|
||||
(re.compile(
|
||||
r'<\s*(caption|em)([^a-z0-9>][^>]*)?>',
|
||||
(re.I | re.M | re.S)), '<i>', {}),
|
||||
(re.compile(
|
||||
r'<\s*/\s*(caption|em)([^a-z0-9>][^>]*)?>',
|
||||
(re.I | re.M | re.S)), '</i>', {}),
|
||||
|
||||
# Bullet Lists
|
||||
(re.compile(
|
||||
r'<\s*li([^a-z0-9>][^>]*)?>\s*',
|
||||
(re.I | re.M | re.S)), ' -', {}),
|
||||
|
||||
# convert pre tags to code (supported by Telegram)
|
||||
(re.compile(
|
||||
r'<\s*pre([^a-z0-9>][^>]*)?>',
|
||||
(re.I | re.M | re.S)), '{}<code>', {'html': '\r\n'}),
|
||||
(re.compile(
|
||||
r'<\s*/\s*pre([^a-z0-9>][^>]*)?>',
|
||||
(re.I | re.M | re.S)), '</code>{}', {'html': '\r\n'}),
|
||||
|
||||
# New Lines
|
||||
(re.compile(
|
||||
r'\s*<\s*/?\s*(ol|ul|br|hr)\s*/?>\s*',
|
||||
(re.I | re.M | re.S)), '\r\n', {}),
|
||||
(re.compile(
|
||||
r'\s*<\s*/\s*(br|p|hr|li|div)([^a-z0-9>][^>]*)?>\s*',
|
||||
(re.I | re.M | re.S)), '\r\n', {}),
|
||||
|
||||
# HTML Spaces ( ) and tabs ( ) aren't supported
|
||||
# See https://core.telegram.org/bots/api#html-style
|
||||
re.compile(r'\ ?', re.I): ' ',
|
||||
(re.compile(r'\ ?', re.I), ' ', {}),
|
||||
|
||||
# Tabs become 3 spaces
|
||||
re.compile(r'\ ?', re.I): ' ',
|
||||
(re.compile(r'\ ?', re.I), ' ', {}),
|
||||
|
||||
# Some characters get re-escaped by the Telegram upstream
|
||||
# service so we need to convert these back,
|
||||
re.compile(r'\'?', re.I): '\'',
|
||||
re.compile(r'\"?', re.I): '"',
|
||||
}
|
||||
(re.compile(r'\'?', re.I), '\'', {}),
|
||||
(re.compile(r'\"?', re.I), '"', {}),
|
||||
|
||||
# New line cleanup
|
||||
(re.compile(r'\r*\n[\r\n]+', re.I), '\r\n', {}),
|
||||
)
|
||||
|
||||
# Define our template tokens
|
||||
template_tokens = dict(NotifyBase.template_tokens, **{
|
||||
@ -597,38 +638,19 @@ class NotifyTelegram(NotifyBase):
|
||||
|
||||
# Use Telegram's HTML mode
|
||||
payload['parse_mode'] = 'HTML'
|
||||
for r, v in self.__telegram_escape_html_dict.items():
|
||||
body = r.sub(v, body, re.I)
|
||||
for r, v, m in self.__telegram_escape_html_entries:
|
||||
|
||||
if 'html' in m:
|
||||
# Handle special cases where we need to alter new lines
|
||||
# for presentation purposes
|
||||
v = v.format(m['html'] if body_format in (
|
||||
NotifyFormat.HTML, NotifyFormat.MARKDOWN) else '')
|
||||
|
||||
body = r.sub(v, body)
|
||||
|
||||
# Prepare our payload based on HTML or TEXT
|
||||
payload['text'] = body
|
||||
|
||||
# else: # self.notify_format == NotifyFormat.TEXT:
|
||||
# # Use Telegram's HTML mode
|
||||
# payload['parse_mode'] = 'HTML'
|
||||
|
||||
# # Further html escaping required...
|
||||
# telegram_escape_text_dict = {
|
||||
# # We need to escape characters that conflict with html
|
||||
# # entity blocks (< and >) when displaying text
|
||||
# r'>': '>',
|
||||
# r'<': '<',
|
||||
# r'\&': '&',
|
||||
# }
|
||||
|
||||
# # Create a regular expression from the dictionary keys
|
||||
# text_regex = re.compile("(%s)" % "|".join(
|
||||
# map(re.escape, telegram_escape_text_dict.keys())).lower(),
|
||||
# re.I)
|
||||
|
||||
# # For each match, look-up corresponding value in dictionary
|
||||
# body = text_regex.sub( # pragma: no branch
|
||||
# lambda mo: telegram_escape_text_dict[
|
||||
# mo.string[mo.start():mo.end()]], body)
|
||||
|
||||
# # prepare our payload based on HTML or TEXT
|
||||
# payload['text'] = body
|
||||
|
||||
# Create a copy of the chat_ids list
|
||||
targets = list(self.targets)
|
||||
while len(targets):
|
||||
|
@ -22,7 +22,6 @@
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
from apprise import NotifyFormat
|
||||
from apprise.conversion import convert_between
|
||||
import pytest
|
||||
|
@ -625,11 +625,10 @@ def test_plugin_telegram_formating_py3(mock_post):
|
||||
|
||||
# Test that everything is escaped properly in a TEXT mode
|
||||
assert payload['text'] == \
|
||||
'<b>🚨 Change detected for <i>Apprise ' \
|
||||
'Test Title</i></b>\r\n<a href=' \
|
||||
'"http://localhost"><i>Apprise Body Title<' \
|
||||
'/i></a> had <a href="http://' \
|
||||
'127.0.0.1">a change</a>'
|
||||
'<b>🚨 Change detected for <i>Apprise Test Title</i>' \
|
||||
'</b>\r\n<a href="http://localhost"><i>' \
|
||||
'Apprise Body Title</i></a> had <' \
|
||||
'a href="http://127.0.0.1">a change</a>'
|
||||
|
||||
# Reset our values
|
||||
mock_post.reset_mock()
|
||||
@ -718,8 +717,9 @@ def test_plugin_telegram_formating_py3(mock_post):
|
||||
|
||||
# Test that everything is escaped properly in a HTML mode
|
||||
assert payload['text'] == \
|
||||
'<b><b>🚨 Another Change detected for <i>Apprise Test Title</i>' \
|
||||
'</b></b>\r\n<i><a href="http://localhost">Apprise Body Title</a>' \
|
||||
'<b>\r\n<b>🚨 Another Change detected for ' \
|
||||
'<i>Apprise Test Title</i></b>\r\n</b>\r\n<i>' \
|
||||
'<a href="http://localhost">Apprise Body Title</a>' \
|
||||
'</i> had <a href="http://127.0.0.2">a change</a>\r\n'
|
||||
|
||||
# Now we'll test an edge case where a title was defined, but after
|
||||
@ -881,11 +881,11 @@ def test_plugin_telegram_formating_py2(mock_post):
|
||||
|
||||
# Test that everything is escaped properly in a TEXT mode
|
||||
assert payload['text'].encode('utf-8') == \
|
||||
'<b>\xf0\x9f\x9a\xa8 Change detected for <i>' \
|
||||
'Apprise Test Title</i></b>\r\n<a ' \
|
||||
'href="http://localhost"><i>Apprise Body ' \
|
||||
'Title</i></a> had <a href="' \
|
||||
'http://127.0.0.1">a change</a>'
|
||||
'<b>\xf0\x9f\x9a\xa8 Change detected for <i>' \
|
||||
'Apprise Test Title</i></b>\r\n<' \
|
||||
'a href="http://localhost"><i>Apprise Body Title' \
|
||||
'</i></a> had <a href="http://127.0.0.1"' \
|
||||
'>a change</a>'
|
||||
|
||||
# Reset our values
|
||||
mock_post.reset_mock()
|
||||
@ -969,9 +969,9 @@ def test_plugin_telegram_formating_py2(mock_post):
|
||||
|
||||
# Test that everything is escaped properly in a HTML mode
|
||||
assert payload['text'].encode('utf-8') == \
|
||||
'<b><b>\xf0\x9f\x9a\xa8 Change detected for ' \
|
||||
'<i>Apprise Test Title</i></b></b>\r\n<i>' \
|
||||
'<a href="http://localhost">Apprise Body Title</a>'\
|
||||
'<b>\r\n<b>\xf0\x9f\x9a\xa8 Change detected for ' \
|
||||
'<i>Apprise Test Title</i></b>\r\n</b>\r\n<i>' \
|
||||
'<a href="http://localhost">Apprise Body Title</a>' \
|
||||
'</i> had <a href="http://127.0.0.1">a change</a>\r\n'
|
||||
|
||||
# Reset our values
|
||||
@ -1163,8 +1163,8 @@ def test_plugin_telegram_html_formatting(mock_post):
|
||||
|
||||
# Test that everything is escaped properly in a HTML mode
|
||||
assert payload['text'] == \
|
||||
'<b><b>\'information\'</b></b>\r\n<i>"This is in Italic"' \
|
||||
'</i>\r\n<b> Headings are dropped and converted to bold</b>'
|
||||
'<b>\r\n<b>\'information\'</b>\r\n</b>\r\n<i>"This is in Italic"' \
|
||||
'</i>\r\n<b> Headings are dropped and converted to bold</b>\r\n'
|
||||
|
||||
mock_post.reset_mock()
|
||||
|
||||
@ -1177,7 +1177,28 @@ def test_plugin_telegram_html_formatting(mock_post):
|
||||
|
||||
assert payload['text'] == \
|
||||
'<b><title>&apos;information&apos</title></b>' \
|
||||
'\r\n<em>&quot;This is in Italic&quot</em' \
|
||||
'><br/><h5>&emsp;&emspHeadings&nbsp;' \
|
||||
'are dropped and&nbspconverted to bold<' \
|
||||
'/h5>'
|
||||
'\r\n<em>&quot;This is in Italic&quot</em><' \
|
||||
'br/><h5>&emsp;&emspHeadings&nbsp;are ' \
|
||||
'dropped and&nbspconverted to bold</h5>'
|
||||
|
||||
# Lest test more complex HTML examples now
|
||||
mock_post.reset_mock()
|
||||
|
||||
test_file_01 = os.path.join(
|
||||
TEST_VAR_DIR, '01_test_example.html')
|
||||
with open(test_file_01) as html_file:
|
||||
assert aobj.notify(
|
||||
body=html_file.read(), body_format=NotifyFormat.HTML)
|
||||
|
||||
# owner has already been looked up, so only one call is made
|
||||
assert mock_post.call_count == 1
|
||||
|
||||
payload = loads(mock_post.call_args_list[0][1]['data'])
|
||||
assert payload['text'] == \
|
||||
'\r\n<b>Bootstrap 101 Template</b>\r\n<b>My Title</b>\r\n' \
|
||||
'<b>Heading 1</b>\r\n-Bullet 1\r\n-Bullet 2\r\n-Bullet 3\r\n' \
|
||||
'-Bullet 1\r\n-Bullet 2\r\n-Bullet 3\r\n<b>Heading 2</b>\r\n' \
|
||||
'A div entry\r\nA div entry\r\n<code>A pre entry</code>\r\n' \
|
||||
'<b>Heading 3</b>\r\n<b>Heading 4</b>\r\n<b>Heading 5</b>\r\n' \
|
||||
'<b>Heading 6</b>\r\nA set of text\r\n' \
|
||||
'Another line after the set of text\r\nMore text\r\nlabel'
|
||||
|
66
test/var/01_test_example.html
Normal file
66
test/var/01_test_example.html
Normal file
@ -0,0 +1,66 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
|
||||
<title>Bootstrap 101 Template</title>
|
||||
|
||||
<!-- Bootstrap -->
|
||||
<link href="css/bootstrap.min.css" rel="stylesheet">
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
|
||||
<!--[if lt IE 9]>
|
||||
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
</head>
|
||||
<body>
|
||||
<h1>My Title</h1>
|
||||
|
||||
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
|
||||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script>
|
||||
<!-- Include all compiled plugins (below), or include individual files as needed -->
|
||||
<script src="js/bootstrap.min.js"></script>
|
||||
|
||||
<h1>Heading 1</h1>
|
||||
<p>
|
||||
<ul>
|
||||
<li>Bullet 1</li>
|
||||
<li>Bullet 2</li>
|
||||
<li>Bullet 3</li>
|
||||
</ul>
|
||||
|
||||
<ol>
|
||||
<li>Bullet 1</li>
|
||||
<li>Bullet 2</li>
|
||||
<li>Bullet 3</li>
|
||||
</ol>
|
||||
</p>
|
||||
|
||||
<h2>Heading 2</h2>
|
||||
<div>A div entry</div>
|
||||
<p>
|
||||
<span>A div entry</span>
|
||||
<pre>A pre entry</pre>
|
||||
</p>
|
||||
|
||||
<h3>Heading 3</h3>
|
||||
<h4>Heading 4</h4>
|
||||
<h5>Heading 5</h5>
|
||||
<h6>Heading 6</h6>
|
||||
|
||||
<p>
|
||||
A set of text <br/>Another line after the set of text
|
||||
<hr/>
|
||||
More text
|
||||
</p>
|
||||
<form>
|
||||
<label>label</label>
|
||||
<input/>
|
||||
<select/>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user