Add notification body conversion for HTML to plain text (#527)

This commit is contained in:
Ryan Young 2022-01-27 16:33:34 -08:00 committed by GitHub
parent 5e2a293195
commit 6a2b6130db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 191 additions and 47 deletions

View File

@ -23,14 +23,12 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
import re
import os import os
import six import six
from markdown import markdown
from itertools import chain from itertools import chain
from .common import NotifyType from .common import NotifyType
from .common import NotifyFormat
from .common import MATCH_ALL_TAG from .common import MATCH_ALL_TAG
from .conversion import convert_between
from .utils import is_exclusive_match from .utils import is_exclusive_match
from .utils import parse_list from .utils import parse_list
from .utils import parse_urls from .utils import parse_urls
@ -516,50 +514,8 @@ class Apprise(object):
# was set to None), or we did define a tag and the logic above # was set to None), or we did define a tag and the logic above
# determined we need to notify the service it's associated with # determined we need to notify the service it's associated with
if server.notify_format not in conversion_map: if server.notify_format not in conversion_map:
if body_format == NotifyFormat.MARKDOWN and \ conversion_map[server.notify_format] = \
server.notify_format == NotifyFormat.HTML: convert_between(body_format, server.notify_format, body)
# Apply Markdown
conversion_map[server.notify_format] = markdown(body)
elif body_format == NotifyFormat.TEXT and \
server.notify_format == NotifyFormat.HTML:
# Basic TEXT to HTML format map; supports keys only
re_map = {
# Support Ampersand
r'&': '&',
# Spaces to   for formatting purposes since
# multiple spaces are treated as one an this may
# not be the callers intention
r' ': ' ',
# Tab support
r'\t': '   ',
# Greater than and Less than Characters
r'>': '>',
r'<': '&lt;',
}
# Compile our map
re_table = re.compile(
r'(' + '|'.join(
map(re.escape, re_map.keys())) + r')',
re.IGNORECASE,
)
# Execute our map against our body in addition to
# swapping out new lines and replacing them with <br/>
conversion_map[server.notify_format] = \
re.sub(r'\r*\n', '<br/>\r\n',
re_table.sub(
lambda x: re_map[x.group()], body))
else:
# Store entry directly
conversion_map[server.notify_format] = body
if interpret_escapes: if interpret_escapes:
# #

130
apprise/conversion.py Normal file
View File

@ -0,0 +1,130 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022 Chris Caron <lead2gold@gmail.com>
# All rights reserved.
#
# This code is licensed under the MIT License.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files(the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions :
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import re
import six
from markdown import markdown
from os import linesep
from .common import NotifyFormat
if six.PY2:
from HTMLParser import HTMLParser
else:
from html.parser import HTMLParser
def convert_between(from_format, to_format, body):
"""
Converts between different notification formats. If no conversion exists,
or the selected one fails, the original text will be returned.
"""
converters = {
(NotifyFormat.MARKDOWN, NotifyFormat.HTML): markdown,
(NotifyFormat.TEXT, NotifyFormat.HTML): text_to_html,
(NotifyFormat.HTML, NotifyFormat.TEXT): html_to_text,
}
convert = converters.get((from_format, to_format))
return convert(body) if convert is not None else body
def text_to_html(body):
"""
Converts a notification body from plain text to HTML.
"""
# Basic TEXT to HTML format map; supports keys only
re_map = {
# Support Ampersand
r'&': '&amp;',
# Spaces to &nbsp; for formatting purposes since
# multiple spaces are treated as one an this may
# not be the callers intention
r' ': '&nbsp;',
# Tab support
r'\t': '&nbsp;&nbsp;&nbsp;',
# Greater than and Less than Characters
r'>': '&gt;',
r'<': '&lt;',
}
# Compile our map
re_table = re.compile(
r'(' + '|'.join(
map(re.escape, re_map.keys())) + r')',
re.IGNORECASE,
)
# Execute our map against our body in addition to
# swapping out new lines and replacing them with <br/>
return re.sub(
r'\r*\n', '<br/>\r\n', re_table.sub(lambda x: re_map[x.group()], body))
def html_to_text(body):
"""
Converts a notification body from HTML to plain text.
"""
parser = HTMLConverter()
parser.feed(body)
parser.close()
return parser.converted
class HTMLConverter(HTMLParser, object):
"""An HTML to plain text converter tuned for email messages."""
def __init__(self, **kwargs):
super(HTMLConverter, self).__init__(**kwargs)
self.converted = ""
def close(self):
# Removes all html before the last "}". Some HTML can return additional
# style information with text output.
self.converted = str(self.converted).split('}')[-1].strip()
def handle_data(self, data):
self.converted += data.strip()
def handle_starttag(self, tag, attrs):
if tag == 'li':
self.converted += linesep + '- '
elif tag == 'blockquote':
self.converted += linesep + linesep + '\t'
elif tag in ('p', 'h1', 'h2', 'h3', 'h4', 'tr', 'th'):
self.converted += linesep + '\n'
elif tag == 'br':
self.converted += linesep
def handle_endtag(self, tag):
if tag == 'blockquote':
self.converted += linesep + linesep

58
test/test_conversion.py Normal file
View File

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022 Chris Caron <lead2gold@gmail.com>
# All rights reserved.
#
# This code is licensed under the MIT License.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files(the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions :
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from apprise import NotifyFormat
from apprise.conversion import convert_between
# Disable logging for a cleaner testing output
import logging
logging.disable(logging.CRITICAL)
def test_html_to_text():
"""conversion: Test HTML to plain text
"""
def convert(body):
return convert_between(NotifyFormat.HTML, NotifyFormat.TEXT, body)
assert convert("No HTML code here.") == "No HTML code here."
clist = convert("<ul><li>Lots and lots</li><li>of lists.</li></ul>")
assert "Lots and lots" in clist
assert "of lists." in clist
assert "To be or not to be." in convert(
"<blockquote>To be or not to be.</blockquote>")
cspace = convert(
"<h2>Fancy heading</h2>"
"<p>And a paragraph too.<br>Plus line break.</p>")
assert "Fancy heading" in cspace
assert "And a paragraph too.\nPlus line break." in cspace
assert convert(
"<style>body { font: 200%; }</style>"
"<p>Some obnoxious text here.</p>") == "Some obnoxious text here."