From 4e001c7f15dd266ffb1b0dc7e1aebaca07c63642 Mon Sep 17 00:00:00 2001
From: chrisbroderick
Date: Fri, 2 Sep 2022 22:11:19 +0100
Subject: [PATCH 01/16] test utility methods to simplify more complex tests
---
helpdesk/tests/utils.py | 122 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 122 insertions(+)
create mode 100644 helpdesk/tests/utils.py
diff --git a/helpdesk/tests/utils.py b/helpdesk/tests/utils.py
new file mode 100644
index 00000000..2e36f04d
--- /dev/null
+++ b/helpdesk/tests/utils.py
@@ -0,0 +1,122 @@
+"""UItility functions facilitate making unit testing easier and less brittle."""
+
+import factory
+import faker
+import random
+import re
+import string
+import unicodedata
+from io import BytesIO
+from email.message import Message
+from email.mime.text import MIMEText
+from numpy.random import randint
+from PIL import Image
+from typing import Tuple, Any
+
+
+def strip_accents(text):
+ """
+ Strip accents from input String. (only works on Pythin 3
+
+ :param text: The input string.
+ :type text: String.
+
+ :returns: The processed String.
+ :rtype: String.
+ """
+ text = unicodedata.normalize('NFD', text)
+ text = text.encode('ascii', 'ignore')
+ text = text.decode("utf-8")
+ return str(text)
+
+
+def text_to_id(text):
+ """
+ Convert input text to id.
+
+ :param text: The input string.
+ :type text: String.
+
+ :returns: The processed String.
+ :rtype: String.
+ """
+ text = strip_accents(text.lower())
+ text = re.sub('[ ]+', '_', text)
+ text = re.sub('[^0-9a-zA-Z_-]', '', text)
+ return text
+
+
+def get_random_string(length: int=16) -> str:
+ return "".join(
+ [random.choice(string.ascii_letters + string.digits) for n in range(length)]
+ )
+
+
+def generate_random_image(image_format, array_dims):
+ """
+ Creates an image from a random array.
+
+ :param image_format: An image format (PNG or JPEG).
+ :param array_dims: A tuple with array dimensions.
+
+ :returns: A byte string with encoded image
+ :rtype: bytes
+ """
+ image_bytes = randint(low=0, high=255, size=array_dims, dtype='uint8')
+ io = BytesIO()
+ image_pil = Image.fromarray(image_bytes)
+ image_pil.save(io, image_format, subsampling=0, quality=100)
+ return io.getvalue()
+
+
+def get_random_image(image_format: str="PNG", size: int=5):
+ """
+ Returns a random image.
+
+ Args:
+ image_format: An image format (PNG or JPEG).
+
+ Returns:
+ A string with encoded image
+ """
+ return generate_random_image(image_format, (size, size, 3))
+
+
+def get_fake(provider: str, locale: str = "en_US", min_length: int = 5) -> Any:
+ """
+ Generates a random string, float, integer etc based on provider
+ Provider can be "text', 'sentence',
+ e.g. `get_fake('name')` ==> 'Buzz Aldrin'
+ """
+ return factory.Faker(provider).evaluate({}, None, {'locale': locale,})
+
+
+def generate_email_address(locale: str="en_US") -> Tuple[str, str, str]:
+ """
+ Generate an email address making sure that the email address itself contains only ascii
+ """
+ fake = faker.Faker(locale=locale)
+ first_name = fake.first_name()
+ last_name = fake.last_name()
+ first_name.replace(' ', '').encode("ascii", "ignore").lower()
+ email_address = "{}.{}@{}".format(
+ first_name.replace(' ', '').encode("ascii", "ignore").lower().decode(),
+ last_name.replace(' ', '').encode("ascii", "ignore").lower().decode(),
+ get_random_string(5) + fake.domain_name()
+ )
+ return email_address, first_name, last_name
+
+
+def generate_email(locale: str="en_US", content_type: str="text/html", use_short_email: bool=False) -> Message:
+ """
+ Generates an email includng headers
+ """
+ to_meta = generate_email_address(locale)
+ from_meta = generate_email_address(locale)
+ body = get_fake("text", locale=locale)
+
+ msg = MIMEText(body)
+ msg['Subject'] = get_fake("sentence", locale=locale)
+ msg['From'] = from_meta[0] if use_short_email else "{} {}<{}>".format(from_meta[1], from_meta[2], from_meta[0])
+ msg['To'] = to_meta[0] if use_short_email else "{} {}<{}>".format(to_meta[1], to_meta[2], to_meta[0])
+ return msg.as_string()
From 3c4c9ce533a86c6b8d9e6cbadd38381a39b4f0d7 Mon Sep 17 00:00:00 2001
From: chrisbroderick
Date: Fri, 2 Sep 2022 22:12:42 +0100
Subject: [PATCH 02/16] Additional test libraries for more detailed testing of
emails
---
requirements-testing.txt | 3 +++
1 file changed, 3 insertions(+)
diff --git a/requirements-testing.txt b/requirements-testing.txt
index 12adeae7..80872424 100644
--- a/requirements-testing.txt
+++ b/requirements-testing.txt
@@ -7,3 +7,6 @@ pbr
mock
freezegun
isort
+numpy
+factory_boy
+faker
\ No newline at end of file
From e1085cb370fd2b31a4a868c465a69da771acb317 Mon Sep 17 00:00:00 2001
From: chrisbroderick
Date: Sun, 4 Sep 2022 21:53:08 +0100
Subject: [PATCH 03/16] Custom exception to handle ignored messages explicitly.
---
helpdesk/exceptions.py | 5 +++++
1 file changed, 5 insertions(+)
create mode 100644 helpdesk/exceptions.py
diff --git a/helpdesk/exceptions.py b/helpdesk/exceptions.py
new file mode 100644
index 00000000..de35b9c4
--- /dev/null
+++ b/helpdesk/exceptions.py
@@ -0,0 +1,5 @@
+class IgnoreTicketException(Exception):
+ """
+ Raised when an email message is received from a sender who is marked to be ignored
+ """
+ pass
\ No newline at end of file
From 23c3b72a432a24f8f374881ffa80ffab57d5120f Mon Sep 17 00:00:00 2001
From: chrisbroderick
Date: Sun, 4 Sep 2022 22:01:32 +0100
Subject: [PATCH 04/16] Refactor object_from_message to make it more testable
and fix some bugs. Extract the from email using the email library instead of
pre-decoding which exposes the comma separator causing the email address to
be extracted incorrectly if the real name contains a comma. Raise an
exception when a message to be ignored is detected to process ignored
messages explicitly.
---
helpdesk/email.py | 253 ++++++++++++++++++++++++----------------------
1 file changed, 132 insertions(+), 121 deletions(-)
diff --git a/helpdesk/email.py b/helpdesk/email.py
index 1a0b7d50..7a4962e2 100644
--- a/helpdesk/email.py
+++ b/helpdesk/email.py
@@ -20,6 +20,7 @@ import email
from email.utils import getaddresses
from email_reply_parser import EmailReplyParser
from helpdesk import settings
+from helpdesk.exceptions import IgnoreTicketException
from helpdesk.lib import process_attachments, safe_template_context
from helpdesk.models import FollowUp, IgnoreEmail, Queue, Ticket
import imaplib
@@ -34,6 +35,8 @@ import ssl
import sys
from time import ctime
import typing
+from email.message import Message
+from typing import Tuple, List
# import User model, which may be a custom model
@@ -135,8 +138,11 @@ def pop3_sync(q, logger, server):
else:
full_message = encoding.force_str(
"\n".join(raw_content), errors='replace')
- ticket = object_from_message(
- message=full_message, queue=q, logger=logger)
+ try:
+ ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ except IgnoreTicketException:
+ logger.warn(
+ "Message %s was ignored and will be left on POP3 server" % msgNum)
if ticket:
server.dele(msgNum)
@@ -186,9 +192,12 @@ def imap_sync(q, logger, server):
data = server.fetch(num, '(RFC822)')[1]
full_message = encoding.force_str(data[0][1], errors='replace')
try:
- ticket = object_from_message(
- message=full_message, queue=q, logger=logger)
- except TypeError:
+ ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ except IgnoreTicketException:
+ logger.warn("Message %s was ignored and will be left on IMAP server" % num)
+ return
+ except TypeError as te:
+ logger.warn(f"Unexpected error processing message: {te}")
ticket = None # hotfix. Need to work out WHY.
if ticket:
server.store(num, '+FLAGS', '\\Deleted')
@@ -282,8 +291,11 @@ def process_queue(q, logger):
logger.info("Processing message %d" % i)
with open(m, 'r') as f:
full_message = encoding.force_str(f.read(), errors='replace')
- ticket = object_from_message(
- message=full_message, queue=q, logger=logger)
+ try:
+ ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ except IgnoreTicketException:
+ logger.warn("Message %d was ignored and will be left in local directory", i)
+ return
if ticket:
logger.info(
"Successfully processed message %d, ticket/comment created.", i)
@@ -573,38 +585,122 @@ def get_email_body_from_part_payload(part) -> str:
part.get_payload(decode=False)
)
+def attempt_body_extract_from_html(message: str) -> str:
+ mail = BeautifulSoup(str(message), "html.parser")
+ beautiful_body = mail.find('body')
+ body = None
+ full_body = None
+ if beautiful_body:
+ try:
+ body = beautiful_body.text
+ full_body = body
+ except AttributeError:
+ pass
+ if not body:
+ body = ""
+ return body, full_body
+
+def extract_part_data(
+ part: Message,
+ counter: int,
+ ticket_id: int,
+ files: List,
+ logger: logging.Logger
+ ) -> Tuple[str, str]:
+ name = part.get_filename()
+ if name:
+ name = email.utils.collapse_rfc2231_value(name)
+ part_body = None
+ part_full_body=None
+ if part.get_content_maintype() == 'text' and name is None:
+ if part.get_content_subtype() == 'plain':
+ part_body = part.get_payload(decode=True)
+ # https://github.com/django-helpdesk/django-helpdesk/issues/732
+ if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8':
+ part_body = part_body.decode('unicode_escape')
+ part_body = decodeUnknown(part.get_content_charset(), part_body)
+ # have to use django_settings here so overwritting it works in tests
+ # the default value is False anyway
+ if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False):
+ # first message in thread, we save full body to avoid
+ # losing forwards and things like that
+ part_full_body = get_body_from_fragments(part_body)
+ part_body = EmailReplyParser.parse_reply(part_body)
+ else:
+ # second and other reply, save only first part of the
+ # message
+ part_body = EmailReplyParser.parse_reply(part_body)
+ part_full_body = part_body
+ # workaround to get unicode text out rather than escaped text
+ part_body = get_encoded_body(part_body)
+ logger.debug("Discovered plain text MIME part")
+ else:
+ email_body = get_email_body_from_part_payload(part)
+
+ if not part_body and not part_full_body:
+ # no text has been parsed so far - try such deep parsing
+ # for some messages
+ altered_body = email_body.replace(
+ "
", "\n").replace("
{email_body}