From 707cc6761b5f982c85551f6672b29184ff1bae09 Mon Sep 17 00:00:00 2001
From: Christopher Broderick
Date: Sun, 16 Jul 2023 14:55:18 +0100
Subject: [PATCH 01/20] Implement a recursive multipart parser to cater for
attachments that have embedded attachments.
---
helpdesk/email.py | 58 +++++++++++++++++++++++++++++++++++++++--------
1 file changed, 48 insertions(+), 10 deletions(-)
diff --git a/helpdesk/email.py b/helpdesk/email.py
index 68aa06b6..fd956fef 100644
--- a/helpdesk/email.py
+++ b/helpdesk/email.py
@@ -734,7 +734,7 @@ def extract_part_data(
if name:
name = email.utils.collapse_rfc2231_value(name)
part_body = None
- part_full_body = None
+ formatted_body = None
if part.get_content_maintype() == 'text' and name is None:
if part.get_content_subtype() == 'plain':
part_body = part.get_payload(decode=True)
@@ -747,26 +747,26 @@ def extract_part_data(
if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False):
# first message in thread, we save full body to avoid
# losing forwards and things like that
- part_full_body = get_body_from_fragments(part_body)
+ formatted_body = get_body_from_fragments(part_body)
part_body = EmailReplyParser.parse_reply(part_body)
else:
# second and other reply, save only first part of the
# message
part_body = EmailReplyParser.parse_reply(part_body)
- part_full_body = part_body
+ formatted_body = part_body
# workaround to get unicode text out rather than escaped text
part_body = get_encoded_body(part_body)
logger.debug("Discovered plain text MIME part")
else:
email_body = get_email_body_from_part_payload(part)
- if not part_body and not part_full_body:
+ if not part_body and not formatted_body:
# no text has been parsed so far - try such deep parsing
# for some messages
altered_body = email_body.replace(
"
", "\n").replace(" {email_body}
"
@@ -793,7 +793,44 @@ def extract_part_data(
payload = part.as_string() if part.is_multipart() else part.get_payload(decode=True)
files.append(SimpleUploadedFile(name, payload, mimetypes.guess_type(name)[0]))
logger.debug("Found MIME attachment %s", name)
- return part_body, part_full_body
+ return part_body, formatted_body
+
+
+def recurse_multipart(
+ multipart: Message,
+ counter: int,
+ ticket_id: int,
+ files: List,
+ logger: logging.Logger
+) -> Tuple[str, str]:
+ '''
+ The received MIME part could be a multipart with embedded multiparts and therefore requires recursion.
+ Recurse through the multipart structures trying to find the 1st body part that
+ provides the message body. It will try to find an HTML formatted part (contentType=text/html)
+ and a TEXT formatted part (contentType=text/plain) and return both
+ :param multipart:
+ :param counter:
+ :param ticket_id:
+ :param files:
+ :param logger:
+ '''
+ plain_msg = None
+ formatted_msg = None
+
+ for part in multipart.walk():
+ if part.get_content_maintype() == 'multipart':
+ continue
+ # See email.message_obj.Message.get_filename()
+ plain_body, formatted_body = recurse_multipart(
+ part, counter, ticket_id, files, logger) if part.get_content_maintype(
+ ) == 'multipart' else extract_part_data(part, counter, ticket_id, files, logger)
+ # Only update the message variables if they are still empty to handle attached messages overriding the core message
+ if plain_msg is None and plain_body:
+ plain_msg = plain_body
+ if formatted_msg is None and formatted_body:
+ formatted_msg = formatted_body
+ counter += 1
+ return plain_msg, formatted_msg
def object_from_message(message: str,
@@ -841,10 +878,11 @@ def object_from_message(message: str,
if part.get_content_maintype() == 'multipart':
continue
# See email.message_obj.Message.get_filename()
- part_body, part_full_body = extract_part_data(part, counter, ticket_id, files, logger)
- if part_body:
- body = part_body
- full_body = part_full_body
+ plain_body, formatted_body = extract_part_data(part, counter, ticket_id, files, logger)
+ if plain_body:
+ body = plain_body
+ if formatted_body:
+ full_body = formatted_body
counter += 1
if not body:
From 7b72a2cad22de053acd2fa84d88976fab2414e4a Mon Sep 17 00:00:00 2001
From: Christopher Broderick
Date: Mon, 17 Jul 2023 23:33:56 +0100
Subject: [PATCH 02/20] Allow identifying what exactly is not asserted.
---
helpdesk/tests/test_ticket_submission.py | 25 +++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/helpdesk/tests/test_ticket_submission.py b/helpdesk/tests/test_ticket_submission.py
index 2e1da9ac..1d8ef61c 100644
--- a/helpdesk/tests/test_ticket_submission.py
+++ b/helpdesk/tests/test_ticket_submission.py
@@ -2,15 +2,12 @@
from django.contrib.auth import get_user_model
from django.core import mail
-from django.core.exceptions import ObjectDoesNotExist
-from django.forms import ValidationError
from django.test import TestCase
from django.test.client import Client
from django.urls import reverse
import email
-from helpdesk.email import create_ticket_cc, object_from_message
+from helpdesk.email import object_from_message
from helpdesk.models import CustomField, FollowUp, KBCategory, KBItem, Queue, Ticket, TicketCC
-from helpdesk.tests.helpers import print_response
import logging
from urllib.parse import urlparse
import uuid
@@ -1102,9 +1099,23 @@ class EmailInteractionsTestCase(TestCase):
cat_url = reverse('helpdesk:submit') + \
"?kbitem=1&submitter_email=foo@bar.cz&title=lol"
response = self.client.get(cat_url)
+
+ if (
+ hasattr(response, "render")
+ and callable(response.render)
+ and not response.is_rendered
+ ):
+ response.render()
+ if response.streaming:
+ content = b"".join(response.streaming_content)
+ else:
+ content = response.content
+
+
+ msg_prefix = content.decode(response.charset)
self.assertContains(
- response, '')
+ response, '', msg_prefix = msg_prefix)
self.assertContains(
- response, '')
+ response, '', msg_prefix = msg_prefix)
self.assertContains(
- response, '')
+ response, '', msg_prefix = msg_prefix)
From 07f6d5f6c8cc44337e4c6ce9aa726a4dad16aa6c Mon Sep 17 00:00:00 2001
From: Christopher Broderick
Date: Tue, 18 Jul 2023 01:01:10 +0100
Subject: [PATCH 03/20] Make test less dependent on template changes
---
helpdesk/tests/test_ticket_submission.py | 27 +++++++++++++++++-------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/helpdesk/tests/test_ticket_submission.py b/helpdesk/tests/test_ticket_submission.py
index 1d8ef61c..2c59ef75 100644
--- a/helpdesk/tests/test_ticket_submission.py
+++ b/helpdesk/tests/test_ticket_submission.py
@@ -1085,21 +1085,32 @@ class EmailInteractionsTestCase(TestCase):
cat = KBCategory.objects.create(
title="Test Cat",
slug="test_cat",
- description="This is a test category",
+ description="This is a test category",
queue=self.queue_public,
)
cat.save()
+ attr_list = {
+ "f1_field_title": "KBItem 1",
+ "f1_attr": "kbitem",
+ "f1_attr_value": "1",
+ "f2_attr": "submitter_email",
+ "f2_attr_value": "foo@bar.cz",
+ "f3_attr": "title",
+ "f3_attr_value": "lol",
+ }
self.kbitem1 = KBItem.objects.create(
category=cat,
- title="KBItem 1",
+ title=attr_list["f1_field_title"],
question="What?",
answer="A KB Item",
)
self.kbitem1.save()
- cat_url = reverse('helpdesk:submit') + \
- "?kbitem=1&submitter_email=foo@bar.cz&title=lol"
+ cat_url = reverse('helpdesk:submit') + '?' \
+ + attr_list["f1_attr"] + '=' + attr_list["f1_attr_value"] + '&' \
+ + attr_list["f2_attr"] + '=' + attr_list["f2_attr_value"] + '&' \
+ + attr_list["f3_attr"] + '=' + attr_list["f3_attr_value"]
response = self.client.get(cat_url)
-
+ # Get the rendered response to make it easier to debug if things go wrong
if (
hasattr(response, "render")
and callable(response.render)
@@ -1114,8 +1125,8 @@ class EmailInteractionsTestCase(TestCase):
msg_prefix = content.decode(response.charset)
self.assertContains(
- response, '', msg_prefix = msg_prefix)
+ response, '', msg_prefix = msg_prefix)
self.assertContains(
- response, '', msg_prefix = msg_prefix)
+ response, '', msg_prefix = msg_prefix)
+ response, '
Date: Tue, 18 Jul 2023 01:35:13 +0100
Subject: [PATCH 04/20] Make query test less flaky
---
helpdesk/tests/test_query.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/helpdesk/tests/test_query.py b/helpdesk/tests/test_query.py
index 9e7d8842..8e2c1533 100644
--- a/helpdesk/tests/test_query.py
+++ b/helpdesk/tests/test_query.py
@@ -58,12 +58,13 @@ class QueryTests(TestCase):
query = query_to_base64({})
response = self.client.get(
reverse('helpdesk:datatables_ticket_list', args=[query]))
+ resp_json = response.json()
self.assertEqual(
- response.json(),
+ resp_json,
{
"data":
- [{"ticket": "1 [test_queue-1]", "id": 1, "priority": 3, "title": "unassigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": ""},
- {"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}],
+ [{"ticket": "1 [test_queue-1]", "id": 1, "priority": 3, "title": "unassigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": resp_json["data"][0]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": ""},
+ {"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": resp_json["data"][1]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}],
"recordsFiltered": 2,
"recordsTotal": 2,
"draw": 0,
@@ -77,12 +78,13 @@ class QueryTests(TestCase):
)
response = self.client.get(
reverse('helpdesk:datatables_ticket_list', args=[query]))
+ resp_json = response.json()
self.assertEqual(
- response.json(),
+ resp_json,
{
"data":
[{"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open",
- "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}],
+ "created": resp_json["data"][0]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}],
"recordsFiltered": 1,
"recordsTotal": 1,
"draw": 0,
@@ -96,12 +98,13 @@ class QueryTests(TestCase):
)
response = self.client.get(
reverse('helpdesk:datatables_ticket_list', args=[query]))
+ resp_json = response.json()
self.assertEqual(
- response.json(),
+ resp_json,
{
"data":
[{"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open",
- "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}],
+ "created": resp_json["data"][0]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}],
"recordsFiltered": 1,
"recordsTotal": 1,
"draw": 0,
From 2e5697c11acfd3d98a8f3c37226c419633b81226 Mon Sep 17 00:00:00 2001
From: Christopher Broderick
Date: Sun, 23 Jul 2023 06:12:32 +0100
Subject: [PATCH 05/20] Completely rework the email parsing. Fixes a number of
hacks that have accumulated and makes it is more easily understood and easier
to enhance in the future.
---
helpdesk/email.py | 283 +++++++++++++++++++++++++---------------------
1 file changed, 152 insertions(+), 131 deletions(-)
diff --git a/helpdesk/email.py b/helpdesk/email.py
index fd956fef..1ace3830 100644
--- a/helpdesk/email.py
+++ b/helpdesk/email.py
@@ -39,7 +39,8 @@ import ssl
import sys
from time import ctime
import typing
-from typing import List, Tuple
+from typing import List
+from email.mime.text import MIMEText
# import User model, which may be a custom model
@@ -53,6 +54,8 @@ STRIPPED_SUBJECT_STRINGS = [
"Automatic reply: ",
]
+HTML_EMAIL_ATTACHMENT_FILENAME = _("email_html_body.html")
+
def process_email(quiet=False):
for q in Queue.objects.filter(
@@ -141,7 +144,7 @@ def pop3_sync(q, logger, server):
full_message = encoding.force_str(
"\n".join(raw_content), errors='replace')
try:
- ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ ticket = extract_email_metadata(message=full_message, queue=q, logger=logger)
except IgnoreTicketException:
logger.warn(
"Message %s was ignored and will be left on POP3 server" % msgNum)
@@ -198,7 +201,7 @@ def imap_sync(q, logger, server):
data = server.fetch(num, '(RFC822)')[1]
full_message = encoding.force_str(data[0][1], errors='replace')
try:
- ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ ticket = extract_email_metadata(message=full_message, queue=q, logger=logger)
except IgnoreTicketException:
logger.warn("Message %s was ignored and will be left on IMAP server" % num)
except DeleteIgnoredTicketException:
@@ -285,7 +288,7 @@ def imap_oauth_sync(q, logger, server):
full_message = encoding.force_str(data[0][1], errors='replace')
try:
- ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ ticket = extract_email_metadata(message=full_message, queue=q, logger=logger)
except IgnoreTicketException as itex:
logger.warn(f"Message {num} was ignored. {itex}")
@@ -405,7 +408,7 @@ def process_queue(q, logger):
with open(m, 'r') as f:
full_message = encoding.force_str(f.read(), errors='replace')
try:
- ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ ticket = extract_email_metadata(message=full_message, queue=q, logger=logger)
except IgnoreTicketException:
logger.warn("Message %d was ignored and will be left in local directory", i)
except DeleteIgnoredTicketException:
@@ -433,7 +436,7 @@ def decodeUnknown(charset, string):
if not charset:
try:
return str(string, encoding='utf-8', errors='replace')
- except UnicodeError:
+ except UnicodeError as e:
return str(string, encoding='iso8859-1', errors='replace')
return str(string, encoding=charset, errors='replace')
return string
@@ -723,133 +726,92 @@ def attempt_body_extract_from_html(message: str) -> str:
return body, full_body
-def extract_part_data(
+def extract_mime_content(part: Message,) -> str:
+ '''
+ Extract the content from the MIME body part
+ :param part: the MIME part to extract the content from
+ '''
+ content_bytes = part.get_payload(decode=True)
+ charset = part.get_content_charset()
+ # The default for MIME email is 7bit which requires special decoding to utf-8 so make sure we handle the decoding correctly
+ if part['Content-Transfer-Encoding'] in [None, '8bit', '7bit'] and (charset == 'utf-8' or charset is None):
+ charset = "unicode_escape"
+ content = decodeUnknown(charset, content_bytes)
+ return content
+
+
+def extract_email_message(mime_content: str, is_plain_content_type: bool, is_extract_full_email_msg: bool) -> str:
+ email_content = None
+ if is_extract_full_email_msg:
+ # Take the full content including encapsulated "forwarded" and "reply" sections
+ email_content = get_body_from_fragments(mime_content) if is_plain_content_type else mime_content
+ else:
+ # Just get the primary part of the email and drop off any text below the actually response text
+ email_content = EmailReplyParser.parse_reply(mime_content) if is_plain_content_type else mime_content
+ return email_content
+
+
+def process_as_attachment(
part: Message,
counter: int,
- ticket_id: int,
files: List,
logger: logging.Logger
-) -> Tuple[str, str]:
+):
name = part.get_filename()
if name:
- name = email.utils.collapse_rfc2231_value(name)
- part_body = None
- formatted_body = None
- if part.get_content_maintype() == 'text' and name is None:
- if part.get_content_subtype() == 'plain':
- part_body = part.get_payload(decode=True)
- # https://github.com/django-helpdesk/django-helpdesk/issues/732
- if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8':
- part_body = part_body.decode('unicode_escape')
- part_body = decodeUnknown(part.get_content_charset(), part_body)
- # have to use django_settings here so overwriting it works in tests
- # the default value is False anyway
- if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False):
- # first message in thread, we save full body to avoid
- # losing forwards and things like that
- formatted_body = get_body_from_fragments(part_body)
- part_body = EmailReplyParser.parse_reply(part_body)
- else:
- # second and other reply, save only first part of the
- # message
- part_body = EmailReplyParser.parse_reply(part_body)
- formatted_body = part_body
- # workaround to get unicode text out rather than escaped text
- part_body = get_encoded_body(part_body)
- logger.debug("Discovered plain text MIME part")
- else:
- email_body = get_email_body_from_part_payload(part)
-
- if not part_body and not formatted_body:
- # no text has been parsed so far - try such deep parsing
- # for some messages
- altered_body = email_body.replace(
- "