From 707cc6761b5f982c85551f6672b29184ff1bae09 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 16 Jul 2023 14:55:18 +0100 Subject: [PATCH 01/17] Implement a recursive multipart parser to cater for attachments that have embedded attachments. --- helpdesk/email.py | 58 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/helpdesk/email.py b/helpdesk/email.py index 68aa06b6..fd956fef 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -734,7 +734,7 @@ def extract_part_data( if name: name = email.utils.collapse_rfc2231_value(name) part_body = None - part_full_body = None + formatted_body = None if part.get_content_maintype() == 'text' and name is None: if part.get_content_subtype() == 'plain': part_body = part.get_payload(decode=True) @@ -747,26 +747,26 @@ def extract_part_data( if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False): # first message in thread, we save full body to avoid # losing forwards and things like that - part_full_body = get_body_from_fragments(part_body) + formatted_body = get_body_from_fragments(part_body) part_body = EmailReplyParser.parse_reply(part_body) else: # second and other reply, save only first part of the # message part_body = EmailReplyParser.parse_reply(part_body) - part_full_body = part_body + formatted_body = part_body # workaround to get unicode text out rather than escaped text part_body = get_encoded_body(part_body) logger.debug("Discovered plain text MIME part") else: email_body = get_email_body_from_part_payload(part) - if not part_body and not part_full_body: + if not part_body and not formatted_body: # no text has been parsed so far - try such deep parsing # for some messages altered_body = email_body.replace( "

", "

\n").replace("{email_body}" @@ -793,7 +793,44 @@ def extract_part_data( payload = part.as_string() if part.is_multipart() else part.get_payload(decode=True) files.append(SimpleUploadedFile(name, payload, mimetypes.guess_type(name)[0])) logger.debug("Found MIME attachment %s", name) - return part_body, part_full_body + return part_body, formatted_body + + +def recurse_multipart( + multipart: Message, + counter: int, + ticket_id: int, + files: List, + logger: logging.Logger +) -> Tuple[str, str]: + ''' + The received MIME part could be a multipart with embedded multiparts and therefore requires recursion. + Recurse through the multipart structures trying to find the 1st body part that + provides the message body. It will try to find an HTML formatted part (contentType=text/html) + and a TEXT formatted part (contentType=text/plain) and return both + :param multipart: + :param counter: + :param ticket_id: + :param files: + :param logger: + ''' + plain_msg = None + formatted_msg = None + + for part in multipart.walk(): + if part.get_content_maintype() == 'multipart': + continue + # See email.message_obj.Message.get_filename() + plain_body, formatted_body = recurse_multipart( + part, counter, ticket_id, files, logger) if part.get_content_maintype( + ) == 'multipart' else extract_part_data(part, counter, ticket_id, files, logger) + # Only update the message variables if they are still empty to handle attached messages overriding the core message + if plain_msg is None and plain_body: + plain_msg = plain_body + if formatted_msg is None and formatted_body: + formatted_msg = formatted_body + counter += 1 + return plain_msg, formatted_msg def object_from_message(message: str, @@ -841,10 +878,11 @@ def object_from_message(message: str, if part.get_content_maintype() == 'multipart': continue # See email.message_obj.Message.get_filename() - part_body, part_full_body = extract_part_data(part, counter, ticket_id, files, logger) - if part_body: - body = part_body - full_body = part_full_body + plain_body, formatted_body = extract_part_data(part, counter, ticket_id, files, logger) + if plain_body: + body = plain_body + if formatted_body: + full_body = formatted_body counter += 1 if not body: From 7b72a2cad22de053acd2fa84d88976fab2414e4a Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Mon, 17 Jul 2023 23:33:56 +0100 Subject: [PATCH 02/17] Allow identifying what exactly is not asserted. --- helpdesk/tests/test_ticket_submission.py | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/helpdesk/tests/test_ticket_submission.py b/helpdesk/tests/test_ticket_submission.py index 2e1da9ac..1d8ef61c 100644 --- a/helpdesk/tests/test_ticket_submission.py +++ b/helpdesk/tests/test_ticket_submission.py @@ -2,15 +2,12 @@ from django.contrib.auth import get_user_model from django.core import mail -from django.core.exceptions import ObjectDoesNotExist -from django.forms import ValidationError from django.test import TestCase from django.test.client import Client from django.urls import reverse import email -from helpdesk.email import create_ticket_cc, object_from_message +from helpdesk.email import object_from_message from helpdesk.models import CustomField, FollowUp, KBCategory, KBItem, Queue, Ticket, TicketCC -from helpdesk.tests.helpers import print_response import logging from urllib.parse import urlparse import uuid @@ -1102,9 +1099,23 @@ class EmailInteractionsTestCase(TestCase): cat_url = reverse('helpdesk:submit') + \ "?kbitem=1&submitter_email=foo@bar.cz&title=lol" response = self.client.get(cat_url) + + if ( + hasattr(response, "render") + and callable(response.render) + and not response.is_rendered + ): + response.render() + if response.streaming: + content = b"".join(response.streaming_content) + else: + content = response.content + + + msg_prefix = content.decode(response.charset) self.assertContains( - response, '') + response, '', msg_prefix = msg_prefix) self.assertContains( - response, '') + response, '', msg_prefix = msg_prefix) self.assertContains( - response, '') + response, '', msg_prefix = msg_prefix) From 07f6d5f6c8cc44337e4c6ce9aa726a4dad16aa6c Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 18 Jul 2023 01:01:10 +0100 Subject: [PATCH 03/17] Make test less dependent on template changes --- helpdesk/tests/test_ticket_submission.py | 27 +++++++++++++++++------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/helpdesk/tests/test_ticket_submission.py b/helpdesk/tests/test_ticket_submission.py index 1d8ef61c..2c59ef75 100644 --- a/helpdesk/tests/test_ticket_submission.py +++ b/helpdesk/tests/test_ticket_submission.py @@ -1085,21 +1085,32 @@ class EmailInteractionsTestCase(TestCase): cat = KBCategory.objects.create( title="Test Cat", slug="test_cat", - description="This is a test category", + description="This is a test category", queue=self.queue_public, ) cat.save() + attr_list = { + "f1_field_title": "KBItem 1", + "f1_attr": "kbitem", + "f1_attr_value": "1", + "f2_attr": "submitter_email", + "f2_attr_value": "foo@bar.cz", + "f3_attr": "title", + "f3_attr_value": "lol", + } self.kbitem1 = KBItem.objects.create( category=cat, - title="KBItem 1", + title=attr_list["f1_field_title"], question="What?", answer="A KB Item", ) self.kbitem1.save() - cat_url = reverse('helpdesk:submit') + \ - "?kbitem=1&submitter_email=foo@bar.cz&title=lol" + cat_url = reverse('helpdesk:submit') + '?' \ + + attr_list["f1_attr"] + '=' + attr_list["f1_attr_value"] + '&' \ + + attr_list["f2_attr"] + '=' + attr_list["f2_attr_value"] + '&' \ + + attr_list["f3_attr"] + '=' + attr_list["f3_attr_value"] response = self.client.get(cat_url) - + # Get the rendered response to make it easier to debug if things go wrong if ( hasattr(response, "render") and callable(response.render) @@ -1114,8 +1125,8 @@ class EmailInteractionsTestCase(TestCase): msg_prefix = content.decode(response.charset) self.assertContains( - response, '', msg_prefix = msg_prefix) + response, '', msg_prefix = msg_prefix) self.assertContains( - response, '', msg_prefix = msg_prefix) + response, '', msg_prefix = msg_prefix) + response, ' Date: Tue, 18 Jul 2023 01:35:13 +0100 Subject: [PATCH 04/17] Make query test less flaky --- helpdesk/tests/test_query.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/helpdesk/tests/test_query.py b/helpdesk/tests/test_query.py index 9e7d8842..8e2c1533 100644 --- a/helpdesk/tests/test_query.py +++ b/helpdesk/tests/test_query.py @@ -58,12 +58,13 @@ class QueryTests(TestCase): query = query_to_base64({}) response = self.client.get( reverse('helpdesk:datatables_ticket_list', args=[query])) + resp_json = response.json() self.assertEqual( - response.json(), + resp_json, { "data": - [{"ticket": "1 [test_queue-1]", "id": 1, "priority": 3, "title": "unassigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": ""}, - {"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}], + [{"ticket": "1 [test_queue-1]", "id": 1, "priority": 3, "title": "unassigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": resp_json["data"][0]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": ""}, + {"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", "created": resp_json["data"][1]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}], "recordsFiltered": 2, "recordsTotal": 2, "draw": 0, @@ -77,12 +78,13 @@ class QueryTests(TestCase): ) response = self.client.get( reverse('helpdesk:datatables_ticket_list', args=[query])) + resp_json = response.json() self.assertEqual( - response.json(), + resp_json, { "data": [{"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", - "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}], + "created": resp_json["data"][0]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}], "recordsFiltered": 1, "recordsTotal": 1, "draw": 0, @@ -96,12 +98,13 @@ class QueryTests(TestCase): ) response = self.client.get( reverse('helpdesk:datatables_ticket_list', args=[query])) + resp_json = response.json() self.assertEqual( - response.json(), + resp_json, { "data": [{"ticket": "2 [test_queue-2]", "id": 2, "priority": 3, "title": "assigned to kbitem", "queue": {"title": "Test queue", "id": 1}, "status": "Open", - "created": "now", "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}], + "created": resp_json["data"][0]["created"], "due_date": None, "assigned_to": "None", "submitter": None, "row_class": "", "time_spent": "", "kbitem": "KBItem 1"}], "recordsFiltered": 1, "recordsTotal": 1, "draw": 0, From 2e5697c11acfd3d98a8f3c37226c419633b81226 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 06:12:32 +0100 Subject: [PATCH 05/17] Completely rework the email parsing. Fixes a number of hacks that have accumulated and makes it is more easily understood and easier to enhance in the future. --- helpdesk/email.py | 283 +++++++++++++++++++++++++--------------------- 1 file changed, 152 insertions(+), 131 deletions(-) diff --git a/helpdesk/email.py b/helpdesk/email.py index fd956fef..1ace3830 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -39,7 +39,8 @@ import ssl import sys from time import ctime import typing -from typing import List, Tuple +from typing import List +from email.mime.text import MIMEText # import User model, which may be a custom model @@ -53,6 +54,8 @@ STRIPPED_SUBJECT_STRINGS = [ "Automatic reply: ", ] +HTML_EMAIL_ATTACHMENT_FILENAME = _("email_html_body.html") + def process_email(quiet=False): for q in Queue.objects.filter( @@ -141,7 +144,7 @@ def pop3_sync(q, logger, server): full_message = encoding.force_str( "\n".join(raw_content), errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException: logger.warn( "Message %s was ignored and will be left on POP3 server" % msgNum) @@ -198,7 +201,7 @@ def imap_sync(q, logger, server): data = server.fetch(num, '(RFC822)')[1] full_message = encoding.force_str(data[0][1], errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException: logger.warn("Message %s was ignored and will be left on IMAP server" % num) except DeleteIgnoredTicketException: @@ -285,7 +288,7 @@ def imap_oauth_sync(q, logger, server): full_message = encoding.force_str(data[0][1], errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException as itex: logger.warn(f"Message {num} was ignored. {itex}") @@ -405,7 +408,7 @@ def process_queue(q, logger): with open(m, 'r') as f: full_message = encoding.force_str(f.read(), errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException: logger.warn("Message %d was ignored and will be left in local directory", i) except DeleteIgnoredTicketException: @@ -433,7 +436,7 @@ def decodeUnknown(charset, string): if not charset: try: return str(string, encoding='utf-8', errors='replace') - except UnicodeError: + except UnicodeError as e: return str(string, encoding='iso8859-1', errors='replace') return str(string, encoding=charset, errors='replace') return string @@ -723,133 +726,92 @@ def attempt_body_extract_from_html(message: str) -> str: return body, full_body -def extract_part_data( +def extract_mime_content(part: Message,) -> str: + ''' + Extract the content from the MIME body part + :param part: the MIME part to extract the content from + ''' + content_bytes = part.get_payload(decode=True) + charset = part.get_content_charset() + # The default for MIME email is 7bit which requires special decoding to utf-8 so make sure we handle the decoding correctly + if part['Content-Transfer-Encoding'] in [None, '8bit', '7bit'] and (charset == 'utf-8' or charset is None): + charset = "unicode_escape" + content = decodeUnknown(charset, content_bytes) + return content + + +def extract_email_message(mime_content: str, is_plain_content_type: bool, is_extract_full_email_msg: bool) -> str: + email_content = None + if is_extract_full_email_msg: + # Take the full content including encapsulated "forwarded" and "reply" sections + email_content = get_body_from_fragments(mime_content) if is_plain_content_type else mime_content + else: + # Just get the primary part of the email and drop off any text below the actually response text + email_content = EmailReplyParser.parse_reply(mime_content) if is_plain_content_type else mime_content + return email_content + + +def process_as_attachment( part: Message, counter: int, - ticket_id: int, files: List, logger: logging.Logger -) -> Tuple[str, str]: +): name = part.get_filename() if name: - name = email.utils.collapse_rfc2231_value(name) - part_body = None - formatted_body = None - if part.get_content_maintype() == 'text' and name is None: - if part.get_content_subtype() == 'plain': - part_body = part.get_payload(decode=True) - # https://github.com/django-helpdesk/django-helpdesk/issues/732 - if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8': - part_body = part_body.decode('unicode_escape') - part_body = decodeUnknown(part.get_content_charset(), part_body) - # have to use django_settings here so overwriting it works in tests - # the default value is False anyway - if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False): - # first message in thread, we save full body to avoid - # losing forwards and things like that - formatted_body = get_body_from_fragments(part_body) - part_body = EmailReplyParser.parse_reply(part_body) - else: - # second and other reply, save only first part of the - # message - part_body = EmailReplyParser.parse_reply(part_body) - formatted_body = part_body - # workaround to get unicode text out rather than escaped text - part_body = get_encoded_body(part_body) - logger.debug("Discovered plain text MIME part") - else: - email_body = get_email_body_from_part_payload(part) - - if not part_body and not formatted_body: - # no text has been parsed so far - try such deep parsing - # for some messages - altered_body = email_body.replace( - "

", "

\n").replace("{email_body}" - - payload = ( - '' - '' - '' - '' - '%s' - '' - ) % email_body - files.append( - SimpleUploadedFile( - _("email_html_body.html"), payload.encode("utf-8"), 'text/html') - ) - logger.debug("Discovered HTML MIME part") + name = f"part-{counter}_{email.utils.collapse_rfc2231_value(name)}" else: - if not name: - ext = mimetypes.guess_extension(part.get_content_type()) - name = f"part-{counter}{ext}" - else: - name = f"part-{counter}_{name}" - payload = part.as_string() if part.is_multipart() else part.get_payload(decode=True) - files.append(SimpleUploadedFile(name, payload, mimetypes.guess_type(name)[0])) - logger.debug("Found MIME attachment %s", name) - return part_body, formatted_body + ext = mimetypes.guess_extension(part.get_content_type()) + name = f"part-{counter}{ext}" + # Extract payload accounting for attached multiparts + payload = part.as_string() if part.is_multipart() else part.get_payload(decode=True) + files.append(SimpleUploadedFile(name, payload, mimetypes.guess_type(name)[0])) + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Processed MIME as attachment: %s", name) + return -def recurse_multipart( - multipart: Message, - counter: int, - ticket_id: int, - files: List, - logger: logging.Logger -) -> Tuple[str, str]: - ''' - The received MIME part could be a multipart with embedded multiparts and therefore requires recursion. - Recurse through the multipart structures trying to find the 1st body part that - provides the message body. It will try to find an HTML formatted part (contentType=text/html) - and a TEXT formatted part (contentType=text/plain) and return both - :param multipart: - :param counter: - :param ticket_id: - :param files: - :param logger: - ''' - plain_msg = None - formatted_msg = None - - for part in multipart.walk(): - if part.get_content_maintype() == 'multipart': - continue - # See email.message_obj.Message.get_filename() - plain_body, formatted_body = recurse_multipart( - part, counter, ticket_id, files, logger) if part.get_content_maintype( - ) == 'multipart' else extract_part_data(part, counter, ticket_id, files, logger) - # Only update the message variables if they are still empty to handle attached messages overriding the core message - if plain_msg is None and plain_body: - plain_msg = plain_body - if formatted_msg is None and formatted_body: - formatted_msg = formatted_body - counter += 1 - return plain_msg, formatted_msg +def extract_email_subject(email_msg: Message,) -> str: + subject = email_msg.get('subject', _('Comment from e-mail')) + subject = decode_mail_headers( + decodeUnknown(email_msg.get_charset(), subject)) + for affix in STRIPPED_SUBJECT_STRINGS: + subject = subject.replace(affix, "") + return subject.strip() -def object_from_message(message: str, +def extract_email_metadata(message: str, queue: Queue, logger: logging.Logger ) -> Ticket: + ''' + Extracts the text/plain mime part if there is one as the ticket description and + stores the text/html part as an attachment if it is present. + If no text/plain part is present then it will try to use the text/html part if + it is present as the ticket description by removing the HTML formatting. + If neither a text/plain or text/html is present then it will use the first text/* + MIME part that it finds as the ticket description. + By default it will always take only the actual message and drop any chained messages + from replies. + The HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL settings can force the entire message to be + stored in the ticket if it is a new ticket by setting it to True. + In this scenario, if it is a reply that is a forwarded message with no actual message, + then the description will be sourced from the text/html part and the forwarded message + will be in the FollowUp record aassociated with the ticket. + It will iterate over every MIME part and store all MIME parts as attachments apart + from the text/plain part. + There may be a case for trying to exclude repeated signature images by checking if an + attachment of the same name already exists as an attachement on the ticket but that is + not implemented. + :param message: the raw email message received + :param queue: the queue that hte + :param logger: the logger to be used + ''' # 'message' must be an RFC822 formatted message to correctly parse. - message_obj = email.message_from_string(message) + message_obj: Message = email.message_from_string(message) - subject = message_obj.get('subject', _('Comment from e-mail')) - subject = decode_mail_headers( - decodeUnknown(message_obj.get_charset(), subject)) - for affix in STRIPPED_SUBJECT_STRINGS: - subject = subject.replace(affix, "") - subject = subject.strip() + subject = extract_email_subject(message_obj) - # TODO: Should really be assigning a properly formatted fake email. - # Check if anything relies on this being a "real name" formatted string if no sender is found on message_obj. - # Also not sure it should be accepting emails from unknown senders sender_email = _('Unknown Sender') sender_hdr = message_obj.get('from') if sender_hdr: @@ -868,27 +830,86 @@ def object_from_message(message: str, subject, logger ) - - body = None - full_body = None + plain_body: str = None + formatted_body: str = None counter = 0 files = [] + first_mime_non_multipart_content: MIMEText = None + # Cycle through all MIME parts in the email extracting the plain and formatted messages + # Algorithm uses the first text parts found as the actual email content and subsequent text parts + # are made into attachments so they do not get lost for part in message_obj.walk(): - if part.get_content_maintype() == 'multipart': + part_main_type = part.get_content_maintype() + if part_main_type == 'multipart': continue - # See email.message_obj.Message.get_filename() - plain_body, formatted_body = extract_part_data(part, counter, ticket_id, files, logger) - if plain_body: - body = plain_body - if formatted_body: - full_body = formatted_body + if part.get_content_disposition() in ['inline', 'attachment']: + process_as_attachment(part, counter, files, logger) + else: + # Get the content then assign to plain for formatted email message otherwise store the content as an attachment + mime_content = extract_mime_content(part) + if first_mime_non_multipart_content is None: + first_mime_non_multipart_content = mime_content + if part_main_type == 'text': + # Could be the body of the email + part_sub_type = part.get_content_subtype() + if plain_body is None and part_sub_type == "plain": + plain_body = mime_content + elif formatted_body is None and part_sub_type == "html": + formatted_body = mime_content + if "{formatted_body}" + else: + email_body = formatted_body + + payload = ( + '' + '' + '' + '' + '%s' + '' + ) % email_body + files.append( + SimpleUploadedFile( + HTML_EMAIL_ATTACHMENT_FILENAME, payload.encode("utf-8"), 'text/html') + ) + else: + # Theoretically should not happen to properly structured emails but process anything else as an attachment + process_as_attachment(part, counter, files, logger) + logger.debug(f"Text MIME part added as attachment: {part.get_content_type()}") + else: + # process anything else as an attachment + process_as_attachment(part, counter, files, logger) counter += 1 - if not body: - body, full_body = attempt_body_extract_from_html(message_obj) + # Check if we have at least the plain body + if not plain_body: + if formatted_body: + # We have a formatted body but no plain text body + plain_body, _x = attempt_body_extract_from_html(formatted_body) + else: + # Something wrong with email or a processing issue so try first part or save full email message + if first_mime_non_multipart_content: + plain_body = extract_email_message(first_mime_non_multipart_content, True, True) + else: + plain_body = message + # first message in thread, we save full body to avoid losing forwards and things like that + include_chained_msgs = True if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False) else False + message_body = extract_email_message(plain_body, True, include_chained_msgs) - add_file_if_always_save_incoming_email_message(files, message_obj) + # Only need the full message if the message_body excludes the chained messages + chained_email_message = None if include_chained_msgs else plain_body + + # Not sure this is valid but a unit test uses a DIFFERENT plain text to html text body + # where plain text has blank message with forwarded message so.... hack away to support it + if message_body is not None and len(message_body) == 0 and formatted_body and len(formatted_body) > 0: + message_body, _x = attempt_body_extract_from_html(formatted_body) + # Set the chained message to the orignal plain text full message so it is stored in a FollowUp comments field + if len(plain_body) > 0: + chained_email_message = plain_body + + add_file_if_always_save_incoming_email_message(files, message) smtp_priority = message_obj.get('priority', '') smtp_importance = message_obj.get('importance', '') @@ -897,8 +918,8 @@ def object_from_message(message: str, smtp_priority, smtp_importance} else 3 payload = { - 'body': body, - 'full_body': full_body or body, + 'body': message_body, + 'full_body': chained_email_message, 'subject': subject, 'queue': queue, 'sender_email': sender_email, From f7381d5b6222da1664e670d50655755b717f2c47 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 06:13:24 +0100 Subject: [PATCH 06/17] Fix some bugs and enhance some methods to support unit tests. --- helpdesk/tests/utils.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/helpdesk/tests/utils.py b/helpdesk/tests/utils.py index f93d715a..24c231c3 100644 --- a/helpdesk/tests/utils.py +++ b/helpdesk/tests/utils.py @@ -153,21 +153,21 @@ def generate_file_mime_part(locale: str="en_US",filename: str = None, content: s encoders.encode_base64(part) if not filename: filename = get_fake("word", locale=locale, min_length=8) + ".txt" - part.add_header('Content-Disposition', "attachment; filename= %s" % filename) + part.add_header('Content-Disposition', "attachment; filename=%s" % filename) return part -def generate_image_mime_part(locale: str="en_US",imagename: str = None) -> Message: +def generate_image_mime_part(locale: str="en_US",imagename: str = None, disposition_primary_type: str = "attachment") -> Message: """ :param locale: change this to generate locale specific file name and attachment content :param filename: pass a file name if you want to specify a specific name otherwise a random name will be generated """ part = MIMEImage(generate_random_image(image_format="JPEG", array_dims=(200, 200))) - part.set_payload(get_fake("text", locale=locale, min_length=1024)) + #part.set_payload(get_fake("text", locale=locale, min_length=1024)) encoders.encode_base64(part) if not imagename: imagename = get_fake("word", locale=locale, min_length=8) + ".jpg" - part.add_header('Content-Disposition', "attachment; filename= %s" % imagename) + part.add_header('Content-Disposition', disposition_primary_type + "; filename= %s" % imagename) return part def generate_email_list(address_cnt: int = 3, @@ -215,10 +215,10 @@ def generate_mime_part(locale: str="en_US", """ if "plain" == part_type: body = get_fake("text", locale=locale, min_length=1024) - msg = MIMEText(body) + msg = MIMEText(body, part_type) elif "html" == part_type: body = get_fake_html(locale=locale, wrap_in_body_tag=True) - msg = MIMEText(body) + msg = MIMEText(body, part_type) elif "file" == part_type: msg = generate_file_mime_part(locale=locale) elif "image" == part_type: @@ -229,6 +229,7 @@ def generate_mime_part(locale: str="en_US", def generate_multipart_email(locale: str="en_US", type_list: typing.List[str]=["plain", "html", "image"], + sub_type: str = None, use_short_email: bool=False ) -> typing.Tuple[Message, typing.Tuple[str, str], typing.Tuple[str, str]]: """ @@ -236,9 +237,10 @@ def generate_multipart_email(locale: str="en_US", :param locale: :param type_list: options are plain, html, image (attachment), file (attachment) + :param sub_type: multipart sub type that defaults to "mixed" if not specified :param use_short_email: produces a "To" or "From" that is only the email address if True """ - msg = MIMEMultipart() + msg = MIMEMultipart(_subtype=sub_type) if sub_type else MIMEMultipart() for part_type in type_list: msg.attach(generate_mime_part(locale=locale, part_type=part_type)) from_meta, to_meta = add_simple_email_headers(msg, locale=locale, use_short_email=use_short_email) From f1e1d52cd24436dec9a32f0687f6f27a6fc19197 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 06:23:06 +0100 Subject: [PATCH 07/17] Minor enhancement to indicate how to get hrlpdesk setup for development --- README.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 5eae9856..bd6fd6ea 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,7 @@ Installation `django-helpdesk` requires: * Python 3.8+ -* Django 3.2 LTS highly recommended (early adopters may test Django 4) +* Django 3.2 LTS or Django 4.* You can quickly install the latest stable version of `django-helpdesk` app via `pip`:: @@ -72,6 +72,9 @@ Developer Environment --------------------- Follow these steps to set up your development environment to contribute to helpdesk: + - check out the helpdesk app to you local file system:: + git clone https://github.com/django-helpdesk/django-helpdesk.git + - install a virtual environment - using virtualenv from the helpdesk base folder do:: virtualenv .venv && source .venv/bin/activate @@ -79,6 +82,9 @@ Follow these steps to set up your development environment to contribute to helpd - install the requirements for development:: pip install -r requirements.txt -r requirements-dev.txt +To reactivate a VENV just run: + source .venv/bin/activate + To see option for the Makefile run: `make` The project enforces a standardized formatting in the CI/CD pipeline. To ensure you have the correct formatting run:: From 9bbe1945b0f55e4cf32673f7ce69b170e94fef6c Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 06:31:29 +0100 Subject: [PATCH 08/17] Fix format errors --- helpdesk/email.py | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/helpdesk/email.py b/helpdesk/email.py index 1ace3830..9cdcbdc8 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -6,8 +6,6 @@ See LICENSE for details. """ # import base64 - - from bs4 import BeautifulSoup from datetime import timedelta from django.conf import settings as django_settings @@ -19,6 +17,7 @@ from django.utils import encoding, timezone from django.utils.translation import gettext as _ import email from email.message import Message +from email.mime.text import MIMEText from email.utils import getaddresses from email_reply_parser import EmailReplyParser from helpdesk import settings @@ -40,7 +39,6 @@ import sys from time import ctime import typing from typing import List -from email.mime.text import MIMEText # import User model, which may be a custom model @@ -78,7 +76,6 @@ def process_email(quiet=False): logger.propagate = False if quiet: logger.propagate = False # do not propagate to root logger that would log to console - # Log messages to specific file only if the queue has it configured if (q.logging_type in logging_types) and q.logging_dir: # if it's enabled and the dir is set log_file_handler = logging.FileHandler( @@ -255,13 +252,11 @@ def imap_oauth_sync(q, logger, server): ) server.debug = settings.HELPDESK_IMAP_DEBUG_LEVEL - # TODO: Perhaps store the authentication string template externally? Settings? Queue Table? server.authenticate( "XOAUTH2", lambda x: f"user={q.email_box_user}\x01auth=Bearer {token['access_token']}\x01\x01".encode(), ) - # Select the Inbound Mailbox folder server.select(q.email_box_imap_folder) @@ -315,7 +310,6 @@ def imap_oauth_sync(q, logger, server): "IMAP retrieve failed. Is the folder '%s' spelled correctly, and does it exist on the server?", q.email_box_imap_folder ) - # Purged Flagged Messages & Logout server.expunge() server.close() @@ -436,7 +430,7 @@ def decodeUnknown(charset, string): if not charset: try: return str(string, encoding='utf-8', errors='replace') - except UnicodeError as e: + except UnicodeError: return str(string, encoding='iso8859-1', errors='replace') return str(string, encoding=charset, errors='replace') return string @@ -471,11 +465,10 @@ def is_autoreply(message): def create_ticket_cc(ticket, cc_list): if not cc_list: return [] - # Local import to deal with non-defined / circular reference problem - from helpdesk.views.staff import subscribe_to_ticket_updates, User new_ticket_ccs = [] + from helpdesk.views.staff import subscribe_to_ticket_updates, User for __, cced_email in cc_list: cced_email = cced_email.strip() @@ -541,7 +534,6 @@ def create_object_from_email_message(message, ticket_id, payload, files, logger) ticket.merged_to.ticket) # Use the ticket in which it was merged to for next operations ticket = ticket.merged_to - # New issue, create a new instance if ticket is None: if not settings.QUEUE_EMAIL_BOX_UPDATE_ONLY: @@ -558,7 +550,6 @@ def create_object_from_email_message(message, ticket_id, payload, files, logger) (ticket.queue.slug, ticket.id)) new = True - # Old issue being re-opened elif ticket.status == Ticket.CLOSED_STATUS: ticket.status = Ticket.REOPENED_STATUS @@ -726,14 +717,15 @@ def attempt_body_extract_from_html(message: str) -> str: return body, full_body -def extract_mime_content(part: Message,) -> str: +def extract_mime_content(part: Message,) -> str: ''' Extract the content from the MIME body part :param part: the MIME part to extract the content from ''' content_bytes = part.get_payload(decode=True) charset = part.get_content_charset() - # The default for MIME email is 7bit which requires special decoding to utf-8 so make sure we handle the decoding correctly + # The default for MIME email is 7bit which requires special decoding to utf-8 so make sure + # we handle the decoding correctly if part['Content-Transfer-Encoding'] in [None, '8bit', '7bit'] and (charset == 'utf-8' or charset is None): charset = "unicode_escape" content = decodeUnknown(charset, content_bytes) @@ -781,9 +773,9 @@ def extract_email_subject(email_msg: Message,) -> str: def extract_email_metadata(message: str, - queue: Queue, - logger: logging.Logger - ) -> Ticket: + queue: Queue, + logger: logging.Logger + ) -> Ticket: ''' Extracts the text/plain mime part if there is one as the ticket description and stores the text/html part as an attachment if it is present. @@ -804,7 +796,7 @@ def extract_email_metadata(message: str, attachment of the same name already exists as an attachement on the ticket but that is not implemented. :param message: the raw email message received - :param queue: the queue that hte + :param queue: the queue that hte :param logger: the logger to be used ''' # 'message' must be an RFC822 formatted message to correctly parse. @@ -835,7 +827,6 @@ def extract_email_metadata(message: str, counter = 0 files = [] first_mime_non_multipart_content: MIMEText = None - # Cycle through all MIME parts in the email extracting the plain and formatted messages # Algorithm uses the first text parts found as the actual email content and subsequent text parts # are made into attachments so they do not get lost @@ -846,7 +837,8 @@ def extract_email_metadata(message: str, if part.get_content_disposition() in ['inline', 'attachment']: process_as_attachment(part, counter, files, logger) else: - # Get the content then assign to plain for formatted email message otherwise store the content as an attachment + # Get the content then assign to plain for formatted email message otherwise store the + # content as an attachment mime_content = extract_mime_content(part) if first_mime_non_multipart_content is None: first_mime_non_multipart_content = mime_content @@ -861,7 +853,7 @@ def extract_email_metadata(message: str, email_body = f"{formatted_body}" else: email_body = formatted_body - + payload = ( '' '' @@ -875,14 +867,14 @@ def extract_email_metadata(message: str, HTML_EMAIL_ATTACHMENT_FILENAME, payload.encode("utf-8"), 'text/html') ) else: - # Theoretically should not happen to properly structured emails but process anything else as an attachment + # Theoretically should not happen to properly structured emails but process anything + # else as an attachment process_as_attachment(part, counter, files, logger) logger.debug(f"Text MIME part added as attachment: {part.get_content_type()}") else: # process anything else as an attachment process_as_attachment(part, counter, files, logger) counter += 1 - # Check if we have at least the plain body if not plain_body: if formatted_body: @@ -895,12 +887,11 @@ def extract_email_metadata(message: str, else: plain_body = message # first message in thread, we save full body to avoid losing forwards and things like that - include_chained_msgs = True if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False) else False + include_chained_msgs = True if ticket_id is None and getattr( + django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False) else False message_body = extract_email_message(plain_body, True, include_chained_msgs) - # Only need the full message if the message_body excludes the chained messages chained_email_message = None if include_chained_msgs else plain_body - # Not sure this is valid but a unit test uses a DIFFERENT plain text to html text body # where plain text has blank message with forwarded message so.... hack away to support it if message_body is not None and len(message_body) == 0 and formatted_body and len(formatted_body) > 0: From 0b9bfbcddde33e88d5c12f3b43803ceed8a7ee37 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 06:43:08 +0100 Subject: [PATCH 09/17] Enhance tests for multiple parts in emails. --- helpdesk/tests/test_get_email.py | 174 ++++++++++++++++++++----------- 1 file changed, 116 insertions(+), 58 deletions(-) diff --git a/helpdesk/tests/test_get_email.py b/helpdesk/tests/test_get_email.py index f7d37800..b34b3ac3 100644 --- a/helpdesk/tests/test_get_email.py +++ b/helpdesk/tests/test_get_email.py @@ -7,11 +7,13 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.core.management import call_command from django.shortcuts import get_object_or_404 from django.test import override_settings, TestCase +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText import helpdesk.email -from helpdesk.email import extract_part_data, object_from_message +from helpdesk.email import extract_email_metadata, process_as_attachment from helpdesk.exceptions import DeleteIgnoredTicketException, IgnoreTicketException from helpdesk.management.commands.get_email import Command -from helpdesk.models import Attachment, FollowUp, FollowUpAttachment, IgnoreEmail, Queue, Ticket, TicketCC +from helpdesk.models import FollowUp, FollowUpAttachment, IgnoreEmail, Queue, Ticket, TicketCC from helpdesk.tests import utils import itertools import logging @@ -59,7 +61,7 @@ class GetEmailCommonTests(TestCase): """ with open(os.path.join(THIS_DIR, "test_files/blank-body-with-attachment.eml"), encoding="utf-8") as fd: test_email = fd.read() - ticket = helpdesk.email.object_from_message( + ticket = helpdesk.email.extract_email_metadata( test_email, self.queue_public, self.logger) # title got truncated because of max_lengh of the model.title field assert ticket.title == ( @@ -67,7 +69,7 @@ class GetEmailCommonTests(TestCase): "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo..." ) - self.assertEqual(ticket.description, "") + self.assertEqual(ticket.description, "", msg=ticket.description) def test_email_with_quoted_printable_body(self): """ @@ -75,7 +77,7 @@ class GetEmailCommonTests(TestCase): """ with open(os.path.join(THIS_DIR, "test_files/quoted_printable.eml"), encoding="utf-8") as fd: test_email = fd.read() - ticket = helpdesk.email.object_from_message( + ticket = helpdesk.email.extract_email_metadata( test_email, self.queue_public, self.logger) self.assertEqual(ticket.title, "Český test") self.assertEqual(ticket.description, @@ -96,24 +98,29 @@ class GetEmailCommonTests(TestCase): """ with open(os.path.join(THIS_DIR, "test_files/all-special-chars.eml"), encoding="utf-8") as fd: test_email = fd.read() - ticket = helpdesk.email.object_from_message( + ticket = helpdesk.email.extract_email_metadata( test_email, self.queue_public, self.logger) self.assertEqual(ticket.title, "Testovácí email") self.assertEqual(ticket.description, "íářčšáíéřášč") - @override_settings(HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL=True) + @override_settings(HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL=False) def test_email_with_utf_8_non_decodable_sequences(self): """ Tests that emails with utf-8 non-decodable sequences are parsed correctly - The message is fowarded as well + The forwarded part of the message must still be in the ticket description if + HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL is set to True. + Otherwise it should only be in the Followup records Comment field + NOTE: This test weirdly tests having completely different content in the HTML part + than the PLAIN part so not sure about the validity of this as a test but + the code does support this oddity anyway for backwards compatibility """ with open(os.path.join(THIS_DIR, "test_files/utf-nondecodable.eml"), encoding="utf-8") as fd: test_email = fd.read() - ticket = helpdesk.email.object_from_message( + ticket = helpdesk.email.extract_email_metadata( test_email, self.queue_public, self.logger) self.assertEqual( ticket.title, "Fwd: Cyklozaměstnavatel - změna vyhodnocení") - self.assertIn("prosazuje lepší", ticket.description) + self.assertIn("prosazuje lepší", ticket.description, "Missing text \"prosazuje lepší\" in description: %s" % ticket.description) followups = FollowUp.objects.filter(ticket=ticket) followup = followups[0] attachments = FollowUpAttachment.objects.filter(followup=followup) @@ -122,21 +129,40 @@ class GetEmailCommonTests(TestCase): attachment.file.read().decode("utf-8")) @override_settings(HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL=True) - def test_email_with_forwarded_message(self): + def test_email_with_forwarded_message_just_message_stored(self): """ - Forwarded message of that format must be still attached correctly + The forwarded part of the message must still be in the ticket description if + HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL is set to True. + Otherwise it should only be in the Followup records Comment field """ with open(os.path.join(THIS_DIR, "test_files/forwarded-message.eml"), encoding="utf-8") as fd: test_email = fd.read() - ticket = helpdesk.email.object_from_message( + ticket = helpdesk.email.extract_email_metadata( test_email, self.queue_public, self.logger) self.assertEqual( ticket.title, "Test with original message from GitHub") self.assertIn("This is email body", ticket.description) - assert "Hello there!" not in ticket.description, ticket.description - assert FollowUp.objects.filter(ticket=ticket).count() == 1 - assert "Hello there!" in FollowUp.objects.filter( - ticket=ticket).first().comment + self.assertTrue("Hello there!" in ticket.description, ticket.description) + self.assertTrue(FollowUp.objects.filter(ticket=ticket).count() == 1) + + @override_settings(HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL=False) + def test_email_with_forwarded_message_chained_messages_stored(self): + """ + The forwarded part of the message must still be in the ticket description if + HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL is set to True. + Otherwise it should only be in the Followup records Comment field + """ + with open(os.path.join(THIS_DIR, "test_files/forwarded-message.eml"), encoding="utf-8") as fd: + test_email = fd.read() + ticket = helpdesk.email.extract_email_metadata( + test_email, self.queue_public, self.logger) + self.assertEqual( + ticket.title, "Test with original message from GitHub") + self.assertIn("This is email body", ticket.description) + self.assertTrue("Hello there!" not in ticket.description, ticket.description) + followups = FollowUp.objects.filter(ticket=ticket).values("comment") + self.assertTrue(followups.count() == 1) + self.assertTrue("Hello there!" in followups[0]["comment"], followups[0]["comment"]) def test_will_delete_ignored_email(self): """ @@ -147,7 +173,7 @@ class GetEmailCommonTests(TestCase): ignore = IgnoreEmail(name="Test Ignore", email_address=from_meta[1], keep_in_mailbox=False) ignore.save() with self.assertRaises(DeleteIgnoredTicketException): - object_from_message(message.as_string(), self.queue_public, self.logger) + extract_email_metadata(message.as_string(), self.queue_public, self.logger) def test_will_not_delete_ignored_email(self): """ @@ -158,7 +184,7 @@ class GetEmailCommonTests(TestCase): ignore = IgnoreEmail(name="Test Ignore", email_address=from_meta[1], keep_in_mailbox=True) ignore.save() with self.assertRaises(IgnoreTicketException): - object_from_message(message.as_string(), self.queue_public, self.logger) + extract_email_metadata(message.as_string(), self.queue_public, self.logger) def test_utf8_filename_attachment(self): """ @@ -167,7 +193,7 @@ class GetEmailCommonTests(TestCase): filename = "TeléfonoMañana.txt" part = utils.generate_file_mime_part(locale="es_ES", filename=filename) files: typing.List[SimpleUploadedFile] = [] - extract_part_data(part, counter=1, ticket_id="tst1", files=files, logger=self.logger) + process_as_attachment(part, counter=1, files=files, logger=self.logger) sent_file: SimpleUploadedFile = files[0] # The extractor prepends a part identifier so compare the ending self.assertTrue(sent_file.name.endswith(filename), f"Filename extracted does not match: {sent_file.name}") @@ -179,17 +205,17 @@ class GetEmailCommonTests(TestCase): """ message, _, _ = utils.generate_multipart_email(type_list=['plain', 'image']) - self.assertEqual(len(mail.outbox), 0) + self.assertEqual(len(mail.outbox), 0) # @UndefinedVariable with self.assertLogs(logger='helpdesk', level='ERROR') as cm: - object_from_message(message.as_string(), self.queue_public, self.logger) + extract_email_metadata(message.as_string(), self.queue_public, self.logger) self.assertIn( "ERROR:helpdesk:['Unsupported file extension: .jpg']", cm.output ) - self.assertEqual(len(mail.outbox), 1) + self.assertEqual(len(mail.outbox), 1) # @UndefinedVariable self.assertEqual(f'[test-1] {message.get("subject")} (Opened)', mail.outbox[0].subject) def test_multiple_attachments(self): @@ -198,11 +224,11 @@ class GetEmailCommonTests(TestCase): """ message, _, _ = utils.generate_multipart_email(type_list=['plain', 'file', 'image']) - self.assertEqual(len(mail.outbox), 0) + self.assertEqual(len(mail.outbox), 0) # @UndefinedVariable - object_from_message(message.as_string(), self.queue_public, self.logger) + extract_email_metadata(message.as_string(), self.queue_public, self.logger) - self.assertEqual(len(mail.outbox), 1) + self.assertEqual(len(mail.outbox), 1) # @UndefinedVariable self.assertEqual(f'[test-1] {message.get("subject")} (Opened)', mail.outbox[0].subject) ticket = Ticket.objects.get() @@ -216,10 +242,10 @@ class GetEmailCommonTests(TestCase): """ message, _, _ = utils.generate_multipart_email(type_list=['plain', 'image', 'file', 'image']) - self.assertEqual(len(mail.outbox), 0) + self.assertEqual(len(mail.outbox), 0) # @UndefinedVariable with self.assertLogs(logger='helpdesk', level='ERROR') as cm: - object_from_message(message.as_string(), self.queue_public, self.logger) + extract_email_metadata(message.as_string(), self.queue_public, self.logger) self.assertIn( "ERROR:helpdesk:['Unsupported file extension: .jpg']", @@ -240,18 +266,72 @@ class GetEmailCommonTests(TestCase): att_content = email_attachment.as_string() message.attach(utils.generate_file_mime_part(filename=att_filename, content=att_content)) - object_from_message(message.as_string(), self.queue_public, self.logger) - - self.assertEqual(len(mail.outbox), 1) + extract_email_metadata(message.as_string(), self.queue_public, self.logger) + self.assertEqual(len(mail.outbox), 1) # @UndefinedVariable self.assertEqual(f'[test-1] {message.get("subject")} (Opened)', mail.outbox[0].subject) ticket = Ticket.objects.get() followup = ticket.followup_set.get() - att_retrieved: Attachment = followup.followupattachment_set.get() - self.assertTrue(att_retrieved.filename.endswith(att_filename), "Filename of attached multipart not detected: %s" % (att_retrieved.filename)) - with att_retrieved.file.open('r') as f: - retrieved_content = f.read() - self.assertEquals(att_content, retrieved_content, "Retrieved attachment content different to original :\n\n%s\n\n%s" % (att_content, retrieved_content)) + + for att_retrieved in followup.followupattachment_set.all(): + if (helpdesk.email.HTML_EMAIL_ATTACHMENT_FILENAME == att_retrieved.filename): + # Ignore the HTML formatted conntent of the email that is attached + continue + self.assertTrue(att_retrieved.filename.endswith(att_filename), "Filename of attached multipart not detected: %s" % (att_retrieved.filename)) + with att_retrieved.file.open('r') as f: + retrieved_content = f.read() + self.assertEquals(att_content, retrieved_content, "Retrieved attachment content different to original :\n\n%s\n\n%s" % (att_content, retrieved_content)) + + def test_email_with_inline_and_multipart_as_attachments(self): + """ + Test a multipart email with an inline attachment and a multipart email attachment to the email + """ + inline_att_filename = 'inline_attachment.jpg' + email_att_filename = 'email_attachment2.eml' + # Create the inline attachment for the email using an ID so we can reference it in the email body + inline_image_id = "liTE5er6Dbnd" + inline_attachment = utils.generate_image_mime_part(locale="en_US", imagename=inline_att_filename, disposition_primary_type="inline") + inline_attachment.add_header('X-Attachment-Id', inline_image_id) + inline_attachment.add_header('Content-ID', '<' + inline_image_id + '>') + # Create the actual email with its plain and HTML parts + email_message = MIMEMultipart(_subtype="alternative") + # Create the plain and HTML that will reference the inline attachment + plain_body = "Test with inline image: \n[image: " + inline_att_filename + "]\n\n" + plain_msg = MIMEText(plain_body) + email_message.attach(plain_msg) + html_body = '
Test with inline image: 3D="'
' + html_msg = MIMEText(html_body, "html") + email_message.attach(html_msg) + # Create the email attachment and attach that as well + email_attachment, _, _ = utils.generate_multipart_email(type_list=['plain', 'html']) + email_att_content = email_attachment.as_string() + email_message.attach(utils.generate_file_mime_part(filename=email_att_filename, content=email_att_content)) + # Now create the base multipart and attach all the other parts to it + base_message,_ ,_ = utils.generate_multipart_email(type_list=[], sub_type="related") + base_message.attach(email_message) + base_message.attach(inline_attachment) + extract_email_metadata(base_message.as_string(), self.queue_public, self.logger) + + self.assertEqual(len(mail.outbox), 1) # @UndefinedVariable + self.assertEqual(f'[test-1] {base_message.get("subject")} (Opened)', mail.outbox[0].subject) + + ticket = Ticket.objects.get() + followup = ticket.followup_set.get() + # Check both the inline and attachment are stored as attached files + inline_found = False + email_attachment_found = False + for att_retrieved in followup.followupattachment_set.all(): + if (helpdesk.email.HTML_EMAIL_ATTACHMENT_FILENAME == att_retrieved.filename): + # Ignore the HTML formatted conntent of the email that is attached + continue + if att_retrieved.filename.endswith(inline_att_filename): + inline_found = True + elif att_retrieved.filename.endswith(email_att_filename): + email_attachment_found = True + else: + self.assertTrue(False, "Unexpected file in ticket attachments: " % att_retrieved.filename) + self.assertTrue(email_attachment_found, "Email attachment file not found ticket attachments: %s" % (email_att_filename)) + self.assertTrue(inline_found, "Inline file not found in email: %s" % (inline_att_filename)) class GetEmailParametricTemplate(object): @@ -424,7 +504,6 @@ class GetEmailParametricTemplate(object): def test_commas_in_mail_headers(self): """Tests correctly decoding mail headers when a comma is encoded into UTF-8. See bug report #832.""" - # Create the from using standard RFC required formats # Override the last_name to ensure we get a non-ascii character in it test_email_from_meta = utils.generate_email_address("fr_FR", last_name_override="Bouissières") @@ -488,7 +567,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -510,7 +588,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -553,7 +630,6 @@ class GetEmailParametricTemplate(object): except this time the email body contains a Django template tag. For each email source supported, we mock the backend to provide authentically formatted responses containing our test data.""" - # example email text from Django docs: # https://docs.djangoproject.com/en/1.10/ref/unicode/ test_email_from = "Arnbjörg Ráðormsdóttir " @@ -617,7 +693,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -639,7 +714,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -680,12 +754,8 @@ class GetEmailParametricTemplate(object): emails from a queue and creating tickets. For each email source supported, we mock the backend to provide authentically formatted responses containing our test data.""" - # example email text from Python docs: # https://docs.python.org/3/library/email-examples.html - from email.mime.multipart import MIMEMultipart - from email.mime.text import MIMEText - me = "my@example.com" you = "your@example.com" # NOTE: CC'd emails need to be alphabetical and tested as such! @@ -695,7 +765,6 @@ class GetEmailParametricTemplate(object): cc_two = "other@example.com" cc = cc_one + ", " + cc_two subject = "Link" - # Create message container - the correct MIME type is # multipart/alternative. msg = MIMEMultipart('alternative') @@ -703,7 +772,6 @@ class GetEmailParametricTemplate(object): msg['From'] = me msg['To'] = you msg['Cc'] = cc - # Create the body of the message (a plain-text and an HTML version). text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org" html = """\ @@ -717,11 +785,9 @@ class GetEmailParametricTemplate(object): """ - # Record the MIME types of both parts - text/plain and text/html. part1 = MIMEText(text, 'plain') part2 = MIMEText(html, 'html') - # Attach parts into message container. # According to RFC 2046, the last part of a multipart message, in this case # the HTML message, is best and preferred. @@ -785,7 +851,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -807,7 +872,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -867,7 +931,6 @@ class GetEmailParametricTemplate(object): def test_read_pgp_signed_email(self): """Tests reading a PGP signed email to ensure we handle base64 and PGP signatures appropriately.""" - # example email text from #567 on GitHub with open(os.path.join(THIS_DIR, "test_files/pgp.eml"), encoding="utf-8") as fd: test_email = fd.read() @@ -923,7 +986,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -944,7 +1006,6 @@ class GetEmailParametricTemplate(object): mocked_imaplib_server = mock.Mock() mocked_imaplib_server.search = mock.Mock( return_value=imap_mail_list) - # we ignore the second arg as the data item/mime-part is # constant (RFC822) mocked_imaplib_server.fetch = mock.Mock( @@ -1001,8 +1062,6 @@ a9eiiQ+3V1v+7wWHXCzq """) # should this be 'application/pgp-signature'? # self.assertEqual(attach1.mime_type, 'text/plain') - - class GetEmailCCHandling(TestCase): """TestCase that checks CC handling in email. Needs its own test harness.""" @@ -1102,7 +1161,6 @@ class GetEmailCCHandling(TestCase): ", " + test_email_cc_three + ", " + test_email_cc_four + ", " + ticket_user_emails + \ "\nFrom: " + test_email_from + "\nSubject: " + \ test_email_subject + "\n\n" + test_email_body - test_mail_len = len(test_email) with mock.patch('os.listdir') as mocked_listdir, \ mock.patch('helpdesk.email.isfile') as mocked_isfile, \ From 49813051bcc8454dbee52faa1d5b3973955827a4 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 06:44:05 +0100 Subject: [PATCH 10/17] Change method name to match refactored email.py --- helpdesk/tests/test_ticket_submission.py | 40 ++++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/helpdesk/tests/test_ticket_submission.py b/helpdesk/tests/test_ticket_submission.py index 2c59ef75..06d6fe0d 100644 --- a/helpdesk/tests/test_ticket_submission.py +++ b/helpdesk/tests/test_ticket_submission.py @@ -6,7 +6,7 @@ from django.test import TestCase from django.test.client import Client from django.urls import reverse import email -from helpdesk.email import object_from_message +from helpdesk.email import extract_email_metadata from helpdesk.models import CustomField, FollowUp, KBCategory, KBItem, Queue, Ticket, TicketCC import logging from urllib.parse import urlparse @@ -282,7 +282,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -314,7 +314,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) ticket = Ticket.objects.get( title=self.ticket_data['title'], queue=self.queue_public, submitter_email=submitter_email) @@ -350,7 +350,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -397,7 +397,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -449,7 +449,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -495,7 +495,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -529,7 +529,7 @@ class EmailInteractionsTestCase(TestCase): reply.__setitem__('Content-Type', 'text/plain;') reply.set_payload(self.ticket_data['description']) - object_from_message(str(reply), self.queue_public, logger=logger) + extract_email_metadata(str(reply), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -585,7 +585,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -629,7 +629,7 @@ class EmailInteractionsTestCase(TestCase): reply.__setitem__('Content-Type', 'text/plain;') reply.set_payload(self.ticket_data['description']) - object_from_message(str(reply), self.queue_public, logger=logger) + extract_email_metadata(str(reply), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -683,7 +683,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -738,7 +738,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(reply), self.queue_public, logger=logger) + extract_email_metadata(str(reply), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -780,7 +780,7 @@ class EmailInteractionsTestCase(TestCase): msg.set_payload(self.ticket_data['description']) email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -830,7 +830,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message( + extract_email_metadata( str(msg), self.queue_public_with_notifications_disabled, logger=logger) followup = FollowUp.objects.get(message_id=message_id) @@ -877,7 +877,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -915,7 +915,7 @@ class EmailInteractionsTestCase(TestCase): reply.__setitem__('Content-Type', 'text/plain;') reply.set_payload(self.ticket_data['description']) - object_from_message(str(reply), self.queue_public, logger=logger) + extract_email_metadata(str(reply), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -961,7 +961,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message( + extract_email_metadata( str(msg), self.queue_public_with_notifications_disabled, logger=logger) followup = FollowUp.objects.get(message_id=message_id) @@ -1000,7 +1000,7 @@ class EmailInteractionsTestCase(TestCase): reply.__setitem__('Content-Type', 'text/plain;') reply.set_payload(self.ticket_data['description']) - object_from_message( + extract_email_metadata( str(reply), self.queue_public_with_notifications_disabled, logger=logger) followup = FollowUp.objects.get(message_id=message_id) @@ -1034,7 +1034,7 @@ class EmailInteractionsTestCase(TestCase): email_count = len(mail.outbox) - object_from_message(str(msg), self.queue_public, logger=logger) + extract_email_metadata(str(msg), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) @@ -1056,7 +1056,7 @@ class EmailInteractionsTestCase(TestCase): reply.__setitem__('Content-Type', 'text/plain;') reply.set_payload(self.ticket_data['description']) - object_from_message(str(reply), self.queue_public, logger=logger) + extract_email_metadata(str(reply), self.queue_public, logger=logger) followup = FollowUp.objects.get(message_id=message_id) ticket = Ticket.objects.get(id=followup.ticket.id) From 3f8c718d710824578faa7c38efcf113df3e394d2 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Sun, 23 Jul 2023 07:28:42 +0100 Subject: [PATCH 11/17] Document the default attachment extensions supported --- docs/settings.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/settings.rst b/docs/settings.rst index d8f5f7af..874afa34 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -37,7 +37,6 @@ If you want to override the default settings for your users, create ``HELPDESK_D 'tickets_per_page': 25 } - Generic Options --------------- These changes are visible throughout django-helpdesk @@ -86,6 +85,10 @@ These changes are visible throughout django-helpdesk **Default:** ``HELPDESK_MAX_EMAIL_ATTACHMENT_SIZE = 512000`` +- **VALID_EXTENSIONS** Valid extensions for file types that can be attached to tickets + + **Default:** ``VALID_EXTENSIONS = ['.txt', '.asc', '.htm', '.html', '.pdf', '.doc', '.docx', '.odt', '.jpg', '.png', '.eml'] + - **QUEUE_EMAIL_BOX_UPDATE_ONLY** Only process mail with a valid tracking ID; all other mail will be ignored instead of creating a new ticket. **Default:** ``QUEUE_EMAIL_BOX_UPDATE_ONLY = False`` From 8006826ddf86acfce608064d5977b5d09efd03d6 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 10 Oct 2023 13:41:28 +0100 Subject: [PATCH 12/17] Process email content first then focus on attachments. Use the EmailMessage class for enhanced processing. --- helpdesk/email.py | 245 ++++++++++++++++++++++++++++------------------ 1 file changed, 152 insertions(+), 93 deletions(-) diff --git a/helpdesk/email.py b/helpdesk/email.py index 9cdcbdc8..bccb2992 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -16,7 +16,8 @@ from django.db.models import Q from django.utils import encoding, timezone from django.utils.translation import gettext as _ import email -from email.message import Message +from email import policy +from email.message import EmailMessage, MIMEPart from email.mime.text import MIMEText from email.utils import getaddresses from email_reply_parser import EmailReplyParser @@ -52,6 +53,7 @@ STRIPPED_SUBJECT_STRINGS = [ "Automatic reply: ", ] +# Allow a custom default attached email name for the HTML formatted email if one is found HTML_EMAIL_ATTACHMENT_FILENAME = _("email_html_body.html") @@ -426,7 +428,7 @@ def process_queue(q, logger): def decodeUnknown(charset, string): - if type(string) is not str: + if string and not isinstance(string, str): if not charset: try: return str(string, encoding='utf-8', errors='replace') @@ -717,7 +719,7 @@ def attempt_body_extract_from_html(message: str) -> str: return body, full_body -def extract_mime_content(part: Message,) -> str: +def mime_content_to_string(part: EmailMessage,) -> str: ''' Extract the content from the MIME body part :param part: the MIME part to extract the content from @@ -732,19 +734,82 @@ def extract_mime_content(part: Message,) -> str: return content -def extract_email_message(mime_content: str, is_plain_content_type: bool, is_extract_full_email_msg: bool) -> str: - email_content = None +def parse_email_content(mime_content: str, is_extract_full_email_msg: bool) -> str: if is_extract_full_email_msg: # Take the full content including encapsulated "forwarded" and "reply" sections - email_content = get_body_from_fragments(mime_content) if is_plain_content_type else mime_content + return mime_content else: - # Just get the primary part of the email and drop off any text below the actually response text - email_content = EmailReplyParser.parse_reply(mime_content) if is_plain_content_type else mime_content - return email_content + # Just get the primary part of the email and drop off any text below the actual response text + return EmailReplyParser.parse_reply(mime_content) + + +def extract_email_message_content( + part: MIMEPart, + files: List, + include_chained_msgs: bool, +) -> (str, str): + ''' + Uses the get_body() method of the email package to extract the email message content. + If there is an HTML version of the email message content then it is stored as an attachment. + If there is a plain text part then that is used for storing the email content aginst the ticket. + Otherwise if there is just an HTML part then the HTML is parsed to extract a simple text message. + There is special handling for the case when a multipart/related part holds the message content when + there are multiple attachments to the email. + :param part: the base email MIME part to be searched + :param files: any MIME parts to be attached are added to this list + :param include_chained_msgs: flag to indicate if the entire email message content including past replies must be extracted + ''' + message_part:MIMEPart = part.get_body() + parent_part:MIMEPart = part + content_type = message_part.get_content_type() + # Handle the possibility of a related part formatted email + if "multipart/related" == content_type: + # We want the actual message text so try again on the related MIME part + parent_part = message_part + message_part = message_part.get_body(preferencelist=["html", "plain",]) + content_type = message_part.get_content_type() + mime_content = None + formatted_body = None # Retain the original content by using a secondary variable if the HTML needs wrapping + if "text/html" == content_type: + # add the HTML message as an attachment wrapping if necessary + mime_content = mime_content_to_string(message_part) + if "{mime_content}" + if "{mime_content if formatted_body is None else formatted_body}" + files.append( + SimpleUploadedFile( + HTML_EMAIL_ATTACHMENT_FILENAME, (mime_content if formatted_body is None else formatted_body).encode("utf-8"), 'text/html') + ) + # Try to get a plain part message + plain_message_part = parent_part.get_body(preferencelist=["plain",]) + if plain_message_part: + # Replace mime_content with the plain text part content + mime_content = mime_content_to_string(plain_message_part) + message_part = plain_message_part + content_type = "text/plain" + else: + # Try to constitute the HTML response as plain text + mime_content, _x = attempt_body_extract_from_html(mime_content if formatted_body is None else formatted_body) + else: + # Is either text/plain or some random content-type so just decode the part content and store as is + mime_content = mime_content_to_string(message_part) + # We should now have the mime content + filtered_body = parse_email_content(mime_content, include_chained_msgs) + if not filtered_body or "" == filtered_body.strip(): + # A unit test that has a different HTML content to plain text which seems an invalid case as email + # tools should retain the HTML to be consistent with the plain text but manage this as a special case + # Try to constitute the HTML response as plain text + if formatted_body: + filtered_body, _x = attempt_body_extract_from_html(formatted_body) + else: + filtered_body = mime_content + # Only need the full message if the message_body excludes the chained messages + return filtered_body, mime_content def process_as_attachment( - part: Message, + part: MIMEPart, counter: int, files: List, logger: logging.Logger @@ -756,14 +821,14 @@ def process_as_attachment( ext = mimetypes.guess_extension(part.get_content_type()) name = f"part-{counter}{ext}" # Extract payload accounting for attached multiparts - payload = part.as_string() if part.is_multipart() else part.get_payload(decode=True) - files.append(SimpleUploadedFile(name, payload, mimetypes.guess_type(name)[0])) + payload_bytes = part.as_bytes() if part.is_multipart() else part.get_payload(decode=True) + files.append(SimpleUploadedFile(name, payload_bytes, mimetypes.guess_type(name)[0])) if logger.isEnabledFor(logging.DEBUG): logger.debug("Processed MIME as attachment: %s", name) return -def extract_email_subject(email_msg: Message,) -> str: +def extract_email_subject(email_msg: EmailMessage,) -> str: subject = email_msg.get('subject', _('Comment from e-mail')) subject = decode_mail_headers( decodeUnknown(email_msg.get_charset(), subject)) @@ -772,6 +837,62 @@ def extract_email_subject(email_msg: Message,) -> str: return subject.strip() +def extract_attachments( + target_part: MIMEPart, + files: List, + logger: logging.Logger, + counter: int = 1, + content_parts_excluded: bool = False, +) -> (int, bool): + ''' + if the MIME part is a multipart and not identified as inline or attachment then + iterate over the sub parts recursively. + Otherwise extract \MIME part content and add as an attachment. + It will recursively descend as appropriate ensuring that all parts not part of the message content + are added to the list of files to be attached. To cater for the possibility of text/plain and text/html parts + that are further down in the multipart heirarchy than the ones that ones meant to provide that content, + iterators are selectively used. + :param part: the email MIME part to be processed + :param files: any MIME part content or MIME parts to be attached are added to this list + :param logger: the logger to use for this MIME part processing + :param counter: the count of MIME parts added as attachment + :param content_parts_excluded: the MIME part(s) that provided the message content have been excluded + :returns the count of mime parts added as attachments and a boolean if the content parts have been excluded + ''' + content_type = target_part.get_content_type() + content_maintype = target_part.get_content_maintype() + if "multipart" == content_maintype and target_part.get_content_disposition() not in ['inline', 'attachment']: + # Cycle through all MIME parts in the email extracting the attachments that were not part of the message body + # If this is a "related" multipart then we can use the message part excluder iterator directly + if "multipart/related" == content_type: + if content_parts_excluded: + # This should really never happen in a properly constructed email message but... + logger.warn("WARNING! Content type MIME parts have been excluded but a multipart/related has been encountered. there may be missing information in attachments.") + else: + content_parts_excluded = True + # Use the iterator that automatically excludes message content parts + for part in target_part.iter_attachments(): + counter, content_parts_excluded = extract_attachments(part, files, logger, counter, content_parts_excluded) + # The iterator must be different depending on whether we have already excluded message content parts + else: + # Content part might be 1 or 2 parts but will be at same level so track finding at least 1 + content_part_detected = False + for part in target_part.iter_parts(): + if not content_parts_excluded and part.get_content_type() in ["text/plain", "text/html"]: + content_part_detected = True + continue + # Recurse into the part to process embedded parts + counter, content_parts_excluded = extract_attachments(part, files, logger, counter, content_parts_excluded) + # If we have found 1 or more content parts then flag that the content parts have been ommitted + # to ensure that other text/* parts are attached + if content_part_detected: + content_parts_excluded = True + else: + process_as_attachment(target_part, counter, files, logger) + counter = counter + 1 + return (counter, content_parts_excluded) + + def extract_email_metadata(message: str, queue: Queue, logger: logging.Logger @@ -789,18 +910,19 @@ def extract_email_metadata(message: str, stored in the ticket if it is a new ticket by setting it to True. In this scenario, if it is a reply that is a forwarded message with no actual message, then the description will be sourced from the text/html part and the forwarded message - will be in the FollowUp record aassociated with the ticket. + will be in the FollowUp record associated with the ticket. It will iterate over every MIME part and store all MIME parts as attachments apart from the text/plain part. There may be a case for trying to exclude repeated signature images by checking if an - attachment of the same name already exists as an attachement on the ticket but that is + attachment of the same name already exists as an attachment on the ticket but that is not implemented. :param message: the raw email message received - :param queue: the queue that hte + :param queue: the queue that the message is assigned to :param logger: the logger to be used ''' # 'message' must be an RFC822 formatted message to correctly parse. - message_obj: Message = email.message_from_string(message) + # NBot sure why but policy explicitly set to default is required for any messages with attachments in them + message_obj: EmailMessage = email.message_from_string(message, EmailMessage, policy=policy.default) subject = extract_email_subject(message_obj) @@ -822,84 +944,21 @@ def extract_email_metadata(message: str, subject, logger ) - plain_body: str = None - formatted_body: str = None - counter = 0 files = [] - first_mime_non_multipart_content: MIMEText = None - # Cycle through all MIME parts in the email extracting the plain and formatted messages - # Algorithm uses the first text parts found as the actual email content and subsequent text parts - # are made into attachments so they do not get lost - for part in message_obj.walk(): - part_main_type = part.get_content_maintype() - if part_main_type == 'multipart': - continue - if part.get_content_disposition() in ['inline', 'attachment']: - process_as_attachment(part, counter, files, logger) - else: - # Get the content then assign to plain for formatted email message otherwise store the - # content as an attachment - mime_content = extract_mime_content(part) - if first_mime_non_multipart_content is None: - first_mime_non_multipart_content = mime_content - if part_main_type == 'text': - # Could be the body of the email - part_sub_type = part.get_content_subtype() - if plain_body is None and part_sub_type == "plain": - plain_body = mime_content - elif formatted_body is None and part_sub_type == "html": - formatted_body = mime_content - if "{formatted_body}" - else: - email_body = formatted_body - - payload = ( - '' - '' - '' - '' - '%s' - '' - ) % email_body - files.append( - SimpleUploadedFile( - HTML_EMAIL_ATTACHMENT_FILENAME, payload.encode("utf-8"), 'text/html') - ) - else: - # Theoretically should not happen to properly structured emails but process anything - # else as an attachment - process_as_attachment(part, counter, files, logger) - logger.debug(f"Text MIME part added as attachment: {part.get_content_type()}") - else: - # process anything else as an attachment - process_as_attachment(part, counter, files, logger) - counter += 1 - # Check if we have at least the plain body - if not plain_body: - if formatted_body: - # We have a formatted body but no plain text body - plain_body, _x = attempt_body_extract_from_html(formatted_body) - else: - # Something wrong with email or a processing issue so try first part or save full email message - if first_mime_non_multipart_content: - plain_body = extract_email_message(first_mime_non_multipart_content, True, True) - else: - plain_body = message # first message in thread, we save full body to avoid losing forwards and things like that include_chained_msgs = True if ticket_id is None and getattr( django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False) else False - message_body = extract_email_message(plain_body, True, include_chained_msgs) - # Only need the full message if the message_body excludes the chained messages - chained_email_message = None if include_chained_msgs else plain_body - # Not sure this is valid but a unit test uses a DIFFERENT plain text to html text body - # where plain text has blank message with forwarded message so.... hack away to support it - if message_body is not None and len(message_body) == 0 and formatted_body and len(formatted_body) > 0: - message_body, _x = attempt_body_extract_from_html(formatted_body) - # Set the chained message to the orignal plain text full message so it is stored in a FollowUp comments field - if len(plain_body) > 0: - chained_email_message = plain_body - + filtered_body, full_body = extract_email_message_content(message_obj, files, include_chained_msgs) + # If the base part is not a multipart then it will have already been processed as the vbody content so + # no need to process attachments + if "multipart" == message_obj.get_content_maintype(): + # Find and attach all other parts or part contents as attachments + counter, content_parts_excluded = extract_attachments(message_obj, files, logger) + if not content_parts_excluded: + # Unexpected situation and may mean there is a hole in the email processing logic + logger.warning("Failed to exclude email content when parsing all MIME parts in the multipart. Verify that there were no text/* parts containing message content.") + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Email parsed and %s attachments were found and attached.", counter) add_file_if_always_save_incoming_email_message(files, message) smtp_priority = message_obj.get('priority', '') @@ -909,8 +968,8 @@ def extract_email_metadata(message: str, smtp_priority, smtp_importance} else 3 payload = { - 'body': message_body, - 'full_body': chained_email_message, + 'body': filtered_body, + 'full_body': full_body, 'subject': subject, 'queue': queue, 'sender_email': sender_email, From 7e7a38cc3cc9f8769bf1ae95e97347624a5688ea Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 10 Oct 2023 13:42:40 +0100 Subject: [PATCH 13/17] Fix to avoid using "hidden" variable as kwarg. --- helpdesk/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helpdesk/tests/utils.py b/helpdesk/tests/utils.py index 24c231c3..e3011b2b 100644 --- a/helpdesk/tests/utils.py +++ b/helpdesk/tests/utils.py @@ -240,7 +240,7 @@ def generate_multipart_email(locale: str="en_US", :param sub_type: multipart sub type that defaults to "mixed" if not specified :param use_short_email: produces a "To" or "From" that is only the email address if True """ - msg = MIMEMultipart(_subtype=sub_type) if sub_type else MIMEMultipart() + msg = MIMEMultipart(sub_type) if sub_type else MIMEMultipart() for part_type in type_list: msg.attach(generate_mime_part(locale=locale, part_type=part_type)) from_meta, to_meta = add_simple_email_headers(msg, locale=locale, use_short_email=use_short_email) From 6fc7a8dd54684ff6c593c1d2934f72033d3d33c0 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 10 Oct 2023 13:45:30 +0100 Subject: [PATCH 14/17] Minor addition to documentation for developers --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index bd6fd6ea..79234c19 100644 --- a/README.rst +++ b/README.rst @@ -72,7 +72,7 @@ Developer Environment --------------------- Follow these steps to set up your development environment to contribute to helpdesk: - - check out the helpdesk app to you local file system:: + - check out the helpdesk app to your local file system:: git clone https://github.com/django-helpdesk/django-helpdesk.git - install a virtual environment @@ -82,6 +82,9 @@ Follow these steps to set up your development environment to contribute to helpd - install the requirements for development:: pip install -r requirements.txt -r requirements-dev.txt + - install the requirements for testing as well:: + pip install -r requirements.txt -r requirements-dev.txt -r requirements-testing.txt + To reactivate a VENV just run: source .venv/bin/activate From 276af1c03cbb8e0eea52206734cacda3b566011a Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 10 Oct 2023 13:47:18 +0100 Subject: [PATCH 15/17] Fix inline with multipart message attachment test --- helpdesk/tests/test_get_email.py | 33 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/helpdesk/tests/test_get_email.py b/helpdesk/tests/test_get_email.py index b34b3ac3..abd346ba 100644 --- a/helpdesk/tests/test_get_email.py +++ b/helpdesk/tests/test_get_email.py @@ -7,6 +7,7 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.core.management import call_command from django.shortcuts import get_object_or_404 from django.test import override_settings, TestCase +from email.mime.message import MIMEMessage from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import helpdesk.email @@ -69,7 +70,7 @@ class GetEmailCommonTests(TestCase): "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo..." ) - self.assertEqual(ticket.description, "", msg=ticket.description) + self.assertEqual(ticket.description.strip(), "", msg=ticket.description) def test_email_with_quoted_printable_body(self): """ @@ -294,22 +295,27 @@ class GetEmailCommonTests(TestCase): inline_attachment.add_header('X-Attachment-Id', inline_image_id) inline_attachment.add_header('Content-ID', '<' + inline_image_id + '>') # Create the actual email with its plain and HTML parts - email_message = MIMEMultipart(_subtype="alternative") + alt_email_message = MIMEMultipart("alternative") # Create the plain and HTML that will reference the inline attachment plain_body = "Test with inline image: \n[image: " + inline_att_filename + "]\n\n" plain_msg = MIMEText(plain_body) - email_message.attach(plain_msg) + alt_email_message.attach(plain_msg) html_body = '
Test with inline image: 3D="'
' html_msg = MIMEText(html_body, "html") - email_message.attach(html_msg) - # Create the email attachment and attach that as well - email_attachment, _, _ = utils.generate_multipart_email(type_list=['plain', 'html']) - email_att_content = email_attachment.as_string() - email_message.attach(utils.generate_file_mime_part(filename=email_att_filename, content=email_att_content)) + alt_email_message.attach(html_msg) + # Create the email to be attached and attach that as well + email_to_be_attached, _, _ = utils.generate_multipart_email(type_list=['plain', 'html']) + email_as_attachment = MIMEMessage(email_to_be_attached) + email_as_attachment.add_header('Content-Disposition', 'attachment', filename=email_att_filename) # Now create the base multipart and attach all the other parts to it - base_message,_ ,_ = utils.generate_multipart_email(type_list=[], sub_type="related") - base_message.attach(email_message) - base_message.attach(inline_attachment) + related_message = MIMEMultipart("related") + related_message.attach(alt_email_message) + related_message.attach(inline_attachment) + base_message = MIMEMultipart("mixed") + base_message.attach(related_message) + base_message.attach(email_as_attachment) + utils.add_simple_email_headers(base_message, locale="en_US", use_short_email=True) + # Now send the part to the email workflow extract_email_metadata(base_message.as_string(), self.queue_public, self.logger) self.assertEqual(len(mail.outbox), 1) # @UndefinedVariable @@ -322,14 +328,15 @@ class GetEmailCommonTests(TestCase): email_attachment_found = False for att_retrieved in followup.followupattachment_set.all(): if (helpdesk.email.HTML_EMAIL_ATTACHMENT_FILENAME == att_retrieved.filename): - # Ignore the HTML formatted conntent of the email that is attached + # Ignore the HTML formatted content of the email that is attached continue if att_retrieved.filename.endswith(inline_att_filename): inline_found = True elif att_retrieved.filename.endswith(email_att_filename): email_attachment_found = True else: - self.assertTrue(False, "Unexpected file in ticket attachments: " % att_retrieved.filename) + print(f"\n\n%%%%%% {att_retrieved}") + self.assertTrue(False, "Unexpected file in ticket attachments: %s" % att_retrieved.filename) self.assertTrue(email_attachment_found, "Email attachment file not found ticket attachments: %s" % (email_att_filename)) self.assertTrue(inline_found, "Inline file not found in email: %s" % (inline_att_filename)) From 104a849d9f57f4830c7d21174d2193bfa10955da Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 10 Oct 2023 14:08:47 +0100 Subject: [PATCH 16/17] Fix flake8 issues --- helpdesk/lib.py | 6 +++--- helpdesk/templated_email.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helpdesk/lib.py b/helpdesk/lib.py index d265963b..ac0d46bc 100644 --- a/helpdesk/lib.py +++ b/helpdesk/lib.py @@ -186,11 +186,11 @@ def format_time_spent(time_spent): def convert_value(value): """ Convert date/time data type to known fixed format string """ - if type(value) == datetime: + if type(value) is datetime: return value.strftime(CUSTOMFIELD_DATETIME_FORMAT) - elif type(value) == date: + elif type(value) is date: return value.strftime(CUSTOMFIELD_DATE_FORMAT) - elif type(value) == time: + elif type(value) is time: return value.strftime(CUSTOMFIELD_TIME_FORMAT) else: return value diff --git a/helpdesk/templated_email.py b/helpdesk/templated_email.py index 5c98cdc7..e68489a9 100644 --- a/helpdesk/templated_email.py +++ b/helpdesk/templated_email.py @@ -97,7 +97,7 @@ def send_templated_mail(template_name, if isinstance(recipients, str): if recipients.find(','): recipients = recipients.split(',') - elif type(recipients) != list: + elif type(recipients) is not list: recipients = [recipients] msg = EmailMultiAlternatives(subject_part, text_part, From e90739a07a912c76a9e7053dbb03033c837d0f30 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Tue, 10 Oct 2023 14:08:57 +0100 Subject: [PATCH 17/17] Fix formatting issues --- helpdesk/email.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/helpdesk/email.py b/helpdesk/email.py index bccb2992..08e14f96 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -18,7 +18,6 @@ from django.utils.translation import gettext as _ import email from email import policy from email.message import EmailMessage, MIMEPart -from email.mime.text import MIMEText from email.utils import getaddresses from email_reply_parser import EmailReplyParser from helpdesk import settings @@ -757,10 +756,11 @@ def extract_email_message_content( there are multiple attachments to the email. :param part: the base email MIME part to be searched :param files: any MIME parts to be attached are added to this list - :param include_chained_msgs: flag to indicate if the entire email message content including past replies must be extracted + :param include_chained_msgs: flag to indicate if the entire email message content including past + replies must be extracted ''' - message_part:MIMEPart = part.get_body() - parent_part:MIMEPart = part + message_part: MIMEPart = part.get_body() + parent_part: MIMEPart = part content_type = message_part.get_content_type() # Handle the possibility of a related part formatted email if "multipart/related" == content_type: @@ -769,17 +769,20 @@ def extract_email_message_content( message_part = message_part.get_body(preferencelist=["html", "plain",]) content_type = message_part.get_content_type() mime_content = None - formatted_body = None # Retain the original content by using a secondary variable if the HTML needs wrapping + formatted_body = None # Retain the original content by using a secondary variable if the HTML needs wrapping if "text/html" == content_type: # add the HTML message as an attachment wrapping if necessary mime_content = mime_content_to_string(message_part) if "{mime_content}" if "{mime_content if formatted_body is None else formatted_body}" + formatted_body = f"\ + {mime_content if formatted_body is None else formatted_body}" files.append( SimpleUploadedFile( - HTML_EMAIL_ATTACHMENT_FILENAME, (mime_content if formatted_body is None else formatted_body).encode("utf-8"), 'text/html') + HTML_EMAIL_ATTACHMENT_FILENAME, + (mime_content if formatted_body is None else formatted_body).encode("utf-8"), 'text/html', + ) ) # Try to get a plain part message plain_message_part = parent_part.get_body(preferencelist=["plain",]) @@ -790,7 +793,8 @@ def extract_email_message_content( content_type = "text/plain" else: # Try to constitute the HTML response as plain text - mime_content, _x = attempt_body_extract_from_html(mime_content if formatted_body is None else formatted_body) + mime_content, _x = attempt_body_extract_from_html( + mime_content if formatted_body is None else formatted_body) else: # Is either text/plain or some random content-type so just decode the part content and store as is mime_content = mime_content_to_string(message_part) @@ -845,12 +849,12 @@ def extract_attachments( content_parts_excluded: bool = False, ) -> (int, bool): ''' - if the MIME part is a multipart and not identified as inline or attachment then + If the MIME part is a multipart and not identified as "inline" or "attachment" then iterate over the sub parts recursively. - Otherwise extract \MIME part content and add as an attachment. + Otherwise extract MIME part content and add as an attachment. It will recursively descend as appropriate ensuring that all parts not part of the message content are added to the list of files to be attached. To cater for the possibility of text/plain and text/html parts - that are further down in the multipart heirarchy than the ones that ones meant to provide that content, + that are further down in the multipart hierarchy than the ones that ones meant to provide that content, iterators are selectively used. :param part: the email MIME part to be processed :param files: any MIME part content or MIME parts to be attached are added to this list @@ -867,12 +871,15 @@ def extract_attachments( if "multipart/related" == content_type: if content_parts_excluded: # This should really never happen in a properly constructed email message but... - logger.warn("WARNING! Content type MIME parts have been excluded but a multipart/related has been encountered. there may be missing information in attachments.") + logger.warn( + "WARNING! Content type MIME parts have been excluded but a multipart/related has been encountered.\ + There may be missing information in attachments.") else: content_parts_excluded = True # Use the iterator that automatically excludes message content parts for part in target_part.iter_attachments(): - counter, content_parts_excluded = extract_attachments(part, files, logger, counter, content_parts_excluded) + counter, content_parts_excluded = extract_attachments( + part, files, logger, counter, content_parts_excluded) # The iterator must be different depending on whether we have already excluded message content parts else: # Content part might be 1 or 2 parts but will be at same level so track finding at least 1 @@ -882,7 +889,8 @@ def extract_attachments( content_part_detected = True continue # Recurse into the part to process embedded parts - counter, content_parts_excluded = extract_attachments(part, files, logger, counter, content_parts_excluded) + counter, content_parts_excluded = extract_attachments( + part, files, logger, counter, content_parts_excluded) # If we have found 1 or more content parts then flag that the content parts have been ommitted # to ensure that other text/* parts are attached if content_part_detected: @@ -891,7 +899,7 @@ def extract_attachments( process_as_attachment(target_part, counter, files, logger) counter = counter + 1 return (counter, content_parts_excluded) - + def extract_email_metadata(message: str, queue: Queue, @@ -956,7 +964,9 @@ def extract_email_metadata(message: str, counter, content_parts_excluded = extract_attachments(message_obj, files, logger) if not content_parts_excluded: # Unexpected situation and may mean there is a hole in the email processing logic - logger.warning("Failed to exclude email content when parsing all MIME parts in the multipart. Verify that there were no text/* parts containing message content.") + logger.warning( + "Failed to exclude email content when parsing all MIME parts in the multipart.\ + Verify that there were no text/* parts containing message content.") if logger.isEnabledFor(logging.DEBUG): logger.debug("Email parsed and %s attachments were found and attached.", counter) add_file_if_always_save_incoming_email_message(files, message)