From 23c3b72a432a24f8f374881ffa80ffab57d5120f Mon Sep 17 00:00:00 2001
From: chrisbroderick
Date: Sun, 4 Sep 2022 22:01:32 +0100
Subject: [PATCH] Refactor object_from_message to make it more testable and fix
some bugs. Extract the from email using the email library instead of
pre-decoding which exposes the comma separator causing the email address to
be extracted incorrectly if the real name contains a comma. Raise an
exception when a message to be ignored is detected to process ignored
messages explicitly.
---
helpdesk/email.py | 253 ++++++++++++++++++++++++----------------------
1 file changed, 132 insertions(+), 121 deletions(-)
diff --git a/helpdesk/email.py b/helpdesk/email.py
index 1a0b7d50..7a4962e2 100644
--- a/helpdesk/email.py
+++ b/helpdesk/email.py
@@ -20,6 +20,7 @@ import email
from email.utils import getaddresses
from email_reply_parser import EmailReplyParser
from helpdesk import settings
+from helpdesk.exceptions import IgnoreTicketException
from helpdesk.lib import process_attachments, safe_template_context
from helpdesk.models import FollowUp, IgnoreEmail, Queue, Ticket
import imaplib
@@ -34,6 +35,8 @@ import ssl
import sys
from time import ctime
import typing
+from email.message import Message
+from typing import Tuple, List
# import User model, which may be a custom model
@@ -135,8 +138,11 @@ def pop3_sync(q, logger, server):
else:
full_message = encoding.force_str(
"\n".join(raw_content), errors='replace')
- ticket = object_from_message(
- message=full_message, queue=q, logger=logger)
+ try:
+ ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ except IgnoreTicketException:
+ logger.warn(
+ "Message %s was ignored and will be left on POP3 server" % msgNum)
if ticket:
server.dele(msgNum)
@@ -186,9 +192,12 @@ def imap_sync(q, logger, server):
data = server.fetch(num, '(RFC822)')[1]
full_message = encoding.force_str(data[0][1], errors='replace')
try:
- ticket = object_from_message(
- message=full_message, queue=q, logger=logger)
- except TypeError:
+ ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ except IgnoreTicketException:
+ logger.warn("Message %s was ignored and will be left on IMAP server" % num)
+ return
+ except TypeError as te:
+ logger.warn(f"Unexpected error processing message: {te}")
ticket = None # hotfix. Need to work out WHY.
if ticket:
server.store(num, '+FLAGS', '\\Deleted')
@@ -282,8 +291,11 @@ def process_queue(q, logger):
logger.info("Processing message %d" % i)
with open(m, 'r') as f:
full_message = encoding.force_str(f.read(), errors='replace')
- ticket = object_from_message(
- message=full_message, queue=q, logger=logger)
+ try:
+ ticket = object_from_message(message=full_message, queue=q, logger=logger)
+ except IgnoreTicketException:
+ logger.warn("Message %d was ignored and will be left in local directory", i)
+ return
if ticket:
logger.info(
"Successfully processed message %d, ticket/comment created.", i)
@@ -573,38 +585,122 @@ def get_email_body_from_part_payload(part) -> str:
part.get_payload(decode=False)
)
+def attempt_body_extract_from_html(message: str) -> str:
+ mail = BeautifulSoup(str(message), "html.parser")
+ beautiful_body = mail.find('body')
+ body = None
+ full_body = None
+ if beautiful_body:
+ try:
+ body = beautiful_body.text
+ full_body = body
+ except AttributeError:
+ pass
+ if not body:
+ body = ""
+ return body, full_body
+
+def extract_part_data(
+ part: Message,
+ counter: int,
+ ticket_id: int,
+ files: List,
+ logger: logging.Logger
+ ) -> Tuple[str, str]:
+ name = part.get_filename()
+ if name:
+ name = email.utils.collapse_rfc2231_value(name)
+ part_body = None
+ part_full_body=None
+ if part.get_content_maintype() == 'text' and name is None:
+ if part.get_content_subtype() == 'plain':
+ part_body = part.get_payload(decode=True)
+ # https://github.com/django-helpdesk/django-helpdesk/issues/732
+ if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8':
+ part_body = part_body.decode('unicode_escape')
+ part_body = decodeUnknown(part.get_content_charset(), part_body)
+ # have to use django_settings here so overwritting it works in tests
+ # the default value is False anyway
+ if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False):
+ # first message in thread, we save full body to avoid
+ # losing forwards and things like that
+ part_full_body = get_body_from_fragments(part_body)
+ part_body = EmailReplyParser.parse_reply(part_body)
+ else:
+ # second and other reply, save only first part of the
+ # message
+ part_body = EmailReplyParser.parse_reply(part_body)
+ part_full_body = part_body
+ # workaround to get unicode text out rather than escaped text
+ part_body = get_encoded_body(part_body)
+ logger.debug("Discovered plain text MIME part")
+ else:
+ email_body = get_email_body_from_part_payload(part)
+
+ if not part_body and not part_full_body:
+ # no text has been parsed so far - try such deep parsing
+ # for some messages
+ altered_body = email_body.replace(
+ "
", "\n").replace("
{email_body}