diff --git a/helpdesk/email.py b/helpdesk/email.py index fd956fef..1ace3830 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -39,7 +39,8 @@ import ssl import sys from time import ctime import typing -from typing import List, Tuple +from typing import List +from email.mime.text import MIMEText # import User model, which may be a custom model @@ -53,6 +54,8 @@ STRIPPED_SUBJECT_STRINGS = [ "Automatic reply: ", ] +HTML_EMAIL_ATTACHMENT_FILENAME = _("email_html_body.html") + def process_email(quiet=False): for q in Queue.objects.filter( @@ -141,7 +144,7 @@ def pop3_sync(q, logger, server): full_message = encoding.force_str( "\n".join(raw_content), errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException: logger.warn( "Message %s was ignored and will be left on POP3 server" % msgNum) @@ -198,7 +201,7 @@ def imap_sync(q, logger, server): data = server.fetch(num, '(RFC822)')[1] full_message = encoding.force_str(data[0][1], errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException: logger.warn("Message %s was ignored and will be left on IMAP server" % num) except DeleteIgnoredTicketException: @@ -285,7 +288,7 @@ def imap_oauth_sync(q, logger, server): full_message = encoding.force_str(data[0][1], errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException as itex: logger.warn(f"Message {num} was ignored. {itex}") @@ -405,7 +408,7 @@ def process_queue(q, logger): with open(m, 'r') as f: full_message = encoding.force_str(f.read(), errors='replace') try: - ticket = object_from_message(message=full_message, queue=q, logger=logger) + ticket = extract_email_metadata(message=full_message, queue=q, logger=logger) except IgnoreTicketException: logger.warn("Message %d was ignored and will be left in local directory", i) except DeleteIgnoredTicketException: @@ -433,7 +436,7 @@ def decodeUnknown(charset, string): if not charset: try: return str(string, encoding='utf-8', errors='replace') - except UnicodeError: + except UnicodeError as e: return str(string, encoding='iso8859-1', errors='replace') return str(string, encoding=charset, errors='replace') return string @@ -723,133 +726,92 @@ def attempt_body_extract_from_html(message: str) -> str: return body, full_body -def extract_part_data( +def extract_mime_content(part: Message,) -> str: + ''' + Extract the content from the MIME body part + :param part: the MIME part to extract the content from + ''' + content_bytes = part.get_payload(decode=True) + charset = part.get_content_charset() + # The default for MIME email is 7bit which requires special decoding to utf-8 so make sure we handle the decoding correctly + if part['Content-Transfer-Encoding'] in [None, '8bit', '7bit'] and (charset == 'utf-8' or charset is None): + charset = "unicode_escape" + content = decodeUnknown(charset, content_bytes) + return content + + +def extract_email_message(mime_content: str, is_plain_content_type: bool, is_extract_full_email_msg: bool) -> str: + email_content = None + if is_extract_full_email_msg: + # Take the full content including encapsulated "forwarded" and "reply" sections + email_content = get_body_from_fragments(mime_content) if is_plain_content_type else mime_content + else: + # Just get the primary part of the email and drop off any text below the actually response text + email_content = EmailReplyParser.parse_reply(mime_content) if is_plain_content_type else mime_content + return email_content + + +def process_as_attachment( part: Message, counter: int, - ticket_id: int, files: List, logger: logging.Logger -) -> Tuple[str, str]: +): name = part.get_filename() if name: - name = email.utils.collapse_rfc2231_value(name) - part_body = None - formatted_body = None - if part.get_content_maintype() == 'text' and name is None: - if part.get_content_subtype() == 'plain': - part_body = part.get_payload(decode=True) - # https://github.com/django-helpdesk/django-helpdesk/issues/732 - if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8': - part_body = part_body.decode('unicode_escape') - part_body = decodeUnknown(part.get_content_charset(), part_body) - # have to use django_settings here so overwriting it works in tests - # the default value is False anyway - if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False): - # first message in thread, we save full body to avoid - # losing forwards and things like that - formatted_body = get_body_from_fragments(part_body) - part_body = EmailReplyParser.parse_reply(part_body) - else: - # second and other reply, save only first part of the - # message - part_body = EmailReplyParser.parse_reply(part_body) - formatted_body = part_body - # workaround to get unicode text out rather than escaped text - part_body = get_encoded_body(part_body) - logger.debug("Discovered plain text MIME part") - else: - email_body = get_email_body_from_part_payload(part) - - if not part_body and not formatted_body: - # no text has been parsed so far - try such deep parsing - # for some messages - altered_body = email_body.replace( - "
", "\n").replace("" - - payload = ( - '' - '
' - '' - '' - '%s' - '