diff --git a/helpdesk/email.py b/helpdesk/email.py index 1a0b7d50..7a4962e2 100644 --- a/helpdesk/email.py +++ b/helpdesk/email.py @@ -20,6 +20,7 @@ import email from email.utils import getaddresses from email_reply_parser import EmailReplyParser from helpdesk import settings +from helpdesk.exceptions import IgnoreTicketException from helpdesk.lib import process_attachments, safe_template_context from helpdesk.models import FollowUp, IgnoreEmail, Queue, Ticket import imaplib @@ -34,6 +35,8 @@ import ssl import sys from time import ctime import typing +from email.message import Message +from typing import Tuple, List # import User model, which may be a custom model @@ -135,8 +138,11 @@ def pop3_sync(q, logger, server): else: full_message = encoding.force_str( "\n".join(raw_content), errors='replace') - ticket = object_from_message( - message=full_message, queue=q, logger=logger) + try: + ticket = object_from_message(message=full_message, queue=q, logger=logger) + except IgnoreTicketException: + logger.warn( + "Message %s was ignored and will be left on POP3 server" % msgNum) if ticket: server.dele(msgNum) @@ -186,9 +192,12 @@ def imap_sync(q, logger, server): data = server.fetch(num, '(RFC822)')[1] full_message = encoding.force_str(data[0][1], errors='replace') try: - ticket = object_from_message( - message=full_message, queue=q, logger=logger) - except TypeError: + ticket = object_from_message(message=full_message, queue=q, logger=logger) + except IgnoreTicketException: + logger.warn("Message %s was ignored and will be left on IMAP server" % num) + return + except TypeError as te: + logger.warn(f"Unexpected error processing message: {te}") ticket = None # hotfix. Need to work out WHY. if ticket: server.store(num, '+FLAGS', '\\Deleted') @@ -282,8 +291,11 @@ def process_queue(q, logger): logger.info("Processing message %d" % i) with open(m, 'r') as f: full_message = encoding.force_str(f.read(), errors='replace') - ticket = object_from_message( - message=full_message, queue=q, logger=logger) + try: + ticket = object_from_message(message=full_message, queue=q, logger=logger) + except IgnoreTicketException: + logger.warn("Message %d was ignored and will be left in local directory", i) + return if ticket: logger.info( "Successfully processed message %d, ticket/comment created.", i) @@ -573,38 +585,122 @@ def get_email_body_from_part_payload(part) -> str: part.get_payload(decode=False) ) +def attempt_body_extract_from_html(message: str) -> str: + mail = BeautifulSoup(str(message), "html.parser") + beautiful_body = mail.find('body') + body = None + full_body = None + if beautiful_body: + try: + body = beautiful_body.text + full_body = body + except AttributeError: + pass + if not body: + body = "" + return body, full_body + +def extract_part_data( + part: Message, + counter: int, + ticket_id: int, + files: List, + logger: logging.Logger + ) -> Tuple[str, str]: + name = part.get_filename() + if name: + name = email.utils.collapse_rfc2231_value(name) + part_body = None + part_full_body=None + if part.get_content_maintype() == 'text' and name is None: + if part.get_content_subtype() == 'plain': + part_body = part.get_payload(decode=True) + # https://github.com/django-helpdesk/django-helpdesk/issues/732 + if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8': + part_body = part_body.decode('unicode_escape') + part_body = decodeUnknown(part.get_content_charset(), part_body) + # have to use django_settings here so overwritting it works in tests + # the default value is False anyway + if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False): + # first message in thread, we save full body to avoid + # losing forwards and things like that + part_full_body = get_body_from_fragments(part_body) + part_body = EmailReplyParser.parse_reply(part_body) + else: + # second and other reply, save only first part of the + # message + part_body = EmailReplyParser.parse_reply(part_body) + part_full_body = part_body + # workaround to get unicode text out rather than escaped text + part_body = get_encoded_body(part_body) + logger.debug("Discovered plain text MIME part") + else: + email_body = get_email_body_from_part_payload(part) + + if not part_body and not part_full_body: + # no text has been parsed so far - try such deep parsing + # for some messages + altered_body = email_body.replace( + "
", "\n").replace("" + + payload = ( + '' + '
' + '' + '' + '%s' + '