mirror of
https://gitea.mueller.network/extern/django-helpdesk.git
synced 2024-11-29 03:13:15 +01:00
Refactor object_from_message to make it more testable and fix some bugs.
Extract the from email using the email library instead of pre-decoding which exposes the comma separator causing the email address to be extracted incorrectly if the real name contains a comma. Raise an exception when a message to be ignored is detected to process ignored messages explicitly.
This commit is contained in:
parent
e1085cb370
commit
23c3b72a43
@ -20,6 +20,7 @@ import email
|
|||||||
from email.utils import getaddresses
|
from email.utils import getaddresses
|
||||||
from email_reply_parser import EmailReplyParser
|
from email_reply_parser import EmailReplyParser
|
||||||
from helpdesk import settings
|
from helpdesk import settings
|
||||||
|
from helpdesk.exceptions import IgnoreTicketException
|
||||||
from helpdesk.lib import process_attachments, safe_template_context
|
from helpdesk.lib import process_attachments, safe_template_context
|
||||||
from helpdesk.models import FollowUp, IgnoreEmail, Queue, Ticket
|
from helpdesk.models import FollowUp, IgnoreEmail, Queue, Ticket
|
||||||
import imaplib
|
import imaplib
|
||||||
@ -34,6 +35,8 @@ import ssl
|
|||||||
import sys
|
import sys
|
||||||
from time import ctime
|
from time import ctime
|
||||||
import typing
|
import typing
|
||||||
|
from email.message import Message
|
||||||
|
from typing import Tuple, List
|
||||||
|
|
||||||
|
|
||||||
# import User model, which may be a custom model
|
# import User model, which may be a custom model
|
||||||
@ -135,8 +138,11 @@ def pop3_sync(q, logger, server):
|
|||||||
else:
|
else:
|
||||||
full_message = encoding.force_str(
|
full_message = encoding.force_str(
|
||||||
"\n".join(raw_content), errors='replace')
|
"\n".join(raw_content), errors='replace')
|
||||||
ticket = object_from_message(
|
try:
|
||||||
message=full_message, queue=q, logger=logger)
|
ticket = object_from_message(message=full_message, queue=q, logger=logger)
|
||||||
|
except IgnoreTicketException:
|
||||||
|
logger.warn(
|
||||||
|
"Message %s was ignored and will be left on POP3 server" % msgNum)
|
||||||
|
|
||||||
if ticket:
|
if ticket:
|
||||||
server.dele(msgNum)
|
server.dele(msgNum)
|
||||||
@ -186,9 +192,12 @@ def imap_sync(q, logger, server):
|
|||||||
data = server.fetch(num, '(RFC822)')[1]
|
data = server.fetch(num, '(RFC822)')[1]
|
||||||
full_message = encoding.force_str(data[0][1], errors='replace')
|
full_message = encoding.force_str(data[0][1], errors='replace')
|
||||||
try:
|
try:
|
||||||
ticket = object_from_message(
|
ticket = object_from_message(message=full_message, queue=q, logger=logger)
|
||||||
message=full_message, queue=q, logger=logger)
|
except IgnoreTicketException:
|
||||||
except TypeError:
|
logger.warn("Message %s was ignored and will be left on IMAP server" % num)
|
||||||
|
return
|
||||||
|
except TypeError as te:
|
||||||
|
logger.warn(f"Unexpected error processing message: {te}")
|
||||||
ticket = None # hotfix. Need to work out WHY.
|
ticket = None # hotfix. Need to work out WHY.
|
||||||
if ticket:
|
if ticket:
|
||||||
server.store(num, '+FLAGS', '\\Deleted')
|
server.store(num, '+FLAGS', '\\Deleted')
|
||||||
@ -282,8 +291,11 @@ def process_queue(q, logger):
|
|||||||
logger.info("Processing message %d" % i)
|
logger.info("Processing message %d" % i)
|
||||||
with open(m, 'r') as f:
|
with open(m, 'r') as f:
|
||||||
full_message = encoding.force_str(f.read(), errors='replace')
|
full_message = encoding.force_str(f.read(), errors='replace')
|
||||||
ticket = object_from_message(
|
try:
|
||||||
message=full_message, queue=q, logger=logger)
|
ticket = object_from_message(message=full_message, queue=q, logger=logger)
|
||||||
|
except IgnoreTicketException:
|
||||||
|
logger.warn("Message %d was ignored and will be left in local directory", i)
|
||||||
|
return
|
||||||
if ticket:
|
if ticket:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Successfully processed message %d, ticket/comment created.", i)
|
"Successfully processed message %d, ticket/comment created.", i)
|
||||||
@ -573,38 +585,122 @@ def get_email_body_from_part_payload(part) -> str:
|
|||||||
part.get_payload(decode=False)
|
part.get_payload(decode=False)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def attempt_body_extract_from_html(message: str) -> str:
|
||||||
|
mail = BeautifulSoup(str(message), "html.parser")
|
||||||
|
beautiful_body = mail.find('body')
|
||||||
|
body = None
|
||||||
|
full_body = None
|
||||||
|
if beautiful_body:
|
||||||
|
try:
|
||||||
|
body = beautiful_body.text
|
||||||
|
full_body = body
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
if not body:
|
||||||
|
body = ""
|
||||||
|
return body, full_body
|
||||||
|
|
||||||
|
def extract_part_data(
|
||||||
|
part: Message,
|
||||||
|
counter: int,
|
||||||
|
ticket_id: int,
|
||||||
|
files: List,
|
||||||
|
logger: logging.Logger
|
||||||
|
) -> Tuple[str, str]:
|
||||||
|
name = part.get_filename()
|
||||||
|
if name:
|
||||||
|
name = email.utils.collapse_rfc2231_value(name)
|
||||||
|
part_body = None
|
||||||
|
part_full_body=None
|
||||||
|
if part.get_content_maintype() == 'text' and name is None:
|
||||||
|
if part.get_content_subtype() == 'plain':
|
||||||
|
part_body = part.get_payload(decode=True)
|
||||||
|
# https://github.com/django-helpdesk/django-helpdesk/issues/732
|
||||||
|
if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8':
|
||||||
|
part_body = part_body.decode('unicode_escape')
|
||||||
|
part_body = decodeUnknown(part.get_content_charset(), part_body)
|
||||||
|
# have to use django_settings here so overwritting it works in tests
|
||||||
|
# the default value is False anyway
|
||||||
|
if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False):
|
||||||
|
# first message in thread, we save full body to avoid
|
||||||
|
# losing forwards and things like that
|
||||||
|
part_full_body = get_body_from_fragments(part_body)
|
||||||
|
part_body = EmailReplyParser.parse_reply(part_body)
|
||||||
|
else:
|
||||||
|
# second and other reply, save only first part of the
|
||||||
|
# message
|
||||||
|
part_body = EmailReplyParser.parse_reply(part_body)
|
||||||
|
part_full_body = part_body
|
||||||
|
# workaround to get unicode text out rather than escaped text
|
||||||
|
part_body = get_encoded_body(part_body)
|
||||||
|
logger.debug("Discovered plain text MIME part")
|
||||||
|
else:
|
||||||
|
email_body = get_email_body_from_part_payload(part)
|
||||||
|
|
||||||
|
if not part_body and not part_full_body:
|
||||||
|
# no text has been parsed so far - try such deep parsing
|
||||||
|
# for some messages
|
||||||
|
altered_body = email_body.replace(
|
||||||
|
"</p>", "</p>\n").replace("<br", "\n<br")
|
||||||
|
mail = BeautifulSoup(str(altered_body), "html.parser")
|
||||||
|
part_full_body = mail.get_text()
|
||||||
|
|
||||||
|
if "<body" not in email_body:
|
||||||
|
email_body = f"<body>{email_body}</body>"
|
||||||
|
|
||||||
|
payload = (
|
||||||
|
'<html>'
|
||||||
|
'<head>'
|
||||||
|
'<meta charset="utf-8" />'
|
||||||
|
'</head>'
|
||||||
|
'%s'
|
||||||
|
'</html>'
|
||||||
|
) % email_body
|
||||||
|
files.append(
|
||||||
|
SimpleUploadedFile(
|
||||||
|
_("email_html_body.html"), payload.encode("utf-8"), 'text/html')
|
||||||
|
)
|
||||||
|
logger.debug("Discovered HTML MIME part")
|
||||||
|
else:
|
||||||
|
if not name:
|
||||||
|
ext = mimetypes.guess_extension(part.get_content_type())
|
||||||
|
name = "part-%i%s" % (counter, ext)
|
||||||
|
else:
|
||||||
|
name = ("part-%i_" % counter) + name
|
||||||
|
|
||||||
|
files.append(SimpleUploadedFile(name, part.get_payload(decode=True), mimetypes.guess_type(name)[0]))
|
||||||
|
logger.debug("Found MIME attachment %s" % name)
|
||||||
|
return part_body, part_full_body
|
||||||
|
|
||||||
def object_from_message(message: str,
|
def object_from_message(message: str,
|
||||||
queue: Queue,
|
queue: Queue,
|
||||||
logger: logging.Logger
|
logger: logging.Logger
|
||||||
) -> Ticket:
|
) -> Ticket:
|
||||||
# 'message' must be an RFC822 formatted message.
|
# 'message' must be an RFC822 formatted message to correctly parse.
|
||||||
message = email.message_from_string(message)
|
message_obj = email.message_from_string(message)
|
||||||
|
|
||||||
subject = message.get('subject', _('Comment from e-mail'))
|
subject = message_obj.get('subject', _('Comment from e-mail'))
|
||||||
subject = decode_mail_headers(
|
subject = decode_mail_headers(
|
||||||
decodeUnknown(message.get_charset(), subject))
|
decodeUnknown(message_obj.get_charset(), subject))
|
||||||
for affix in STRIPPED_SUBJECT_STRINGS:
|
for affix in STRIPPED_SUBJECT_STRINGS:
|
||||||
subject = subject.replace(affix, "")
|
subject = subject.replace(affix, "")
|
||||||
subject = subject.strip()
|
subject = subject.strip()
|
||||||
|
|
||||||
sender = message.get('from', _('Unknown Sender'))
|
# TODO: Should really be assigning a properly formatted fake email.
|
||||||
sender = decode_mail_headers(decodeUnknown(message.get_charset(), sender))
|
# Check if anything relies on this being a "real name" formatted string if no sender is found on message_obj.
|
||||||
|
# Also not sure it should be accepting emails from unknown senders
|
||||||
# to address bug #832, we wrap all the text in front of the email address in
|
sender_email = _('Unknown Sender')
|
||||||
# double quotes by using replace() on the email string. Then,
|
sender_hdr = message_obj.get('from')
|
||||||
# take first item of list, second item of tuple is the actual email address.
|
if sender_hdr:
|
||||||
# Note that the replace won't work on just an email with no real name,
|
# Parse the header which extracts the first email address in the list if more than one
|
||||||
# but the getaddresses() function seems to be able to handle just unclosed quotes
|
# The parseaddr method returns a tuple in the form <real name> <email address>
|
||||||
# correctly. Not ideal, but this seems to work for now.
|
# Only need the actual email address from the tuple not the "real name"
|
||||||
sender_email = email.utils.getaddresses(
|
# Since the spec requires that all email addresses are ASCII, they will not be encoded
|
||||||
['\"' + sender.replace('<', '\" <')])[0][1]
|
sender_email = email.utils.parseaddr(sender_hdr)[1]
|
||||||
|
|
||||||
for ignore in IgnoreEmail.objects.filter(Q(queues=queue) | Q(queues__isnull=True)):
|
for ignore in IgnoreEmail.objects.filter(Q(queues=queue) | Q(queues__isnull=True)):
|
||||||
if ignore.test(sender_email):
|
if ignore.test(sender_email):
|
||||||
# By returning 'False' the message will be kept in the mailbox,
|
raise IgnoreTicketException()
|
||||||
# and the 'True' will cause the message to be deleted.
|
|
||||||
return not ignore.keep_in_mailbox
|
|
||||||
|
|
||||||
ticket_id: typing.Optional[int] = get_ticket_id_from_subject_slug(
|
ticket_id: typing.Optional[int] = get_ticket_id_from_subject_slug(
|
||||||
queue.slug,
|
queue.slug,
|
||||||
@ -617,108 +713,23 @@ def object_from_message(message: str,
|
|||||||
counter = 0
|
counter = 0
|
||||||
files = []
|
files = []
|
||||||
|
|
||||||
for part in message.walk():
|
for part in message_obj.walk():
|
||||||
if part.get_content_maintype() == 'multipart':
|
if part.get_content_maintype() == 'multipart':
|
||||||
continue
|
continue
|
||||||
|
# See email.message_obj.Message.get_filename()
|
||||||
name = part.get_param("name")
|
part_body, part_full_body = extract_part_data(part, counter, ticket_id, files, logger)
|
||||||
if name:
|
if part_body:
|
||||||
name = email.utils.collapse_rfc2231_value(name)
|
body = part_body
|
||||||
|
full_body = part_full_body
|
||||||
if part.get_content_maintype() == 'text' and name is None:
|
|
||||||
if part.get_content_subtype() == 'plain':
|
|
||||||
body = part.get_payload(decode=True)
|
|
||||||
# https://github.com/django-helpdesk/django-helpdesk/issues/732
|
|
||||||
if part['Content-Transfer-Encoding'] == '8bit' and part.get_content_charset() == 'utf-8':
|
|
||||||
body = body.decode('unicode_escape')
|
|
||||||
body = decodeUnknown(part.get_content_charset(), body)
|
|
||||||
# have to use django_settings here so overwritting it works in tests
|
|
||||||
# the default value is False anyway
|
|
||||||
if ticket_id is None and getattr(django_settings, 'HELPDESK_FULL_FIRST_MESSAGE_FROM_EMAIL', False):
|
|
||||||
# first message in thread, we save full body to avoid
|
|
||||||
# losing forwards and things like that
|
|
||||||
full_body = get_body_from_fragments(body)
|
|
||||||
body = EmailReplyParser.parse_reply(body)
|
|
||||||
else:
|
|
||||||
# second and other reply, save only first part of the
|
|
||||||
# message
|
|
||||||
body = EmailReplyParser.parse_reply(body)
|
|
||||||
full_body = body
|
|
||||||
# workaround to get unicode text out rather than escaped text
|
|
||||||
body = get_encoded_body(body)
|
|
||||||
logger.debug("Discovered plain text MIME part")
|
|
||||||
else:
|
|
||||||
email_body = get_email_body_from_part_payload(part)
|
|
||||||
|
|
||||||
if not body and not full_body:
|
|
||||||
# no text has been parsed so far - try such deep parsing
|
|
||||||
# for some messages
|
|
||||||
altered_body = email_body.replace(
|
|
||||||
"</p>", "</p>\n").replace("<br", "\n<br")
|
|
||||||
mail = BeautifulSoup(str(altered_body), "html.parser")
|
|
||||||
full_body = mail.get_text()
|
|
||||||
|
|
||||||
if "<body" not in email_body:
|
|
||||||
email_body = f"<body>{email_body}</body>"
|
|
||||||
|
|
||||||
payload = (
|
|
||||||
'<html>'
|
|
||||||
'<head>'
|
|
||||||
'<meta charset="utf-8" />'
|
|
||||||
'</head>'
|
|
||||||
'%s'
|
|
||||||
'</html>'
|
|
||||||
) % email_body
|
|
||||||
files.append(
|
|
||||||
SimpleUploadedFile(
|
|
||||||
_("email_html_body.html"), payload.encode("utf-8"), 'text/html')
|
|
||||||
)
|
|
||||||
logger.debug("Discovered HTML MIME part")
|
|
||||||
else:
|
|
||||||
if not name:
|
|
||||||
ext = mimetypes.guess_extension(part.get_content_type())
|
|
||||||
name = "part-%i%s" % (counter, ext)
|
|
||||||
else:
|
|
||||||
name = ("part-%i_" % counter) + name
|
|
||||||
|
|
||||||
# # FIXME: this code gets the paylods, then does something with it and then completely ignores it
|
|
||||||
# # writing the part.get_payload(decode=True) instead; and then the payload variable is
|
|
||||||
# # replaced by some dict later.
|
|
||||||
# # the `payloadToWrite` has been also ignored so was commented
|
|
||||||
# payload = part.get_payload()
|
|
||||||
# if isinstance(payload, list):
|
|
||||||
# payload = payload.pop().as_string()
|
|
||||||
# # payloadToWrite = payload
|
|
||||||
# # check version of python to ensure use of only the correct error type
|
|
||||||
# non_b64_err = TypeError
|
|
||||||
# try:
|
|
||||||
# logger.debug("Try to base64 decode the attachment payload")
|
|
||||||
# # payloadToWrite = base64.decodebytes(payload)
|
|
||||||
# except non_b64_err:
|
|
||||||
# logger.debug("Payload was not base64 encoded, using raw bytes")
|
|
||||||
# # payloadToWrite = payload
|
|
||||||
files.append(SimpleUploadedFile(name, part.get_payload(
|
|
||||||
decode=True), mimetypes.guess_type(name)[0]))
|
|
||||||
logger.debug("Found MIME attachment %s" % name)
|
|
||||||
|
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
if not body:
|
if not body:
|
||||||
mail = BeautifulSoup(str(message), "html.parser")
|
body, full_body = attempt_body_extract_from_html(message_obj)
|
||||||
beautiful_body = mail.find('body')
|
|
||||||
if beautiful_body:
|
|
||||||
try:
|
|
||||||
body = beautiful_body.text
|
|
||||||
full_body = body
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
if not body:
|
|
||||||
body = ""
|
|
||||||
|
|
||||||
add_file_if_always_save_incoming_email_message(files, message)
|
add_file_if_always_save_incoming_email_message(files, message_obj)
|
||||||
|
|
||||||
smtp_priority = message.get('priority', '')
|
smtp_priority = message_obj.get('priority', '')
|
||||||
smtp_importance = message.get('importance', '')
|
smtp_importance = message_obj.get('importance', '')
|
||||||
high_priority_types = {'high', 'important', '1', 'urgent'}
|
high_priority_types = {'high', 'important', '1', 'urgent'}
|
||||||
priority = 2 if high_priority_types & {
|
priority = 2 if high_priority_types & {
|
||||||
smtp_priority, smtp_importance} else 3
|
smtp_priority, smtp_importance} else 3
|
||||||
@ -733,4 +744,4 @@ def object_from_message(message: str,
|
|||||||
'files': files,
|
'files': files,
|
||||||
}
|
}
|
||||||
|
|
||||||
return create_object_from_email_message(message, ticket_id, payload, files, logger=logger)
|
return create_object_from_email_message(message_obj, ticket_id, payload, files, logger=logger)
|
||||||
|
Loading…
Reference in New Issue
Block a user