Fix utf decoding bug in email parsing code

For some reason mozilla thunderbird sometimes marks email parts as 8bit even though they are utf-8. I guess the best way to work around this is to add a try-catch block because this really cannot be predicted.
2025-08-09 08:05:13 +02:00 · 2020-01-23 15:30:08 +01:00
parent af2d0d59b7
commit 9963a3fe5d
3 changed files with 93 additions and 1 deletions
--- a/helpdesk/tests/test_get_email.py
+++ b/helpdesk/tests/test_get_email.py
@ -84,6 +84,21 @@ class GetEmailCommonTests(TestCase):
        self.assertEqual(ticket.title, "Testovácí email")
        self.assertEqual(ticket.description, "íářčšáíéřášč")

+    def test_email_with_utf_8_non_decodable_sequences(self):
+        """
+        Tests that emails with utf-8 non-decodable sequences are parsed correctly
+        """
+        with open(os.path.join(THIS_DIR, "test_files/utf-nondecodable.eml")) as fd:
+            test_email = fd.read()
+        ticket = helpdesk.email.object_from_message(test_email, self.queue_public, self.logger)
+        self.assertEqual(ticket.title, "Fwd: Cyklozaměstnavatel - změna vyhodnocení")
+        self.assertIn("prosazuje lepší", ticket.description)
+        followups = FollowUp.objects.filter(ticket=ticket)
+        followup = followups[0]
+        attachments = FollowUpAttachment.objects.filter(followup=followup)
+        attachment = attachments[0]
+        self.assertIn('prosazuje lepší', attachment.file.read().decode("utf-8"))
+

 class GetEmailParametricTemplate(object):
    """TestCase that checks basic email functionality across methods and socks configs."""