Merge pull request #1269 from django-helpdesk/fix_markdown_javascript_injection_prevention

Fix markdown javascript injection prevention
2025-08-15 02:22:36 +02:00 · 2025-05-17 13:36:36 +01:00
parent 185dc48232 acebf8610c
commit 8bdf3ce525
2 changed files with 44 additions and 15 deletions
--- a/helpdesk/models.py
+++ b/helpdesk/models.py
@ -38,22 +38,31 @@ class EscapeHtml(Extension):


 def get_markdown(text):
+    """
+    This algorithm will check for illegal schemes used in markdown clickable links
+    and remove the scheme. It does an iterative retry until no replacements done to
+    account for embedded schemes in the replacement text.
+    It will then do markdown processing to ensure safe markdown and return the safe string.
+    """
    if not text:
        return ""

-    pattern = r"([\[\s\S\]]*?)\(([\s\S]*?):([\s\S]*?)\)"
-    # Regex check
-    if re.match(pattern, text):
-        # get get value of group regex
-        scheme = re.search(pattern, text, re.IGNORECASE).group(2)
-        # scheme check
-        if scheme in helpdesk_settings.ALLOWED_URL_SCHEMES:
-            replacement = "\\1(\\2:\\3)"
-        else:
-            replacement = "\\1(\\3)"
-
-        text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
-
+    # Search for markdown that creates a clickable link and remove the undesirable ones
+    pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE)
+    rerun_scheme_check = (
+        True  # Used to decide if a re-check should be done after last pass
+    )
+    while rerun_scheme_check:
+        has_illegal_scheme = False
+        for m in re.finditer(pattern, text):
+            # check if scheme is allowed
+            if m.group(2).lower() in helpdesk_settings.ALLOWED_URL_SCHEMES:
+                # Considered safe so don't change it.
+                continue
+            # Remove the scheme and leave the rest
+            text = text.replace(m.group(0), f"{m.group(1)}({m.group(3)})")
+            has_illegal_scheme = True
+        rerun_scheme_check = has_illegal_scheme
    return mark_safe(
        markdown(
            text,
--- a/helpdesk/tests/test_markdown.py
+++ b/helpdesk/tests/test_markdown.py
@ -31,14 +31,34 @@ class MarkDown(SimpleTestCase):
        output_value = get_markdown(input_value)
        self.assertEqual(output_value, expected_value)

-    def test_markdown_link_correct_protokol(self):
+    def test_markdown_link_correct_protocol(self):
        expected_value = '<p><a href="http://www.yahoo.ru">www.google.com</a></p>'
        input_value = "[www.google.com](http://www.yahoo.ru)"
        output_value = get_markdown(input_value)
        self.assertEqual(output_value, expected_value)

-    def test_markdown_link_not_correct_protokol(self):
+    def test_markdown_link_not_correct_protocol(self):
        expected_value = '<p><a href="//www.yahoo.ru">www.google.com</a></p>'
        input_value = "[www.google.com](aaaa://www.yahoo.ru)"
        output_value = get_markdown(input_value)
        self.assertEqual(output_value, expected_value)
+
+    def test_multiline_markdown_link_with_correct_and_incorrect_protocol(self):
+        expected_value = '<p>This<a href="http://alert.javascript.test">XSS</a></p>\n<p>Line 2: <a href="alert(document.domain);">TEST</a></p>'
+        input_value = "This[XSS](http://alert.javascript.test)\n\nLine 2: [TEST](javascript:alert(document.domain);)"
+        output_value = get_markdown(input_value)
+        self.assertEqual(output_value, expected_value)
+
+    def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared(
+        self,
+    ):
+        expected_value = '<p>This<a href="http://alert.javascript.test">XSS</a></p>\n<p>FAKE IT TILL YOU MAKE IT: <a href="alert(document.domain);">TEST</a></p>'
+        input_value = "This[XSS](http://alert.javascript.test)\n\nFAKE IT TILL YOU MAKE IT: [TEST](javascript:javascript:alert(document.domain);)"
+        output_value = get_markdown(input_value)
+        self.assertEqual(output_value, expected_value)
+
+    def test_markdown_link_with__multiple_incorrect_protocols(self):
+        expected_value = '<p>First one:<a href="alert(document.domain);">XSS1</a> ...try again: <a href="alert(document.domain);">XSS2</a></p>'
+        input_value = "First one:[XSS1](javascript:alert(document.domain);) ...try again: [XSS2](javascript:javascript:alert(document.domain);)"
+        output_value = get_markdown(input_value)
+        self.assertEqual(output_value, expected_value)