From 326cf226ddd2e7e0f49d223f244089ed6797ebff Mon Sep 17 00:00:00 2001
From: Christopher Broderick <uhurusurfa@gmail.com>
Date: Fri, 16 May 2025 20:10:17 +0100
Subject: [PATCH 1/5] Fix get_markdown function

---
 helpdesk/models.py | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/helpdesk/models.py b/helpdesk/models.py
index 11b7cdd7..9ab2224e 100644
--- a/helpdesk/models.py
+++ b/helpdesk/models.py
@@ -38,22 +38,29 @@ class EscapeHtml(Extension):
 
 
 def get_markdown(text):
+    """
+    This algorithm will check for illegal schemes used in markdown clickable links
+    and remove the scheme. It does an iterative retry until no replacements done to
+    account for embedded schemes in the replacement text.
+    It will then do markdown processing to ensure safe markdown and return the safe string.
+    """
     if not text:
         return ""
 
-    pattern = r"([\[\s\S\]]*?)\(([\s\S]*?):([\s\S]*?)\)"
-    # Regex check
-    if re.match(pattern, text):
-        # get get value of group regex
-        scheme = re.search(pattern, text, re.IGNORECASE).group(2)
-        # scheme check
-        if scheme in helpdesk_settings.ALLOWED_URL_SCHEMES:
-            replacement = "\\1(\\2:\\3)"
-        else:
-            replacement = "\\1(\\3)"
-
-        text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
-
+    # Search for markdown that creates a clickable link and remove the undesirable ones
+    pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE)
+    rerun_scheme_check = True # Used to decided to re-check the text after each parse 
+    while rerun_scheme_check:
+        has_illegal_scheme = False
+        for m in re.finditer(pattern, text):
+            # check if scheme is allowed
+            if m.group(2).lower() in helpdesk_settings.ALLOWED_URL_SCHEMES:
+                # Considered safe so dn't change it.
+                continue
+            # Remove the scheme and leave the rest
+            text = text.replace(m.group(0), f"{m.group(1)}({m.group(3)})")
+            has_illegal_scheme = True
+        rerun_scheme_check = has_illegal_scheme
     return mark_safe(
         markdown(
             text,

From 8c11758496f9d9e365d818ad791e737134d395b0 Mon Sep 17 00:00:00 2001
From: Christopher Broderick <uhurusurfa@gmail.com>
Date: Fri, 16 May 2025 20:10:50 +0100
Subject: [PATCH 2/5] Add tests for imp[roved markdown link cleaning

---
 helpdesk/tests/test_markdown.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/helpdesk/tests/test_markdown.py b/helpdesk/tests/test_markdown.py
index e3a860ab..6c11864c 100644
--- a/helpdesk/tests/test_markdown.py
+++ b/helpdesk/tests/test_markdown.py
@@ -31,14 +31,32 @@ class MarkDown(SimpleTestCase):
         output_value = get_markdown(input_value)
         self.assertEqual(output_value, expected_value)
 
-    def test_markdown_link_correct_protokol(self):
+    def test_markdown_link_correct_protocol(self):
         expected_value = '<p><a href="http://www.yahoo.ru">www.google.com</a></p>'
         input_value = "[www.google.com](http://www.yahoo.ru)"
         output_value = get_markdown(input_value)
         self.assertEqual(output_value, expected_value)
 
-    def test_markdown_link_not_correct_protokol(self):
+    def test_markdown_link_not_correct_protocol(self):
         expected_value = '<p><a href="//www.yahoo.ru">www.google.com</a></p>'
         input_value = "[www.google.com](aaaa://www.yahoo.ru)"
         output_value = get_markdown(input_value)
         self.assertEqual(output_value, expected_value)
+
+    def test_multiline_markdown_link_with_correct_and_incorrect_protocol(self):
+        expected_value = '<p>This<a href="http://alert.javascript.test">XSS</a></p>\n<p>Line 2: <a href="alert(document.domain);">TEST</a></p>'
+        input_value = "This[XSS](http://alert.javascript.test)\n\nLine 2: [TEST](javascript:alert(document.domain);)"
+        output_value = get_markdown(input_value)
+        self.assertEqual(output_value, expected_value)
+
+    def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared(self):
+        expected_value = '<p>This<a href="http://alert.javascript.test">XSS</a></p>\n<p>FAKE IT TILL YOU MAKE IT: <a href="alert(document.domain);">TEST</a></p>'
+        input_value = "This[XSS](http://alert.javascript.test)\n\nFAKE IT TILL YOU MAKE IT: [TEST](javascript:javascript:alert(document.domain);)"
+        output_value = get_markdown(input_value)
+        self.assertEqual(output_value, expected_value)
+        
+    def test_markdown_link_with__multiple_incorrect_protocols(self):
+        expected_value = '<p>First one:<a href="alert(document.domain);">XSS1</a> ...try again: <a href="alert(document.domain);">XSS2</a></p>'
+        input_value = "First one:[XSS1](javascript:alert(document.domain);) ...try again: [XSS2](javascript:javascript:alert(document.domain);)"
+        output_value = get_markdown(input_value)
+        self.assertEqual(output_value, expected_value)

From 2bba70f3cc789932af343d41770adea37d75cf6c Mon Sep 17 00:00:00 2001
From: Christopher Broderick <uhurusurfa@gmail.com>
Date: Fri, 16 May 2025 20:46:18 +0100
Subject: [PATCH 3/5] Fix formatting

---
 helpdesk/models.py              | 2 +-
 helpdesk/tests/test_markdown.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/helpdesk/models.py b/helpdesk/models.py
index 9ab2224e..2c1b5f14 100644
--- a/helpdesk/models.py
+++ b/helpdesk/models.py
@@ -49,7 +49,7 @@ def get_markdown(text):
 
     # Search for markdown that creates a clickable link and remove the undesirable ones
     pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE)
-    rerun_scheme_check = True # Used to decided to re-check the text after each parse 
+    rerun_scheme_check = True  # Used to decided to re-check the text after each parse
     while rerun_scheme_check:
         has_illegal_scheme = False
         for m in re.finditer(pattern, text):
diff --git a/helpdesk/tests/test_markdown.py b/helpdesk/tests/test_markdown.py
index 6c11864c..3d281ecc 100644
--- a/helpdesk/tests/test_markdown.py
+++ b/helpdesk/tests/test_markdown.py
@@ -49,12 +49,14 @@ class MarkDown(SimpleTestCase):
         output_value = get_markdown(input_value)
         self.assertEqual(output_value, expected_value)
 
-    def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared(self):
+    def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared(
+        self,
+    ):
         expected_value = '<p>This<a href="http://alert.javascript.test">XSS</a></p>\n<p>FAKE IT TILL YOU MAKE IT: <a href="alert(document.domain);">TEST</a></p>'
         input_value = "This[XSS](http://alert.javascript.test)\n\nFAKE IT TILL YOU MAKE IT: [TEST](javascript:javascript:alert(document.domain);)"
         output_value = get_markdown(input_value)
         self.assertEqual(output_value, expected_value)
-        
+
     def test_markdown_link_with__multiple_incorrect_protocols(self):
         expected_value = '<p>First one:<a href="alert(document.domain);">XSS1</a> ...try again: <a href="alert(document.domain);">XSS2</a></p>'
         input_value = "First one:[XSS1](javascript:alert(document.domain);) ...try again: [XSS2](javascript:javascript:alert(document.domain);)"

From 00952197d19ba4130979bd835db13ce71d1088d1 Mon Sep 17 00:00:00 2001
From: Christopher Broderick <uhurusurfa@gmail.com>
Date: Fri, 16 May 2025 23:07:48 +0100
Subject: [PATCH 4/5] fix spelling

---
 helpdesk/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/helpdesk/models.py b/helpdesk/models.py
index 2c1b5f14..36a95582 100644
--- a/helpdesk/models.py
+++ b/helpdesk/models.py
@@ -49,13 +49,13 @@ def get_markdown(text):
 
     # Search for markdown that creates a clickable link and remove the undesirable ones
     pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE)
-    rerun_scheme_check = True  # Used to decided to re-check the text after each parse
+    rerun_scheme_check = True  # Used to decide if a re-check should be done after last pass
     while rerun_scheme_check:
         has_illegal_scheme = False
         for m in re.finditer(pattern, text):
             # check if scheme is allowed
             if m.group(2).lower() in helpdesk_settings.ALLOWED_URL_SCHEMES:
-                # Considered safe so dn't change it.
+                # Considered safe so don't change it.
                 continue
             # Remove the scheme and leave the rest
             text = text.replace(m.group(0), f"{m.group(1)}({m.group(3)})")

From acebf8610cffdb30f6a3f858462bdf5af838f5d0 Mon Sep 17 00:00:00 2001
From: Christopher Broderick <uhurusurfa@gmail.com>
Date: Fri, 16 May 2025 23:10:18 +0100
Subject: [PATCH 5/5] formatting

---
 helpdesk/models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/helpdesk/models.py b/helpdesk/models.py
index 36a95582..8a54b242 100644
--- a/helpdesk/models.py
+++ b/helpdesk/models.py
@@ -49,7 +49,9 @@ def get_markdown(text):
 
     # Search for markdown that creates a clickable link and remove the undesirable ones
     pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE)
-    rerun_scheme_check = True  # Used to decide if a re-check should be done after last pass
+    rerun_scheme_check = (
+        True  # Used to decide if a re-check should be done after last pass
+    )
     while rerun_scheme_check:
         has_illegal_scheme = False
         for m in re.finditer(pattern, text):