From 326cf226ddd2e7e0f49d223f244089ed6797ebff Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Fri, 16 May 2025 20:10:17 +0100 Subject: [PATCH 1/5] Fix get_markdown function --- helpdesk/models.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/helpdesk/models.py b/helpdesk/models.py index 11b7cdd7..9ab2224e 100644 --- a/helpdesk/models.py +++ b/helpdesk/models.py @@ -38,22 +38,29 @@ class EscapeHtml(Extension): def get_markdown(text): + """ + This algorithm will check for illegal schemes used in markdown clickable links + and remove the scheme. It does an iterative retry until no replacements done to + account for embedded schemes in the replacement text. + It will then do markdown processing to ensure safe markdown and return the safe string. + """ if not text: return "" - pattern = r"([\[\s\S\]]*?)\(([\s\S]*?):([\s\S]*?)\)" - # Regex check - if re.match(pattern, text): - # get get value of group regex - scheme = re.search(pattern, text, re.IGNORECASE).group(2) - # scheme check - if scheme in helpdesk_settings.ALLOWED_URL_SCHEMES: - replacement = "\\1(\\2:\\3)" - else: - replacement = "\\1(\\3)" - - text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) - + # Search for markdown that creates a clickable link and remove the undesirable ones + pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE) + rerun_scheme_check = True # Used to decided to re-check the text after each parse + while rerun_scheme_check: + has_illegal_scheme = False + for m in re.finditer(pattern, text): + # check if scheme is allowed + if m.group(2).lower() in helpdesk_settings.ALLOWED_URL_SCHEMES: + # Considered safe so dn't change it. + continue + # Remove the scheme and leave the rest + text = text.replace(m.group(0), f"{m.group(1)}({m.group(3)})") + has_illegal_scheme = True + rerun_scheme_check = has_illegal_scheme return mark_safe( markdown( text, From 8c11758496f9d9e365d818ad791e737134d395b0 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Fri, 16 May 2025 20:10:50 +0100 Subject: [PATCH 2/5] Add tests for imp[roved markdown link cleaning --- helpdesk/tests/test_markdown.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/helpdesk/tests/test_markdown.py b/helpdesk/tests/test_markdown.py index e3a860ab..6c11864c 100644 --- a/helpdesk/tests/test_markdown.py +++ b/helpdesk/tests/test_markdown.py @@ -31,14 +31,32 @@ class MarkDown(SimpleTestCase): output_value = get_markdown(input_value) self.assertEqual(output_value, expected_value) - def test_markdown_link_correct_protokol(self): + def test_markdown_link_correct_protocol(self): expected_value = '

www.google.com

' input_value = "[www.google.com](http://www.yahoo.ru)" output_value = get_markdown(input_value) self.assertEqual(output_value, expected_value) - def test_markdown_link_not_correct_protokol(self): + def test_markdown_link_not_correct_protocol(self): expected_value = '

www.google.com

' input_value = "[www.google.com](aaaa://www.yahoo.ru)" output_value = get_markdown(input_value) self.assertEqual(output_value, expected_value) + + def test_multiline_markdown_link_with_correct_and_incorrect_protocol(self): + expected_value = '

ThisXSS

\n

Line 2: TEST

' + input_value = "This[XSS](http://alert.javascript.test)\n\nLine 2: [TEST](javascript:alert(document.domain);)" + output_value = get_markdown(input_value) + self.assertEqual(output_value, expected_value) + + def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared(self): + expected_value = '

ThisXSS

\n

FAKE IT TILL YOU MAKE IT: TEST

' + input_value = "This[XSS](http://alert.javascript.test)\n\nFAKE IT TILL YOU MAKE IT: [TEST](javascript:javascript:alert(document.domain);)" + output_value = get_markdown(input_value) + self.assertEqual(output_value, expected_value) + + def test_markdown_link_with__multiple_incorrect_protocols(self): + expected_value = '

First one:XSS1 ...try again: XSS2

' + input_value = "First one:[XSS1](javascript:alert(document.domain);) ...try again: [XSS2](javascript:javascript:alert(document.domain);)" + output_value = get_markdown(input_value) + self.assertEqual(output_value, expected_value) From 2bba70f3cc789932af343d41770adea37d75cf6c Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Fri, 16 May 2025 20:46:18 +0100 Subject: [PATCH 3/5] Fix formatting --- helpdesk/models.py | 2 +- helpdesk/tests/test_markdown.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/helpdesk/models.py b/helpdesk/models.py index 9ab2224e..2c1b5f14 100644 --- a/helpdesk/models.py +++ b/helpdesk/models.py @@ -49,7 +49,7 @@ def get_markdown(text): # Search for markdown that creates a clickable link and remove the undesirable ones pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE) - rerun_scheme_check = True # Used to decided to re-check the text after each parse + rerun_scheme_check = True # Used to decided to re-check the text after each parse while rerun_scheme_check: has_illegal_scheme = False for m in re.finditer(pattern, text): diff --git a/helpdesk/tests/test_markdown.py b/helpdesk/tests/test_markdown.py index 6c11864c..3d281ecc 100644 --- a/helpdesk/tests/test_markdown.py +++ b/helpdesk/tests/test_markdown.py @@ -49,12 +49,14 @@ class MarkDown(SimpleTestCase): output_value = get_markdown(input_value) self.assertEqual(output_value, expected_value) - def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared(self): + def test_multiline_markdown_link_with_correct_and_incorrect_protocol_twice_declared( + self, + ): expected_value = '

ThisXSS

\n

FAKE IT TILL YOU MAKE IT: TEST

' input_value = "This[XSS](http://alert.javascript.test)\n\nFAKE IT TILL YOU MAKE IT: [TEST](javascript:javascript:alert(document.domain);)" output_value = get_markdown(input_value) self.assertEqual(output_value, expected_value) - + def test_markdown_link_with__multiple_incorrect_protocols(self): expected_value = '

First one:XSS1 ...try again: XSS2

' input_value = "First one:[XSS1](javascript:alert(document.domain);) ...try again: [XSS2](javascript:javascript:alert(document.domain);)" From 00952197d19ba4130979bd835db13ce71d1088d1 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Fri, 16 May 2025 23:07:48 +0100 Subject: [PATCH 4/5] fix spelling --- helpdesk/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpdesk/models.py b/helpdesk/models.py index 2c1b5f14..36a95582 100644 --- a/helpdesk/models.py +++ b/helpdesk/models.py @@ -49,13 +49,13 @@ def get_markdown(text): # Search for markdown that creates a clickable link and remove the undesirable ones pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE) - rerun_scheme_check = True # Used to decided to re-check the text after each parse + rerun_scheme_check = True # Used to decide if a re-check should be done after last pass while rerun_scheme_check: has_illegal_scheme = False for m in re.finditer(pattern, text): # check if scheme is allowed if m.group(2).lower() in helpdesk_settings.ALLOWED_URL_SCHEMES: - # Considered safe so dn't change it. + # Considered safe so don't change it. continue # Remove the scheme and leave the rest text = text.replace(m.group(0), f"{m.group(1)}({m.group(3)})") From acebf8610cffdb30f6a3f858462bdf5af838f5d0 Mon Sep 17 00:00:00 2001 From: Christopher Broderick Date: Fri, 16 May 2025 23:10:18 +0100 Subject: [PATCH 5/5] formatting --- helpdesk/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/helpdesk/models.py b/helpdesk/models.py index 36a95582..8a54b242 100644 --- a/helpdesk/models.py +++ b/helpdesk/models.py @@ -49,7 +49,9 @@ def get_markdown(text): # Search for markdown that creates a clickable link and remove the undesirable ones pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE) - rerun_scheme_check = True # Used to decide if a re-check should be done after last pass + rerun_scheme_check = ( + True # Used to decide if a re-check should be done after last pass + ) while rerun_scheme_check: has_illegal_scheme = False for m in re.finditer(pattern, text):