Fix get_markdown function

This commit is contained in:
Christopher Broderick 2025-05-16 20:10:17 +01:00
parent 185dc48232
commit 326cf226dd

View File

@ -38,22 +38,29 @@ class EscapeHtml(Extension):
def get_markdown(text): def get_markdown(text):
"""
This algorithm will check for illegal schemes used in markdown clickable links
and remove the scheme. It does an iterative retry until no replacements done to
account for embedded schemes in the replacement text.
It will then do markdown processing to ensure safe markdown and return the safe string.
"""
if not text: if not text:
return "" return ""
pattern = r"([\[\s\S\]]*?)\(([\s\S]*?):([\s\S]*?)\)" # Search for markdown that creates a clickable link and remove the undesirable ones
# Regex check pattern = re.compile(r"(\[[\s\S]*?\])\(([\w]*?):([\s\S]*?)\)", flags=re.MULTILINE)
if re.match(pattern, text): rerun_scheme_check = True # Used to decided to re-check the text after each parse
# get get value of group regex while rerun_scheme_check:
scheme = re.search(pattern, text, re.IGNORECASE).group(2) has_illegal_scheme = False
# scheme check for m in re.finditer(pattern, text):
if scheme in helpdesk_settings.ALLOWED_URL_SCHEMES: # check if scheme is allowed
replacement = "\\1(\\2:\\3)" if m.group(2).lower() in helpdesk_settings.ALLOWED_URL_SCHEMES:
else: # Considered safe so dn't change it.
replacement = "\\1(\\3)" continue
# Remove the scheme and leave the rest
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) text = text.replace(m.group(0), f"{m.group(1)}({m.group(3)})")
has_illegal_scheme = True
rerun_scheme_check = has_illegal_scheme
return mark_safe( return mark_safe(
markdown( markdown(
text, text,