From 3b96ec1c9e6974229796bc36aa9bc6fad6829b29 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 14 May 2026 22:23:12 +0100 Subject: [PATCH 1/5] Fix XSS fromn HTML encoded colons in hrefs --- lib/markdown2.py | 4 +++- test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index dc698970..6683df7c 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1537,8 +1537,10 @@ def _safe_href(self): safe = r'-\w' # omitted ['"<>] for XSS reasons less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' + # html encoded colon in a URL still functions as a normal colon, so need to detect those + protocol_seperators = [':', ':', ':', ':'] # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?<code>" onerror="alert(1)//</code>

A

+ +

x

diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 4a5c25a8..12d54edb 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -2,4 +2,6 @@ ![`" onerror="alert(1)//`]() -![A](B "") \ No newline at end of file +![A](B "") + +[x](javascript:alert(origin)) \ No newline at end of file From a11ce82fbb99c3f8b72711a141ee1a511a90846b Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 14 May 2026 22:24:25 +0100 Subject: [PATCH 2/5] Fix XSS from making javascript: hrefs look like domains with ports --- lib/markdown2.py | 2 +- test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 6683df7c..745d91f6 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1540,7 +1540,7 @@ def _safe_href(self): # html encoded colon in a URL still functions as a normal colon, so need to detect those protocol_seperators = [':', ':', ':', ':'] # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?A

x

+ +

x

diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 12d54edb..26edae4e 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -4,4 +4,6 @@ ![A](B "") -[x](javascript:alert(origin)) \ No newline at end of file +[x](javascript:alert(origin)) + +[x](javascript:1/alert(origin)) \ No newline at end of file From 82b4482b70a1718eef9a4d4fb2449c059949a5f0 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 14 May 2026 22:39:11 +0100 Subject: [PATCH 3/5] Fix onerror XSS in image title attr --- lib/markdown2.py | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 7 +++++++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 745d91f6..4ba78a4f 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -3271,6 +3271,8 @@ def run(self, text: str): .replace('*', self.md._escape_table['*']) .replace('_', self.md._escape_table['_']) ) + if self.md.safe_mode: + title = self.md._hash_span(title) title_str = f' title="{title}"' else: title_str = '' diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html index 20e0cd4d..ccd398e7 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html @@ -7,3 +7,10 @@

x

x

+ +
    +
  • +
      +
    • onerror=alert(origin) )
    • +
  • +
diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 26edae4e..3f025a00 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -6,4 +6,7 @@ [x](javascript:alert(origin)) -[x](javascript:1/alert(origin)) \ No newline at end of file +[x](javascript:1/alert(origin)) + +- +- ![](x '`![](`') onerror=alert(origin) ) \ No newline at end of file From 456f8a97fa105b887e3c287ccdb0cc6eb53baa46 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 23 May 2026 10:36:52 +0100 Subject: [PATCH 4/5] Fix incomplete recursive unhashing of spans Issue was a while loop comparison. We did `orig != text` but assigned `orig = text` at the end of the loop, where it should have been at the start, before any transformations take place --- lib/markdown2.py | 2 +- test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 3 +++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 5 ++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 4ba78a4f..b09fd352 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1421,13 +1421,13 @@ def _unhash_html_spans(self, text: str, spans=True, code=False) -> str: ''' orig = '' while text != orig: + orig = text if spans: for key, sanitized in list(self.html_spans.items()): text = text.replace(key, sanitized) if code: for code, key in list(self._code_table.items()): text = text.replace(key, code) - orig = text return text def _sanitize_html(self, s: str) -> str: diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html index ccd398e7..47abd2f8 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html @@ -14,3 +14,6 @@
  • onerror=alert(origin) )
  • + +

    diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 3f025a00..5b2eeb35 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -9,4 +9,7 @@ [x](javascript:1/alert(origin)) - -- ![](x '`![](`') onerror=alert(origin) ) \ No newline at end of file +- ![](x '`![](`') onerror=alert(origin) ) + +![](``) From c173c1274419bc4a8a685ea180aa002bf172c68a Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 23 May 2026 10:56:09 +0100 Subject: [PATCH 5/5] Update github actions versions --- .github/workflows/python.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 40ce721a..2ca433a4 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -15,9 +15,9 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies