diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 40ce721a..2ca433a4 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -15,9 +15,9 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/lib/markdown2.py b/lib/markdown2.py index dc698970..b09fd352 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1421,13 +1421,13 @@ def _unhash_html_spans(self, text: str, spans=True, code=False) -> str: ''' orig = '' while text != orig: + orig = text if spans: for key, sanitized in list(self.html_spans.items()): text = text.replace(key, sanitized) if code: for code, key in list(self._code_table.items()): text = text.replace(key, code) - orig = text return text def _sanitize_html(self, s: str) -> str: @@ -1537,8 +1537,10 @@ def _safe_href(self): safe = r'-\w' # omitted ['"<>] for XSS reasons less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' + # html encoded colon in a URL still functions as a normal colon, so need to detect those + protocol_seperators = [':', ':', ':', ':'] # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?<code>" onerror="alert(1)//</code>

A

+ +

x

+ +

x

+ + + +

diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 4a5c25a8..5b2eeb35 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -2,4 +2,14 @@ ![`" onerror="alert(1)//`]() -![A](B "") \ No newline at end of file +![A](B "") + +[x](javascript:alert(origin)) + +[x](javascript:1/alert(origin)) + +- +- ![](x '`![](`') onerror=alert(origin) ) + +![](``)