From 42ca4df4334e39160fd627901ecf352b6be39776 Mon Sep 17 00:00:00 2001 From: Synrom Date: Thu, 7 May 2026 13:05:42 +0000 Subject: [PATCH 1/2] Fix: Replace surrogates in Unicode code points --- cssselect/parser.py | 2 +- tests/test_cssselect.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cssselect/parser.py b/cssselect/parser.py index f969769..2c0a55e 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -919,7 +919,7 @@ def _compile(pattern: str) -> MatchFunc: def _replace_unicode(match: re.Match[str]) -> str: codepoint = int(match.group(1), 16) - if codepoint > sys.maxunicode: + if codepoint > sys.maxunicode or 0xD800 <= codepoint <= 0xDFFF: codepoint = 0xFFFD return chr(codepoint) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index dc67bb7..cd82d05 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -213,6 +213,7 @@ def test_pseudo_repr(css: str) -> str: assert parse_one("*") == ("Element[*]", None) assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None) assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None) + assert parse_one(":\\DDDD") == ('Pseudo[Element[*]:\ufffd]', None) # Special cases for CSS 2.1 pseudo-elements assert parse_one(":BEfore") == ("Element[*]", "before") From 0109175b12fd165a511285baac9583e5dd4f9879 Mon Sep 17 00:00:00 2001 From: Synrom Date: Thu, 7 May 2026 13:34:38 +0000 Subject: [PATCH 2/2] Ruff format --- tests/test_cssselect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index cd82d05..a1164e7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -213,7 +213,7 @@ def test_pseudo_repr(css: str) -> str: assert parse_one("*") == ("Element[*]", None) assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None) assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None) - assert parse_one(":\\DDDD") == ('Pseudo[Element[*]:\ufffd]', None) + assert parse_one(":\\DDDD") == ("Pseudo[Element[*]:\ufffd]", None) # Special cases for CSS 2.1 pseudo-elements assert parse_one(":BEfore") == ("Element[*]", "before")