diff --git a/cssselect/parser.py b/cssselect/parser.py index f969769..2c0a55e 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -919,7 +919,7 @@ def _compile(pattern: str) -> MatchFunc: def _replace_unicode(match: re.Match[str]) -> str: codepoint = int(match.group(1), 16) - if codepoint > sys.maxunicode: + if codepoint > sys.maxunicode or 0xD800 <= codepoint <= 0xDFFF: codepoint = 0xFFFD return chr(codepoint) diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index dc67bb7..a1164e7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -213,6 +213,7 @@ def test_pseudo_repr(css: str) -> str: assert parse_one("*") == ("Element[*]", None) assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None) assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None) + assert parse_one(":\\DDDD") == ("Pseudo[Element[*]:\ufffd]", None) # Special cases for CSS 2.1 pseudo-elements assert parse_one(":BEfore") == ("Element[*]", "before")