From 2214484e4f91d045bce2347ff17a83291e606847 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 26 Jun 2026 04:34:25 +0000 Subject: [PATCH 1/5] test: add digraph normalization coverage --- tests/test_digraphs.py | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 tests/test_digraphs.py diff --git a/tests/test_digraphs.py b/tests/test_digraphs.py new file mode 100644 index 0000000..c1f92dd --- /dev/null +++ b/tests/test_digraphs.py @@ -0,0 +1,76 @@ +import typing + +from cxxheaderparser.lexer import LexerTokenStream +from cxxheaderparser.simple import Include, NamespaceScope, ParsedData, parse_string + + +def _token_pairs(content: str) -> typing.List[typing.Tuple[str, str]]: + stream = LexerTokenStream("", content) + pairs = [] + + while True: + tok = stream.token_eof_ok() + if tok is None: + break + pairs.append((tok.type, tok.value)) + + return pairs + + +def test_digraph_tokens_normalize_to_brackets_and_braces() -> None: + assert _token_pairs("struct S <% int a<:3:>; %>;") == [ + ("struct", "struct"), + ("NAME", "S"), + ("{", "{"), + ("int", "int"), + ("NAME", "a"), + ("[", "["), + ("INT_CONST_DEC", "3"), + ("]", "]"), + (";", ";"), + ("}", "}"), + (";", ";"), + ] + + +def test_digraph_double_square_brackets_normalize_to_attribute_tokens() -> None: + assert _token_pairs("<:<:deprecated:>:> int x;") == [ + ("DBL_LBRACKET", "[["), + ("NAME", "deprecated"), + ("DBL_RBRACKET", "]]"), + ("int", "int"), + ("NAME", "x"), + (";", ";"), + ] + + +def test_less_colon_colon_stays_template_less_and_scope_operator() -> None: + assert _token_pairs("std::vector<::Foo> value;") == [ + ("NAME", "std"), + ("DBL_COLON", "::"), + ("NAME", "vector"), + ("<", "<"), + ("DBL_COLON", "::"), + ("NAME", "Foo"), + (">", ">"), + ("NAME", "value"), + (";", ";"), + ] + + +def test_parser_accepts_digraph_braces_and_array_bounds() -> None: + assert parse_string("struct S <% int a<:3:>; %>;") == parse_string( + "struct S { int a[3]; };" + ) + + +def test_parser_accepts_digraph_attribute_brackets() -> None: + assert parse_string("<:<:deprecated:>:> int x;") == parse_string( + "[[deprecated]] int x;" + ) + + +def test_digraph_include_directive_is_normalized() -> None: + assert parse_string("%:include ") == ParsedData( + namespace=NamespaceScope(), includes=[Include(filename="")] + ) From 3d865ea9a4075eaf38cfa57745c364d547133ff8 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 26 Jun 2026 04:34:59 +0000 Subject: [PATCH 2/5] feat: normalize punctuation digraph tokens --- cxxheaderparser/lexer.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index ff071af..066cc93 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -194,6 +194,12 @@ class PlyLexer: "ELLIPSIS", "DBL_LBRACKET", "DBL_RBRACKET", + "DIGRAPH_DBL_LBRACKET", + "DIGRAPH_DBL_RBRACKET", + "DIGRAPH_LBRACKET", + "DIGRAPH_RBRACKET", + "DIGRAPH_LBRACE", + "DIGRAPH_RBRACE", "DBL_COLON", "DBL_AMP", "DBL_PIPE", @@ -476,6 +482,42 @@ def t_PP_DIRECTIVE(self, t: LexToken): t, ) + @TOKEN(r"<:<:") + def t_DIGRAPH_DBL_LBRACKET(self, t: LexToken) -> LexToken: + t.type = "DBL_LBRACKET" + t.value = "[[" + return t + + @TOKEN(r":>:>") + def t_DIGRAPH_DBL_RBRACKET(self, t: LexToken) -> LexToken: + t.type = "DBL_RBRACKET" + t.value = "]]" + return t + + @TOKEN(r"<:(?!:[^:>])") + def t_DIGRAPH_LBRACKET(self, t: LexToken) -> LexToken: + t.type = "[" + t.value = "[" + return t + + @TOKEN(r":>") + def t_DIGRAPH_RBRACKET(self, t: LexToken) -> LexToken: + t.type = "]" + t.value = "]" + return t + + @TOKEN(r"<%") + def t_DIGRAPH_LBRACE(self, t: LexToken) -> LexToken: + t.type = "{" + t.value = "{" + return t + + @TOKEN(r"%>") + def t_DIGRAPH_RBRACE(self, t: LexToken) -> LexToken: + t.type = "}" + t.value = "}" + return t + t_DIVIDE = r"/(?!/)" t_ELLIPSIS = r"\.\.\." t_DBL_LBRACKET = r"\[\[" From d08603a7e9c56f68caff975d4eb597d77f77e296 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 26 Jun 2026 04:35:20 +0000 Subject: [PATCH 3/5] feat: normalize digraph preprocessor directives --- cxxheaderparser/lexer.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index 066cc93..748a186 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -18,6 +18,7 @@ class LexError(CxxParseError): Protocol = object _line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"') +_pp_directive_prefix = r"(?:\#|%:)" _multicomment_re = re.compile("\n[\\s]+\\*") @@ -452,16 +453,17 @@ def t_NAME(self, t: LexToken) -> LexToken: t.type = t.value return t - @TOKEN(r"\#[\t ]*pragma") + @TOKEN(_pp_directive_prefix + r"[\t ]*pragma") def t_PRAGMA_DIRECTIVE(self, t: LexToken) -> LexToken: - return t + return self._normalize_pp_directive(t) - @TOKEN(r"\#[\t ]*include (.*)") + @TOKEN(_pp_directive_prefix + r"[\t ]*include (.*)") def t_INCLUDE_DIRECTIVE(self, t: LexToken) -> LexToken: - return t + return self._normalize_pp_directive(t) - @TOKEN(r"\#(.*)") + @TOKEN(_pp_directive_prefix + r"(.*)") def t_PP_DIRECTIVE(self, t: LexToken): + t = self._normalize_pp_directive(t) # handle line macros m = _line_re.match(t.value) if m: @@ -482,6 +484,11 @@ def t_PP_DIRECTIVE(self, t: LexToken): t, ) + def _normalize_pp_directive(self, t: LexToken) -> LexToken: + if t.value.startswith("%:"): + t.value = "#" + t.value[2:] + return t + @TOKEN(r"<:<:") def t_DIGRAPH_DBL_LBRACKET(self, t: LexToken) -> LexToken: t.type = "DBL_LBRACKET" From 9a298bdb9df5d26b10d1ae3e826c65b6fb841c83 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 26 Jun 2026 04:36:41 +0000 Subject: [PATCH 4/5] test: cover digraph line directives --- tests/test_digraphs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_digraphs.py b/tests/test_digraphs.py index c1f92dd..49e54cd 100644 --- a/tests/test_digraphs.py +++ b/tests/test_digraphs.py @@ -74,3 +74,9 @@ def test_digraph_include_directive_is_normalized() -> None: assert parse_string("%:include ") == ParsedData( namespace=NamespaceScope(), includes=[Include(filename="")] ) + + +def test_digraph_line_directive_is_normalized_for_locations() -> None: + data = parse_string('%: 42 "generated.hpp"\nint value;') + + assert data == parse_string('# 42 "generated.hpp"\nint value;') From 7b923389c25a40428e1d0e583a1e559ffd02b6a8 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 26 Jun 2026 04:51:15 +0000 Subject: [PATCH 5/5] test: add additional digraph cases C++ test inputs extracted from Hayden Michel's add_digraphs branch; assertions regenerated with python -m cxxheaderparser.gentest. Co-authored-by: Hayden Michel --- tests/test_digraphs.py | 542 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 541 insertions(+), 1 deletion(-) diff --git a/tests/test_digraphs.py b/tests/test_digraphs.py index 49e54cd..bb8a8a7 100644 --- a/tests/test_digraphs.py +++ b/tests/test_digraphs.py @@ -1,7 +1,33 @@ import typing from cxxheaderparser.lexer import LexerTokenStream -from cxxheaderparser.simple import Include, NamespaceScope, ParsedData, parse_string +from cxxheaderparser.simple import ( + ClassScope, + Include, + NamespaceScope, + ParsedData, + parse_string, +) +from cxxheaderparser.types import ( + Array, + ClassDecl, + EnumDecl, + Enumerator, + Field, + Function, + FundamentalSpecifier, + Method, + NameSpecifier, + Parameter, + PQName, + TemplateDecl, + TemplateNonTypeParam, + TemplateTypeParam, + Token, + Type, + Value, + Variable, +) def _token_pairs(content: str) -> typing.List[typing.Tuple[str, str]]: @@ -80,3 +106,517 @@ def test_digraph_line_directive_is_normalized_for_locations() -> None: data = parse_string('%: 42 "generated.hpp"\nint value;') assert data == parse_string('# 42 "generated.hpp"\nint value;') + + +def test_canonical_tokens_unaffected() -> None: + content = """ + namespace ns { + struct Foo { + int arr[5]; + }; + } + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "ns": NamespaceScope( + name="ns", + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Foo")], + classkey="struct", + ) + ), + fields=[ + Field( + access="public", + type=Array( + array_of=Type( + typename=PQName( + segments=[ + FundamentalSpecifier(name="int") + ] + ) + ), + size=Value(tokens=[Token(value="5")]), + ), + name="arr", + ) + ], + ) + ], + ) + } + ) + ) + + +def test_digraph_array_and_brace() -> None: + content = """ + struct Grid + <% + float data<:4:>; + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Grid")], classkey="struct" + ) + ), + fields=[ + Field( + access="public", + type=Array( + array_of=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="float")] + ) + ), + size=Value(tokens=[Token(value="4")]), + ), + name="data", + ) + ], + ) + ] + ) + ) + + +def test_digraph_array_subscript() -> None: + content = """ + int arr<:10:>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="arr")]), + type=Array( + array_of=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + size=Value(tokens=[Token(value="10")]), + ), + ) + ] + ) + ) + + +def test_digraph_brace_open_close_function() -> None: + content = """ + #include + int main() + <% + std::cout << "Hello, World!" << std::endl; + return 0; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + functions=[ + Function( + return_type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + name=PQName(segments=[NameSpecifier(name="main")]), + parameters=[], + has_body=True, + ) + ] + ), + includes=[Include(filename="")], + ) + + +def test_digraph_class_with_methods() -> None: + content = """ + class MyClass + <% + public: + MyClass(); + ~MyClass(); + int getValue() const; + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="MyClass")], classkey="class" + ) + ), + methods=[ + Method( + return_type=None, + name=PQName(segments=[NameSpecifier(name="MyClass")]), + parameters=[], + access="public", + constructor=True, + ), + Method( + return_type=None, + name=PQName(segments=[NameSpecifier(name="~MyClass")]), + parameters=[], + access="public", + destructor=True, + ), + Method( + return_type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name=PQName(segments=[NameSpecifier(name="getValue")]), + parameters=[], + access="public", + const=True, + ), + ], + ) + ] + ) + ) + + +def test_digraph_enum() -> None: + content = """ + enum Color + <% + Red, + Green, + Blue + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + enums=[ + EnumDecl( + typename=PQName( + segments=[NameSpecifier(name="Color")], classkey="enum" + ), + values=[ + Enumerator(name="Red"), + Enumerator(name="Green"), + Enumerator(name="Blue"), + ], + ) + ] + ) + ) + + +def test_digraph_mixed_braces() -> None: + content = """ + namespace ns + <% + struct Foo { + int a; + }; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "ns": NamespaceScope( + name="ns", + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Foo")], + classkey="struct", + ) + ), + fields=[ + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="a", + ) + ], + ) + ], + ) + } + ) + ) + + +def test_digraph_namespace_body() -> None: + content = """ + namespace myns + <% + int value; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "myns": NamespaceScope( + name="myns", + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="value")]), + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + ) + ], + ) + } + ) + ) + + +def test_digraph_nested_braces() -> None: + content = """ + namespace outer + <% + struct Inner + <% + int val; + %>; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "outer": NamespaceScope( + name="outer", + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Inner")], + classkey="struct", + ) + ), + fields=[ + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="val", + ) + ], + ) + ], + ) + } + ) + ) + + +def test_digraph_struct_body() -> None: + content = """ + struct Point + <% + int x; + int y; + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Point")], classkey="struct" + ) + ), + fields=[ + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="x", + ), + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="y", + ), + ], + ) + ] + ) + ) + + +def test_percent_operator_unaffected() -> None: + content = """ + void fn(int x = 10 % 3); + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + functions=[ + Function( + return_type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="void")]) + ), + name=PQName(segments=[NameSpecifier(name="fn")]), + parameters=[ + Parameter( + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="x", + default=Value( + tokens=[ + Token(value="10"), + Token(value="%"), + Token(value="3"), + ] + ), + ) + ], + ) + ] + ) + ) + + +def test_shift_left_unaffected() -> None: + content = """ + template + void fn(T x); + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + functions=[ + Function( + return_type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="void")]) + ), + name=PQName(segments=[NameSpecifier(name="fn")]), + parameters=[ + Parameter( + type=Type( + typename=PQName(segments=[NameSpecifier(name="T")]) + ), + name="x", + ) + ], + template=TemplateDecl( + params=[TemplateTypeParam(typekey="typename", name="T")] + ), + ) + ] + ) + ) + + +def test_template_angle_brackets_unaffected() -> None: + content = """ + template + struct Container + { + T data[N]; + }; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Container")], + classkey="struct", + ), + template=TemplateDecl( + params=[ + TemplateTypeParam(typekey="typename", name="T"), + TemplateNonTypeParam( + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="N", + ), + ] + ), + ), + fields=[ + Field( + access="public", + type=Array( + array_of=Type( + typename=PQName(segments=[NameSpecifier(name="T")]) + ), + size=Value(tokens=[Token(value="N")]), + ), + name="data", + ) + ], + ) + ] + ) + )