diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index ff071af..748a186 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -18,6 +18,7 @@ class LexError(CxxParseError): Protocol = object _line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"') +_pp_directive_prefix = r"(?:\#|%:)" _multicomment_re = re.compile("\n[\\s]+\\*") @@ -194,6 +195,12 @@ class PlyLexer: "ELLIPSIS", "DBL_LBRACKET", "DBL_RBRACKET", + "DIGRAPH_DBL_LBRACKET", + "DIGRAPH_DBL_RBRACKET", + "DIGRAPH_LBRACKET", + "DIGRAPH_RBRACKET", + "DIGRAPH_LBRACE", + "DIGRAPH_RBRACE", "DBL_COLON", "DBL_AMP", "DBL_PIPE", @@ -446,16 +453,17 @@ def t_NAME(self, t: LexToken) -> LexToken: t.type = t.value return t - @TOKEN(r"\#[\t ]*pragma") + @TOKEN(_pp_directive_prefix + r"[\t ]*pragma") def t_PRAGMA_DIRECTIVE(self, t: LexToken) -> LexToken: - return t + return self._normalize_pp_directive(t) - @TOKEN(r"\#[\t ]*include (.*)") + @TOKEN(_pp_directive_prefix + r"[\t ]*include (.*)") def t_INCLUDE_DIRECTIVE(self, t: LexToken) -> LexToken: - return t + return self._normalize_pp_directive(t) - @TOKEN(r"\#(.*)") + @TOKEN(_pp_directive_prefix + r"(.*)") def t_PP_DIRECTIVE(self, t: LexToken): + t = self._normalize_pp_directive(t) # handle line macros m = _line_re.match(t.value) if m: @@ -476,6 +484,47 @@ def t_PP_DIRECTIVE(self, t: LexToken): t, ) + def _normalize_pp_directive(self, t: LexToken) -> LexToken: + if t.value.startswith("%:"): + t.value = "#" + t.value[2:] + return t + + @TOKEN(r"<:<:") + def t_DIGRAPH_DBL_LBRACKET(self, t: LexToken) -> LexToken: + t.type = "DBL_LBRACKET" + t.value = "[[" + return t + + @TOKEN(r":>:>") + def t_DIGRAPH_DBL_RBRACKET(self, t: LexToken) -> LexToken: + t.type = "DBL_RBRACKET" + t.value = "]]" + return t + + @TOKEN(r"<:(?!:[^:>])") + def t_DIGRAPH_LBRACKET(self, t: LexToken) -> LexToken: + t.type = "[" + t.value = "[" + return t + + @TOKEN(r":>") + def t_DIGRAPH_RBRACKET(self, t: LexToken) -> LexToken: + t.type = "]" + t.value = "]" + return t + + @TOKEN(r"<%") + def t_DIGRAPH_LBRACE(self, t: LexToken) -> LexToken: + t.type = "{" + t.value = "{" + return t + + @TOKEN(r"%>") + def t_DIGRAPH_RBRACE(self, t: LexToken) -> LexToken: + t.type = "}" + t.value = "}" + return t + t_DIVIDE = r"/(?!/)" t_ELLIPSIS = r"\.\.\." t_DBL_LBRACKET = r"\[\[" diff --git a/tests/test_digraphs.py b/tests/test_digraphs.py new file mode 100644 index 0000000..bb8a8a7 --- /dev/null +++ b/tests/test_digraphs.py @@ -0,0 +1,622 @@ +import typing + +from cxxheaderparser.lexer import LexerTokenStream +from cxxheaderparser.simple import ( + ClassScope, + Include, + NamespaceScope, + ParsedData, + parse_string, +) +from cxxheaderparser.types import ( + Array, + ClassDecl, + EnumDecl, + Enumerator, + Field, + Function, + FundamentalSpecifier, + Method, + NameSpecifier, + Parameter, + PQName, + TemplateDecl, + TemplateNonTypeParam, + TemplateTypeParam, + Token, + Type, + Value, + Variable, +) + + +def _token_pairs(content: str) -> typing.List[typing.Tuple[str, str]]: + stream = LexerTokenStream("", content) + pairs = [] + + while True: + tok = stream.token_eof_ok() + if tok is None: + break + pairs.append((tok.type, tok.value)) + + return pairs + + +def test_digraph_tokens_normalize_to_brackets_and_braces() -> None: + assert _token_pairs("struct S <% int a<:3:>; %>;") == [ + ("struct", "struct"), + ("NAME", "S"), + ("{", "{"), + ("int", "int"), + ("NAME", "a"), + ("[", "["), + ("INT_CONST_DEC", "3"), + ("]", "]"), + (";", ";"), + ("}", "}"), + (";", ";"), + ] + + +def test_digraph_double_square_brackets_normalize_to_attribute_tokens() -> None: + assert _token_pairs("<:<:deprecated:>:> int x;") == [ + ("DBL_LBRACKET", "[["), + ("NAME", "deprecated"), + ("DBL_RBRACKET", "]]"), + ("int", "int"), + ("NAME", "x"), + (";", ";"), + ] + + +def test_less_colon_colon_stays_template_less_and_scope_operator() -> None: + assert _token_pairs("std::vector<::Foo> value;") == [ + ("NAME", "std"), + ("DBL_COLON", "::"), + ("NAME", "vector"), + ("<", "<"), + ("DBL_COLON", "::"), + ("NAME", "Foo"), + (">", ">"), + ("NAME", "value"), + (";", ";"), + ] + + +def test_parser_accepts_digraph_braces_and_array_bounds() -> None: + assert parse_string("struct S <% int a<:3:>; %>;") == parse_string( + "struct S { int a[3]; };" + ) + + +def test_parser_accepts_digraph_attribute_brackets() -> None: + assert parse_string("<:<:deprecated:>:> int x;") == parse_string( + "[[deprecated]] int x;" + ) + + +def test_digraph_include_directive_is_normalized() -> None: + assert parse_string("%:include ") == ParsedData( + namespace=NamespaceScope(), includes=[Include(filename="")] + ) + + +def test_digraph_line_directive_is_normalized_for_locations() -> None: + data = parse_string('%: 42 "generated.hpp"\nint value;') + + assert data == parse_string('# 42 "generated.hpp"\nint value;') + + +def test_canonical_tokens_unaffected() -> None: + content = """ + namespace ns { + struct Foo { + int arr[5]; + }; + } + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "ns": NamespaceScope( + name="ns", + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Foo")], + classkey="struct", + ) + ), + fields=[ + Field( + access="public", + type=Array( + array_of=Type( + typename=PQName( + segments=[ + FundamentalSpecifier(name="int") + ] + ) + ), + size=Value(tokens=[Token(value="5")]), + ), + name="arr", + ) + ], + ) + ], + ) + } + ) + ) + + +def test_digraph_array_and_brace() -> None: + content = """ + struct Grid + <% + float data<:4:>; + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Grid")], classkey="struct" + ) + ), + fields=[ + Field( + access="public", + type=Array( + array_of=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="float")] + ) + ), + size=Value(tokens=[Token(value="4")]), + ), + name="data", + ) + ], + ) + ] + ) + ) + + +def test_digraph_array_subscript() -> None: + content = """ + int arr<:10:>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="arr")]), + type=Array( + array_of=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + size=Value(tokens=[Token(value="10")]), + ), + ) + ] + ) + ) + + +def test_digraph_brace_open_close_function() -> None: + content = """ + #include + int main() + <% + std::cout << "Hello, World!" << std::endl; + return 0; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + functions=[ + Function( + return_type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="int")]) + ), + name=PQName(segments=[NameSpecifier(name="main")]), + parameters=[], + has_body=True, + ) + ] + ), + includes=[Include(filename="")], + ) + + +def test_digraph_class_with_methods() -> None: + content = """ + class MyClass + <% + public: + MyClass(); + ~MyClass(); + int getValue() const; + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="MyClass")], classkey="class" + ) + ), + methods=[ + Method( + return_type=None, + name=PQName(segments=[NameSpecifier(name="MyClass")]), + parameters=[], + access="public", + constructor=True, + ), + Method( + return_type=None, + name=PQName(segments=[NameSpecifier(name="~MyClass")]), + parameters=[], + access="public", + destructor=True, + ), + Method( + return_type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name=PQName(segments=[NameSpecifier(name="getValue")]), + parameters=[], + access="public", + const=True, + ), + ], + ) + ] + ) + ) + + +def test_digraph_enum() -> None: + content = """ + enum Color + <% + Red, + Green, + Blue + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + enums=[ + EnumDecl( + typename=PQName( + segments=[NameSpecifier(name="Color")], classkey="enum" + ), + values=[ + Enumerator(name="Red"), + Enumerator(name="Green"), + Enumerator(name="Blue"), + ], + ) + ] + ) + ) + + +def test_digraph_mixed_braces() -> None: + content = """ + namespace ns + <% + struct Foo { + int a; + }; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "ns": NamespaceScope( + name="ns", + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Foo")], + classkey="struct", + ) + ), + fields=[ + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="a", + ) + ], + ) + ], + ) + } + ) + ) + + +def test_digraph_namespace_body() -> None: + content = """ + namespace myns + <% + int value; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "myns": NamespaceScope( + name="myns", + variables=[ + Variable( + name=PQName(segments=[NameSpecifier(name="value")]), + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + ) + ], + ) + } + ) + ) + + +def test_digraph_nested_braces() -> None: + content = """ + namespace outer + <% + struct Inner + <% + int val; + %>; + %> + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + namespaces={ + "outer": NamespaceScope( + name="outer", + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Inner")], + classkey="struct", + ) + ), + fields=[ + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="val", + ) + ], + ) + ], + ) + } + ) + ) + + +def test_digraph_struct_body() -> None: + content = """ + struct Point + <% + int x; + int y; + %>; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Point")], classkey="struct" + ) + ), + fields=[ + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="x", + ), + Field( + access="public", + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="y", + ), + ], + ) + ] + ) + ) + + +def test_percent_operator_unaffected() -> None: + content = """ + void fn(int x = 10 % 3); + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + functions=[ + Function( + return_type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="void")]) + ), + name=PQName(segments=[NameSpecifier(name="fn")]), + parameters=[ + Parameter( + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="x", + default=Value( + tokens=[ + Token(value="10"), + Token(value="%"), + Token(value="3"), + ] + ), + ) + ], + ) + ] + ) + ) + + +def test_shift_left_unaffected() -> None: + content = """ + template + void fn(T x); + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + functions=[ + Function( + return_type=Type( + typename=PQName(segments=[FundamentalSpecifier(name="void")]) + ), + name=PQName(segments=[NameSpecifier(name="fn")]), + parameters=[ + Parameter( + type=Type( + typename=PQName(segments=[NameSpecifier(name="T")]) + ), + name="x", + ) + ], + template=TemplateDecl( + params=[TemplateTypeParam(typekey="typename", name="T")] + ), + ) + ] + ) + ) + + +def test_template_angle_brackets_unaffected() -> None: + content = """ + template + struct Container + { + T data[N]; + }; + """ + + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + classes=[ + ClassScope( + class_decl=ClassDecl( + typename=PQName( + segments=[NameSpecifier(name="Container")], + classkey="struct", + ), + template=TemplateDecl( + params=[ + TemplateTypeParam(typekey="typename", name="T"), + TemplateNonTypeParam( + type=Type( + typename=PQName( + segments=[FundamentalSpecifier(name="int")] + ) + ), + name="N", + ), + ] + ), + ), + fields=[ + Field( + access="public", + type=Array( + array_of=Type( + typename=PQName(segments=[NameSpecifier(name="T")]) + ), + size=Value(tokens=[Token(value="N")]), + ), + name="data", + ) + ], + ) + ] + ) + )