@@ -563,6 +563,36 @@ def test_sanitize() -> None:
563563 assert sanitize_column_names (before_sanitized ) == expected_schema
564564
565565
566+ def test_sanitize_special_chars () -> None :
567+ """Test sanitizing schema with special characters in field names, using only StringType fields."""
568+ # Test names with special characters: numbers at start, dots, unicode, hash
569+ # Expected sanitized names: numbers prefixed with _, dots become _x2E, unicode becomes _x<hex>, hash becomes _x23
570+ names = ["9x" , "x_" , "a.b" , "☃" , "a#b" ]
571+ expected_names = ["_9x" , "x_" , "a_x2Eb" , "_x2603" , "a_x23b" ]
572+
573+ before_sanitized = Schema (
574+ NestedField (field_id = 1 , name = names [0 ], field_type = StringType (), required = True ),
575+ NestedField (field_id = 2 , name = names [1 ], field_type = StringType (), required = True ),
576+ NestedField (field_id = 3 , name = names [2 ], field_type = StringType (), required = True ),
577+ NestedField (field_id = 4 , name = names [3 ], field_type = StringType (), required = True ),
578+ NestedField (field_id = 5 , name = names [4 ], field_type = StringType (), required = True ),
579+ schema_id = 1 ,
580+ identifier_field_ids = [1 ],
581+ )
582+
583+ expected_schema = Schema (
584+ NestedField (field_id = 1 , name = expected_names [0 ], field_type = StringType (), required = True ),
585+ NestedField (field_id = 2 , name = expected_names [1 ], field_type = StringType (), required = True ),
586+ NestedField (field_id = 3 , name = expected_names [2 ], field_type = StringType (), required = True ),
587+ NestedField (field_id = 4 , name = expected_names [3 ], field_type = StringType (), required = True ),
588+ NestedField (field_id = 5 , name = expected_names [4 ], field_type = StringType (), required = True ),
589+ schema_id = 1 ,
590+ identifier_field_ids = [1 ],
591+ )
592+
593+ assert sanitize_column_names (before_sanitized ) == expected_schema
594+
595+
566596def test_prune_columns_string (table_schema_nested_with_struct_key_map : Schema ) -> None :
567597 assert prune_columns (table_schema_nested_with_struct_key_map , {1 }, False ) == Schema (
568598 NestedField (field_id = 1 , name = "foo" , field_type = StringType (), required = True ), schema_id = 1 , identifier_field_ids = [1 ]
0 commit comments