@@ -511,8 +511,51 @@ def test_upsert_without_identifier_fields(catalog: Catalog) -> None:
511511 tbl .upsert (df )
512512
513513
514- def test_upsert_struct_field (catalog : Catalog ) -> None :
515- identifier = "default.test_upsert_struct_field"
514+ def test_upsert_with_nulls (catalog : Catalog ) -> None :
515+ identifier = "default.test_upsert_with_nulls"
516+ _drop_table (catalog , identifier )
517+
518+ schema = pa .schema (
519+ [
520+ ("foo" , pa .string ()),
521+ ("bar" , pa .int32 ()),
522+ ("baz" , pa .bool_ ()),
523+ ]
524+ )
525+
526+ # create table with null value
527+ table = catalog .create_table (identifier , schema )
528+ data_with_null = pa .Table .from_pylist (
529+ [
530+ {"foo" : "apple" , "bar" : None , "baz" : False },
531+ {"foo" : "banana" , "bar" : None , "baz" : False },
532+ ],
533+ schema = schema ,
534+ )
535+ table .append (data_with_null )
536+ assert table .scan ().to_arrow ()["bar" ].is_null ()
537+
538+ # upsert table with non-null value
539+ data_without_null = pa .Table .from_pylist (
540+ [
541+ {"foo" : "apple" , "bar" : 7 , "baz" : False },
542+ ],
543+ schema = schema ,
544+ )
545+ upd = table .upsert (data_without_null , join_cols = ["foo" ])
546+ assert upd .rows_updated == 1
547+ assert upd .rows_inserted == 0
548+ assert table .scan ().to_arrow () == pa .Table .from_pylist (
549+ [
550+ {"foo" : "apple" , "bar" : 7 , "baz" : False },
551+ {"foo" : "banana" , "bar" : None , "baz" : False },
552+ ],
553+ schema = schema ,
554+ )
555+
556+
557+ def test_upsert_with_struct_field (catalog : Catalog ) -> None :
558+ identifier = "default.test_upsert_with_struct_field"
516559 _drop_table (catalog , identifier )
517560
518561 schema = Schema (
@@ -578,44 +621,68 @@ def test_upsert_struct_field(catalog: Catalog) -> None:
578621 assert upd .rows_inserted == 1
579622
580623
581- def test_upsert_with_nulls (catalog : Catalog ) -> None :
582- identifier = "default.test_upsert_with_nulls "
624+ def test_upsert_with_struct_field_as_join_key (catalog : Catalog ) -> None :
625+ identifier = "default.test_upsert_with_struct_field_as_join_key "
583626 _drop_table (catalog , identifier )
584627
585- schema = pa .schema (
586- [
587- ("foo" , pa .string ()),
588- ("bar" , pa .int32 ()),
589- ("baz" , pa .bool_ ()),
590- ]
628+ schema = Schema (
629+ NestedField (1 , "id" , IntegerType (), required = True ),
630+ NestedField (
631+ 2 ,
632+ "nested_type" ,
633+ StructType (
634+ NestedField (3 , "sub1" , StringType (), required = True ),
635+ NestedField (4 , "sub2" , StringType (), required = True ),
636+ ),
637+ required = False ,
638+ ),
639+ identifier_field_ids = [1 ],
591640 )
592641
593- # create table with null value
594- table = catalog . create_table ( identifier , schema )
595- data_with_null = pa .Table . from_pylist (
642+ tbl = catalog . create_table ( identifier , schema = schema )
643+
644+ arrow_schema = pa .schema (
596645 [
597- {"foo" : "apple" , "bar" : None , "baz" : False },
598- {"foo" : "banana" , "bar" : None , "baz" : False },
599- ],
600- schema = schema ,
646+ pa .field ("id" , pa .int32 (), nullable = False ),
647+ pa .field (
648+ "nested_type" ,
649+ pa .struct (
650+ [
651+ pa .field ("sub1" , pa .large_string (), nullable = False ),
652+ pa .field ("sub2" , pa .large_string (), nullable = False ),
653+ ]
654+ ),
655+ nullable = True ,
656+ ),
657+ ]
601658 )
602- table .append (data_with_null )
603- assert table .scan ().to_arrow ()["bar" ].is_null ()
604659
605- # upsert table with non-null value
606- data_without_null = pa .Table .from_pylist (
660+ initial_data = pa .Table .from_pylist (
607661 [
608- {"foo" : "apple" , "bar" : 7 , "baz" : False },
662+ {
663+ "id" : 1 ,
664+ "nested_type" : {"sub1" : "bla1" , "sub2" : "bla" },
665+ }
609666 ],
610- schema = schema ,
667+ schema = arrow_schema ,
611668 )
612- upd = table .upsert (data_without_null , join_cols = ["foo" ])
613- assert upd .rows_updated == 1
614- assert upd .rows_inserted == 0
615- assert table .scan ().to_arrow () == pa .Table .from_pylist (
669+ tbl .append (initial_data )
670+
671+ update_data = pa .Table .from_pylist (
616672 [
617- {"foo" : "apple" , "bar" : 7 , "baz" : False },
618- {"foo" : "banana" , "bar" : None , "baz" : False },
673+ {
674+ "id" : 2 ,
675+ "nested_type" : {"sub1" : "bla1" , "sub2" : "bla" },
676+ },
677+ {
678+ "id" : 1 ,
679+ "nested_type" : {"sub1" : "bla1" , "sub2" : "bla" },
680+ },
619681 ],
620- schema = schema ,
682+ schema = arrow_schema ,
621683 )
684+
685+ with pytest .raises (
686+ pa .lib .ArrowNotImplementedError , match = "Keys of type struct<sub1: large_string not null, sub2: large_string not null>"
687+ ):
688+ _ = tbl .upsert (update_data , join_cols = ["nested_type" ])
0 commit comments