@@ -892,6 +892,7 @@ def test_coarse_match_filter_composite_key() -> None:
892892 Test that create_coarse_match_filter produces efficient In() predicates for composite keys.
893893 """
894894 from pyiceberg .table .upsert_util import create_coarse_match_filter , create_match_filter
895+ from pyiceberg .expressions import Or , And , In
895896
896897 # Create a table with composite key that has overlapping values
897898 # (1, 'x'), (2, 'y'), (1, 'z') - exact filter should have 3 conditions
@@ -908,10 +909,10 @@ def test_coarse_match_filter_composite_key() -> None:
908909 coarse_filter = create_coarse_match_filter (table , ["a" , "b" ])
909910
910911 # Exact filter is an Or of And conditions
911- assert "Or" in str (exact_filter )
912+ assert isinstance (exact_filter , Or )
912913
913914 # Coarse filter is an And of In conditions
914- assert "And" in str (coarse_filter )
915+ assert isinstance (coarse_filter , And )
915916 assert "In" in str (coarse_filter )
916917
917918
@@ -1071,3 +1072,48 @@ def test_upsert_with_list_field(catalog: Catalog) -> None:
10711072 res = tbl .upsert (update_data , join_cols = ["id" ])
10721073 assert res .rows_updated == 1
10731074 assert res .rows_inserted == 1
1075+
1076+
1077+ def test_vectorized_comparison_struct_level_nulls () -> None :
1078+ """Test vectorized comparison handles struct-level nulls correctly (not just field-level nulls)."""
1079+ from pyiceberg .table .upsert_util import _compare_columns_vectorized
1080+
1081+ struct_type = pa .struct ([("x" , pa .int32 ()), ("y" , pa .string ())])
1082+
1083+ # null struct vs non-null struct = different
1084+ source = pa .array ([{"x" : 1 , "y" : "a" }, None , {"x" : 3 , "y" : "c" }], type = struct_type )
1085+ target = pa .array ([{"x" : 1 , "y" : "a" }, {"x" : 2 , "y" : "b" }, {"x" : 3 , "y" : "c" }], type = struct_type )
1086+ diff = _compare_columns_vectorized (source , target )
1087+ assert diff .to_pylist () == [False , True , False ]
1088+
1089+ # non-null struct vs null struct = different
1090+ source = pa .array ([{"x" : 1 , "y" : "a" }, {"x" : 2 , "y" : "b" }, {"x" : 3 , "y" : "c" }], type = struct_type )
1091+ target = pa .array ([{"x" : 1 , "y" : "a" }, None , {"x" : 3 , "y" : "c" }], type = struct_type )
1092+ diff = _compare_columns_vectorized (source , target )
1093+ assert diff .to_pylist () == [False , True , False ]
1094+
1095+ # null struct vs null struct = same (no update needed)
1096+ source = pa .array ([{"x" : 1 , "y" : "a" }, None , {"x" : 3 , "y" : "c" }], type = struct_type )
1097+ target = pa .array ([{"x" : 1 , "y" : "a" }, None , {"x" : 3 , "y" : "c" }], type = struct_type )
1098+ diff = _compare_columns_vectorized (source , target )
1099+ assert diff .to_pylist () == [False , False , False ]
1100+
1101+
1102+ def test_vectorized_comparison_empty_struct_with_nulls () -> None :
1103+ """Test that empty structs with null values are compared correctly."""
1104+ from pyiceberg .table .upsert_util import _compare_columns_vectorized
1105+
1106+ # Empty struct type - edge case where only struct-level null handling matters
1107+ empty_struct_type = pa .struct ([])
1108+
1109+ # null vs non-null empty struct = different
1110+ source = pa .array ([{}, None , {}], type = empty_struct_type )
1111+ target = pa .array ([{}, {}, {}], type = empty_struct_type )
1112+ diff = _compare_columns_vectorized (source , target )
1113+ assert diff .to_pylist () == [False , True , False ]
1114+
1115+ # null vs null empty struct = same
1116+ source = pa .array ([None , None ], type = empty_struct_type )
1117+ target = pa .array ([None , None ], type = empty_struct_type )
1118+ diff = _compare_columns_vectorized (source , target )
1119+ assert diff .to_pylist () == [False , False ]
0 commit comments