Skip to content

Commit 1296116

Browse files
committed
strict=True in tests
1 parent 96b0edc commit 1296116

7 files changed

Lines changed: 40 additions & 37 deletions

File tree

tests/integration/test_add_files.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ def test_add_file_with_valid_nullability_diff(spark: SparkSession, session_catal
713713
rhs = written_arrow_table.to_pandas()
714714

715715
for column in written_arrow_table.column_names:
716-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
716+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
717717
assert left == right
718718

719719

@@ -755,7 +755,7 @@ def test_add_files_with_valid_upcast(
755755
rhs = written_arrow_table.to_pandas()
756756

757757
for column in written_arrow_table.column_names:
758-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
758+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
759759
if column == "map":
760760
# Arrow returns a list of tuples, instead of a dict
761761
right = dict(right)
@@ -802,7 +802,7 @@ def test_add_files_subset_of_schema(spark: SparkSession, session_catalog: Catalo
802802
rhs = written_arrow_table.to_pandas()
803803

804804
for column in written_arrow_table.column_names:
805-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
805+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
806806
assert left == right
807807

808808

tests/integration/test_inspect_table.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _inspect_files_asserts(df: pa.Table, spark_df: DataFrame) -> None:
152152
if column == "partition":
153153
# Spark leaves out the partition if the table is unpartitioned
154154
continue
155-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
155+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
156156
if isinstance(left, float) and math.isnan(left) and isinstance(right, float) and math.isnan(right):
157157
# NaN != NaN in Python
158158
continue
@@ -209,7 +209,7 @@ def _check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> No
209209
lhs = df.to_pandas().sort_values("last_updated_at")
210210
rhs = spark_df.toPandas().sort_values("last_updated_at")
211211
for column in df.column_names:
212-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
212+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
213213
assert left == right, f"Difference in column {column}: {left} != {right}"
214214

215215

@@ -284,7 +284,7 @@ def test_inspect_snapshots(
284284
lhs = spark.table(f"{identifier}.snapshots").toPandas()
285285
rhs = df.to_pandas()
286286
for column in df.column_names:
287-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
287+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
288288
if column == "summary":
289289
# Arrow returns a list of tuples, instead of a dict
290290
right = dict(right)
@@ -332,7 +332,7 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non
332332
assert len(lhs) == len(rhs)
333333

334334
for column in df.column_names:
335-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
335+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
336336
if column == "data_file":
337337
for df_column in left.keys():
338338
if df_column == "partition":
@@ -485,7 +485,7 @@ def test_inspect_refs(
485485
lhs = spark.table(f"{identifier}.refs").toPandas()
486486
rhs = df.to_pandas()
487487
for column in df.column_names:
488-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
488+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
489489
if isinstance(left, float) and math.isnan(left) and isinstance(right, float) and math.isnan(right):
490490
# NaN != NaN in Python
491491
continue
@@ -535,7 +535,7 @@ def test_inspect_partitions_unpartitioned(
535535
lhs = df.to_pandas()
536536
rhs = spark.table(f"{identifier}.partitions").toPandas()
537537
for column in df.column_names:
538-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
538+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
539539
assert left == right, f"Difference in column {column}: {left} != {right}"
540540

541541

@@ -755,7 +755,7 @@ def test_inspect_manifests(spark: SparkSession, session_catalog: Catalog, format
755755
lhs = spark.table(f"{identifier}.manifests").toPandas()
756756
rhs = df.to_pandas()
757757
for column in df.column_names:
758-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
758+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
759759
assert left == right, f"Difference in column {column}: {left} != {right}"
760760

761761

@@ -793,7 +793,7 @@ def test_inspect_metadata_log_entries(
793793
assert_frame_equal(left_before_last, right_before_last, check_dtype=False)
794794
# compare the last row, except for the timestamp
795795
for column in df.column_names:
796-
for left, right in zip(left_last[column], right_last[column]):
796+
for left, right in zip(left_last[column], right_last[column], strict=True):
797797
if column == "timestamp":
798798
continue
799799
assert left == right, f"Difference in column {column}: {left} != {right}"
@@ -861,7 +861,7 @@ def test_inspect_history(spark: SparkSession, session_catalog: Catalog, format_v
861861
lhs = spark.table(f"{identifier}.history").toPandas()
862862
rhs = df.to_pandas()
863863
for column in df.column_names:
864-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
864+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
865865
if isinstance(left, float) and math.isnan(left) and isinstance(right, float) and math.isnan(right):
866866
# NaN != NaN in Python
867867
continue

tests/integration/test_partitioning_key.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,7 @@ def test_partition_key(
737737
) -> None:
738738
field_values = [
739739
PartitionFieldValue(field, field.transform.transform(TABLE_SCHEMA.find_field(field.source_id).field_type)(value))
740-
for field, value in zip(partition_fields, partition_values)
740+
for field, value in zip(partition_fields, partition_values, strict=True)
741741
]
742742
spec = PartitionSpec(*partition_fields)
743743

tests/integration/test_rest_manifest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
# direct comparison with the dicts returned by fastavro
3939
def todict(obj: Any, spec_keys: List[str]) -> Any:
4040
if type(obj) is Record:
41-
return {key: obj[pos] for key, pos in zip(spec_keys, range(len(obj)))}
41+
return {key: obj[pos] for key, pos in zip(spec_keys, range(len(obj)), strict=True)}
4242
if isinstance(obj, dict) or isinstance(obj, LazyDict):
4343
data = []
4444
for k, v in obj.items():

tests/integration/test_writes/test_writes.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,9 @@ def test_spark_writes_orc_pyiceberg_reads(spark: SparkSession, session_catalog:
759759
]
760760

761761
# Verify PyIceberg results contain the expected data (appears twice due to create + append)
762-
pyiceberg_data = list(zip(pyiceberg_df["id"], pyiceberg_df["name"], pyiceberg_df["age"], pyiceberg_df["is_active"]))
762+
pyiceberg_data = list(
763+
zip(pyiceberg_df["id"], pyiceberg_df["name"], pyiceberg_df["age"], pyiceberg_df["is_active"], strict=True)
764+
)
763765
assert pyiceberg_data == expected_data + expected_data # Data should appear twice
764766

765767
# Verify PyIceberg data types are correct
@@ -1170,7 +1172,7 @@ def test_inspect_snapshots(
11701172
lhs = spark.table(f"{identifier}.snapshots").toPandas()
11711173
rhs = df.to_pandas()
11721174
for column in df.column_names:
1173-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
1175+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
11741176
if column == "summary":
11751177
# Arrow returns a list of tuples, instead of a dict
11761178
right = dict(right)
@@ -1466,7 +1468,7 @@ def test_table_write_schema_with_valid_nullability_diff(
14661468
rhs = written_arrow_table.to_pandas()
14671469

14681470
for column in written_arrow_table.column_names:
1469-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
1471+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
14701472
assert left == right
14711473

14721474

@@ -1506,7 +1508,7 @@ def test_table_write_schema_with_valid_upcast(
15061508
rhs = written_arrow_table.to_pandas()
15071509

15081510
for column in written_arrow_table.column_names:
1509-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
1511+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
15101512
if column == "map":
15111513
# Arrow returns a list of tuples, instead of a dict
15121514
right = dict(right)
@@ -1552,7 +1554,7 @@ def test_write_all_timestamp_precision(
15521554
rhs = written_arrow_table.to_pandas()
15531555

15541556
for column in written_arrow_table.column_names:
1555-
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
1557+
for left, right in zip(lhs[column].to_list(), rhs[column].to_list(), strict=True):
15561558
if pd.isnull(left):
15571559
assert pd.isnull(right)
15581560
else:

tests/io/test_pyarrow.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,16 +1076,16 @@ def test_projection_add_column(file_int: str) -> None:
10761076
for col in result_table.columns:
10771077
assert len(col) == 3
10781078

1079-
for actual, expected in zip(result_table.columns[0], [None, None, None]):
1079+
for actual, expected in zip(result_table.columns[0], [None, None, None], strict=True):
10801080
assert actual.as_py() == expected
10811081

1082-
for actual, expected in zip(result_table.columns[1], [None, None, None]):
1082+
for actual, expected in zip(result_table.columns[1], [None, None, None], strict=True):
10831083
assert actual.as_py() == expected
10841084

1085-
for actual, expected in zip(result_table.columns[2], [None, None, None]):
1085+
for actual, expected in zip(result_table.columns[2], [None, None, None], strict=True):
10861086
assert actual.as_py() == expected
10871087

1088-
for actual, expected in zip(result_table.columns[3], [None, None, None]):
1088+
for actual, expected in zip(result_table.columns[3], [None, None, None], strict=True):
10891089
assert actual.as_py() == expected
10901090
assert (
10911091
repr(result_table.schema)
@@ -1106,7 +1106,9 @@ def test_read_list(schema_list: Schema, file_list: str) -> None:
11061106
result_table = project(schema_list, [file_list])
11071107

11081108
assert len(result_table.columns[0]) == 3
1109-
for actual, expected in zip(result_table.columns[0], [list(range(1, 10)), list(range(2, 20)), list(range(3, 30))]):
1109+
for actual, expected in zip(
1110+
result_table.columns[0], [list(range(1, 10)), list(range(2, 20)), list(range(3, 30))], strict=True
1111+
):
11101112
assert actual.as_py() == expected
11111113

11121114
assert (
@@ -1120,7 +1122,7 @@ def test_read_map(schema_map: Schema, file_map: str) -> None:
11201122
result_table = project(schema_map, [file_map])
11211123

11221124
assert len(result_table.columns[0]) == 3
1123-
for actual, expected in zip(result_table.columns[0], [[("a", "b")], [("c", "d")], [("e", "f"), ("g", "h")]]):
1125+
for actual, expected in zip(result_table.columns[0], [[("a", "b")], [("c", "d")], [("e", "f"), ("g", "h")]], strict=True):
11241126
assert actual.as_py() == expected
11251127

11261128
assert (
@@ -1177,7 +1179,7 @@ def test_projection_rename_column(schema_int: Schema, file_int: str) -> None:
11771179
)
11781180
result_table = project(schema, [file_int])
11791181
assert len(result_table.columns[0]) == 3
1180-
for actual, expected in zip(result_table.columns[0], [0, 1, 2]):
1182+
for actual, expected in zip(result_table.columns[0], [0, 1, 2], strict=True):
11811183
assert actual.as_py() == expected
11821184

11831185
assert repr(result_table.schema) == "other_name: int32 not null"
@@ -1186,7 +1188,7 @@ def test_projection_rename_column(schema_int: Schema, file_int: str) -> None:
11861188
def test_projection_concat_files(schema_int: Schema, file_int: str) -> None:
11871189
result_table = project(schema_int, [file_int, file_int])
11881190

1189-
for actual, expected in zip(result_table.columns[0], [0, 1, 2, 0, 1, 2]):
1191+
for actual, expected in zip(result_table.columns[0], [0, 1, 2, 0, 1, 2], strict=True):
11901192
assert actual.as_py() == expected
11911193
assert len(result_table.columns[0]) == 6
11921194
assert repr(result_table.schema) == "id: int32"
@@ -1350,7 +1352,7 @@ def test_projection_filter_add_column(schema_int: Schema, file_int: str, file_st
13501352
"""We have one file that has the column, and the other one doesn't"""
13511353
result_table = project(schema_int, [file_int, file_string])
13521354

1353-
for actual, expected in zip(result_table.columns[0], [0, 1, 2, None, None, None]):
1355+
for actual, expected in zip(result_table.columns[0], [0, 1, 2, None, None, None], strict=True):
13541356
assert actual.as_py() == expected
13551357
assert len(result_table.columns[0]) == 6
13561358
assert repr(result_table.schema) == "id: int32"
@@ -1360,7 +1362,7 @@ def test_projection_filter_add_column_promote(file_int: str) -> None:
13601362
schema_long = Schema(NestedField(1, "id", LongType(), required=True))
13611363
result_table = project(schema_long, [file_int])
13621364

1363-
for actual, expected in zip(result_table.columns[0], [0, 1, 2]):
1365+
for actual, expected in zip(result_table.columns[0], [0, 1, 2], strict=True):
13641366
assert actual.as_py() == expected
13651367
assert len(result_table.columns[0]) == 3
13661368
assert repr(result_table.schema) == "id: int64 not null"
@@ -1388,7 +1390,7 @@ def test_projection_nested_struct_subset(file_struct: str) -> None:
13881390

13891391
result_table = project(schema, [file_struct])
13901392

1391-
for actual, expected in zip(result_table.columns[0], [52.371807, 52.387386, 52.078663]):
1393+
for actual, expected in zip(result_table.columns[0], [52.371807, 52.387386, 52.078663], strict=True):
13921394
assert actual.as_py() == {"lat": expected}
13931395

13941396
assert len(result_table.columns[0]) == 3
@@ -1413,7 +1415,7 @@ def test_projection_nested_new_field(file_struct: str) -> None:
14131415

14141416
result_table = project(schema, [file_struct])
14151417

1416-
for actual, expected in zip(result_table.columns[0], [None, None, None]):
1418+
for actual, expected in zip(result_table.columns[0], [None, None, None], strict=True):
14171419
assert actual.as_py() == {"null": expected}
14181420
assert len(result_table.columns[0]) == 3
14191421
assert (
@@ -1445,6 +1447,7 @@ def test_projection_nested_struct(schema_struct: Schema, file_struct: str) -> No
14451447
{"lat": 52.387386, "long": 4.646219, "null": None},
14461448
{"lat": 52.078663, "long": 4.288788, "null": None},
14471449
],
1450+
strict=True,
14481451
):
14491452
assert actual.as_py() == expected
14501453
assert len(result_table.columns[0]) == 3
@@ -1536,6 +1539,7 @@ def test_projection_maps_of_structs(schema_map_of_structs: Schema, file_map_of_s
15361539
("4", {"latitude": 52.387386, "longitude": 4.646219, "altitude": None}),
15371540
],
15381541
],
1542+
strict=True,
15391543
):
15401544
assert actual.as_py() == expected
15411545
assert (
@@ -1563,7 +1567,7 @@ def test_projection_nested_struct_different_parent_id(file_struct: str) -> None:
15631567
)
15641568

15651569
result_table = project(schema, [file_struct])
1566-
for actual, expected in zip(result_table.columns[0], [None, None, None]):
1570+
for actual, expected in zip(result_table.columns[0], [None, None, None], strict=True):
15671571
assert actual.as_py() == expected
15681572
assert len(result_table.columns[0]) == 3
15691573
assert (
@@ -1579,10 +1583,7 @@ def test_projection_filter_on_unprojected_field(schema_int_str: Schema, file_int
15791583

15801584
result_table = project(schema, [file_int_str], GreaterThan("data", "1"), schema_int_str)
15811585

1582-
for actual, expected in zip(
1583-
result_table.columns[0],
1584-
[2],
1585-
):
1586+
for actual, expected in zip(result_table.columns[0], [2], strict=True):
15861587
assert actual.as_py() == expected
15871588
assert len(result_table.columns[0]) == 1
15881589
assert repr(result_table.schema) == "id: int32 not null"

tests/test_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ def test_deepcopy_of_singleton_fixed_type() -> None:
698698
list_of_fixed_types = [FixedType(22), FixedType(19)]
699699
copied_list = deepcopy(list_of_fixed_types)
700700

701-
for lhs, rhs in zip(list_of_fixed_types, copied_list):
701+
for lhs, rhs in zip(list_of_fixed_types, copied_list, strict=True):
702702
assert id(lhs) == id(rhs)
703703

704704

0 commit comments

Comments
 (0)