Skip to content

Commit 0500478

Browse files
committed
update error handling for StatsAggregator
1 parent a7828f0 commit 0500478

2 files changed

Lines changed: 10 additions & 9 deletions

File tree

pyiceberg/io/pyarrow.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,13 +1982,11 @@ class StatsAggregator:
19821982
current_min: Any
19831983
current_max: Any
19841984
trunc_length: Optional[int]
1985-
column_name: Optional[str]
19861985

1987-
def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc_length: Optional[int] = None, column_name: Optional[str] = None) -> None:
1986+
def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc_length: Optional[int] = None) -> None:
19881987
self.current_min = None
19891988
self.current_max = None
19901989
self.trunc_length = trunc_length
1991-
self.column_name = column_name
19921990

19931991
expected_physical_type = _primitive_to_physical(iceberg_type)
19941992
if expected_physical_type != physical_type_string:
@@ -2000,7 +1998,7 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc
20001998
pass
20011999
else:
20022000
raise ValueError(
2003-
f"Unexpected physical type {physical_type_string} for {self.column_name or '<unknown column>'} with iceberg type {iceberg_type}, expected {expected_physical_type}"
2001+
f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}"
20042002
)
20052003

20062004
self.primitive_type = iceberg_type
@@ -2406,9 +2404,12 @@ def data_file_statistics_from_parquet_metadata(
24062404
continue
24072405

24082406
if field_id not in col_aggs:
2409-
col_aggs[field_id] = StatsAggregator(
2410-
stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length, stats_col.column_name
2411-
)
2407+
try:
2408+
col_aggs[field_id] = StatsAggregator(
2409+
stats_col.iceberg_type, statistics.physical_type, stats_col.mode.length
2410+
)
2411+
except ValueError as e:
2412+
raise ValueError(f"{e} for column '{stats_col.column_name}'") from e
24122413

24132414
if isinstance(stats_col.iceberg_type, DecimalType) and statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":
24142415
scale = stats_col.iceberg_type.scale

tests/io/test_pyarrow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,7 +2054,7 @@ def test_make_compatible_name() -> None:
20542054
],
20552055
)
20562056
def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None:
2057-
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type), column_name="test_col")
2057+
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type))
20582058

20592059
for val in vals:
20602060
stats.update_min(val)
@@ -2074,7 +2074,7 @@ def test_stats_aggregator_update_min(vals: List[Any], primitive_type: PrimitiveT
20742074
],
20752075
)
20762076
def test_stats_aggregator_update_max(vals: List[Any], primitive_type: PrimitiveType, expected_result: Any) -> None:
2077-
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type), column_name="test_col")
2077+
stats = StatsAggregator(primitive_type, _primitive_to_physical(primitive_type))
20782078

20792079
for val in vals:
20802080
stats.update_max(val)

0 commit comments

Comments
 (0)