Skip to content

Commit 0bac388

Browse files
committed
Type promotion - metadata file reading
1 parent df258f5 commit 0bac388

2 files changed

Lines changed: 48 additions & 0 deletions

File tree

pyiceberg/conversions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ def _(_: PrimitiveType, b: bytes) -> int:
350350
@from_bytes.register(TimestampNanoType)
351351
@from_bytes.register(TimestamptzNanoType)
352352
def _(_: PrimitiveType, b: bytes) -> int:
353+
if len(b) == 4:
354+
return _INT_STRUCT.unpack(b)[0]
353355
return _LONG_STRUCT.unpack(b)[0]
354356

355357

@@ -360,6 +362,8 @@ def _(_: FloatType, b: bytes) -> float:
360362

361363
@from_bytes.register(DoubleType)
362364
def _(_: DoubleType, b: bytes) -> float:
365+
if len(b) == 4:
366+
return _FLOAT_STRUCT.unpack(b)[0]
363367
return _DOUBLE_STRUCT.unpack(b)[0]
364368

365369

tests/expressions/test_evaluator.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
FloatType,
5151
IcebergType,
5252
IntegerType,
53+
LongType,
5354
NestedField,
5455
PrimitiveType,
5556
StringType,
@@ -1463,3 +1464,46 @@ def test_strict_integer_not_in(strict_data_file_schema: Schema, strict_data_file
14631464

14641465
should_read = _StrictMetricsEvaluator(strict_data_file_schema, NotIn("no_nulls", {"abc", "def"})).eval(strict_data_file_1)
14651466
assert not should_read, "Should not match: no_nulls field does not have bounds"
1467+
1468+
1469+
def test_inclusive_metrics_evaluator_with_type_promotion_crash() -> None:
1470+
# Schema defines 'id' as LongType (evolved state)
1471+
schema = Schema(NestedField(1, "id", LongType(), required=True))
1472+
1473+
# Historical manifest contains 4-byte integer bounds
1474+
data_file = DataFile.from_args(
1475+
file_path="file_1.parquet",
1476+
file_format=FileFormat.PARQUET,
1477+
partition={},
1478+
record_count=100,
1479+
file_size_in_bytes=1024,
1480+
lower_bounds={1: to_bytes(IntegerType(), 30)},
1481+
upper_bounds={1: to_bytes(IntegerType(), 79)},
1482+
)
1483+
1484+
# Predicate: id > 100
1485+
# Decodes 4-byte bounds correctly and skips the file
1486+
evaluator_pruning = _InclusiveMetricsEvaluator(schema, GreaterThan("id", 100))
1487+
assert not evaluator_pruning.eval(data_file)
1488+
1489+
1490+
def test_inclusive_metrics_evaluator_with_float_to_double_promotion() -> None:
1491+
# Schema defines 'val' as DoubleType (evolved state)
1492+
schema = Schema(NestedField(1, "val", DoubleType(), required=True))
1493+
1494+
# Historical manifest contains 4-byte float bounds
1495+
data_file = DataFile.from_args(
1496+
file_path="file_1.parquet",
1497+
file_format=FileFormat.PARQUET,
1498+
partition={},
1499+
record_count=100,
1500+
file_size_in_bytes=1024,
1501+
lower_bounds={1: to_bytes(FloatType(), 30.0)},
1502+
upper_bounds={1: to_bytes(FloatType(), 79.0)},
1503+
)
1504+
1505+
# Predicate: val < 50.0
1506+
evaluator = _InclusiveMetricsEvaluator(schema, LessThan("val", 50.0))
1507+
1508+
# Should not crash and should correctly identify that the file might match
1509+
assert evaluator.eval(data_file)

0 commit comments

Comments
 (0)