Reading upper/lower bounds values with type promotions (#3293)

rambleraptor · web-flow · commit 2558dcdb4c90 · 2026-04-30T18:01:33.000-07:00
diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
@@ -343,7 +343,6 @@ def _(_: PrimitiveType, b: bytes) -> int:
     return _INT_STRUCT.unpack(b)[0]
 
 
-@from_bytes.register(LongType)
 @from_bytes.register(TimeType)
 @from_bytes.register(TimestampType)
 @from_bytes.register(TimestamptzType)
@@ -353,13 +352,24 @@ def _(_: PrimitiveType, b: bytes) -> int:
     return _LONG_STRUCT.unpack(b)[0]
 
 
+@from_bytes.register(LongType)
+def _(_: PrimitiveType, b: bytes) -> int:
+    if len(b) < 8:
+        # If the length is 4 bytes, it is a promoted IntegerType
+        return _INT_STRUCT.unpack(b)[0]
+    return _LONG_STRUCT.unpack(b)[0]
+
+
 @from_bytes.register(FloatType)
 def _(_: FloatType, b: bytes) -> float:
     return _FLOAT_STRUCT.unpack(b)[0]
 
 
 @from_bytes.register(DoubleType)
 def _(_: DoubleType, b: bytes) -> float:
+    if len(b) < 8:
+        # If the length is 4 bytes, it is a promoted FloatType
+        return _FLOAT_STRUCT.unpack(b)[0]
     return _DOUBLE_STRUCT.unpack(b)[0]
 
 
diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py
@@ -41,7 +41,14 @@
     Or,
     StartsWith,
 )
-from pyiceberg.expressions.visitors import _InclusiveMetricsEvaluator, _StrictMetricsEvaluator
+from pyiceberg.expressions.visitors import (
+    ROWS_CANNOT_MATCH,
+    ROWS_MIGHT_MATCH,
+    ROWS_MIGHT_NOT_MATCH,
+    ROWS_MUST_MATCH,
+    _InclusiveMetricsEvaluator,
+    _StrictMetricsEvaluator,
+)
 from pyiceberg.manifest import DataFile, FileFormat
 from pyiceberg.schema import Schema
 from pyiceberg.typedef import Record
@@ -50,6 +57,7 @@
     FloatType,
     IcebergType,
     IntegerType,
+    LongType,
     NestedField,
     PrimitiveType,
     StringType,
@@ -1463,3 +1471,81 @@ def test_strict_integer_not_in(strict_data_file_schema: Schema, strict_data_file
 
     should_read = _StrictMetricsEvaluator(strict_data_file_schema, NotIn("no_nulls", {"abc", "def"})).eval(strict_data_file_1)
     assert not should_read, "Should not match: no_nulls field does not have bounds"
+
+
+@pytest.mark.parametrize(
+    "file_type, evolved_type, lower_bound, upper_bound, op, lit, expected",
+    [
+        # Int -> Long
+        (IntegerType(), LongType(), 30, 79, GreaterThan, 100, ROWS_CANNOT_MATCH),
+        (IntegerType(), LongType(), 30, 79, LessThan, 50, ROWS_MIGHT_MATCH),
+        # Float -> Double
+        (FloatType(), DoubleType(), 30.0, 79.0, GreaterThan, 100.0, ROWS_CANNOT_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 50.0, ROWS_MIGHT_MATCH),
+    ],
+)
+def test_inclusive_metrics_eval_bounds_after_promotion(
+    file_type: PrimitiveType,
+    evolved_type: PrimitiveType,
+    lower_bound: Any,
+    upper_bound: Any,
+    op: Any,
+    lit: Any,
+    expected: bool,
+) -> None:
+    schema = Schema(NestedField(1, "col", evolved_type, required=True))
+
+    data_file = DataFile.from_args(
+        file_path="file_1.parquet",
+        file_format=FileFormat.PARQUET,
+        partition={},
+        record_count=100,
+        file_size_in_bytes=1024,
+        lower_bounds={1: to_bytes(file_type, lower_bound)},
+        upper_bounds={1: to_bytes(file_type, upper_bound)},
+    )
+
+    evaluator = _InclusiveMetricsEvaluator(schema, op("col", lit))
+    assert evaluator.eval(data_file) == expected
+
+
+@pytest.mark.parametrize(
+    "file_type, evolved_type, lower_bound, upper_bound, op, lit, expected",
+    [
+        # Int -> Long
+        (IntegerType(), LongType(), 30, 79, GreaterThan, 20, ROWS_MUST_MATCH),
+        (IntegerType(), LongType(), 30, 79, GreaterThan, 100, ROWS_MIGHT_NOT_MATCH),
+        (IntegerType(), LongType(), 30, 79, LessThan, 100, ROWS_MUST_MATCH),
+        (IntegerType(), LongType(), 30, 79, LessThan, 20, ROWS_MIGHT_NOT_MATCH),
+        # Float -> Double
+        (FloatType(), DoubleType(), 30.0, 79.0, GreaterThan, 20.0, ROWS_MUST_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, GreaterThan, 100.0, ROWS_MIGHT_NOT_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 100.0, ROWS_MUST_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 20.0, ROWS_MIGHT_NOT_MATCH),
+    ],
+)
+def test_strict_metrics_eval_bounds_after_promotion(
+    file_type: PrimitiveType,
+    evolved_type: PrimitiveType,
+    lower_bound: Any,
+    upper_bound: Any,
+    op: Any,
+    lit: Any,
+    expected: bool,
+) -> None:
+    schema = Schema(NestedField(1, "col", evolved_type, required=True))
+
+    data_file = DataFile.from_args(
+        file_path="file_1.parquet",
+        file_format=FileFormat.PARQUET,
+        partition={},
+        record_count=100,
+        file_size_in_bytes=1024,
+        lower_bounds={1: to_bytes(file_type, lower_bound)},
+        upper_bounds={1: to_bytes(file_type, upper_bound)},
+        null_value_counts={1: 0},
+        nan_value_counts={1: 0},
+    )
+
+    evaluator = _StrictMetricsEvaluator(schema, op("col", lit))
+    assert evaluator.eval(data_file) == expected