|
50 | 50 | FloatType, |
51 | 51 | IcebergType, |
52 | 52 | IntegerType, |
| 53 | + LongType, |
53 | 54 | NestedField, |
54 | 55 | PrimitiveType, |
55 | 56 | StringType, |
@@ -1463,3 +1464,46 @@ def test_strict_integer_not_in(strict_data_file_schema: Schema, strict_data_file |
1463 | 1464 |
|
1464 | 1465 | should_read = _StrictMetricsEvaluator(strict_data_file_schema, NotIn("no_nulls", {"abc", "def"})).eval(strict_data_file_1) |
1465 | 1466 | assert not should_read, "Should not match: no_nulls field does not have bounds" |
| 1467 | + |
| 1468 | + |
| 1469 | +def test_inclusive_metrics_evaluator_with_type_promotion_crash() -> None: |
| 1470 | + # Schema defines 'id' as LongType (evolved state) |
| 1471 | + schema = Schema(NestedField(1, "id", LongType(), required=True)) |
| 1472 | + |
| 1473 | + # Historical manifest contains 4-byte integer bounds |
| 1474 | + data_file = DataFile.from_args( |
| 1475 | + file_path="file_1.parquet", |
| 1476 | + file_format=FileFormat.PARQUET, |
| 1477 | + partition={}, |
| 1478 | + record_count=100, |
| 1479 | + file_size_in_bytes=1024, |
| 1480 | + lower_bounds={1: to_bytes(IntegerType(), 30)}, |
| 1481 | + upper_bounds={1: to_bytes(IntegerType(), 79)}, |
| 1482 | + ) |
| 1483 | + |
| 1484 | + # Predicate: id > 100 |
| 1485 | + # Decodes 4-byte bounds correctly and skips the file |
| 1486 | + evaluator_pruning = _InclusiveMetricsEvaluator(schema, GreaterThan("id", 100)) |
| 1487 | + assert not evaluator_pruning.eval(data_file) |
| 1488 | + |
| 1489 | + |
| 1490 | +def test_inclusive_metrics_evaluator_with_float_to_double_promotion() -> None: |
| 1491 | + # Schema defines 'val' as DoubleType (evolved state) |
| 1492 | + schema = Schema(NestedField(1, "val", DoubleType(), required=True)) |
| 1493 | + |
| 1494 | + # Historical manifest contains 4-byte float bounds |
| 1495 | + data_file = DataFile.from_args( |
| 1496 | + file_path="file_1.parquet", |
| 1497 | + file_format=FileFormat.PARQUET, |
| 1498 | + partition={}, |
| 1499 | + record_count=100, |
| 1500 | + file_size_in_bytes=1024, |
| 1501 | + lower_bounds={1: to_bytes(FloatType(), 30.0)}, |
| 1502 | + upper_bounds={1: to_bytes(FloatType(), 79.0)}, |
| 1503 | + ) |
| 1504 | + |
| 1505 | + # Predicate: val < 50.0 |
| 1506 | + evaluator = _InclusiveMetricsEvaluator(schema, LessThan("val", 50.0)) |
| 1507 | + |
| 1508 | + # Should not crash and should correctly identify that the file might match |
| 1509 | + assert evaluator.eval(data_file) |
0 commit comments