5959from pyiceberg .schema import Schema
6060from pyiceberg .typedef import EMPTY_DICT , L , LiteralValue , Record , StructProtocol
6161from pyiceberg .types import (
62+ DateType ,
6263 DoubleType ,
6364 FloatType ,
6465 IcebergType ,
66+ IntegerType ,
67+ LongType ,
6568 NestedField ,
6669 PrimitiveType ,
6770 StructType ,
71+ TimestampNanoType ,
6872 TimestampType ,
6973 TimestamptzType ,
7074)
7377T = TypeVar ("T" )
7478
7579
80+ def _from_bytes_with_promotion (field_type : PrimitiveType , b : bytes ) -> Any :
81+ if len (b ) == 4 :
82+ if isinstance (field_type , LongType ):
83+ return from_bytes (IntegerType (), b )
84+ elif isinstance (field_type , DoubleType ):
85+ return from_bytes (FloatType (), b )
86+ elif isinstance (field_type , (TimestampType , TimestampNanoType )):
87+ return from_bytes (DateType (), b )
88+ return from_bytes (field_type , b )
89+
90+
7691class BooleanExpressionVisitor (Generic [T ], ABC ):
7792 @abstractmethod
7893 def visit_true (self ) -> T :
@@ -1242,7 +1257,7 @@ def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12421257 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
12431258
12441259 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1245- lower_bound = from_bytes (field .field_type , lower_bound_bytes )
1260+ lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
12461261
12471262 if self ._is_nan (lower_bound ):
12481263 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1264,7 +1279,7 @@ def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> bo
12641279 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
12651280
12661281 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1267- lower_bound = from_bytes (field .field_type , lower_bound_bytes )
1282+ lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
12681283 if self ._is_nan (lower_bound ):
12691284 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
12701285 return ROWS_MIGHT_MATCH
@@ -1285,7 +1300,7 @@ def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12851300 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
12861301
12871302 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1288- upper_bound = from_bytes (field .field_type , upper_bound_bytes )
1303+ upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
12891304 if upper_bound <= literal .value :
12901305 if self ._is_nan (upper_bound ):
12911306 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1306,7 +1321,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) ->
13061321 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13071322
13081323 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1309- upper_bound = from_bytes (field .field_type , upper_bound_bytes )
1324+ upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
13101325 if upper_bound < literal .value :
13111326 if self ._is_nan (upper_bound ):
13121327 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1327,7 +1342,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13271342 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13281343
13291344 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1330- lower_bound = from_bytes (field .field_type , lower_bound_bytes )
1345+ lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
13311346 if self ._is_nan (lower_bound ):
13321347 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13331348 return ROWS_MIGHT_MATCH
@@ -1336,7 +1351,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13361351 return ROWS_CANNOT_MATCH
13371352
13381353 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1339- upper_bound = from_bytes (field .field_type , upper_bound_bytes )
1354+ upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
13401355 if self ._is_nan (upper_bound ):
13411356 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13421357 return ROWS_MIGHT_MATCH
@@ -1364,22 +1379,22 @@ def visit_in(self, term: BoundTerm, literals: set[L]) -> bool:
13641379 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13651380
13661381 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1367- lower_bound = from_bytes (field .field_type , lower_bound_bytes )
1382+ lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
13681383 if self ._is_nan (lower_bound ):
13691384 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13701385 return ROWS_MIGHT_MATCH
13711386
1372- literals = {lit for lit in literals if lower_bound <= lit } # type: ignore[operator]
1387+ literals = {lit for lit in literals if lower_bound <= lit }
13731388 if len (literals ) == 0 :
13741389 return ROWS_CANNOT_MATCH
13751390
13761391 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1377- upper_bound = from_bytes (field .field_type , upper_bound_bytes )
1392+ upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
13781393 # this is different from Java, here NaN is always larger
13791394 if self ._is_nan (upper_bound ):
13801395 return ROWS_MIGHT_MATCH
13811396
1382- literals = {lit for lit in literals if upper_bound >= lit } # type: ignore[operator]
1397+ literals = {lit for lit in literals if upper_bound >= lit }
13831398 if len (literals ) == 0 :
13841399 return ROWS_CANNOT_MATCH
13851400
@@ -1404,14 +1419,14 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14041419 len_prefix = len (prefix )
14051420
14061421 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1407- lower_bound = str (from_bytes (field .field_type , lower_bound_bytes ))
1422+ lower_bound = str (_from_bytes_with_promotion (field .field_type , lower_bound_bytes ))
14081423
14091424 # truncate lower bound so that its length is not greater than the length of prefix
14101425 if lower_bound and lower_bound [:len_prefix ] > prefix :
14111426 return ROWS_CANNOT_MATCH
14121427
14131428 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1414- upper_bound = str (from_bytes (field .field_type , upper_bound_bytes ))
1429+ upper_bound = str (_from_bytes_with_promotion (field .field_type , upper_bound_bytes ))
14151430
14161431 # truncate upper bound so that its length is not greater than the length of prefix
14171432 if upper_bound is not None and upper_bound [:len_prefix ] < prefix :
@@ -1435,8 +1450,8 @@ def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14351450 # not_starts_with will match unless all values must start with the prefix. This happens when
14361451 # the lower and upper bounds both start with the prefix.
14371452 if (lower_bound_bytes := self .lower_bounds .get (field_id )) and (upper_bound_bytes := self .upper_bounds .get (field_id )):
1438- lower_bound = str (from_bytes (field .field_type , lower_bound_bytes ))
1439- upper_bound = str (from_bytes (field .field_type , upper_bound_bytes ))
1453+ lower_bound = str (_from_bytes_with_promotion (field .field_type , lower_bound_bytes ))
1454+ upper_bound = str (_from_bytes_with_promotion (field .field_type , upper_bound_bytes ))
14401455
14411456 # if lower is shorter than the prefix then lower doesn't start with the prefix
14421457 if len (lower_bound ) < len_prefix :
0 commit comments