6262 DoubleType ,
6363 FloatType ,
6464 IcebergType ,
65- IntegerType ,
66- LongType ,
6765 NestedField ,
6866 PrimitiveType ,
6967 StructType ,
7573T = TypeVar ("T" )
7674
7775
78- def _from_bytes_with_promotion (field_type : PrimitiveType , b : bytes ) -> Any :
79- # Integer, Float, Date are 4 bytes
80- # Long, Double, Timestamps are 8 bytes
81- # If we have 4 bytes, we may have to handle type promotion.
82- if len (b ) == 4 :
83- if isinstance (field_type , LongType ):
84- return from_bytes (IntegerType (), b )
85- elif isinstance (field_type , DoubleType ):
86- return from_bytes (FloatType (), b )
87- return from_bytes (field_type , b )
88-
89-
9076class BooleanExpressionVisitor (Generic [T ], ABC ):
9177 @abstractmethod
9278 def visit_true (self ) -> T :
@@ -554,7 +540,7 @@ def visit_or(self, left_result: bool, right_result: bool) -> bool:
554540def _from_byte_buffer (field_type : IcebergType , val : bytes ) -> Any :
555541 if not isinstance (field_type , PrimitiveType ):
556542 raise ValueError (f"Expected a PrimitiveType, got: { type (field_type )} " )
557- return _from_bytes_with_promotion (field_type , val )
543+ return from_bytes (field_type , val )
558544
559545
560546class _ManifestEvalVisitor (BoundBooleanExpressionVisitor [bool ]):
@@ -1256,7 +1242,7 @@ def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12561242 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
12571243
12581244 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1259- lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
1245+ lower_bound = from_bytes (field .field_type , lower_bound_bytes )
12601246
12611247 if self ._is_nan (lower_bound ):
12621248 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1278,7 +1264,7 @@ def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> bo
12781264 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
12791265
12801266 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1281- lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
1267+ lower_bound = from_bytes (field .field_type , lower_bound_bytes )
12821268 if self ._is_nan (lower_bound ):
12831269 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
12841270 return ROWS_MIGHT_MATCH
@@ -1299,7 +1285,7 @@ def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12991285 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13001286
13011287 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1302- upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
1288+ upper_bound = from_bytes (field .field_type , upper_bound_bytes )
13031289 if upper_bound <= literal .value :
13041290 if self ._is_nan (upper_bound ):
13051291 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1320,7 +1306,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) ->
13201306 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13211307
13221308 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1323- upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
1309+ upper_bound = from_bytes (field .field_type , upper_bound_bytes )
13241310 if upper_bound < literal .value :
13251311 if self ._is_nan (upper_bound ):
13261312 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1341,7 +1327,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13411327 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13421328
13431329 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1344- lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
1330+ lower_bound = from_bytes (field .field_type , lower_bound_bytes )
13451331 if self ._is_nan (lower_bound ):
13461332 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13471333 return ROWS_MIGHT_MATCH
@@ -1350,7 +1336,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13501336 return ROWS_CANNOT_MATCH
13511337
13521338 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1353- upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
1339+ upper_bound = from_bytes (field .field_type , upper_bound_bytes )
13541340 if self ._is_nan (upper_bound ):
13551341 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13561342 return ROWS_MIGHT_MATCH
@@ -1378,22 +1364,22 @@ def visit_in(self, term: BoundTerm, literals: set[L]) -> bool:
13781364 raise ValueError (f"Expected PrimitiveType: { field .field_type } " )
13791365
13801366 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1381- lower_bound = _from_bytes_with_promotion (field .field_type , lower_bound_bytes )
1367+ lower_bound = from_bytes (field .field_type , lower_bound_bytes )
13821368 if self ._is_nan (lower_bound ):
13831369 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13841370 return ROWS_MIGHT_MATCH
13851371
1386- literals = {lit for lit in literals if lower_bound <= lit }
1372+ literals = {lit for lit in literals if lower_bound <= lit } # type: ignore[operator]
13871373 if len (literals ) == 0 :
13881374 return ROWS_CANNOT_MATCH
13891375
13901376 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1391- upper_bound = _from_bytes_with_promotion (field .field_type , upper_bound_bytes )
1377+ upper_bound = from_bytes (field .field_type , upper_bound_bytes )
13921378 # this is different from Java, here NaN is always larger
13931379 if self ._is_nan (upper_bound ):
13941380 return ROWS_MIGHT_MATCH
13951381
1396- literals = {lit for lit in literals if upper_bound >= lit }
1382+ literals = {lit for lit in literals if upper_bound >= lit } # type: ignore[operator]
13971383 if len (literals ) == 0 :
13981384 return ROWS_CANNOT_MATCH
13991385
@@ -1418,14 +1404,14 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14181404 len_prefix = len (prefix )
14191405
14201406 if lower_bound_bytes := self .lower_bounds .get (field_id ):
1421- lower_bound = str (_from_bytes_with_promotion (field .field_type , lower_bound_bytes ))
1407+ lower_bound = str (from_bytes (field .field_type , lower_bound_bytes ))
14221408
14231409 # truncate lower bound so that its length is not greater than the length of prefix
14241410 if lower_bound and lower_bound [:len_prefix ] > prefix :
14251411 return ROWS_CANNOT_MATCH
14261412
14271413 if upper_bound_bytes := self .upper_bounds .get (field_id ):
1428- upper_bound = str (_from_bytes_with_promotion (field .field_type , upper_bound_bytes ))
1414+ upper_bound = str (from_bytes (field .field_type , upper_bound_bytes ))
14291415
14301416 # truncate upper bound so that its length is not greater than the length of prefix
14311417 if upper_bound is not None and upper_bound [:len_prefix ] < prefix :
@@ -1449,8 +1435,8 @@ def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14491435 # not_starts_with will match unless all values must start with the prefix. This happens when
14501436 # the lower and upper bounds both start with the prefix.
14511437 if (lower_bound_bytes := self .lower_bounds .get (field_id )) and (upper_bound_bytes := self .upper_bounds .get (field_id )):
1452- lower_bound = str (_from_bytes_with_promotion (field .field_type , lower_bound_bytes ))
1453- upper_bound = str (_from_bytes_with_promotion (field .field_type , upper_bound_bytes ))
1438+ lower_bound = str (from_bytes (field .field_type , lower_bound_bytes ))
1439+ upper_bound = str (from_bytes (field .field_type , upper_bound_bytes ))
14541440
14551441 # if lower is shorter than the prefix then lower doesn't start with the prefix
14561442 if len (lower_bound ) < len_prefix :
0 commit comments