Skip to content

Commit ef8e8c9

Browse files
committed
PR comments
1 parent 803ac61 commit ef8e8c9

2 files changed

Lines changed: 26 additions & 30 deletions

File tree

pyiceberg/conversions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,6 @@ def _(_: PrimitiveType, b: bytes) -> int:
343343
return _INT_STRUCT.unpack(b)[0]
344344

345345

346-
@from_bytes.register(LongType)
347346
@from_bytes.register(TimeType)
348347
@from_bytes.register(TimestampType)
349348
@from_bytes.register(TimestamptzType)
@@ -353,13 +352,24 @@ def _(_: PrimitiveType, b: bytes) -> int:
353352
return _LONG_STRUCT.unpack(b)[0]
354353

355354

355+
@from_bytes.register(LongType)
356+
def _(_: PrimitiveType, b: bytes) -> int:
357+
if len(b) == 4:
358+
# If the length is 4 bytes, it is a promoted IntegerType
359+
return _INT_STRUCT.unpack(b)[0]
360+
return _LONG_STRUCT.unpack(b)[0]
361+
362+
356363
@from_bytes.register(FloatType)
357364
def _(_: FloatType, b: bytes) -> float:
358365
return _FLOAT_STRUCT.unpack(b)[0]
359366

360367

361368
@from_bytes.register(DoubleType)
362369
def _(_: DoubleType, b: bytes) -> float:
370+
if len(b) == 4:
371+
# If the length is 4 bytes, it is a promoted FloatType
372+
return _FLOAT_STRUCT.unpack(b)[0]
363373
return _DOUBLE_STRUCT.unpack(b)[0]
364374

365375

pyiceberg/expressions/visitors.py

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@
6262
DoubleType,
6363
FloatType,
6464
IcebergType,
65-
IntegerType,
66-
LongType,
6765
NestedField,
6866
PrimitiveType,
6967
StructType,
@@ -75,18 +73,6 @@
7573
T = TypeVar("T")
7674

7775

78-
def _from_bytes_with_promotion(field_type: PrimitiveType, b: bytes) -> Any:
79-
# Integer, Float, Date are 4 bytes
80-
# Long, Double, Timestamps are 8 bytes
81-
# If we have 4 bytes, we may have to handle type promotion.
82-
if len(b) == 4:
83-
if isinstance(field_type, LongType):
84-
return from_bytes(IntegerType(), b)
85-
elif isinstance(field_type, DoubleType):
86-
return from_bytes(FloatType(), b)
87-
return from_bytes(field_type, b)
88-
89-
9076
class BooleanExpressionVisitor(Generic[T], ABC):
9177
@abstractmethod
9278
def visit_true(self) -> T:
@@ -554,7 +540,7 @@ def visit_or(self, left_result: bool, right_result: bool) -> bool:
554540
def _from_byte_buffer(field_type: IcebergType, val: bytes) -> Any:
555541
if not isinstance(field_type, PrimitiveType):
556542
raise ValueError(f"Expected a PrimitiveType, got: {type(field_type)}")
557-
return _from_bytes_with_promotion(field_type, val)
543+
return from_bytes(field_type, val)
558544

559545

560546
class _ManifestEvalVisitor(BoundBooleanExpressionVisitor[bool]):
@@ -1256,7 +1242,7 @@ def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12561242
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
12571243

12581244
if lower_bound_bytes := self.lower_bounds.get(field_id):
1259-
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
1245+
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
12601246

12611247
if self._is_nan(lower_bound):
12621248
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1278,7 +1264,7 @@ def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> bo
12781264
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
12791265

12801266
if lower_bound_bytes := self.lower_bounds.get(field_id):
1281-
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
1267+
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
12821268
if self._is_nan(lower_bound):
12831269
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
12841270
return ROWS_MIGHT_MATCH
@@ -1299,7 +1285,7 @@ def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12991285
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13001286

13011287
if upper_bound_bytes := self.upper_bounds.get(field_id):
1302-
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
1288+
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
13031289
if upper_bound <= literal.value:
13041290
if self._is_nan(upper_bound):
13051291
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1320,7 +1306,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) ->
13201306
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13211307

13221308
if upper_bound_bytes := self.upper_bounds.get(field_id):
1323-
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
1309+
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
13241310
if upper_bound < literal.value:
13251311
if self._is_nan(upper_bound):
13261312
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1341,7 +1327,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13411327
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13421328

13431329
if lower_bound_bytes := self.lower_bounds.get(field_id):
1344-
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
1330+
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
13451331
if self._is_nan(lower_bound):
13461332
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13471333
return ROWS_MIGHT_MATCH
@@ -1350,7 +1336,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13501336
return ROWS_CANNOT_MATCH
13511337

13521338
if upper_bound_bytes := self.upper_bounds.get(field_id):
1353-
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
1339+
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
13541340
if self._is_nan(upper_bound):
13551341
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13561342
return ROWS_MIGHT_MATCH
@@ -1378,22 +1364,22 @@ def visit_in(self, term: BoundTerm, literals: set[L]) -> bool:
13781364
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13791365

13801366
if lower_bound_bytes := self.lower_bounds.get(field_id):
1381-
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
1367+
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
13821368
if self._is_nan(lower_bound):
13831369
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13841370
return ROWS_MIGHT_MATCH
13851371

1386-
literals = {lit for lit in literals if lower_bound <= lit}
1372+
literals = {lit for lit in literals if lower_bound <= lit} # type: ignore[operator]
13871373
if len(literals) == 0:
13881374
return ROWS_CANNOT_MATCH
13891375

13901376
if upper_bound_bytes := self.upper_bounds.get(field_id):
1391-
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
1377+
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
13921378
# this is different from Java, here NaN is always larger
13931379
if self._is_nan(upper_bound):
13941380
return ROWS_MIGHT_MATCH
13951381

1396-
literals = {lit for lit in literals if upper_bound >= lit}
1382+
literals = {lit for lit in literals if upper_bound >= lit} # type: ignore[operator]
13971383
if len(literals) == 0:
13981384
return ROWS_CANNOT_MATCH
13991385

@@ -1418,14 +1404,14 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14181404
len_prefix = len(prefix)
14191405

14201406
if lower_bound_bytes := self.lower_bounds.get(field_id):
1421-
lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
1407+
lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
14221408

14231409
# truncate lower bound so that its length is not greater than the length of prefix
14241410
if lower_bound and lower_bound[:len_prefix] > prefix:
14251411
return ROWS_CANNOT_MATCH
14261412

14271413
if upper_bound_bytes := self.upper_bounds.get(field_id):
1428-
upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
1414+
upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
14291415

14301416
# truncate upper bound so that its length is not greater than the length of prefix
14311417
if upper_bound is not None and upper_bound[:len_prefix] < prefix:
@@ -1449,8 +1435,8 @@ def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14491435
# not_starts_with will match unless all values must start with the prefix. This happens when
14501436
# the lower and upper bounds both start with the prefix.
14511437
if (lower_bound_bytes := self.lower_bounds.get(field_id)) and (upper_bound_bytes := self.upper_bounds.get(field_id)):
1452-
lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
1453-
upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
1438+
lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
1439+
upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
14541440

14551441
# if lower is shorter than the prefix then lower doesn't start with the prefix
14561442
if len(lower_bound) < len_prefix:

0 commit comments

Comments
 (0)