Skip to content

Commit 095fd76

Browse files
committed
linter fix
1 parent 0bac388 commit 095fd76

2 files changed

Lines changed: 29 additions & 18 deletions

File tree

pyiceberg/conversions.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,6 @@ def _(_: PrimitiveType, b: bytes) -> int:
350350
@from_bytes.register(TimestampNanoType)
351351
@from_bytes.register(TimestamptzNanoType)
352352
def _(_: PrimitiveType, b: bytes) -> int:
353-
if len(b) == 4:
354-
return _INT_STRUCT.unpack(b)[0]
355353
return _LONG_STRUCT.unpack(b)[0]
356354

357355

@@ -362,8 +360,6 @@ def _(_: FloatType, b: bytes) -> float:
362360

363361
@from_bytes.register(DoubleType)
364362
def _(_: DoubleType, b: bytes) -> float:
365-
if len(b) == 4:
366-
return _FLOAT_STRUCT.unpack(b)[0]
367363
return _DOUBLE_STRUCT.unpack(b)[0]
368364

369365

pyiceberg/expressions/visitors.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,16 @@
5959
from pyiceberg.schema import Schema
6060
from pyiceberg.typedef import EMPTY_DICT, L, LiteralValue, Record, StructProtocol
6161
from pyiceberg.types import (
62+
DateType,
6263
DoubleType,
6364
FloatType,
6465
IcebergType,
66+
IntegerType,
67+
LongType,
6568
NestedField,
6669
PrimitiveType,
6770
StructType,
71+
TimestampNanoType,
6872
TimestampType,
6973
TimestamptzType,
7074
)
@@ -73,6 +77,17 @@
7377
T = TypeVar("T")
7478

7579

80+
def _from_bytes_with_promotion(field_type: PrimitiveType, b: bytes) -> Any:
81+
if len(b) == 4:
82+
if isinstance(field_type, LongType):
83+
return from_bytes(IntegerType(), b)
84+
elif isinstance(field_type, DoubleType):
85+
return from_bytes(FloatType(), b)
86+
elif isinstance(field_type, (TimestampType, TimestampNanoType)):
87+
return from_bytes(DateType(), b)
88+
return from_bytes(field_type, b)
89+
90+
7691
class BooleanExpressionVisitor(Generic[T], ABC):
7792
@abstractmethod
7893
def visit_true(self) -> T:
@@ -1242,7 +1257,7 @@ def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12421257
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
12431258

12441259
if lower_bound_bytes := self.lower_bounds.get(field_id):
1245-
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
1260+
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
12461261

12471262
if self._is_nan(lower_bound):
12481263
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1264,7 +1279,7 @@ def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> bo
12641279
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
12651280

12661281
if lower_bound_bytes := self.lower_bounds.get(field_id):
1267-
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
1282+
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
12681283
if self._is_nan(lower_bound):
12691284
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
12701285
return ROWS_MIGHT_MATCH
@@ -1285,7 +1300,7 @@ def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
12851300
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
12861301

12871302
if upper_bound_bytes := self.upper_bounds.get(field_id):
1288-
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
1303+
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
12891304
if upper_bound <= literal.value:
12901305
if self._is_nan(upper_bound):
12911306
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1306,7 +1321,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) ->
13061321
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13071322

13081323
if upper_bound_bytes := self.upper_bounds.get(field_id):
1309-
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
1324+
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
13101325
if upper_bound < literal.value:
13111326
if self._is_nan(upper_bound):
13121327
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1327,7 +1342,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13271342
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13281343

13291344
if lower_bound_bytes := self.lower_bounds.get(field_id):
1330-
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
1345+
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
13311346
if self._is_nan(lower_bound):
13321347
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13331348
return ROWS_MIGHT_MATCH
@@ -1336,7 +1351,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
13361351
return ROWS_CANNOT_MATCH
13371352

13381353
if upper_bound_bytes := self.upper_bounds.get(field_id):
1339-
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
1354+
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
13401355
if self._is_nan(upper_bound):
13411356
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13421357
return ROWS_MIGHT_MATCH
@@ -1364,22 +1379,22 @@ def visit_in(self, term: BoundTerm, literals: set[L]) -> bool:
13641379
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
13651380

13661381
if lower_bound_bytes := self.lower_bounds.get(field_id):
1367-
lower_bound = from_bytes(field.field_type, lower_bound_bytes)
1382+
lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
13681383
if self._is_nan(lower_bound):
13691384
# NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
13701385
return ROWS_MIGHT_MATCH
13711386

1372-
literals = {lit for lit in literals if lower_bound <= lit} # type: ignore[operator]
1387+
literals = {lit for lit in literals if lower_bound <= lit}
13731388
if len(literals) == 0:
13741389
return ROWS_CANNOT_MATCH
13751390

13761391
if upper_bound_bytes := self.upper_bounds.get(field_id):
1377-
upper_bound = from_bytes(field.field_type, upper_bound_bytes)
1392+
upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
13781393
# this is different from Java, here NaN is always larger
13791394
if self._is_nan(upper_bound):
13801395
return ROWS_MIGHT_MATCH
13811396

1382-
literals = {lit for lit in literals if upper_bound >= lit} # type: ignore[operator]
1397+
literals = {lit for lit in literals if upper_bound >= lit}
13831398
if len(literals) == 0:
13841399
return ROWS_CANNOT_MATCH
13851400

@@ -1404,14 +1419,14 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14041419
len_prefix = len(prefix)
14051420

14061421
if lower_bound_bytes := self.lower_bounds.get(field_id):
1407-
lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
1422+
lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
14081423

14091424
# truncate lower bound so that its length is not greater than the length of prefix
14101425
if lower_bound and lower_bound[:len_prefix] > prefix:
14111426
return ROWS_CANNOT_MATCH
14121427

14131428
if upper_bound_bytes := self.upper_bounds.get(field_id):
1414-
upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
1429+
upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
14151430

14161431
# truncate upper bound so that its length is not greater than the length of prefix
14171432
if upper_bound is not None and upper_bound[:len_prefix] < prefix:
@@ -1435,8 +1450,8 @@ def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
14351450
# not_starts_with will match unless all values must start with the prefix. This happens when
14361451
# the lower and upper bounds both start with the prefix.
14371452
if (lower_bound_bytes := self.lower_bounds.get(field_id)) and (upper_bound_bytes := self.upper_bounds.get(field_id)):
1438-
lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
1439-
upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
1453+
lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
1454+
upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
14401455

14411456
# if lower is shorter than the prefix then lower doesn't start with the prefix
14421457
if len(lower_bound) < len_prefix:

0 commit comments

Comments
 (0)