Skip to content

Commit d7b8779

Browse files
goutamvenkat-anyscalepeterxcli
authored andcommitted
[Data] - Upgrade to pyiceberg 0.11.0 (ray-project#61062)
## Description As title states. One more note: apache/iceberg-python#2777 removed `FileScanTask`'s length param ## Related issues > Link related issues: "Fixes ray-project#1234", "Closes ray-project#1234", or "Related to ray-project#1234". ## Additional information > Optional: Add implementation details, API changes, usage examples, screenshots, etc. --------- Signed-off-by: Goutam <goutam@anyscale.com> Signed-off-by: peterxcli <peterxcli@gmail.com>
1 parent 9198a0c commit d7b8779

5 files changed

Lines changed: 16 additions & 17 deletions

File tree

python/ray/data/_internal/datasource/iceberg_datasource.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484

8585

8686
class _IcebergExpressionVisitor(
87-
_ExprVisitor["BooleanExpression | UnboundTerm[Any] | Literal[Any]"]
87+
_ExprVisitor["BooleanExpression | UnboundTerm | Literal"]
8888
):
8989
"""
9090
Visitor that converts Ray Data expressions to PyIceberg expressions.
@@ -99,11 +99,11 @@ class _IcebergExpressionVisitor(
9999
>>> # iceberg_expr can now be used with PyIceberg's filter APIs
100100
"""
101101

102-
def visit_column(self, expr: "ColumnExpr") -> "UnboundTerm[Any]":
102+
def visit_column(self, expr: "ColumnExpr") -> "UnboundTerm":
103103
"""Convert a column reference to an Iceberg reference."""
104104
return Reference(expr.name)
105105

106-
def visit_literal(self, expr: "LiteralExpr") -> "Literal[Any]":
106+
def visit_literal(self, expr: "LiteralExpr") -> "Literal":
107107
"""Convert a literal value to an Iceberg literal."""
108108
return literal(expr.value)
109109

@@ -148,13 +148,11 @@ def visit_unary(self, expr: "UnaryExpr") -> "BooleanExpression":
148148

149149
def visit_alias(
150150
self, expr: "AliasExpr"
151-
) -> "BooleanExpression | UnboundTerm[Any] | Literal[Any]":
151+
) -> "BooleanExpression | UnboundTerm | Literal":
152152
"""Convert an aliased expression (just unwrap the alias)."""
153153
return self.visit(expr.expr)
154154

155-
def visit_udf(
156-
self, expr: "UDFExpr"
157-
) -> "BooleanExpression | UnboundTerm[Any] | Literal[Any]":
155+
def visit_udf(self, expr: "UDFExpr") -> "BooleanExpression | UnboundTerm | Literal":
158156
"""UDF expressions cannot be converted to Iceberg expressions."""
159157
raise TypeError(
160158
"UDF expressions cannot be converted to Iceberg expressions. "
@@ -163,23 +161,23 @@ def visit_udf(
163161

164162
def visit_download(
165163
self, expr: "DownloadExpr"
166-
) -> "BooleanExpression | UnboundTerm[Any] | Literal[Any]":
164+
) -> "BooleanExpression | UnboundTerm | Literal":
167165
"""Download expressions cannot be converted to Iceberg expressions."""
168166
raise TypeError(
169167
"Download expressions cannot be converted to Iceberg expressions."
170168
)
171169

172170
def visit_star(
173171
self, expr: "StarExpr"
174-
) -> "BooleanExpression | UnboundTerm[Any] | Literal[Any]":
172+
) -> "BooleanExpression | UnboundTerm | Literal":
175173
"""Star expressions cannot be converted to Iceberg expressions."""
176174
raise TypeError(
177175
"Star expressions cannot be converted to Iceberg filter expressions."
178176
)
179177

180178
def visit_monotonically_increasing_id(
181179
self, expr: "MonotonicallyIncreasingIdExpr"
182-
) -> "BooleanExpression | UnboundTerm[Any] | Literal[Any]":
180+
) -> "BooleanExpression | UnboundTerm | Literal":
183181
"""Monotonically increasing ID expressions cannot be converted to Iceberg expressions."""
184182
raise TypeError(
185183
"monotonically_increasing_id expressions cannot be converted to Iceberg filter expressions."
@@ -490,7 +488,7 @@ def get_read_tasks(
490488
metadata = BlockMetadata(
491489
num_rows=sum(task.file.record_count for task in chunk_tasks)
492490
- position_delete_count,
493-
size_bytes=sum(task.length for task in chunk_tasks),
491+
size_bytes=sum(task.file.file_size_in_bytes for task in chunk_tasks),
494492
input_files=[task.file.file_path for task in chunk_tasks],
495493
exec_stats=None,
496494
)

python/ray/data/tests/datasource/test_iceberg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_write_basic():
287287
table_p = (
288288
ds.to_pandas().sort_values(["col_a", "col_b", "col_c"]).reset_index(drop=True)
289289
)
290-
assert orig_table_p.equals(table_p)
290+
assert rows_same(table_p, orig_table_p)
291291

292292

293293
@pytest.mark.skipif(

python/requirements/ml/data-test-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ deltalake==0.9.0
1919
pytest-mock
2020
decord
2121
snowflake-connector-python>=3.15.0
22-
pyiceberg[sql-sqlite]==0.10.0
22+
pyiceberg[sql-sqlite]==0.11.0
2323
clickhouse-connect
2424
kafka-python
2525
pybase64

python/requirements_compiled.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,7 +1656,7 @@ pygments==2.18.0
16561656
# nbconvert
16571657
# rich
16581658
# sphinx
1659-
pyiceberg==0.10.0
1659+
pyiceberg==0.11.0
16601660
# via -r python/requirements/ml/data-test-requirements.txt
16611661
pyjwt==2.8.0
16621662
# via
@@ -2107,7 +2107,6 @@ snowflake-connector-python==3.15.0
21072107
sortedcontainers==2.4.0
21082108
# via
21092109
# distributed
2110-
# pyiceberg
21112110
# snowflake-connector-python
21122111
soundfile==0.12.1
21132112
# via -r python/requirements/ml/data-test-requirements.txt
@@ -2589,7 +2588,9 @@ zipp==3.19.2
25892588
zoopt==0.4.1
25902589
# via -r python/requirements/ml/tune-test-requirements.txt
25912590
zstandard==0.23.0
2592-
# via clickhouse-connect
2591+
# via
2592+
# clickhouse-connect
2593+
# pyiceberg
25932594

25942595
# The following packages are considered to be unsafe in a requirements file:
25952596
# pip

python/requirements_compiled_py3.13.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1699,7 +1699,7 @@ pygments==2.18.0
16991699
# nbconvert
17001700
# rich
17011701
# sphinx
1702-
pyiceberg==0.10.0
1702+
pyiceberg==0.11.0
17031703
# via -r python/requirements/ml/data-test-requirements.txt
17041704
pyjwt==2.8.0
17051705
# via

0 commit comments

Comments
 (0)