Skip to content

Commit 19841dc

Browse files
sumedhsakdeoclaude
andcommitted
test: Update tests for new ScanOrder class hierarchy
Replace ScanOrder.TASK/ARRIVAL with TaskOrder()/ArrivalOrder() instances. Update concurrent_files → concurrent_streams parameter usage. All existing test scenarios preserved with new type-safe API. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 03bda3d commit 19841dc

2 files changed

Lines changed: 20 additions & 20 deletions

File tree

tests/io/test_bounded_concurrent_batches.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import pytest
2626

2727
from pyiceberg.io.pyarrow import _bounded_concurrent_batches
28-
from pyiceberg.table import FileScanTask, ScanOrder
28+
from pyiceberg.table import FileScanTask, ScanOrder, TaskOrder, ArrivalOrder
2929

3030

3131
def _make_task() -> FileScanTask:
@@ -253,6 +253,6 @@ def test_concurrent_with_limit_via_arrowscan(tmpdir: str) -> None:
253253
limit=150,
254254
)
255255

256-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL, concurrent_files=2))
256+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder(concurrent_streams=2)))
257257
total_rows = sum(len(b) for b in batches)
258258
assert total_rows == 150

tests/io/test_pyarrow.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
from pyiceberg.manifest import DataFile, DataFileContent, FileFormat
8787
from pyiceberg.partitioning import PartitionField, PartitionSpec
8888
from pyiceberg.schema import Schema, make_compatible_name, visit
89-
from pyiceberg.table import FileScanTask, ScanOrder, TableProperties
89+
from pyiceberg.table import FileScanTask, ScanOrder, TaskOrder, ArrivalOrder, TableProperties
9090
from pyiceberg.table.metadata import TableMetadataV2
9191
from pyiceberg.table.name_mapping import create_mapping_from_schema
9292
from pyiceberg.transforms import HourTransform, IdentityTransform
@@ -3170,13 +3170,13 @@ def _create_scan_and_tasks(
31703170

31713171

31723172
def test_task_order_produces_same_results(tmpdir: str) -> None:
3173-
"""Test that order=ScanOrder.TASK produces the same results as the default behavior."""
3173+
"""Test that order=TaskOrder() produces the same results as the default behavior."""
31743174
scan, tasks = _create_scan_and_tasks(tmpdir, num_files=3, rows_per_file=100)
31753175

3176-
batches_default = list(scan.to_record_batches(tasks, order=ScanOrder.TASK))
3176+
batches_default = list(scan.to_record_batches(tasks, order=TaskOrder()))
31773177
# Re-create tasks since iterators are consumed
31783178
_, tasks2 = _create_scan_and_tasks(tmpdir, num_files=3, rows_per_file=100)
3179-
batches_task_order = list(scan.to_record_batches(tasks2, order=ScanOrder.TASK))
3179+
batches_task_order = list(scan.to_record_batches(tasks2, order=TaskOrder()))
31803180

31813181
total_default = sum(len(b) for b in batches_default)
31823182
total_task_order = sum(len(b) for b in batches_task_order)
@@ -3185,10 +3185,10 @@ def test_task_order_produces_same_results(tmpdir: str) -> None:
31853185

31863186

31873187
def test_arrival_order_yields_all_batches(tmpdir: str) -> None:
3188-
"""Test that order=ScanOrder.ARRIVAL yields all batches correctly."""
3188+
"""Test that order=ArrivalOrder() yields all batches correctly."""
31893189
scan, tasks = _create_scan_and_tasks(tmpdir, num_files=3, rows_per_file=100)
31903190

3191-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL))
3191+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder()))
31923192

31933193
total_rows = sum(len(b) for b in batches)
31943194
assert total_rows == 300
@@ -3198,10 +3198,10 @@ def test_arrival_order_yields_all_batches(tmpdir: str) -> None:
31983198

31993199

32003200
def test_arrival_order_with_limit(tmpdir: str) -> None:
3201-
"""Test that order=ScanOrder.ARRIVAL respects the row limit."""
3201+
"""Test that order=ArrivalOrder() respects the row limit."""
32023202
scan, tasks = _create_scan_and_tasks(tmpdir, num_files=3, rows_per_file=100, limit=150)
32033203

3204-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL))
3204+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder()))
32053205

32063206
total_rows = sum(len(b) for b in batches)
32073207
assert total_rows == 150
@@ -3211,15 +3211,15 @@ def test_arrival_order_within_file_ordering_preserved(tmpdir: str) -> None:
32113211
"""Test that within-file row ordering is preserved in arrival order mode."""
32123212
scan, tasks = _create_scan_and_tasks(tmpdir, num_files=3, rows_per_file=100)
32133213

3214-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL))
3214+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder()))
32153215
all_values = sorted([v for b in batches for v in b.column("col").to_pylist()])
32163216

32173217
# All values should be present, within-file ordering is preserved
32183218
assert all_values == list(range(300))
32193219

32203220

32213221
def test_arrival_order_with_positional_deletes(tmpdir: str) -> None:
3222-
"""Test that order=ScanOrder.ARRIVAL correctly applies positional deletes."""
3222+
"""Test that order=ArrivalOrder() correctly applies positional deletes."""
32233223
# 3 files, 10 rows each; delete rows 0,5 from file 0, row 3 from file 1, nothing from file 2
32243224
scan, tasks = _create_scan_and_tasks(
32253225
tmpdir,
@@ -3228,7 +3228,7 @@ def test_arrival_order_with_positional_deletes(tmpdir: str) -> None:
32283228
delete_rows_per_file=[[0, 5], [3], []],
32293229
)
32303230

3231-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL))
3231+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder()))
32323232

32333233
total_rows = sum(len(b) for b in batches)
32343234
assert total_rows == 27 # 30 - 3 deletes
@@ -3241,7 +3241,7 @@ def test_arrival_order_with_positional_deletes(tmpdir: str) -> None:
32413241

32423242

32433243
def test_arrival_order_with_positional_deletes_and_limit(tmpdir: str) -> None:
3244-
"""Test that order=ScanOrder.ARRIVAL with positional deletes respects the row limit."""
3244+
"""Test that order=ArrivalOrder() with positional deletes respects the row limit."""
32453245
# 3 files, 10 rows each; delete row 0 from each file
32463246
scan, tasks = _create_scan_and_tasks(
32473247
tmpdir,
@@ -3251,7 +3251,7 @@ def test_arrival_order_with_positional_deletes_and_limit(tmpdir: str) -> None:
32513251
delete_rows_per_file=[[0], [0], [0]],
32523252
)
32533253

3254-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL))
3254+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder()))
32553255

32563256
total_rows = sum(len(b) for b in batches)
32573257
assert total_rows == 15
@@ -3267,7 +3267,7 @@ def test_task_order_with_positional_deletes(tmpdir: str) -> None:
32673267
delete_rows_per_file=[[0, 5], [3], []],
32683268
)
32693269

3270-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.TASK))
3270+
batches = list(scan.to_record_batches(tasks, order=TaskOrder()))
32713271

32723272
total_rows = sum(len(b) for b in batches)
32733273
assert total_rows == 27 # 30 - 3 deletes
@@ -3277,7 +3277,7 @@ def test_task_order_with_positional_deletes(tmpdir: str) -> None:
32773277

32783278

32793279
def test_concurrent_files_with_positional_deletes(tmpdir: str) -> None:
3280-
"""Test that order=ScanOrder.ARRIVAL with concurrent_files correctly applies positional deletes."""
3280+
"""Test that order=ArrivalOrder() with concurrent_files correctly applies positional deletes."""
32813281
# 4 files, 10 rows each; delete different rows per file
32823282
scan, tasks = _create_scan_and_tasks(
32833283
tmpdir,
@@ -3286,7 +3286,7 @@ def test_concurrent_files_with_positional_deletes(tmpdir: str) -> None:
32863286
delete_rows_per_file=[[0, 9], [4, 5], [0, 1, 2], []],
32873287
)
32883288

3289-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL, concurrent_files=2))
3289+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder(concurrent_streams=2)))
32903290

32913291
total_rows = sum(len(b) for b in batches)
32923292
assert total_rows == 33 # 40 - 7 deletes
@@ -3310,7 +3310,7 @@ def test_concurrent_files_with_positional_deletes_and_limit(tmpdir: str) -> None
33103310
delete_rows_per_file=[[0], [0], [0], [0]],
33113311
)
33123312

3313-
batches = list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL, concurrent_files=2))
3313+
batches = list(scan.to_record_batches(tasks, order=ArrivalOrder(concurrent_streams=2)))
33143314

33153315
total_rows = sum(len(b) for b in batches)
33163316
assert total_rows == 20
@@ -3321,7 +3321,7 @@ def test_concurrent_files_invalid_value(tmpdir: str) -> None:
33213321
scan, tasks = _create_scan_and_tasks(tmpdir, num_files=1, rows_per_file=10)
33223322

33233323
with pytest.raises(ValueError, match="concurrent_files must be >= 1"):
3324-
list(scan.to_record_batches(tasks, order=ScanOrder.ARRIVAL, concurrent_files=0))
3324+
list(scan.to_record_batches(tasks, order=ArrivalOrder(concurrent_streams=0)))
33253325

33263326

33273327
def test_parse_location_defaults() -> None:

0 commit comments

Comments
 (0)