Skip to content

Commit ad53ec2

Browse files
committed
make PYARROW_USE_LARGE_TYPES_ON_READ work
1 parent 6a01ce6 commit ad53ec2

2 files changed

Lines changed: 10 additions & 7 deletions

File tree

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1609,7 +1609,7 @@ def _table_from_scan_task(task: FileScanTask) -> pa.Table:
16091609
removed_in="0.11.0",
16101610
help_message=f"Property `{PYARROW_USE_LARGE_TYPES_ON_READ}` will be removed.",
16111611
)
1612-
result = result.cast(arrow_schema)
1612+
result = result.cast(_pyarrow_schema_ensure_large_types(arrow_schema))
16131613

16141614
if self._limit is not None:
16151615
return result.slice(0, self._limit)

tests/integration/test_reads.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -872,9 +872,12 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
872872

873873

874874
@pytest.mark.integration
875+
@pytest.mark.filterwarnings(
876+
"ignore:Deprecated in 0.10.0, will be removed in 0.11.0. Property `pyarrow.use-large-types-on-read` will be removed.:DeprecationWarning"
877+
)
875878
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
876-
def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
877-
identifier = "default.test_table_scan_override_with_small_types"
879+
def test_table_scan_override_with_large_types(catalog: Catalog) -> None:
880+
identifier = "default.test_table_scan_override_with_large_types"
878881
arrow_table = pa.Table.from_arrays(
879882
[
880883
pa.array(["a", "b", "c"]),
@@ -900,15 +903,15 @@ def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
900903
with tbl.update_schema() as update_schema:
901904
update_schema.update_column("string-to-binary", BinaryType())
902905

903-
tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
906+
tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "True"
904907
result_table = tbl.scan().to_arrow()
905908

906909
expected_schema = pa.schema(
907910
[
908-
pa.field("string", pa.string()),
911+
pa.field("string", pa.large_string()),
909912
pa.field("string-to-binary", pa.large_binary()),
910-
pa.field("binary", pa.binary()),
911-
pa.field("list", pa.list_(pa.string())),
913+
pa.field("binary", pa.large_binary()),
914+
pa.field("list", pa.large_list(pa.large_string())),
912915
]
913916
)
914917
assert result_table.schema.equals(expected_schema)

0 commit comments

Comments
 (0)