Skip to content

Commit c63b4d2

Browse files
committed
fix small type
1 parent ff76144 commit c63b4d2

4 files changed

Lines changed: 51 additions & 51 deletions

File tree

pyiceberg/io/pyarrow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ def field(self, field: NestedField, field_result: pa.DataType) -> pa.Field:
626626

627627
def list(self, list_type: ListType, element_result: pa.DataType) -> pa.DataType:
628628
element_field = self.field(list_type.element_field, element_result)
629-
return pa.large_list(value_type=element_field)
629+
return pa.list_(value_type=element_field)
630630

631631
def map(self, map_type: MapType, key_result: pa.DataType, value_result: pa.DataType) -> pa.DataType:
632632
key_field = self.field(map_type.key_field, key_result)
@@ -676,7 +676,7 @@ def visit_timestamptz_ns(self, _: TimestamptzNanoType) -> pa.DataType:
676676
return pa.timestamp(unit="ns", tz="UTC")
677677

678678
def visit_string(self, _: StringType) -> pa.DataType:
679-
return pa.large_string()
679+
return pa.string()
680680

681681
def visit_uuid(self, _: UUIDType) -> pa.DataType:
682682
return pa.binary(16)
@@ -685,7 +685,7 @@ def visit_unknown(self, _: UnknownType) -> pa.DataType:
685685
return pa.null()
686686

687687
def visit_binary(self, _: BinaryType) -> pa.DataType:
688-
return pa.large_binary()
688+
return pa.binary()
689689

690690

691691
def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:

tests/io/test_pyarrow.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ def test_pyarrow_unified_session_properties() -> None:
406406

407407
def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema) -> None:
408408
actual = schema_to_pyarrow(table_schema_nested)
409-
expected = """foo: large_string
409+
expected = """foo: string
410410
-- field metadata --
411411
PARQUET:field_id: '1'
412412
bar: int32 not null
@@ -415,20 +415,20 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
415415
baz: bool
416416
-- field metadata --
417417
PARQUET:field_id: '3'
418-
qux: large_list<element: large_string not null> not null
419-
child 0, element: large_string not null
418+
qux: list<element: string not null> not null
419+
child 0, element: string not null
420420
-- field metadata --
421421
PARQUET:field_id: '5'
422422
-- field metadata --
423423
PARQUET:field_id: '4'
424-
quux: map<large_string, map<large_string, int32>> not null
425-
child 0, entries: struct<key: large_string not null, value: map<large_string, int32> not null> not null
426-
child 0, key: large_string not null
424+
quux: map<string, map<string, int32>> not null
425+
child 0, entries: struct<key: string not null, value: map<string, int32> not null> not null
426+
child 0, key: string not null
427427
-- field metadata --
428428
PARQUET:field_id: '7'
429-
child 1, value: map<large_string, int32> not null
430-
child 0, entries: struct<key: large_string not null, value: int32 not null> not null
431-
child 0, key: large_string not null
429+
child 1, value: map<string, int32> not null
430+
child 0, entries: struct<key: string not null, value: int32 not null> not null
431+
child 0, key: string not null
432432
-- field metadata --
433433
PARQUET:field_id: '9'
434434
child 1, value: int32 not null
@@ -438,7 +438,7 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
438438
PARQUET:field_id: '8'
439439
-- field metadata --
440440
PARQUET:field_id: '6'
441-
location: large_list<element: struct<latitude: float, longitude: float> not null> not null
441+
location: list<element: struct<latitude: float, longitude: float> not null> not null
442442
child 0, element: struct<latitude: float, longitude: float> not null
443443
child 0, latitude: float
444444
-- field metadata --
@@ -450,8 +450,8 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
450450
PARQUET:field_id: '12'
451451
-- field metadata --
452452
PARQUET:field_id: '11'
453-
person: struct<name: large_string, age: int32 not null>
454-
child 0, name: large_string
453+
person: struct<name: string, age: int32 not null>
454+
child 0, name: string
455455
-- field metadata --
456456
PARQUET:field_id: '16'
457457
child 1, age: int32 not null
@@ -464,24 +464,24 @@ def test_schema_to_pyarrow_schema_include_field_ids(table_schema_nested: Schema)
464464

465465
def test_schema_to_pyarrow_schema_exclude_field_ids(table_schema_nested: Schema) -> None:
466466
actual = schema_to_pyarrow(table_schema_nested, include_field_ids=False)
467-
expected = """foo: large_string
467+
expected = """foo: string
468468
bar: int32 not null
469469
baz: bool
470-
qux: large_list<element: large_string not null> not null
471-
child 0, element: large_string not null
472-
quux: map<large_string, map<large_string, int32>> not null
473-
child 0, entries: struct<key: large_string not null, value: map<large_string, int32> not null> not null
474-
child 0, key: large_string not null
475-
child 1, value: map<large_string, int32> not null
476-
child 0, entries: struct<key: large_string not null, value: int32 not null> not null
477-
child 0, key: large_string not null
470+
qux: list<element: string not null> not null
471+
child 0, element: string not null
472+
quux: map<string, map<string, int32>> not null
473+
child 0, entries: struct<key: string not null, value: map<string, int32> not null> not null
474+
child 0, key: string not null
475+
child 1, value: map<string, int32> not null
476+
child 0, entries: struct<key: string not null, value: int32 not null> not null
477+
child 0, key: string not null
478478
child 1, value: int32 not null
479-
location: large_list<element: struct<latitude: float, longitude: float> not null> not null
479+
location: list<element: struct<latitude: float, longitude: float> not null> not null
480480
child 0, element: struct<latitude: float, longitude: float> not null
481481
child 0, latitude: float
482482
child 1, longitude: float
483-
person: struct<name: large_string, age: int32 not null>
484-
child 0, name: large_string
483+
person: struct<name: string, age: int32 not null>
484+
child 0, name: string
485485
child 1, age: int32 not null"""
486486
assert repr(actual) == expected
487487

@@ -546,18 +546,18 @@ def test_timestamptz_type_to_pyarrow() -> None:
546546

547547
def test_string_type_to_pyarrow() -> None:
548548
iceberg_type = StringType()
549-
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_string()
549+
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.string()
550550

551551

552552
def test_binary_type_to_pyarrow() -> None:
553553
iceberg_type = BinaryType()
554-
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()
554+
assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.binary()
555555

556556

557557
def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:
558558
expected = pa.struct(
559559
[
560-
pa.field("foo", pa.large_string(), nullable=True, metadata={"field_id": "1"}),
560+
pa.field("foo", pa.string(), nullable=True, metadata={"field_id": "1"}),
561561
pa.field("bar", pa.int32(), nullable=False, metadata={"field_id": "2"}),
562562
pa.field("baz", pa.bool_(), nullable=True, metadata={"field_id": "3"}),
563563
]
@@ -575,7 +575,7 @@ def test_map_type_to_pyarrow() -> None:
575575
)
576576
assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.map_(
577577
pa.field("key", pa.int32(), nullable=False, metadata={"field_id": "1"}),
578-
pa.field("value", pa.large_string(), nullable=False, metadata={"field_id": "2"}),
578+
pa.field("value", pa.string(), nullable=False, metadata={"field_id": "2"}),
579579
)
580580

581581

@@ -585,7 +585,7 @@ def test_list_type_to_pyarrow() -> None:
585585
element_type=IntegerType(),
586586
element_required=True,
587587
)
588-
assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.large_list(
588+
assert visit(iceberg_map, _ConvertToArrowSchema()) == pa.list_(
589589
pa.field("element", pa.int32(), nullable=False, metadata={"field_id": "1"})
590590
)
591591

@@ -668,11 +668,11 @@ def test_expr_less_than_or_equal_to_pyarrow(bound_reference: BoundReference[str]
668668

669669
def test_expr_in_to_pyarrow(bound_reference: BoundReference[str]) -> None:
670670
assert repr(expression_to_pyarrow(BoundIn(bound_reference, {literal("hello"), literal("world")}))) in (
671-
"""<pyarrow.compute.Expression is_in(foo, {value_set=large_string:[
671+
"""<pyarrow.compute.Expression is_in(foo, {value_set=string:[
672672
"hello",
673673
"world"
674674
], null_matching_behavior=MATCH})>""",
675-
"""<pyarrow.compute.Expression is_in(foo, {value_set=large_string:[
675+
"""<pyarrow.compute.Expression is_in(foo, {value_set=string:[
676676
"world",
677677
"hello"
678678
], null_matching_behavior=MATCH})>""",
@@ -681,11 +681,11 @@ def test_expr_in_to_pyarrow(bound_reference: BoundReference[str]) -> None:
681681

682682
def test_expr_not_in_to_pyarrow(bound_reference: BoundReference[str]) -> None:
683683
assert repr(expression_to_pyarrow(BoundNotIn(bound_reference, {literal("hello"), literal("world")}))) in (
684-
"""<pyarrow.compute.Expression invert(is_in(foo, {value_set=large_string:[
684+
"""<pyarrow.compute.Expression invert(is_in(foo, {value_set=string:[
685685
"hello",
686686
"world"
687687
], null_matching_behavior=MATCH}))>""",
688-
"""<pyarrow.compute.Expression invert(is_in(foo, {value_set=large_string:[
688+
"""<pyarrow.compute.Expression invert(is_in(foo, {value_set=string:[
689689
"world",
690690
"hello"
691691
], null_matching_behavior=MATCH}))>""",
@@ -1030,12 +1030,12 @@ def test_projection_add_column(file_int: str) -> None:
10301030
assert (
10311031
repr(result_table.schema)
10321032
== """id: int32
1033-
list: large_list<element: int32>
1033+
list: list<element: int32>
10341034
child 0, element: int32
1035-
map: map<int32, large_string>
1036-
child 0, entries: struct<key: int32 not null, value: large_string> not null
1035+
map: map<int32, string>
1036+
child 0, entries: struct<key: int32 not null, value: string> not null
10371037
child 0, key: int32 not null
1038-
child 1, value: large_string
1038+
child 1, value: string
10391039
location: struct<lat: double, lon: double>
10401040
child 0, lat: double
10411041
child 1, lon: double"""
@@ -1051,7 +1051,7 @@ def test_read_list(schema_list: Schema, file_list: str) -> None:
10511051

10521052
assert (
10531053
repr(result_table.schema)
1054-
== """ids: large_list<element: int32>
1054+
== """ids: list<element: int32>
10551055
child 0, element: int32"""
10561056
)
10571057

@@ -1088,10 +1088,10 @@ def test_projection_add_column_struct(schema_int: Schema, file_int: str) -> None
10881088
assert r.as_py() is None
10891089
assert (
10901090
repr(result_table.schema)
1091-
== """id: map<int32, large_string>
1092-
child 0, entries: struct<key: int32 not null, value: large_string> not null
1091+
== """id: map<int32, string>
1092+
child 0, entries: struct<key: int32 not null, value: string> not null
10931093
child 0, key: int32 not null
1094-
child 1, value: large_string"""
1094+
child 1, value: string"""
10951095
)
10961096

10971097

@@ -1422,7 +1422,7 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of
14221422
]
14231423
assert (
14241424
repr(result_table.schema)
1425-
== """locations: large_list<element: struct<latitude: double not null, longitude: double not null, altitude: double>>
1425+
== """locations: list<element: struct<latitude: double not null, longitude: double not null, altitude: double>>
14261426
child 0, element: struct<latitude: double not null, longitude: double not null, altitude: double>
14271427
child 0, latitude: double not null
14281428
child 1, longitude: double not null
@@ -1567,7 +1567,7 @@ def test_delete(deletes_file: str, example_task: FileScanTask, table_schema_simp
15671567
assert (
15681568
str(with_deletes)
15691569
== """pyarrow.Table
1570-
foo: large_string
1570+
foo: string
15711571
bar: int32 not null
15721572
baz: bool
15731573
----
@@ -1604,7 +1604,7 @@ def test_delete_duplicates(deletes_file: str, example_task: FileScanTask, table_
16041604
assert (
16051605
str(with_deletes)
16061606
== """pyarrow.Table
1607-
foo: large_string
1607+
foo: string
16081608
bar: int32 not null
16091609
baz: bool
16101610
----
@@ -1635,7 +1635,7 @@ def test_pyarrow_wrap_fsspec(example_task: FileScanTask, table_schema_simple: Sc
16351635
assert (
16361636
str(projection)
16371637
== """pyarrow.Table
1638-
foo: large_string
1638+
foo: string
16391639
bar: int32 not null
16401640
baz: bool
16411641
----

tests/io/test_pyarrow_visitor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,14 @@ def test_pyarrow_timestamp_tz_invalid_tz() -> None:
229229
def test_pyarrow_string_to_iceberg(pyarrow_type: pa.DataType) -> None:
230230
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
231231
assert converted_iceberg_type == StringType()
232-
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.large_string()
232+
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.string()
233233

234234

235235
@pytest.mark.parametrize("pyarrow_type", [pa.binary(), pa.large_binary(), pa.binary_view()])
236236
def test_pyarrow_variable_binary_to_iceberg(pyarrow_type: pa.DataType) -> None:
237237
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
238238
assert converted_iceberg_type == BinaryType()
239-
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()
239+
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.binary()
240240

241241

242242
def test_pyarrow_struct_to_iceberg() -> None:

tests/test_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1648,7 +1648,7 @@ def test_arrow_schema() -> None:
16481648

16491649
expected_schema = pa.schema(
16501650
[
1651-
pa.field("foo", pa.large_string(), nullable=False),
1651+
pa.field("foo", pa.string(), nullable=False),
16521652
pa.field("bar", pa.int32(), nullable=True),
16531653
pa.field("baz", pa.bool_(), nullable=True),
16541654
]

0 commit comments

Comments
 (0)