|
30 | 30 | from pyiceberg.table import UpsertResult |
31 | 31 | from pyiceberg.table.snapshots import Operation |
32 | 32 | from pyiceberg.table.upsert_util import create_match_filter |
33 | | -from pyiceberg.types import IntegerType, NestedField, StringType |
| 33 | +from pyiceberg.types import IntegerType, NestedField, StringType, StructType |
34 | 34 | from tests.catalog.test_base import InMemoryCatalog, Table |
35 | 35 |
|
36 | 36 |
|
@@ -509,3 +509,69 @@ def test_upsert_without_identifier_fields(catalog: Catalog) -> None: |
509 | 509 | ValueError, match="Join columns could not be found, please set identifier-field-ids or pass in explicitly." |
510 | 510 | ): |
511 | 511 | tbl.upsert(df) |
| 512 | + |
| 513 | + |
| 514 | +def test_upsert_struct_field_fails_in_join(catalog: Catalog) -> None: |
| 515 | + identifier = "default.test_upsert_struct_field_fails" |
| 516 | + _drop_table(catalog, identifier) |
| 517 | + |
| 518 | + schema = Schema( |
| 519 | + NestedField(1, "id", IntegerType(), required=True), |
| 520 | + NestedField( |
| 521 | + 2, |
| 522 | + "nested_type", |
| 523 | + # Struct<type: string, coordinates: list<double>> |
| 524 | + StructType( |
| 525 | + NestedField(3, "sub1", StringType(), required=True), |
| 526 | + NestedField(4, "sub2", StringType(), required=True), |
| 527 | + ), |
| 528 | + required=False, |
| 529 | + ), |
| 530 | + identifier_field_ids=[1], |
| 531 | + ) |
| 532 | + |
| 533 | + tbl = catalog.create_table(identifier, schema=schema) |
| 534 | + |
| 535 | + arrow_schema = pa.schema( |
| 536 | + [ |
| 537 | + pa.field("id", pa.int32(), nullable=False), |
| 538 | + pa.field( |
| 539 | + "nested_type", |
| 540 | + pa.struct( |
| 541 | + [ |
| 542 | + pa.field("sub1", pa.large_string(), nullable=False), |
| 543 | + pa.field("sub2", pa.large_string(), nullable=False), |
| 544 | + ] |
| 545 | + ), |
| 546 | + nullable=True, |
| 547 | + ), |
| 548 | + ] |
| 549 | + ) |
| 550 | + |
| 551 | + initial_data = pa.Table.from_pylist( |
| 552 | + [ |
| 553 | + { |
| 554 | + "id": 1, |
| 555 | + "nested_type": {"sub1": "bla1", "sub2": "bla"}, |
| 556 | + } |
| 557 | + ], |
| 558 | + schema=arrow_schema, |
| 559 | + ) |
| 560 | + tbl.append(initial_data) |
| 561 | + |
| 562 | + update_data = pa.Table.from_pylist( |
| 563 | + [ |
| 564 | + { |
| 565 | + "id": 1, |
| 566 | + "nested_type": {"sub1": "bla1", "sub2": "bla"}, |
| 567 | + } |
| 568 | + ], |
| 569 | + schema=arrow_schema, |
| 570 | + ) |
| 571 | + |
| 572 | + upd = tbl.upsert(update_data, join_cols=["id"]) |
| 573 | + |
| 574 | + # Row needs to be updated even tho it's not changed. |
| 575 | + # When pyarrow isn't able to compare rows, just update everything |
| 576 | + assert upd.rows_updated == 1 |
| 577 | + assert upd.rows_inserted == 0 |
0 commit comments