|
16 | 16 | # under the License. |
17 | 17 | # pylint:disable=redefined-outer-name |
18 | 18 | import json |
| 19 | +from unittest.mock import Mock |
19 | 20 | import uuid |
20 | 21 | from copy import copy |
21 | 22 | from typing import Any, Dict |
|
43 | 44 | from pyiceberg.partitioning import PartitionField, PartitionSpec |
44 | 45 | from pyiceberg.schema import Schema |
45 | 46 | from pyiceberg.table import ( |
| 47 | + ALWAYS_TRUE, |
46 | 48 | CommitTableRequest, |
47 | 49 | StaticTable, |
48 | 50 | Table, |
|
94 | 96 | BucketTransform, |
95 | 97 | IdentityTransform, |
96 | 98 | ) |
| 99 | +from pyiceberg.typedef import Record |
97 | 100 | from pyiceberg.types import ( |
98 | 101 | BinaryType, |
99 | 102 | BooleanType, |
@@ -1378,3 +1381,91 @@ def test_remove_statistics_update(table_v2_with_statistics: Table) -> None: |
1378 | 1381 | table_v2_with_statistics.metadata, |
1379 | 1382 | (RemoveStatisticsUpdate(snapshot_id=123456789),), |
1380 | 1383 | ) |
| 1384 | + |
| 1385 | + |
| 1386 | +def test_transaction_commit_retry(table_v1: Table, mocker: Mock) -> None: |
| 1387 | + import pyarrow as pa |
| 1388 | + |
| 1389 | + mock_data_file = DataFile( |
| 1390 | + content=DataFileContent.DATA, |
| 1391 | + file_path="s3://some-path/some-file.parquet", |
| 1392 | + file_format=FileFormat.PARQUET, |
| 1393 | + partition=Record(), |
| 1394 | + record_count=131327, |
| 1395 | + file_size_in_bytes=220669226, |
| 1396 | + column_sizes={1: 220661854}, |
| 1397 | + value_counts={1: 131327}, |
| 1398 | + null_value_counts={1: 0}, |
| 1399 | + nan_value_counts={}, |
| 1400 | + lower_bounds={1: b"aaaaaaaaaaaaaaaa"}, |
| 1401 | + upper_bounds={1: b"zzzzzzzzzzzzzzzz"}, |
| 1402 | + key_metadata=b"\xde\xad\xbe\xef", |
| 1403 | + split_offsets=[4, 133697593], |
| 1404 | + equality_ids=[], |
| 1405 | + sort_order_id=4, |
| 1406 | + ) |
| 1407 | + |
| 1408 | + call_count = 0 |
| 1409 | + captured_args = [] |
| 1410 | + |
| 1411 | + def mock_do_commit(*args, **kwargs): |
| 1412 | + """Capture arguments to `Transaction._do_commit` and invoke an initial retry.""" |
| 1413 | + |
| 1414 | + nonlocal call_count |
| 1415 | + captured_args.append((args, kwargs)) |
| 1416 | + call_count += 1 |
| 1417 | + if call_count == 1: |
| 1418 | + raise CommitFailedException("Test") |
| 1419 | + return None |
| 1420 | + |
| 1421 | + # Patch out IO of data, manifests, and metadata |
| 1422 | + mocker.patch("pyiceberg.io.pyarrow.write_file", return_value=[mock_data_file]) |
| 1423 | + mocker.patch("pyiceberg.table.update.snapshot.write_manifest") |
| 1424 | + mocker.patch("pyiceberg.table.update.snapshot.write_manifest_list") |
| 1425 | + mocker.patch("pyiceberg.catalog.noop.NoopCatalog.load_table", return_value=table_v1) |
| 1426 | + mocker.patch("pyiceberg.table.Table._do_commit", side_effect=mock_do_commit) |
| 1427 | + |
| 1428 | + schema = pa.schema( |
| 1429 | + [ |
| 1430 | + pa.field("x", pa.int64(), nullable=False), |
| 1431 | + pa.field("y", pa.int64(), nullable=False), |
| 1432 | + pa.field("z", pa.int64(), nullable=False), |
| 1433 | + ] |
| 1434 | + ) |
| 1435 | + |
| 1436 | + trx = table_v1.transaction() |
| 1437 | + with pytest.warns(UserWarning): |
| 1438 | + trx.delete(ALWAYS_TRUE) |
| 1439 | + trx.append(pa.Table.from_pydict({"x": [1, 2, 3], "y": [4, 5, 6], "z": [7, 8, 9]}, schema=schema)) |
| 1440 | + trx.commit_transaction() |
| 1441 | + |
| 1442 | + # Verify that _do_commit was called twice (first failed, second succeeded) |
| 1443 | + assert call_count == 2, f"Expected 2 calls to _do_commit, got {call_count}" |
| 1444 | + |
| 1445 | + # Inspect the arguments passed to both commit attempts |
| 1446 | + _, first_call_kwargs = captured_args[0] |
| 1447 | + _, second_call_kwargs = captured_args[1] |
| 1448 | + |
| 1449 | + # Extract updates and requirements from both calls |
| 1450 | + first_updates = first_call_kwargs.get("updates", ()) |
| 1451 | + first_requirements = first_call_kwargs.get("requirements", ()) |
| 1452 | + second_updates = second_call_kwargs.get("updates", ()) |
| 1453 | + second_requirements = second_call_kwargs.get("requirements", ()) |
| 1454 | + |
| 1455 | + # Assert retry has same number of updates and requirements as first attempt |
| 1456 | + assert len(first_updates) == len(second_updates), f"Updates count mismatch: {len(first_updates)} vs {len(second_updates)}" |
| 1457 | + assert len(first_requirements) == len(second_requirements), ( |
| 1458 | + f"Requirements count mismatch: {len(first_requirements)} vs {len(second_requirements)}" |
| 1459 | + ) |
| 1460 | + |
| 1461 | + # Assert retry has same types of updates as first attempt |
| 1462 | + first_update_types = [type(update).__name__ for update in first_updates] |
| 1463 | + second_update_types = [type(update).__name__ for update in second_updates] |
| 1464 | + assert first_update_types == second_update_types, f"Update types mismatch: {first_update_types} vs {second_update_types}" |
| 1465 | + |
| 1466 | + # Assert retry has same types of requirements as first attempt |
| 1467 | + first_requirement_types = [type(req).__name__ for req in first_requirements] |
| 1468 | + second_requirement_types = [type(req).__name__ for req in second_requirements] |
| 1469 | + assert first_requirement_types == second_requirement_types, ( |
| 1470 | + f"Requirement types mismatch: {first_requirement_types} vs {second_requirement_types}" |
| 1471 | + ) |
0 commit comments