Skip to content

Commit e2f97cd

Browse files
committed
fix: Add check table UUID to detect table replacement
1 parent 3855f64 commit e2f97cd

3 files changed

Lines changed: 117 additions & 0 deletions

File tree

pyiceberg/table/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,7 @@ def refresh(self) -> Table:
11111111
An updated instance of the same Iceberg table
11121112
"""
11131113
fresh = self.catalog.load_table(self._identifier)
1114+
self._check_uuid(self.metadata, fresh.metadata)
11141115
self.metadata = fresh.metadata
11151116
self.io = fresh.io
11161117
self.metadata_location = fresh.metadata_location
@@ -1491,9 +1492,22 @@ def refs(self) -> dict[str, SnapshotRef]:
14911492
"""Return the snapshot references in the table."""
14921493
return self.metadata.refs
14931494

1495+
@staticmethod
1496+
def _check_uuid(current_metadata: TableMetadata, new_metadata: TableMetadata) -> None:
1497+
"""Validate that the table UUID matches after refresh."""
1498+
current = current_metadata.table_uuid
1499+
refreshed = new_metadata.table_uuid
1500+
1501+
if current != refreshed:
1502+
raise ValueError(f"Table UUID does not match: current={current} != refreshed={refreshed}")
1503+
14941504
def _do_commit(self, updates: tuple[TableUpdate, ...], requirements: tuple[TableRequirement, ...]) -> None:
14951505
response = self.catalog.commit_table(self, requirements, updates)
14961506

1507+
# Check UUID to detect table replacement (matches Java's RESTTableOperations.updateCurrentMetadata)
1508+
if not isinstance(self, StagedTable):
1509+
self._check_uuid(self.metadata, response.metadata)
1510+
14971511
# https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527
14981512
# delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set to true and uses
14991513
# TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many previous versions to keep -

tests/catalog/test_rest.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pyiceberg.table.sorting import SortField, SortOrder
4949
from pyiceberg.transforms import IdentityTransform, TruncateTransform
5050
from pyiceberg.typedef import RecursiveDict
51+
from pyiceberg.types import StringType
5152
from pyiceberg.utils.config import Config
5253

5354
TEST_URI = "https://iceberg-test-catalog/"
@@ -2155,3 +2156,87 @@ def test_view_endpoints_enabled_with_config(self, requests_mock: Mocker) -> None
21552156
# View endpoints should be supported when enabled
21562157
catalog._check_endpoint(Capability.V1_LIST_VIEWS)
21572158
catalog._check_endpoint(Capability.V1_DELETE_VIEW)
2159+
2160+
2161+
def test_table_uuid_check_on_commit(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
2162+
"""Test that UUID mismatch is detected on commit response (matches Java RESTTableOperations behavior)."""
2163+
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
2164+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
2165+
metadata_location = "s3://warehouse/database/table/metadata.json"
2166+
2167+
rest_mock.get(
2168+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2169+
json={
2170+
"metadata-location": metadata_location,
2171+
"metadata": example_table_metadata_v2,
2172+
"config": {},
2173+
},
2174+
status_code=200,
2175+
request_headers=TEST_HEADERS,
2176+
)
2177+
2178+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
2179+
table = catalog.load_table(("namespace", "table_name"))
2180+
2181+
assert str(table.metadata.table_uuid) == original_uuid
2182+
2183+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
2184+
2185+
rest_mock.post(
2186+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2187+
json={
2188+
"metadata-location": metadata_location,
2189+
"metadata": metadata_with_different_uuid,
2190+
},
2191+
status_code=200,
2192+
request_headers=TEST_HEADERS,
2193+
)
2194+
2195+
with pytest.raises(ValueError) as exc_info:
2196+
table.update_schema().add_column("new_col", StringType()).commit()
2197+
2198+
assert "Table UUID does not match" in str(exc_info.value)
2199+
assert f"current={original_uuid}" in str(exc_info.value)
2200+
assert f"refreshed={different_uuid}" in str(exc_info.value)
2201+
2202+
2203+
def test_table_uuid_check_on_refresh(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
2204+
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
2205+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
2206+
metadata_location = "s3://warehouse/database/table/metadata.json"
2207+
2208+
rest_mock.get(
2209+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2210+
json={
2211+
"metadata-location": metadata_location,
2212+
"metadata": example_table_metadata_v2,
2213+
"config": {},
2214+
},
2215+
status_code=200,
2216+
request_headers=TEST_HEADERS,
2217+
)
2218+
2219+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
2220+
table = catalog.load_table(("namespace", "table_name"))
2221+
2222+
assert str(table.metadata.table_uuid) == original_uuid
2223+
2224+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
2225+
2226+
rest_mock.get(
2227+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2228+
json={
2229+
"metadata-location": metadata_location,
2230+
"metadata": metadata_with_different_uuid,
2231+
"config": {},
2232+
},
2233+
status_code=200,
2234+
request_headers=TEST_HEADERS,
2235+
)
2236+
2237+
with pytest.raises(ValueError) as exc_info:
2238+
table.refresh()
2239+
2240+
assert "Table UUID does not match" in str(exc_info.value)
2241+
assert f"current={original_uuid}" in str(exc_info.value)
2242+
assert f"refreshed={different_uuid}" in str(exc_info.value)

tests/table/test_init.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool:
16391639
if model != type(model).model_validate(model_data):
16401640
pytest.fail(f"model {type(model)} did not roundtrip successfully")
16411641
return True
1642+
1643+
1644+
def test_check_uuid_raises_when_mismatch(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
1645+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
1646+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
1647+
new_metadata = TableMetadataV2(**metadata_with_different_uuid)
1648+
1649+
with pytest.raises(ValueError) as exc_info:
1650+
Table._check_uuid(table_v2.metadata, new_metadata)
1651+
1652+
assert "Table UUID does not match" in str(exc_info.value)
1653+
assert different_uuid in str(exc_info.value)
1654+
1655+
1656+
def test_check_uuid_passes_when_match(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
1657+
new_metadata = TableMetadataV2(**example_table_metadata_v2)
1658+
# Should not raise with same uuid
1659+
Table._check_uuid(table_v2.metadata, new_metadata)

0 commit comments

Comments
 (0)