Skip to content

Commit 3db6043

Browse files
committed
fix: Add check table UUID to detect table replacement
1 parent 3855f64 commit 3db6043

3 files changed

Lines changed: 115 additions & 0 deletions

File tree

pyiceberg/table/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,7 @@ def refresh(self) -> Table:
11111111
An updated instance of the same Iceberg table
11121112
"""
11131113
fresh = self.catalog.load_table(self._identifier)
1114+
self._check_uuid(fresh.metadata)
11141115
self.metadata = fresh.metadata
11151116
self.io = fresh.io
11161117
self.metadata_location = fresh.metadata_location
@@ -1491,9 +1492,21 @@ def refs(self) -> dict[str, SnapshotRef]:
14911492
"""Return the snapshot references in the table."""
14921493
return self.metadata.refs
14931494

1495+
def _check_uuid(self, new_metadata: TableMetadata) -> None:
1496+
"""Validate that the table UUID matches after refresh or commit."""
1497+
current = self.metadata.table_uuid
1498+
refreshed = new_metadata.table_uuid
1499+
1500+
if current and refreshed and current != refreshed:
1501+
raise ValueError(f"Table UUID does not match: current={current} != refreshed={refreshed}")
1502+
14941503
def _do_commit(self, updates: tuple[TableUpdate, ...], requirements: tuple[TableRequirement, ...]) -> None:
14951504
response = self.catalog.commit_table(self, requirements, updates)
14961505

1506+
# Only check UUID for existing tables, not new tables
1507+
if not isinstance(self, StagedTable):
1508+
self._check_uuid(response.metadata)
1509+
14971510
# https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527
14981511
# delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set to true and uses
14991512
# TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many previous versions to keep -

tests/catalog/test_rest.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pyiceberg.table.sorting import SortField, SortOrder
4949
from pyiceberg.transforms import IdentityTransform, TruncateTransform
5050
from pyiceberg.typedef import RecursiveDict
51+
from pyiceberg.types import StringType
5152
from pyiceberg.utils.config import Config
5253

5354
TEST_URI = "https://iceberg-test-catalog/"
@@ -2155,3 +2156,86 @@ def test_view_endpoints_enabled_with_config(self, requests_mock: Mocker) -> None
21552156
# View endpoints should be supported when enabled
21562157
catalog._check_endpoint(Capability.V1_LIST_VIEWS)
21572158
catalog._check_endpoint(Capability.V1_DELETE_VIEW)
2159+
2160+
2161+
def test_table_uuid_check_on_commit(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
2162+
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
2163+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
2164+
metadata_location = "s3://warehouse/database/table/metadata.json"
2165+
2166+
rest_mock.get(
2167+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2168+
json={
2169+
"metadata-location": metadata_location,
2170+
"metadata": example_table_metadata_v2,
2171+
"config": {},
2172+
},
2173+
status_code=200,
2174+
request_headers=TEST_HEADERS,
2175+
)
2176+
2177+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
2178+
table = catalog.load_table(("namespace", "table_name"))
2179+
2180+
assert str(table.metadata.table_uuid) == original_uuid
2181+
2182+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
2183+
2184+
rest_mock.post(
2185+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2186+
json={
2187+
"metadata-location": metadata_location,
2188+
"metadata": metadata_with_different_uuid,
2189+
},
2190+
status_code=200,
2191+
request_headers=TEST_HEADERS,
2192+
)
2193+
2194+
with pytest.raises(ValueError) as exc_info:
2195+
table.update_schema().add_column("new_col", StringType()).commit()
2196+
2197+
assert "Table UUID does not match" in str(exc_info.value)
2198+
assert f"current={original_uuid}" in str(exc_info.value)
2199+
assert f"refreshed={different_uuid}" in str(exc_info.value)
2200+
2201+
2202+
def test_table_uuid_check_on_refresh(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
2203+
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
2204+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
2205+
metadata_location = "s3://warehouse/database/table/metadata.json"
2206+
2207+
rest_mock.get(
2208+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2209+
json={
2210+
"metadata-location": metadata_location,
2211+
"metadata": example_table_metadata_v2,
2212+
"config": {},
2213+
},
2214+
status_code=200,
2215+
request_headers=TEST_HEADERS,
2216+
)
2217+
2218+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
2219+
table = catalog.load_table(("namespace", "table_name"))
2220+
2221+
assert str(table.metadata.table_uuid) == original_uuid
2222+
2223+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
2224+
2225+
rest_mock.get(
2226+
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
2227+
json={
2228+
"metadata-location": metadata_location,
2229+
"metadata": metadata_with_different_uuid,
2230+
"config": {},
2231+
},
2232+
status_code=200,
2233+
request_headers=TEST_HEADERS,
2234+
)
2235+
2236+
with pytest.raises(ValueError) as exc_info:
2237+
table.refresh()
2238+
2239+
assert "Table UUID does not match" in str(exc_info.value)
2240+
assert f"current={original_uuid}" in str(exc_info.value)
2241+
assert f"refreshed={different_uuid}" in str(exc_info.value)

tests/table/test_init.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool:
16391639
if model != type(model).model_validate(model_data):
16401640
pytest.fail(f"model {type(model)} did not roundtrip successfully")
16411641
return True
1642+
1643+
1644+
def test_check_uuid_raises_when_mismatch(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
1645+
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
1646+
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
1647+
new_metadata = TableMetadataV2(**metadata_with_different_uuid)
1648+
1649+
with pytest.raises(ValueError) as exc_info:
1650+
table_v2._check_uuid(new_metadata)
1651+
1652+
assert "Table UUID does not match" in str(exc_info.value)
1653+
assert different_uuid in str(exc_info.value)
1654+
1655+
1656+
def test_check_uuid_passes_when_match(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
1657+
new_metadata = TableMetadataV2(**example_table_metadata_v2)
1658+
# Should not raise with same uuid
1659+
table_v2._check_uuid(new_metadata)

0 commit comments

Comments
 (0)