|
21 | 21 | import pytest |
22 | 22 |
|
23 | 23 | from pyiceberg.table import CommitTableResponse, Table |
| 24 | +from pyiceberg.table.update.snapshot import ExpireSnapshots |
24 | 25 |
|
25 | 26 |
|
26 | 27 | def test_cannot_expire_protected_head_snapshot(table_v2: Table) -> None: |
@@ -223,3 +224,173 @@ def test_expire_snapshots_by_ids(table_v2: Table) -> None: |
223 | 224 | assert EXPIRE_SNAPSHOT_1 not in remaining_snapshots |
224 | 225 | assert EXPIRE_SNAPSHOT_2 not in remaining_snapshots |
225 | 226 | assert len(table_v2.metadata.snapshots) == 1 |
| 227 | + |
| 228 | + |
| 229 | +def test_retain_last_n_with_protection(table_v2: Table) -> None: |
| 230 | + """Test retain_last_n keeps most recent snapshots plus protected ones.""" |
| 231 | + from types import SimpleNamespace |
| 232 | + |
| 233 | + # Clear shared state set on the class between tests |
| 234 | + ExpireSnapshots._snapshot_ids_to_expire.clear() |
| 235 | + |
| 236 | + S1 = 101 # oldest (also protected) |
| 237 | + S2 = 102 |
| 238 | + S3 = 103 |
| 239 | + S4 = 104 # newest |
| 240 | + |
| 241 | + # Protected S1 as branch head |
| 242 | + table_v2.metadata = table_v2.metadata.model_copy( |
| 243 | + update={ |
| 244 | + "refs": { |
| 245 | + "main": MagicMock(snapshot_id=S1, snapshot_ref_type="branch"), |
| 246 | + }, |
| 247 | + "snapshots": [ |
| 248 | + SimpleNamespace(snapshot_id=S1, timestamp_ms=1, parent_snapshot_id=None), |
| 249 | + SimpleNamespace(snapshot_id=S2, timestamp_ms=2, parent_snapshot_id=None), |
| 250 | + SimpleNamespace(snapshot_id=S3, timestamp_ms=3, parent_snapshot_id=None), |
| 251 | + SimpleNamespace(snapshot_id=S4, timestamp_ms=4, parent_snapshot_id=None), |
| 252 | + ], |
| 253 | + } |
| 254 | + ) |
| 255 | + |
| 256 | + table_v2.catalog = MagicMock() |
| 257 | + kept_ids = {S1, S3, S4} # retain_last_n=2 keeps S4,S3 plus protected S1 |
| 258 | + mock_response = CommitTableResponse( |
| 259 | + metadata=table_v2.metadata.model_copy(update={"snapshots": list(kept_ids)}), |
| 260 | + metadata_location="mock://metadata/location", |
| 261 | + uuid=uuid4(), |
| 262 | + ) |
| 263 | + table_v2.catalog.commit_table.return_value = mock_response |
| 264 | + |
| 265 | + table_v2.maintenance.expire_snapshots().retain_last_n(2).commit() |
| 266 | + table_v2.metadata = mock_response.metadata |
| 267 | + |
| 268 | + args, kwargs = table_v2.catalog.commit_table.call_args |
| 269 | + updates = args[2] if len(args) > 2 else () |
| 270 | + remove_update = next((u for u in updates if getattr(u, "action", None) == "remove-snapshots"), None) |
| 271 | + assert remove_update is not None |
| 272 | + # Only S2 should be expired |
| 273 | + assert set(remove_update.snapshot_ids) == {S2} |
| 274 | + assert S2 not in table_v2.metadata.snapshots |
| 275 | + |
| 276 | + |
| 277 | +def test_older_than_with_retention_combination(table_v2: Table) -> None: |
| 278 | + """Test older_than_with_retention combining timestamp, retain_last_n and min_snapshots_to_keep.""" |
| 279 | + from types import SimpleNamespace |
| 280 | + |
| 281 | + ExpireSnapshots._snapshot_ids_to_expire.clear() |
| 282 | + |
| 283 | + # Create 5 snapshots with increasing timestamps |
| 284 | + S1, S2, S3, S4, S5 = 201, 202, 203, 204, 205 |
| 285 | + snapshots = [ |
| 286 | + SimpleNamespace(snapshot_id=S1, timestamp_ms=100, parent_snapshot_id=None), |
| 287 | + SimpleNamespace(snapshot_id=S2, timestamp_ms=200, parent_snapshot_id=None), |
| 288 | + SimpleNamespace(snapshot_id=S3, timestamp_ms=300, parent_snapshot_id=None), |
| 289 | + SimpleNamespace(snapshot_id=S4, timestamp_ms=400, parent_snapshot_id=None), |
| 290 | + SimpleNamespace(snapshot_id=S5, timestamp_ms=500, parent_snapshot_id=None), |
| 291 | + ] |
| 292 | + table_v2.metadata = table_v2.metadata.model_copy(update={"refs": {}, "snapshots": snapshots}) |
| 293 | + table_v2.catalog = MagicMock() |
| 294 | + |
| 295 | + # Expect to expire S1,S2,S3 ; keep S4 (due to min snapshots) and S5 (retain_last_n=1) |
| 296 | + mock_response = CommitTableResponse( |
| 297 | + metadata=table_v2.metadata.model_copy(update={"snapshots": [S4, S5]}), |
| 298 | + metadata_location="mock://metadata/location", |
| 299 | + uuid=uuid4(), |
| 300 | + ) |
| 301 | + table_v2.catalog.commit_table.return_value = mock_response |
| 302 | + |
| 303 | + table_v2.maintenance.expire_snapshots().older_than_with_retention( |
| 304 | + timestamp_ms=450, retain_last_n=1, min_snapshots_to_keep=2 |
| 305 | + ).commit() |
| 306 | + table_v2.metadata = mock_response.metadata |
| 307 | + |
| 308 | + args, kwargs = table_v2.catalog.commit_table.call_args |
| 309 | + updates = args[2] if len(args) > 2 else () |
| 310 | + remove_update = next((u for u in updates if getattr(u, "action", None) == "remove-snapshots"), None) |
| 311 | + assert remove_update is not None |
| 312 | + assert set(remove_update.snapshot_ids) == {S1, S2, S3} |
| 313 | + assert set(table_v2.metadata.snapshots) == {S4, S5} |
| 314 | + |
| 315 | + |
| 316 | +def test_with_retention_policy_defaults(table_v2: Table) -> None: |
| 317 | + """Test with_retention_policy uses table property defaults when arguments omitted.""" |
| 318 | + from types import SimpleNamespace |
| 319 | + |
| 320 | + ExpireSnapshots._snapshot_ids_to_expire.clear() |
| 321 | + |
| 322 | + # Properties: expire snapshots older than 350ms, keep at least 3 snapshots |
| 323 | + properties = { |
| 324 | + "history.expire.max-snapshot-age-ms": "350", |
| 325 | + "history.expire.min-snapshots-to-keep": "3", |
| 326 | + } |
| 327 | + S1, S2, S3, S4, S5 = 301, 302, 303, 304, 305 |
| 328 | + snapshots = [ |
| 329 | + SimpleNamespace(snapshot_id=S1, timestamp_ms=100, parent_snapshot_id=None), |
| 330 | + SimpleNamespace(snapshot_id=S2, timestamp_ms=200, parent_snapshot_id=None), |
| 331 | + SimpleNamespace(snapshot_id=S3, timestamp_ms=300, parent_snapshot_id=None), |
| 332 | + SimpleNamespace(snapshot_id=S4, timestamp_ms=400, parent_snapshot_id=None), |
| 333 | + SimpleNamespace(snapshot_id=S5, timestamp_ms=500, parent_snapshot_id=None), |
| 334 | + ] |
| 335 | + table_v2.metadata = table_v2.metadata.model_copy(update={"refs": {}, "snapshots": snapshots, "properties": properties}) |
| 336 | + table_v2.catalog = MagicMock() |
| 337 | + |
| 338 | + # Expect S1,S2 expired; S3 kept due to min_snapshots_to_keep |
| 339 | + mock_response = CommitTableResponse( |
| 340 | + metadata=table_v2.metadata.model_copy(update={"snapshots": [S3, S4, S5]}), |
| 341 | + metadata_location="mock://metadata/location", |
| 342 | + uuid=uuid4(), |
| 343 | + ) |
| 344 | + table_v2.catalog.commit_table.return_value = mock_response |
| 345 | + |
| 346 | + table_v2.maintenance.expire_snapshots().with_retention_policy().commit() |
| 347 | + table_v2.metadata = mock_response.metadata |
| 348 | + |
| 349 | + args, kwargs = table_v2.catalog.commit_table.call_args |
| 350 | + updates = args[2] if len(args) > 2 else () |
| 351 | + remove_update = next((u for u in updates if getattr(u, "action", None) == "remove-snapshots"), None) |
| 352 | + assert remove_update is not None |
| 353 | + assert set(remove_update.snapshot_ids) == {S1, S2} |
| 354 | + assert set(table_v2.metadata.snapshots) == {S3, S4, S5} |
| 355 | + |
| 356 | + |
| 357 | +def test_get_expiration_properties(table_v2: Table) -> None: |
| 358 | + """Test retrieval of expiration properties from table metadata.""" |
| 359 | + ExpireSnapshots._snapshot_ids_to_expire.clear() |
| 360 | + properties = { |
| 361 | + "history.expire.max-snapshot-age-ms": "60000", |
| 362 | + "history.expire.min-snapshots-to-keep": "5", |
| 363 | + "history.expire.max-ref-age-ms": "120000", |
| 364 | + } |
| 365 | + table_v2.metadata = table_v2.metadata.model_copy(update={"properties": properties}) |
| 366 | + expire = table_v2.maintenance.expire_snapshots() |
| 367 | + max_age, min_snaps, max_ref_age = expire._get_expiration_properties() |
| 368 | + assert max_age == 60000 |
| 369 | + assert min_snaps == 5 |
| 370 | + assert max_ref_age == 120000 |
| 371 | + |
| 372 | + |
| 373 | +def test_get_snapshots_to_expire_with_retention_respects_protection(table_v2: Table) -> None: |
| 374 | + """Internal helper should not select protected snapshots for expiration.""" |
| 375 | + from types import SimpleNamespace |
| 376 | + |
| 377 | + ExpireSnapshots._snapshot_ids_to_expire.clear() |
| 378 | + |
| 379 | + P = 401 # protected |
| 380 | + A = 402 |
| 381 | + B = 403 |
| 382 | + table_v2.metadata = table_v2.metadata.model_copy( |
| 383 | + update={ |
| 384 | + "refs": {"main": MagicMock(snapshot_id=P, snapshot_ref_type="branch")}, |
| 385 | + "snapshots": [ |
| 386 | + SimpleNamespace(snapshot_id=P, timestamp_ms=10, parent_snapshot_id=None), |
| 387 | + SimpleNamespace(snapshot_id=A, timestamp_ms=20, parent_snapshot_id=None), |
| 388 | + SimpleNamespace(snapshot_id=B, timestamp_ms=30, parent_snapshot_id=None), |
| 389 | + ], |
| 390 | + } |
| 391 | + ) |
| 392 | + expire = table_v2.maintenance.expire_snapshots() |
| 393 | + to_expire = expire._get_snapshots_to_expire_with_retention(timestamp_ms=100, retain_last_n=None, min_snapshots_to_keep=1) |
| 394 | + # Protected snapshot P should not be in list; both A and B can expire respecting min keep |
| 395 | + assert P not in to_expire |
| 396 | + assert set(to_expire) == {A, B} |
0 commit comments