Skip to content

Commit a1a126f

Browse files
committed
test: enhance snapshot expiration tests with retention policy validations
1 parent a8fe03d commit a1a126f

1 file changed

Lines changed: 111 additions & 89 deletions

File tree

tests/table/test_expire_snapshots.py

Lines changed: 111 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -233,28 +233,28 @@ def test_retain_last_n_with_protection(table_v2: Table) -> None:
233233
# Clear shared state set on the class between tests
234234
ExpireSnapshots._snapshot_ids_to_expire.clear()
235235

236-
S1 = 101 # oldest (also protected)
237-
S2 = 102
238-
S3 = 103
239-
S4 = 104 # newest
236+
PROTECTED_SNAPSHOT = 3051729675574597101 # oldest (also protected)
237+
EXPIRE_SNAPSHOT = 3051729675574597102
238+
KEEP_SNAPSHOT_1 = 3051729675574597103
239+
KEEP_SNAPSHOT_2 = 3051729675574597104 # newest
240240

241-
# Protected S1 as branch head
241+
# Protected PROTECTED_SNAPSHOT as branch head
242242
table_v2.metadata = table_v2.metadata.model_copy(
243243
update={
244244
"refs": {
245-
"main": MagicMock(snapshot_id=S1, snapshot_ref_type="branch"),
245+
"main": MagicMock(snapshot_id=PROTECTED_SNAPSHOT, snapshot_ref_type="branch"),
246246
},
247247
"snapshots": [
248-
SimpleNamespace(snapshot_id=S1, timestamp_ms=1, parent_snapshot_id=None),
249-
SimpleNamespace(snapshot_id=S2, timestamp_ms=2, parent_snapshot_id=None),
250-
SimpleNamespace(snapshot_id=S3, timestamp_ms=3, parent_snapshot_id=None),
251-
SimpleNamespace(snapshot_id=S4, timestamp_ms=4, parent_snapshot_id=None),
248+
SimpleNamespace(snapshot_id=PROTECTED_SNAPSHOT, timestamp_ms=1, parent_snapshot_id=None),
249+
SimpleNamespace(snapshot_id=EXPIRE_SNAPSHOT, timestamp_ms=2, parent_snapshot_id=None),
250+
SimpleNamespace(snapshot_id=KEEP_SNAPSHOT_1, timestamp_ms=3, parent_snapshot_id=None),
251+
SimpleNamespace(snapshot_id=KEEP_SNAPSHOT_2, timestamp_ms=4, parent_snapshot_id=None),
252252
],
253253
}
254254
)
255255

256256
table_v2.catalog = MagicMock()
257-
kept_ids = {S1, S3, S4} # retain_last_n=2 keeps S4,S3 plus protected S1
257+
kept_ids = {PROTECTED_SNAPSHOT, KEEP_SNAPSHOT_1, KEEP_SNAPSHOT_2} # retain_last_n=2 keeps newest plus protected
258258
mock_response = CommitTableResponse(
259259
metadata=table_v2.metadata.model_copy(update={"snapshots": list(kept_ids)}),
260260
metadata_location="mock://metadata/location",
@@ -269,128 +269,150 @@ def test_retain_last_n_with_protection(table_v2: Table) -> None:
269269
updates = args[2] if len(args) > 2 else ()
270270
remove_update = next((u for u in updates if getattr(u, "action", None) == "remove-snapshots"), None)
271271
assert remove_update is not None
272-
# Only S2 should be expired
273-
assert set(remove_update.snapshot_ids) == {S2}
274-
assert S2 not in table_v2.metadata.snapshots
272+
# Only EXPIRE_SNAPSHOT should be expired
273+
assert set(remove_update.snapshot_ids) == {EXPIRE_SNAPSHOT}
274+
assert EXPIRE_SNAPSHOT not in table_v2.metadata.snapshots
275275

276276

277-
def test_older_than_with_retention_combination(table_v2: Table) -> None:
278-
"""Test older_than_with_retention combining timestamp, retain_last_n and min_snapshots_to_keep."""
277+
def test_retain_last_n_validation(table_v2: Table) -> None:
278+
"""Test retain_last_n validates n >= 1."""
279+
ExpireSnapshots._snapshot_ids_to_expire.clear()
280+
281+
with pytest.raises(ValueError, match="Number of snapshots to retain must be at least 1"):
282+
table_v2.maintenance.expire_snapshots().retain_last_n(0)
283+
284+
with pytest.raises(ValueError, match="Number of snapshots to retain must be at least 1"):
285+
table_v2.maintenance.expire_snapshots().retain_last_n(-1)
286+
287+
288+
def test_with_retention_policy_validation(table_v2: Table) -> None:
289+
"""Test with_retention_policy validates parameter ranges."""
290+
ExpireSnapshots._snapshot_ids_to_expire.clear()
291+
292+
with pytest.raises(ValueError, match="retain_last_n must be at least 1"):
293+
table_v2.maintenance.expire_snapshots().with_retention_policy(retain_last_n=0).commit()
294+
295+
with pytest.raises(ValueError, match="min_snapshots_to_keep must be at least 1"):
296+
table_v2.maintenance.expire_snapshots().with_retention_policy(min_snapshots_to_keep=0).commit()
297+
298+
299+
def test_with_retention_policy_no_criteria_does_nothing(table_v2: Table) -> None:
300+
"""Test with_retention_policy does nothing when no criteria provided."""
301+
from types import SimpleNamespace
302+
303+
ExpireSnapshots._snapshot_ids_to_expire.clear()
304+
305+
SNAPSHOT_1, SNAPSHOT_2 = 3051729675574597501, 3051729675574597502
306+
snapshots = [
307+
SimpleNamespace(snapshot_id=SNAPSHOT_1, timestamp_ms=100, parent_snapshot_id=None),
308+
SimpleNamespace(snapshot_id=SNAPSHOT_2, timestamp_ms=200, parent_snapshot_id=None),
309+
]
310+
table_v2.metadata = table_v2.metadata.model_copy(update={"refs": {}, "snapshots": snapshots, "properties": {}})
311+
table_v2.catalog = MagicMock()
312+
313+
# Should be a no-op
314+
result = table_v2.maintenance.expire_snapshots().with_retention_policy()
315+
assert result._snapshot_ids_to_expire == set()
316+
317+
318+
def test_get_expiration_properties_missing_values(table_v2: Table) -> None:
319+
"""Test _get_expiration_properties handles missing properties gracefully."""
320+
ExpireSnapshots._snapshot_ids_to_expire.clear()
321+
322+
# No expiration properties set
323+
table_v2.metadata = table_v2.metadata.model_copy(update={"properties": {}})
324+
expire = table_v2.maintenance.expire_snapshots()
325+
max_age, min_snaps, max_ref_age = expire._get_expiration_properties()
326+
327+
assert max_age is None
328+
assert min_snaps is None
329+
assert max_ref_age is None
330+
331+
332+
def test_retain_last_n_fewer_snapshots_than_requested(table_v2: Table) -> None:
333+
"""Test retain_last_n when table has fewer snapshots than requested."""
279334
from types import SimpleNamespace
280335

281336
ExpireSnapshots._snapshot_ids_to_expire.clear()
282337

283-
# Create 5 snapshots with increasing timestamps
284-
S1, S2, S3, S4, S5 = 201, 202, 203, 204, 205
338+
SNAPSHOT_1, SNAPSHOT_2 = 3051729675574597601, 3051729675574597602
285339
snapshots = [
286-
SimpleNamespace(snapshot_id=S1, timestamp_ms=100, parent_snapshot_id=None),
287-
SimpleNamespace(snapshot_id=S2, timestamp_ms=200, parent_snapshot_id=None),
288-
SimpleNamespace(snapshot_id=S3, timestamp_ms=300, parent_snapshot_id=None),
289-
SimpleNamespace(snapshot_id=S4, timestamp_ms=400, parent_snapshot_id=None),
290-
SimpleNamespace(snapshot_id=S5, timestamp_ms=500, parent_snapshot_id=None),
340+
SimpleNamespace(snapshot_id=SNAPSHOT_1, timestamp_ms=100, parent_snapshot_id=None),
341+
SimpleNamespace(snapshot_id=SNAPSHOT_2, timestamp_ms=200, parent_snapshot_id=None),
342+
]
343+
table_v2.metadata = table_v2.metadata.model_copy(update={"refs": {}, "snapshots": snapshots})
344+
table_v2.catalog = MagicMock()
345+
346+
# Request to keep 5 snapshots, but only 2 exist
347+
result = table_v2.maintenance.expire_snapshots().retain_last_n(5)
348+
# Should not expire anything
349+
assert result._snapshot_ids_to_expire == set()
350+
351+
352+
def test_older_than_with_retention_edge_cases(table_v2: Table) -> None:
353+
"""Test edge cases for older_than_with_retention."""
354+
from types import SimpleNamespace
355+
356+
ExpireSnapshots._snapshot_ids_to_expire.clear()
357+
358+
# Test with retain_last_n and a valid timestamp
359+
EXPIRE_SNAPSHOT, KEEP_SNAPSHOT_1, KEEP_SNAPSHOT_2 = 3051729675574597701, 3051729675574597702, 3051729675574597703
360+
snapshots = [
361+
SimpleNamespace(snapshot_id=EXPIRE_SNAPSHOT, timestamp_ms=100, parent_snapshot_id=None),
362+
SimpleNamespace(snapshot_id=KEEP_SNAPSHOT_1, timestamp_ms=200, parent_snapshot_id=None),
363+
SimpleNamespace(snapshot_id=KEEP_SNAPSHOT_2, timestamp_ms=300, parent_snapshot_id=None),
291364
]
292365
table_v2.metadata = table_v2.metadata.model_copy(update={"refs": {}, "snapshots": snapshots})
293366
table_v2.catalog = MagicMock()
294367

295-
# Expect to expire S1,S2,S3 ; keep S4 (due to min snapshots) and S5 (retain_last_n=1)
296368
mock_response = CommitTableResponse(
297-
metadata=table_v2.metadata.model_copy(update={"snapshots": [S4, S5]}),
369+
metadata=table_v2.metadata.model_copy(update={"snapshots": [KEEP_SNAPSHOT_1, KEEP_SNAPSHOT_2]}),
298370
metadata_location="mock://metadata/location",
299371
uuid=uuid4(),
300372
)
301373
table_v2.catalog.commit_table.return_value = mock_response
302374

375+
# Use a timestamp that would include all snapshots, but retain_last_n=2 should limit to keeping KEEP_SNAPSHOT_1, KEEP_SNAPSHOT_2
303376
table_v2.maintenance.expire_snapshots().older_than_with_retention(
304-
timestamp_ms=450, retain_last_n=1, min_snapshots_to_keep=2
377+
timestamp_ms=400, retain_last_n=2, min_snapshots_to_keep=None
305378
).commit()
306-
table_v2.metadata = mock_response.metadata
307379

308380
args, kwargs = table_v2.catalog.commit_table.call_args
309381
updates = args[2] if len(args) > 2 else ()
310382
remove_update = next((u for u in updates if getattr(u, "action", None) == "remove-snapshots"), None)
311383
assert remove_update is not None
312-
assert set(remove_update.snapshot_ids) == {S1, S2, S3}
313-
assert set(table_v2.metadata.snapshots) == {S4, S5}
384+
assert set(remove_update.snapshot_ids) == {EXPIRE_SNAPSHOT}
314385

315386

316-
def test_with_retention_policy_defaults(table_v2: Table) -> None:
317-
"""Test with_retention_policy uses table property defaults when arguments omitted."""
387+
def test_with_retention_policy_partial_properties(table_v2: Table) -> None:
388+
"""Test with_retention_policy with only some properties set."""
318389
from types import SimpleNamespace
319390

320391
ExpireSnapshots._snapshot_ids_to_expire.clear()
321392

322-
# Properties: expire snapshots older than 350ms, keep at least 3 snapshots
323-
properties = {
324-
"history.expire.max-snapshot-age-ms": "350",
325-
"history.expire.min-snapshots-to-keep": "3",
326-
}
327-
S1, S2, S3, S4, S5 = 301, 302, 303, 304, 305
393+
# Only max-snapshot-age-ms set, no min-snapshots-to-keep
394+
properties = {"history.expire.max-snapshot-age-ms": "250"}
395+
EXPIRE_SNAPSHOT_1, EXPIRE_SNAPSHOT_2, KEEP_SNAPSHOT = 3051729675574597801, 3051729675574597802, 3051729675574597803
328396
snapshots = [
329-
SimpleNamespace(snapshot_id=S1, timestamp_ms=100, parent_snapshot_id=None),
330-
SimpleNamespace(snapshot_id=S2, timestamp_ms=200, parent_snapshot_id=None),
331-
SimpleNamespace(snapshot_id=S3, timestamp_ms=300, parent_snapshot_id=None),
332-
SimpleNamespace(snapshot_id=S4, timestamp_ms=400, parent_snapshot_id=None),
333-
SimpleNamespace(snapshot_id=S5, timestamp_ms=500, parent_snapshot_id=None),
397+
SimpleNamespace(snapshot_id=EXPIRE_SNAPSHOT_1, timestamp_ms=100, parent_snapshot_id=None),
398+
SimpleNamespace(snapshot_id=EXPIRE_SNAPSHOT_2, timestamp_ms=200, parent_snapshot_id=None),
399+
SimpleNamespace(snapshot_id=KEEP_SNAPSHOT, timestamp_ms=300, parent_snapshot_id=None),
334400
]
335401
table_v2.metadata = table_v2.metadata.model_copy(update={"refs": {}, "snapshots": snapshots, "properties": properties})
336402
table_v2.catalog = MagicMock()
337403

338-
# Expect S1,S2 expired; S3 kept due to min_snapshots_to_keep
339404
mock_response = CommitTableResponse(
340-
metadata=table_v2.metadata.model_copy(update={"snapshots": [S3, S4, S5]}),
405+
metadata=table_v2.metadata.model_copy(update={"snapshots": [KEEP_SNAPSHOT]}),
341406
metadata_location="mock://metadata/location",
342407
uuid=uuid4(),
343408
)
344409
table_v2.catalog.commit_table.return_value = mock_response
345410

411+
# Should expire EXPIRE_SNAPSHOT_1,EXPIRE_SNAPSHOT_2 (older than 250ms), keep KEEP_SNAPSHOT
346412
table_v2.maintenance.expire_snapshots().with_retention_policy().commit()
347-
table_v2.metadata = mock_response.metadata
348413

349414
args, kwargs = table_v2.catalog.commit_table.call_args
350415
updates = args[2] if len(args) > 2 else ()
351416
remove_update = next((u for u in updates if getattr(u, "action", None) == "remove-snapshots"), None)
352417
assert remove_update is not None
353-
assert set(remove_update.snapshot_ids) == {S1, S2}
354-
assert set(table_v2.metadata.snapshots) == {S3, S4, S5}
355-
356-
357-
def test_get_expiration_properties(table_v2: Table) -> None:
358-
"""Test retrieval of expiration properties from table metadata."""
359-
ExpireSnapshots._snapshot_ids_to_expire.clear()
360-
properties = {
361-
"history.expire.max-snapshot-age-ms": "60000",
362-
"history.expire.min-snapshots-to-keep": "5",
363-
"history.expire.max-ref-age-ms": "120000",
364-
}
365-
table_v2.metadata = table_v2.metadata.model_copy(update={"properties": properties})
366-
expire = table_v2.maintenance.expire_snapshots()
367-
max_age, min_snaps, max_ref_age = expire._get_expiration_properties()
368-
assert max_age == 60000
369-
assert min_snaps == 5
370-
assert max_ref_age == 120000
371-
372-
373-
def test_get_snapshots_to_expire_with_retention_respects_protection(table_v2: Table) -> None:
374-
"""Internal helper should not select protected snapshots for expiration."""
375-
from types import SimpleNamespace
376-
377-
ExpireSnapshots._snapshot_ids_to_expire.clear()
378-
379-
P = 401 # protected
380-
A = 402
381-
B = 403
382-
table_v2.metadata = table_v2.metadata.model_copy(
383-
update={
384-
"refs": {"main": MagicMock(snapshot_id=P, snapshot_ref_type="branch")},
385-
"snapshots": [
386-
SimpleNamespace(snapshot_id=P, timestamp_ms=10, parent_snapshot_id=None),
387-
SimpleNamespace(snapshot_id=A, timestamp_ms=20, parent_snapshot_id=None),
388-
SimpleNamespace(snapshot_id=B, timestamp_ms=30, parent_snapshot_id=None),
389-
],
390-
}
391-
)
392-
expire = table_v2.maintenance.expire_snapshots()
393-
to_expire = expire._get_snapshots_to_expire_with_retention(timestamp_ms=100, retain_last_n=None, min_snapshots_to_keep=1)
394-
# Protected snapshot P should not be in list; both A and B can expire respecting min keep
395-
assert P not in to_expire
396-
assert set(to_expire) == {A, B}
418+
assert set(remove_update.snapshot_ids) == {EXPIRE_SNAPSHOT_1, EXPIRE_SNAPSHOT_2}

0 commit comments

Comments
 (0)