55from typing import Any , Dict , Optional
66import pytest
77from pyiceberg .catalog .memory import InMemoryCatalog
8- from pyiceberg .catalog .noop import NoopCatalog
98from pyiceberg .io import load_file_io
109from pyiceberg .table import Table
1110from pyiceberg .table .sorting import NullOrder , SortDirection , SortField , SortOrder
1716from pyiceberg .schema import Schema
1817from pyiceberg .types import NestedField , LongType , StringType
1918from pyiceberg .table .snapshots import Snapshot
20- from pyiceberg .table .metadata import TableMetadata , TableMetadataV2 , new_table_metadata
19+ from pyiceberg .table .metadata import TableMetadata , TableMetadataUtil , TableMetadataV2 , new_table_metadata
2120
2221
22+ @pytest .fixture
23+ def mock_table ():
24+ """Fixture to create a mock Table instance with proper metadata for testing."""
25+ # Create mock metadata with empty snapshots list
26+ metadata_dict = {
27+ "format-version" : 2 ,
28+ "table-uuid" : "9c12d441-03fe-4693-9a96-a0705ddf69c1" ,
29+ "location" : "s3://bucket/test/location" ,
30+ "last-sequence-number" : 0 ,
31+ "last-updated-ms" : int (time .time () * 1000 ),
32+ "last-column-id" : 3 ,
33+ "current-schema-id" : 1 ,
34+ "schemas" : [
35+ {
36+ "type" : "struct" ,
37+ "schema-id" : 1 ,
38+ "fields" : [{"id" : 1 , "name" : "x" , "required" : True , "type" : "long" }]
39+ }
40+ ],
41+ "default-spec-id" : 0 ,
42+ "partition-specs" : [{"spec-id" : 0 , "fields" : []}],
43+ "last-partition-id" : 0 ,
44+ "default-sort-order-id" : 0 ,
45+ "sort-orders" : [{"order-id" : 0 , "fields" : []}],
46+ "snapshots" : [],
47+ "refs" : {},
48+ }
49+
50+ metadata = TableMetadataUtil .parse_obj (metadata_dict )
51+
52+ return Table (
53+ identifier = ("mock_database" , "mock_table" ),
54+ metadata = metadata ,
55+ metadata_location = "mock_location" ,
56+ io = load_file_io (),
57+ catalog = InMemoryCatalog ("InMemoryCatalog" ),
58+ )
2359
2460
2561@pytest .fixture
@@ -43,19 +79,24 @@ def generate_snapshot(
4379 snapshot_log = []
4480 initial_snapshot_id = 3051729675574597004
4581
46- for i in range (2000 ):
82+ for i in range (5 ):
4783 snapshot_id = initial_snapshot_id + i
4884 parent_snapshot_id = snapshot_id - 1 if i > 0 else None
4985 timestamp_ms = int (time .time () * 1000 ) - randint (0 , 1000000 )
5086 snapshots .append (generate_snapshot (snapshot_id , parent_snapshot_id , timestamp_ms , i ))
5187 snapshot_log .append ({"snapshot-id" : snapshot_id , "timestamp-ms" : timestamp_ms })
5288
53- metadata = {
89+ metadata_dict = {
5490 "format-version" : 2 ,
5591 "table-uuid" : "9c12d441-03fe-4693-9a96-a0705ddf69c1" ,
5692 "location" : "s3://bucket/test/location" ,
5793 "last-sequence-number" : 34 ,
58- "last-updated-ms" : 1602638573590 ,
94+ "last-updated-ms" : snapshots [- 1 ]["timestamp-ms" ],
95+ "metadata-log" : [
96+ {"metadata-file" : "s3://bucket/test/location/metadata/v1.json" , "timestamp-ms" : 1700000000000 },
97+ {"metadata-file" : "s3://bucket/test/location/metadata/v2.json" , "timestamp-ms" : 1700003600000 },
98+ {"metadata-file" : "s3://bucket/test/location/metadata/v3.json" , "timestamp-ms" : snapshots [- 1 ]["timestamp-ms" ]},
99+ ],
59100 "last-column-id" : 3 ,
60101 "current-schema-id" : 1 ,
61102 "schemas" : [
@@ -72,46 +113,41 @@ def generate_snapshot(
72113 },
73114 ],
74115 "default-spec-id" : 0 ,
75- "partition-specs" : [{"spec-id" : 0 , "fields" : [{ "name" : "x" , "transform" : "identity" , "source-id" : 1 , "field-id" : 1000 } ]}],
116+ "partition-specs" : [{"spec-id" : 0 , "fields" : []}],
76117 "last-partition-id" : 1000 ,
77118 "default-sort-order-id" : 3 ,
78- "sort-orders" : [
79- {
80- "order-id" : 3 ,
81- "fields" : [
82- {"transform" : "identity" , "source-id" : 2 , "direction" : "asc" , "null-order" : "nulls-first" },
83- {"transform" : "bucket[4]" , "source-id" : 3 , "direction" : "desc" , "null-order" : "nulls-last" },
84- ],
85- }
86- ],
119+ "sort-orders" : [{"order-id" : 3 , "fields" : []}],
87120 "properties" : {"read.split.target.size" : "134217728" },
88- "current-snapshot-id" : initial_snapshot_id + 1999 ,
121+ "current-snapshot-id" : initial_snapshot_id + 4 ,
89122 "snapshots" : snapshots ,
90123 "snapshot-log" : snapshot_log ,
91- "metadata-log" : [{"metadata-file" : "s3://bucket/.../v1.json" , "timestamp-ms" : 1515100 }],
92124 "refs" : {"test" : {"snapshot-id" : initial_snapshot_id , "type" : "tag" , "max-ref-age-ms" : 10000000 }},
93125 }
94126
127+ metadata = TableMetadataUtil .parse_obj (metadata_dict )
128+
95129 return Table (
96130 identifier = ("database" , "table" ),
97131 metadata = metadata ,
98- metadata_location = f"{ metadata [ ' location' ] } /uuid.metadata.json" ,
132+ metadata_location = f"{ metadata . location } /uuid.metadata.json" ,
99133 io = load_file_io (),
100- catalog = NoopCatalog ( "NoopCatalog " ),
134+ catalog = InMemoryCatalog ( "InMemoryCatalog " ),
101135 )
102136
103137
104138
105- def test_expire_snapshots_removes_correct_snapshots (generate_test_table ):
139+ def test_expire_snapshots_removes_correct_snapshots (generate_test_table : Table ):
106140 """
107141 Test case for the `ExpireSnapshots` class to ensure that the correct snapshots
108142 are removed and the delete function is called the expected number of times.
109143 """
110-
144+
111145 # Use the fixture-provided table
112- with ExpireSnapshots ( generate_test_table .transaction ()) as manage_snapshots :
113- manage_snapshots .expire_snapshot_id (3051729675574597004 )
146+ with generate_test_table .expire_snapshots () as transaction :
147+ transaction .expire_snapshot_id (3051729675574597004 ). commit ( )
114148
115149 # Check the remaining snapshots
116150 remaining_snapshot_ids = {snapshot .snapshot_id for snapshot in generate_test_table .metadata .snapshots }
117- assert not remaining_snapshot_ids .issubset ({3051729675574597004 })
151+
152+ # Assert that the expired snapshot ID is not in the remaining snapshots
153+ assert 3051729675574597004 not in remaining_snapshot_ids
0 commit comments