1212"""
1313Tests for Iceberg table directory detection in DatalakeGcsClient and DatalakeS3Client.
1414"""
15+
1516import sys
1617import types
1718from unittest .mock import MagicMock , patch
2122# present, which prevents breaking other tests or masking integration issues.
2223_google_mod = sys .modules .setdefault ("google" , types .ModuleType ("google" ))
2324_gcloud_mod = sys .modules .setdefault ("google.cloud" , types .ModuleType ("google.cloud" ))
24- _storage_mod = sys .modules .setdefault (
25- "google.cloud.storage" , types .ModuleType ("google.cloud.storage" )
26- )
25+ _storage_mod = sys .modules .setdefault ("google.cloud.storage" , types .ModuleType ("google.cloud.storage" ))
2726if not hasattr (_storage_mod , "Client" ):
2827 _storage_mod .Client = MagicMock
2928if not hasattr (_google_mod , "cloud" ):
3938)
4039
4140
42- def _make_blob (
43- name : str , size : int = 1024 , storage_class : str = "STANDARD"
44- ) -> MagicMock :
41+ def _make_blob (name : str , size : int = 1024 , storage_class : str = "STANDARD" ) -> MagicMock :
4542 blob = MagicMock ()
4643 blob .name = name
4744 blob .size = size
@@ -54,9 +51,7 @@ def _make_gcs_client(blobs: list) -> DatalakeGcsClient:
5451 mock_bucket = MagicMock ()
5552 mock_storage_client .get_bucket .return_value = mock_bucket
5653 mock_bucket .list_blobs .return_value = blobs
57- mock_bucket .get_blob .side_effect = lambda name : next (
58- (b for b in blobs if b .name == name ), None
59- )
54+ mock_bucket .get_blob .side_effect = lambda name : next ((b for b in blobs if b .name == name ), None )
6055 client = DatalakeGcsClient .__new__ (DatalakeGcsClient )
6156 client ._client = mock_storage_client
6257 client ._temp_credentials_file_path_list = []
@@ -166,17 +161,15 @@ def test_gcs_iceberg_version_comparison_v10(self):
166161
167162
168163class TestS3IcebergDiscovery :
169- def _make_s3_client (self , keys : list , sizes : dict = None ) -> DatalakeS3Client :
164+ def _make_s3_client (self , keys : list , sizes : dict | None = None ) -> DatalakeS3Client :
170165 """Helper: create a DatalakeS3Client backed by a mocked boto3 client."""
171166 mock_boto_client = MagicMock ()
172167 client = DatalakeS3Client .__new__ (DatalakeS3Client )
173168 client ._client = mock_boto_client
174169 client ._session = None
175170 self ._mock_boto_client = mock_boto_client
176171 sizes = sizes or {}
177- self ._s3_objects = [
178- {"Key" : k , "Size" : sizes .get (k , 1024 )} for k in keys
179- ]
172+ self ._s3_objects = [{"Key" : k , "Size" : sizes .get (k , 1024 )} for k in keys ]
180173 return client
181174
182175 def test_s3_iceberg_table_detected (self ):
@@ -275,39 +268,19 @@ def test_iceberg_table_name_extracted_correctly(self):
275268 get_iceberg_table_name_from_metadata_path ,
276269 )
277270
278- assert (
279- get_iceberg_table_name_from_metadata_path (
280- "warehouse/orders/metadata/v2.metadata.json"
281- )
282- == "orders"
283- )
284- assert (
285- get_iceberg_table_name_from_metadata_path (
286- "my_prefix/sales/metadata/v1.metadata.json"
287- )
288- == "sales"
289- )
290- assert (
291- get_iceberg_table_name_from_metadata_path (
292- "simple/metadata/v3.metadata.json"
293- )
294- == "simple"
295- )
271+ assert get_iceberg_table_name_from_metadata_path ("warehouse/orders/metadata/v2.metadata.json" ) == "orders"
272+ assert get_iceberg_table_name_from_metadata_path ("my_prefix/sales/metadata/v1.metadata.json" ) == "sales"
273+ assert get_iceberg_table_name_from_metadata_path ("simple/metadata/v3.metadata.json" ) == "simple"
296274
297275 def test_non_iceberg_path_returns_none (self ):
298276 from metadata .utils .datalake .datalake_utils import (
299277 get_iceberg_table_name_from_metadata_path ,
300278 )
301279
302280 assert get_iceberg_table_name_from_metadata_path ("data/orders.json" ) is None
303- assert (
304- get_iceberg_table_name_from_metadata_path ("warehouse/orders.json" ) is None
305- )
281+ assert get_iceberg_table_name_from_metadata_path ("warehouse/orders.json" ) is None
306282 assert get_iceberg_table_name_from_metadata_path ("metadata/v1.json" ) is None
307- assert (
308- get_iceberg_table_name_from_metadata_path ("orders/metadata/snapshot.avro" )
309- is None
310- )
283+ assert get_iceberg_table_name_from_metadata_path ("orders/metadata/snapshot.avro" ) is None
311284
312285 def test_table_type_iceberg_for_metadata_files (self ):
313286 from metadata .generated .schema .entity .data .table import TableType
@@ -317,9 +290,7 @@ def test_table_type_iceberg_for_metadata_files(self):
317290
318291 key_name = "warehouse/orders/metadata/v1.metadata.json"
319292 table_type = (
320- TableType .Iceberg
321- if get_iceberg_table_name_from_metadata_path (key_name ) is not None
322- else TableType .Regular
293+ TableType .Iceberg if get_iceberg_table_name_from_metadata_path (key_name ) is not None else TableType .Regular
323294 )
324295 assert table_type == TableType .Iceberg
325296
@@ -335,9 +306,7 @@ def test_table_type_regular_for_normal_files(self):
335306 if get_iceberg_table_name_from_metadata_path (key_name ) is not None
336307 else TableType .Regular
337308 )
338- assert (
339- table_type == TableType .Regular
340- ), f"Expected Regular for { key_name } , got { table_type } "
309+ assert table_type == TableType .Regular , f"Expected Regular for { key_name } , got { table_type } "
341310
342311
343312class TestSlice4FetchKeyCorrectness :
@@ -375,7 +344,7 @@ def test_yield_table_uses_metadata_path_not_display_name(self):
375344 file_size ,
376345 original_key ,
377346 )
378- table_name , table_type , table_extension , t_file_size , fetch_key = tuple_5
347+ table_name , _table_type , table_extension , t_file_size , fetch_key = tuple_5
379348
380349 wrapper = DatalakeTableSchemaWrapper (
381350 key = fetch_key ,
@@ -384,12 +353,8 @@ def test_yield_table_uses_metadata_path_not_display_name(self):
384353 file_size = t_file_size ,
385354 )
386355
387- assert (
388- wrapper .key == original_key
389- ), f"fetch key should be original blob path, got { wrapper .key !r} "
390- assert (
391- wrapper .key != display_name
392- ), f"fetch key must NOT be the display name '{ display_name } '"
356+ assert wrapper .key == original_key , f"fetch key should be original blob path, got { wrapper .key !r} "
357+ assert wrapper .key != display_name , f"fetch key must NOT be the display name '{ display_name } '"
393358 assert table_name == display_name
394359
395360 def test_non_iceberg_fetch_key_equals_table_name (self ):
@@ -406,9 +371,7 @@ def test_non_iceberg_fetch_key_equals_table_name(self):
406371 )
407372
408373 key_name = "data/orders.parquet"
409- table_name = (
410- key_name # standardize_table_name returns unchanged for non-Iceberg
411- )
374+ table_name = key_name # standardize_table_name returns unchanged for non-Iceberg
412375
413376 assert get_iceberg_table_name_from_metadata_path (key_name ) is None
414377
0 commit comments