Skip to content

Commit ef0c138

Browse files
committed
test: Spy on update_table_metadata instead of property descriptor
Avoids mypy attr-defined on Transaction.table_metadata.fget by counting calls to the underlying update_table_metadata function (the actual expensive operation) via mock.patch with wraps.
1 parent 7839569 commit ef0c138

2 files changed

Lines changed: 31 additions & 45 deletions

File tree

pyiceberg/table/update/snapshot.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,6 @@ def _summary(self, snapshot_properties: dict[str, str] = EMPTY_DICT) -> Summary:
228228

229229
# avoid copying metadata for each data file
230230
table_metadata = self._transaction.table_metadata
231-
schema = table_metadata.schema()
232-
default_spec = table_metadata.spec()
233231

234232
partition_summary_limit = int(
235233
table_metadata.properties.get(
@@ -241,8 +239,8 @@ def _summary(self, snapshot_properties: dict[str, str] = EMPTY_DICT) -> Summary:
241239
for data_file in self._added_data_files:
242240
ssc.add_file(
243241
data_file=data_file,
244-
partition_spec=default_spec,
245-
schema=schema,
242+
partition_spec=table_metadata.spec(),
243+
schema=table_metadata.schema(),
246244
)
247245

248246
if len(self._deleted_data_files) > 0:
@@ -251,7 +249,7 @@ def _summary(self, snapshot_properties: dict[str, str] = EMPTY_DICT) -> Summary:
251249
ssc.remove_file(
252250
data_file=data_file,
253251
partition_spec=specs[data_file.spec_id],
254-
schema=schema,
252+
schema=table_metadata.schema(),
255253
)
256254

257255
previous_snapshot = (
@@ -426,14 +424,12 @@ def _copy_with_new_status(entry: ManifestEntry, status: ManifestEntryStatus) ->
426424
data_file=entry.data_file,
427425
)
428426

429-
# avoid copying metadata for each evaluator
430-
table_metadata = self._transaction.table_metadata
431-
schema = table_metadata.schema()
432-
433427
manifest_evaluators: dict[int, Callable[[ManifestFile], bool]] = KeyDefaultDict(self._build_manifest_evaluator)
434-
strict_metrics_evaluator = _StrictMetricsEvaluator(schema, self._predicate, case_sensitive=self._case_sensitive).eval
428+
strict_metrics_evaluator = _StrictMetricsEvaluator(
429+
self.schema(), self._predicate, case_sensitive=self._case_sensitive
430+
).eval
435431
inclusive_metrics_evaluator = _InclusiveMetricsEvaluator(
436-
schema, self._predicate, case_sensitive=self._case_sensitive
432+
self.schema(), self._predicate, case_sensitive=self._case_sensitive
437433
).eval
438434

439435
existing_manifests = []
@@ -445,7 +441,7 @@ def _copy_with_new_status(entry: ManifestEntry, status: ManifestEntryStatus) ->
445441
# Should be the current tip of the _target_branch
446442
parent_snapshot_id_for_delete_source = self._parent_snapshot_id
447443
if parent_snapshot_id_for_delete_source is not None:
448-
snapshot = table_metadata.snapshot_by_id(parent_snapshot_id_for_delete_source)
444+
snapshot = self._transaction.table_metadata.snapshot_by_id(parent_snapshot_id_for_delete_source)
449445
if snapshot: # Ensure snapshot is found
450446
for manifest_file in snapshot.manifests(io=self._io):
451447
if manifest_file.content == ManifestContent.DATA:
@@ -546,19 +542,18 @@ def __init__(
546542
from pyiceberg.table import TableProperties
547543

548544
super().__init__(operation, transaction, io, commit_uuid, snapshot_properties, branch)
549-
table_properties = self._transaction.table_metadata.properties
550545
self._target_size_bytes = property_as_int(
551-
table_properties,
546+
self._transaction.table_metadata.properties,
552547
TableProperties.MANIFEST_TARGET_SIZE_BYTES,
553548
TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT,
554549
) # type: ignore
555550
self._min_count_to_merge = property_as_int(
556-
table_properties,
551+
self._transaction.table_metadata.properties,
557552
TableProperties.MANIFEST_MIN_MERGE_COUNT,
558553
TableProperties.MANIFEST_MIN_MERGE_COUNT_DEFAULT,
559554
) # type: ignore
560555
self._merge_enabled = property_as_bool(
561-
table_properties,
556+
self._transaction.table_metadata.properties,
562557
TableProperties.MANIFEST_MERGE_ENABLED,
563558
TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT,
564559
)

tests/table/test_snapshots.py

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -554,51 +554,42 @@ def test_latest_ancestor_before_timestamp() -> None:
554554

555555

556556
def test_snapshot_producer_bounded_metadata_access(table_v2: Table) -> None:
557-
"""Transaction.table_metadata recomputes via model_copy on every access, so the
558-
snapshot producer must not read it once per item. Guards the hoisting introduced
559-
in #2674 and extended here.
557+
"""Transaction.table_metadata replays staged updates via update_table_metadata on
558+
every access, so the snapshot producer must not read it once per item. Guards the
559+
hoisting introduced in #2674 and extended here.
560560
"""
561-
from unittest.mock import patch
561+
from unittest import mock
562562

563-
from pyiceberg.table import Transaction
564-
from pyiceberg.table.metadata import TableMetadata
563+
from pyiceberg.table.update import update_table_metadata
565564
from pyiceberg.table.update.snapshot import _FastAppendFiles, _MergeAppendFiles
566565

567-
original_fget = Transaction.table_metadata.fget
568-
call_count = 0
569-
570-
def counting(self: Transaction) -> TableMetadata:
571-
nonlocal call_count
572-
call_count += 1
573-
return original_fget(self)
574-
575566
def make_file() -> DataFile:
576567
return DataFile.from_args(content=DataFileContent.DATA, record_count=1, file_size_in_bytes=1, partition=Record())
577568

578569
txn = table_v2.transaction()
579570

580-
with patch.object(Transaction, "table_metadata", property(counting)):
581-
# _summary() access count must not scale with the number of data files
582-
def summary_accesses(n_files: int) -> int:
571+
with mock.patch("pyiceberg.table.update_table_metadata", wraps=update_table_metadata) as spy:
572+
# _summary() cost must not scale with the number of data files
573+
def summary_calls(n_files: int) -> int:
583574
append = _FastAppendFiles(operation=Operation.APPEND, transaction=txn, io=table_v2.io)
584575
for _ in range(n_files):
585576
append.append_data_file(make_file())
586-
before = call_count
577+
spy.reset_mock()
587578
append._summary()
588-
return call_count - before
579+
return spy.call_count
589580

590-
few, many = summary_accesses(10), summary_accesses(100)
591-
assert few == many, f"_summary() table_metadata accesses scale with file count ({few} vs {many})"
592-
assert many <= 2, f"_summary() accessed table_metadata {many} times; expected O(1)"
581+
few, many = summary_calls(10), summary_calls(100)
582+
assert few == many, f"_summary() update_table_metadata calls scale with file count ({few} vs {many})"
583+
assert many <= 2, f"_summary() triggered {many} update_table_metadata calls; expected O(1)"
593584

594-
# _MergeAppendFiles.__init__ should add exactly one access over _FastAppendFiles.__init__
595-
before = call_count
585+
# _MergeAppendFiles.__init__ should add exactly one call over _FastAppendFiles.__init__
586+
spy.reset_mock()
596587
_FastAppendFiles(operation=Operation.APPEND, transaction=txn, io=table_v2.io)
597-
fast_init = call_count - before
598-
before = call_count
588+
fast_init = spy.call_count
589+
spy.reset_mock()
599590
_MergeAppendFiles(operation=Operation.APPEND, transaction=txn, io=table_v2.io)
600-
merge_init = call_count - before
591+
merge_init = spy.call_count
601592
assert merge_init - fast_init == 1, (
602-
f"_MergeAppendFiles.__init__ made {merge_init - fast_init} extra table_metadata "
603-
"accesses over its superclass; expected 1 (hoisted)"
593+
f"_MergeAppendFiles.__init__ made {merge_init - fast_init} extra update_table_metadata "
594+
"calls over its superclass; expected 1 (hoisted)"
604595
)

0 commit comments

Comments
 (0)