Skip to content

Commit e9e1f95

Browse files
committed
Address comments from Fokko
1 parent 5f7d79d commit e9e1f95

2 files changed

Lines changed: 22 additions & 26 deletions

File tree

pyiceberg/table/delete_file_index.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from __future__ import annotations
1818

1919
from bisect import bisect_left
20-
from typing import Any
2120

2221
from pyiceberg.expressions import EqualTo
2322
from pyiceberg.expressions.visitors import _InclusiveMetricsEvaluator
@@ -93,17 +92,17 @@ def _referenced_data_file_path(delete_file: DataFile) -> str | None:
9392
return None
9493

9594

96-
def _partition_key(spec_id: int, partition: Record | None) -> tuple[int, tuple[Any, ...]]:
95+
def _partition_key(spec_id: int, partition: Record | None) -> tuple[int, Record]:
9796
if partition:
98-
return spec_id, tuple(partition._data)
99-
return spec_id, () # unpartitioned handling
97+
return spec_id, partition
98+
return spec_id, Record() # unpartitioned handling
10099

101100

102101
class DeleteFileIndex:
103102
"""Indexes position delete files by partition and by exact data file path."""
104103

105104
def __init__(self) -> None:
106-
self._by_partition: dict[tuple[int, tuple[Any, ...]], PositionDeletes] = {}
105+
self._by_partition: dict[tuple[int, Record], PositionDeletes] = {}
107106
self._by_path: dict[str, PositionDeletes] = {}
108107

109108
def is_empty(self) -> bool:

tests/table/test_delete_file_index.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -161,27 +161,6 @@ def test_dvs_treated_as_position_deletes() -> None:
161161
assert all(d.content == DataFileContent.POSITION_DELETES for d in result)
162162

163163

164-
def test_lazy_sorting() -> None:
165-
group = PositionDeletes()
166-
167-
pos_4 = _create_positional_delete(sequence_number=4).data_file
168-
pos_2 = _create_positional_delete(sequence_number=2).data_file
169-
pos_1 = _create_positional_delete(sequence_number=1).data_file
170-
pos_3 = _create_positional_delete(sequence_number=3).data_file
171-
172-
group.add(pos_4, 4)
173-
group.add(pos_2, 2)
174-
group.add(pos_1, 1)
175-
group.add(pos_3, 3)
176-
177-
assert len(group.filter_by_seq(0)) == 4
178-
assert len(group.filter_by_seq(1)) == 4
179-
assert len(group.filter_by_seq(2)) == 3
180-
assert len(group.filter_by_seq(3)) == 2
181-
assert len(group.filter_by_seq(4)) == 1
182-
assert len(group.filter_by_seq(5)) == 0
183-
184-
185164
def test_cannot_add_after_indexing() -> None:
186165
group = PositionDeletes()
187166
group.add(_create_positional_delete(sequence_number=1).data_file, 1)
@@ -190,3 +169,21 @@ def test_cannot_add_after_indexing() -> None:
190169

191170
with pytest.raises(ValueError, match="Cannot add files after indexing"):
192171
group.add(_create_positional_delete(sequence_number=2).data_file, 2)
172+
173+
174+
def test_record_equality_for_partition_lookup() -> None:
175+
index = DeleteFileIndex()
176+
177+
partition_a = Record(1, "foo")
178+
partition_b = Record(1, "foo")
179+
partition_c = Record(1, "bar")
180+
181+
assert partition_a == partition_b
182+
assert partition_a != partition_c
183+
184+
index.add_delete_file(_create_partition_delete(sequence_number=2, spec_id=0, partition=partition_a), partition_a)
185+
186+
data_file = _create_data_file()
187+
188+
assert len(index.for_data_file(1, data_file, partition_b)) == 1
189+
assert len(index.for_data_file(1, data_file, partition_c)) == 0

0 commit comments

Comments
 (0)