@@ -160,6 +160,18 @@ def _added_data_files(
160160 partition_set : Optional [dict [int , set [Record ]]],
161161 parent_snapshot : Optional [Snapshot ],
162162) -> Iterator [ManifestEntry ]:
163+ """Return manifest entries for data files added between the starting snapshot and parent snapshot.
164+
165+ Args:
166+ table: Table to get the history from
167+ starting_snapshot: Starting snapshot to get the history from
168+ data_filter: Optional filter to match data files
169+ partition_set: Optional set of partitions to match data files
170+ parent_snapshot: Parent snapshot to get the history from
171+
172+ Returns:
173+ Iterator of manifest entries for added data files matching the conditions
174+ """
163175 if parent_snapshot is None :
164176 return
165177
@@ -197,8 +209,16 @@ def _validate_added_data_files(
197209 data_filter : Optional [BooleanExpression ],
198210 parent_snapshot : Optional [Snapshot ],
199211) -> None :
200- conflicting_entries = _added_data_files ( table , starting_snapshot , data_filter , None , parent_snapshot )
212+ """Validate that no files matching a filter have been added to the table since a starting snapshot.
201213
214+ Args:
215+ table: Table to validate
216+ starting_snapshot: Snapshot current at the start of the operation
217+ data_filter: Expression used to find added data files
218+ parent_snapshot: Ending snapshot on the branch being validated
219+
220+ """
221+ conflicting_entries = _added_data_files (table , starting_snapshot , data_filter , None , parent_snapshot )
202222 if any (conflicting_entries ):
203223 conflicting_snapshots = {entry .snapshot_id for entry in conflicting_entries if entry .snapshot_id is not None }
204224 raise ValidationException (f"Added data files were found matching the filter for snapshots { conflicting_snapshots } !" )
0 commit comments