@@ -384,30 +384,15 @@ def _get_all_manifests_schema(self) -> "pa.Schema":
384384 all_manifests_schema = all_manifests_schema .append (pa .field ("reference_snapshot_id" , pa .int64 (), nullable = False ))
385385 return all_manifests_schema
386386
387- def _get_positional_file_schema (self ) -> "pa.Schema" :
388- import pyarrow as pa
389-
390- from pyiceberg .io .pyarrow import schema_to_pyarrow
391-
392- pa_row_struct = schema_to_pyarrow (self .tbl .schema ().as_struct ())
393- positinal_delete_schema = pa .schema (
394- [
395- pa .field ("file_path" , pa .string (), nullable = False ),
396- pa .field ("pos" , pa .int64 (), nullable = False ),
397- pa .field ("row" , pa_row_struct , nullable = True ),
398- ]
399- )
400- return positinal_delete_schema
401-
402387 def _get_positional_deletes_schema (self ) -> "pa.Schema" :
403388 import pyarrow as pa
404389
405390 from pyiceberg .io .pyarrow import schema_to_pyarrow
406391
407- partition_record = self .tbl .metadata .specs_struct ()
408- pa_partition_struct = schema_to_pyarrow (partition_record )
392+ partition_struct = self .tbl .metadata .spec_struct ()
393+ pa_partition_struct = schema_to_pyarrow (partition_struct )
409394 pa_row_struct = schema_to_pyarrow (self .tbl .schema ().as_struct ())
410- positinal_delete_schema = pa .schema (
395+ positional_delete_schema = pa .schema (
411396 [
412397 pa .field ("file_path" , pa .string (), nullable = False ),
413398 pa .field ("pos" , pa .int64 (), nullable = False ),
@@ -417,7 +402,7 @@ def _get_positional_deletes_schema(self) -> "pa.Schema":
417402 pa .field ("delete_file_path" , pa .string (), nullable = False ),
418403 ]
419404 )
420- return positinal_delete_schema
405+ return positional_delete_schema
421406
422407 def _generate_manifests_table (self , snapshot : Optional [Snapshot ], is_all_manifests_table : bool = False ) -> "pa.Table" :
423408 import pyarrow as pa
@@ -492,22 +477,28 @@ def _generate_positional_delete_table(self, manifest: ManifestFile, position_del
492477 import pyarrow as pa
493478
494479 positional_deletes : List ["pa.Table" ] = []
480+
495481 if manifest .content == ManifestContent .DELETES :
496482 for entry in manifest .fetch_manifest_entry (self .tbl .io ):
497483 if entry .data_file .content == DataFileContent .POSITION_DELETES :
498484 from pyiceberg .io .pyarrow import _fs_from_file_path , _read_delete_file
499485
500486 positional_delete_file = _read_delete_file (
501- _fs_from_file_path (self .tbl .io , entry .data_file .file_path ),
502- entry .data_file ,
503- self ._get_positional_file_schema (),
504- ).to_pylist ()
487+ _fs_from_file_path (self .tbl .io , entry .data_file .file_path ), entry .data_file
488+ )
489+ positional_deletes_records = []
505490 for record in positional_delete_file :
506- record ["partition" ] = entry .data_file .partition .__dict__
507- record ["spec_id" ] = manifest .partition_spec_id
508- record ["delete_file_path" ] = entry .data_file .file_path
509-
510- positional_deletes .append (pa .Table .from_pylist (positional_delete_file , position_deletes_schema ))
491+ row = {
492+ "file_path" : record .file_path ,
493+ "pos" : record .pos ,
494+ "row" : record .row ,
495+ "partition" : entry .data_file .partition .__dict__ ,
496+ "spec_id" : manifest .partition_spec_id ,
497+ "delete_file_path" : entry .data_file .file_path ,
498+ }
499+ positional_deletes_records .append (row )
500+
501+ positional_deletes .append (pa .Table .from_pylist (positional_deletes_records , position_deletes_schema ))
511502
512503 if not positional_deletes :
513504 return pa .Table .from_pylist ([], position_deletes_schema )
@@ -718,18 +709,18 @@ def all_manifests(self) -> "pa.Table":
718709 )
719710 return pa .concat_tables (manifests_by_snapshots )
720711
721- def position_deletes (self ) -> "pa.Table" :
712+ def position_deletes (self , snapshot_id : Optional [ int ] = None ) -> "pa.Table" :
722713 import pyarrow as pa
723714
715+ snapshot = self ._get_snapshot (snapshot_id ) if snapshot_id else self .tbl .current_snapshot ()
724716 position_deletes_schema = self ._get_positional_deletes_schema ()
725- current_snapshot = self .tbl .current_snapshot ()
726717
727- if not current_snapshot :
718+ if not snapshot :
728719 return pa .Table .from_pylist ([], schema = position_deletes_schema )
729720
730721 executor = ExecutorFactory .get_or_create ()
731722 positional_deletes : Iterator ["pa.Table" ] = executor .map (
732723 lambda manifest : self ._generate_positional_delete_table (manifest , position_deletes_schema ),
733- current_snapshot .manifests (self .tbl .io ),
724+ snapshot .manifests (self .tbl .io ),
734725 )
735726 return pa .concat_tables (positional_deletes )
0 commit comments