|
22 | 22 | from pyiceberg.conversions import from_bytes |
23 | 23 | from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, ManifestFile, PartitionFieldSummary |
24 | 24 | from pyiceberg.partitioning import PartitionSpec |
| 25 | +from pyiceberg.schema import Schema |
25 | 26 | from pyiceberg.table.snapshots import Snapshot, ancestors_of |
26 | 27 | from pyiceberg.types import PrimitiveType |
27 | 28 | from pyiceberg.utils.concurrent import ExecutorFactory |
@@ -384,14 +385,16 @@ def _get_all_manifests_schema(self) -> "pa.Schema": |
384 | 385 | all_manifests_schema = all_manifests_schema.append(pa.field("reference_snapshot_id", pa.int64(), nullable=False)) |
385 | 386 | return all_manifests_schema |
386 | 387 |
|
387 | | - def _get_positional_deletes_schema(self) -> "pa.Schema": |
| 388 | + def _get_positional_deletes_schema(self, schema: Optional[Schema] = None, spec_id: Optional[int] = None) -> "pa.Schema": |
388 | 389 | import pyarrow as pa |
389 | 390 |
|
390 | 391 | from pyiceberg.io.pyarrow import schema_to_pyarrow |
391 | 392 |
|
392 | | - partition_struct = self.tbl.metadata.spec_struct() |
| 393 | + schema = schema or self.tbl.metadata.schema() |
| 394 | + |
| 395 | + partition_struct = self.tbl.metadata.spec_struct(spec_id=spec_id) |
393 | 396 | pa_partition_struct = schema_to_pyarrow(partition_struct) |
394 | | - pa_row_struct = schema_to_pyarrow(self.tbl.schema().as_struct()) |
| 397 | + pa_row_struct = schema_to_pyarrow(schema.as_struct()) |
395 | 398 | positional_delete_schema = pa.schema( |
396 | 399 | [ |
397 | 400 | pa.field("file_path", pa.string(), nullable=False), |
@@ -473,11 +476,13 @@ def _partition_summaries_to_rows( |
473 | 476 | schema=self._get_all_manifests_schema() if is_all_manifests_table else self._get_manifests_schema(), |
474 | 477 | ) |
475 | 478 |
|
476 | | - def _generate_positional_delete_table(self, manifest: ManifestFile, position_deletes_schema: "pa.Schema") -> "pa.Table": |
| 479 | + def _generate_positional_delete_table(self, manifest: ManifestFile, schema: Schema) -> "pa.Table": |
477 | 480 | import pyarrow as pa |
478 | 481 |
|
479 | 482 | positional_deletes: List["pa.Table"] = [] |
480 | 483 |
|
| 484 | + position_deletes_schema = self._get_positional_deletes_schema(schema=schema, spec_id=manifest.partition_spec_id) |
| 485 | + |
481 | 486 | if manifest.content == ManifestContent.DELETES: |
482 | 487 | for entry in manifest.fetch_manifest_entry(self.tbl.io): |
483 | 488 | if entry.data_file.content == DataFileContent.POSITION_DELETES: |
@@ -713,14 +718,14 @@ def position_deletes(self, snapshot_id: Optional[int] = None) -> "pa.Table": |
713 | 718 | import pyarrow as pa |
714 | 719 |
|
715 | 720 | snapshot = self._get_snapshot(snapshot_id) if snapshot_id else self.tbl.current_snapshot() |
716 | | - position_deletes_schema = self._get_positional_deletes_schema() |
717 | | - |
718 | 721 | if not snapshot: |
719 | | - return pa.Table.from_pylist([], schema=position_deletes_schema) |
| 722 | + schema = self._get_positional_deletes_schema() |
| 723 | + return pa.Table.from_pylist([], schema=schema) |
720 | 724 |
|
| 725 | + schemas = self.tbl.schemas() |
721 | 726 | executor = ExecutorFactory.get_or_create() |
722 | 727 | positional_deletes: Iterator["pa.Table"] = executor.map( |
723 | | - lambda manifest: self._generate_positional_delete_table(manifest, position_deletes_schema), |
| 728 | + lambda manifest: self._generate_positional_delete_table(manifest, schema=schemas[snapshot.schema_id]), |
724 | 729 | snapshot.manifests(self.tbl.io), |
725 | 730 | ) |
726 | 731 | return pa.concat_tables(positional_deletes) |
0 commit comments