Skip to content

Commit b536f2b

Browse files
committed
Address comments from fokko
1 parent 18c27f2 commit b536f2b

6 files changed

Lines changed: 49 additions & 35 deletions

File tree

pyiceberg/catalog/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,10 @@ def namespace_to_string(identifier: str | Identifier, err: type[ValueError] | ty
722722

723723
return ".".join(segment.strip() for segment in tuple_identifier)
724724

725+
def supports_server_side_planning(self) -> bool:
726+
"""Check if the catalog supports server-side scan planning."""
727+
return False
728+
725729
@staticmethod
726730
def identifier_to_database(
727731
identifier: str | Identifier, err: type[ValueError] | type[NoSuchNamespaceError] = ValueError

pyiceberg/catalog/rest/__init__.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,8 @@ def _create_session(self) -> Session:
390390

391391
return session
392392

393-
def is_rest_scan_planning_enabled(self) -> bool:
394-
"""Check if rest server-side scan planning is enabled.
395-
396-
Returns:
397-
True if enabled, False otherwise.
398-
"""
393+
def supports_server_side_planning(self) -> bool:
394+
"""Check if the catalog supports server-side scan planning."""
399395
return Capability.V1_SUBMIT_TABLE_SCAN_PLAN in self._supported_endpoints and property_as_bool(
400396
self.properties, REST_SCAN_PLANNING_ENABLED, REST_SCAN_PLANNING_ENABLED_DEFAULT
401397
)
@@ -410,6 +406,7 @@ def _plan_table_scan(self, identifier: str | Identifier, request: PlanTableScanR
410406
411407
Returns:
412408
PlanningResponse the result of the scan plan request representing the status
409+
413410
Raises:
414411
NoSuchTableError: If a table with the given identifier does not exist.
415412
"""

pyiceberg/catalog/rest/scan_planning.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,9 @@
2525

2626
from pyiceberg.catalog.rest.response import ErrorResponseMessage
2727
from pyiceberg.expressions import BooleanExpression, SerializableBooleanExpression
28-
from pyiceberg.manifest import DataFileContent, FileFormat
28+
from pyiceberg.manifest import FileFormat
2929
from pyiceberg.typedef import IcebergBaseModel
3030

31-
# REST content-type to DataFileContent
32-
CONTENT_TYPE_MAP: dict[str, DataFileContent] = {
33-
"data": DataFileContent.DATA,
34-
"position-deletes": DataFileContent.POSITION_DELETES,
35-
"equality-deletes": DataFileContent.EQUALITY_DELETES,
36-
}
37-
3831
# Primitive types that can appear in partition values and bounds
3932
PrimitiveTypeValue: TypeAlias = bool | int | float | str | Decimal | UUID | date | time | datetime | bytes
4033

pyiceberg/manifest.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,28 @@ def __repr__(self) -> str:
6969
"""Return the string representation of the DataFileContent class."""
7070
return f"DataFileContent.{self.name}"
7171

72+
@staticmethod
73+
def from_rest_type(content_type: str) -> DataFileContent:
74+
"""Convert REST API content type string to DataFileContent.
75+
76+
Args:
77+
content_type: REST API content type.
78+
79+
Returns:
80+
The corresponding DataFileContent enum value.
81+
82+
Raises:
83+
ValueError: If the content type is unknown.
84+
"""
85+
mapping = {
86+
"data": DataFileContent.DATA,
87+
"position-deletes": DataFileContent.POSITION_DELETES,
88+
"equality-deletes": DataFileContent.EQUALITY_DELETES,
89+
}
90+
if content_type not in mapping:
91+
raise ValueError(f"Invalid file content value: {content_type}")
92+
return mapping[content_type]
93+
7294

7395
class ManifestContent(int, Enum):
7496
DATA = 0

pyiceberg/table/__init__.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,7 +1849,7 @@ def from_rest_response(
18491849

18501850
def _rest_file_to_data_file(rest_file: RESTContentFile) -> DataFile:
18511851
"""Convert a REST content file to a manifest DataFile."""
1852-
from pyiceberg.catalog.rest.scan_planning import CONTENT_TYPE_MAP, RESTDataFile
1852+
from pyiceberg.catalog.rest.scan_planning import RESTDataFile
18531853

18541854
if isinstance(rest_file, RESTDataFile):
18551855
column_sizes = rest_file.column_sizes.to_dict() if rest_file.column_sizes else None
@@ -1863,7 +1863,7 @@ def _rest_file_to_data_file(rest_file: RESTContentFile) -> DataFile:
18631863
nan_value_counts = None
18641864

18651865
data_file = DataFile.from_args(
1866-
content=CONTENT_TYPE_MAP[rest_file.content],
1866+
content=DataFileContent.from_rest_type(rest_file.content),
18671867
file_path=rest_file.file_path,
18681868
file_format=rest_file.file_format,
18691869
partition=Record(*rest_file.partition) if rest_file.partition else Record(),
@@ -2051,15 +2051,13 @@ def scan_plan_helper(self) -> Iterator[list[ManifestEntry]]:
20512051
],
20522052
)
20532053

2054-
def _should_use_rest_planning(self) -> bool:
2055-
"""Check if REST scan planning should be used for this scan."""
2056-
from pyiceberg.catalog.rest import RestCatalog
2057-
2058-
if not isinstance(self.catalog, RestCatalog):
2054+
def _should_use_server_side_planning(self) -> bool:
2055+
"""Check if server-side scan planning should be used for this scan."""
2056+
if not self.catalog:
20592057
return False
2060-
return self.catalog.is_rest_scan_planning_enabled()
2058+
return self.catalog.supports_server_side_planning()
20612059

2062-
def _plan_files_rest(self) -> Iterable[FileScanTask]:
2060+
def _plan_files_server_side(self) -> Iterable[FileScanTask]:
20632061
"""Plan files using REST server-side scan planning."""
20642062
from pyiceberg.catalog.rest import RestCatalog
20652063
from pyiceberg.catalog.rest.scan_planning import PlanTableScanRequest
@@ -2120,8 +2118,8 @@ def plan_files(self) -> Iterable[FileScanTask]:
21202118
Returns:
21212119
List of FileScanTasks that contain both data and delete files.
21222120
"""
2123-
if self._should_use_rest_planning():
2124-
return self._plan_files_rest()
2121+
if self._should_use_server_side_planning():
2122+
return self._plan_files_server_side()
21252123
return self._plan_files_local()
21262124

21272125
def to_arrow(self) -> pa.Table:

tests/catalog/test_rest.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,22 +2016,22 @@ def test_rest_catalog_context_manager_with_exception_sigv4(self, rest_mock: Mock
20162016
assert catalog is not None and hasattr(catalog, "_session")
20172017
assert len(catalog._session.adapters) == self.EXPECTED_ADAPTERS_SIGV4
20182018

2019-
def test_rest_scan_planning_disabled_by_default(self, rest_mock: Mocker) -> None:
2019+
def test_server_side_planning_disabled_by_default(self, rest_mock: Mocker) -> None:
20202020
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
20212021

2022-
assert catalog.is_rest_scan_planning_enabled() is False
2022+
assert catalog.supports_server_side_planning() is False
20232023

2024-
def test_rest_scan_planning_enabled_by_property(self, rest_mock: Mocker) -> None:
2024+
def test_server_side_planning_enabled_by_property(self, rest_mock: Mocker) -> None:
20252025
catalog = RestCatalog(
20262026
"rest",
20272027
uri=TEST_URI,
20282028
token=TEST_TOKEN,
20292029
**{"rest-scan-planning-enabled": "true"},
20302030
)
20312031

2032-
assert catalog.is_rest_scan_planning_enabled() is True
2032+
assert catalog.supports_server_side_planning() is True
20332033

2034-
def test_rest_scan_planning_disabled_when_endpoint_unsupported(self, requests_mock: Mocker) -> None:
2034+
def test_server_side_planning_disabled_when_endpoint_unsupported(self, requests_mock: Mocker) -> None:
20352035
# config endpoint does not populate endpoint falling back to default
20362036
requests_mock.get(
20372037
f"{TEST_URI}v1/config",
@@ -2045,19 +2045,19 @@ def test_rest_scan_planning_disabled_when_endpoint_unsupported(self, requests_mo
20452045
**{"rest-scan-planning-enabled": "true"},
20462046
)
20472047

2048-
assert catalog.is_rest_scan_planning_enabled() is False
2048+
assert catalog.supports_server_side_planning() is False
20492049

2050-
def test_rest_scan_planning_explicitly_disabled(self, rest_mock: Mocker) -> None:
2050+
def test_server_side_planning_explicitly_disabled(self, rest_mock: Mocker) -> None:
20512051
catalog = RestCatalog(
20522052
"rest",
20532053
uri=TEST_URI,
20542054
token=TEST_TOKEN,
20552055
**{"rest-scan-planning-enabled": "false"},
20562056
)
20572057

2058-
assert catalog.is_rest_scan_planning_enabled() is False
2058+
assert catalog.supports_server_side_planning() is False
20592059

2060-
def test_rest_scan_planning_enabled_from_server_config(self, rest_mock: Mocker) -> None:
2060+
def test_server_side_planning_enabled_from_server_config(self, rest_mock: Mocker) -> None:
20612061
rest_mock.get(
20622062
f"{TEST_URI}v1/config",
20632063
json={
@@ -2069,7 +2069,7 @@ def test_rest_scan_planning_enabled_from_server_config(self, rest_mock: Mocker)
20692069
)
20702070
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
20712071

2072-
assert catalog.is_rest_scan_planning_enabled() is True
2072+
assert catalog.supports_server_side_planning() is True
20732073

20742074
def test_supported_endpoint(self, requests_mock: Mocker) -> None:
20752075
requests_mock.get(

0 commit comments

Comments
 (0)