Skip to content

Commit cf1e446

Browse files
committed
Make REST catalog namespace separator configurable
The REST spec currently uses %1F as the UTF-8 encoded namespace separator for multi-part namespaces.
1 parent e3070d4 commit cf1e446

2 files changed

Lines changed: 70 additions & 10 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
Any,
2121
Union,
2222
)
23+
from urllib.parse import quote, unquote
2324

2425
from pydantic import Field, field_validator
2526
from requests import HTTPError, Session
@@ -131,7 +132,8 @@ class IdentifierKind(Enum):
131132
AUTH = "auth"
132133
CUSTOM = "custom"
133134

134-
NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
135+
NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator"
136+
DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
135137

136138

137139
def _retry_hook(retry_state: RetryCallState) -> None:
@@ -214,6 +216,8 @@ class ListViewsResponse(IcebergBaseModel):
214216
class RestCatalog(Catalog):
215217
uri: str
216218
_session: Session
219+
_supported_endpoints: set[Endpoint]
220+
_namespace_separator: str
217221

218222
def __init__(self, name: str, **properties: str):
219223
"""Rest Catalog.
@@ -228,6 +232,8 @@ def __init__(self, name: str, **properties: str):
228232
self.uri = properties[URI]
229233
self._fetch_config()
230234
self._session = self._create_session()
235+
separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR)
236+
self._namespace_separator = unquote(separator_from_properties)
231237

232238
def _create_session(self) -> Session:
233239
"""Create a request session with provided catalog configuration."""
@@ -351,6 +357,16 @@ def _extract_optional_oauth_params(self) -> dict[str, str]:
351357

352358
return optional_oauth_param
353359

360+
def _encode_namespace_path(self, namespace: Identifier) -> str:
361+
"""
362+
Encode a namespace for use as a path parameter in a URL.
363+
364+
Each part of the namespace is URL-encoded using `urllib.parse.quote`
365+
(ensuring characters like '/' are encoded) and then joined by the
366+
configured namespace separator.
367+
"""
368+
return self._namespace_separator.join(quote(part, safe="") for part in namespace)
369+
354370
def _fetch_config(self) -> None:
355371
params = {}
356372
if warehouse_location := self.properties.get(WAREHOUSE_LOCATION):
@@ -381,11 +397,19 @@ def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identi
381397
def _split_identifier_for_path(
382398
self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE
383399
) -> Properties:
400+
from urllib.parse import quote
401+
384402
if isinstance(identifier, TableIdentifier):
385-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: identifier.name}
403+
return {
404+
"namespace": self._encode_namespace_path(tuple(identifier.namespace.root)),
405+
kind.value: quote(identifier.name, safe=""),
406+
}
386407
identifier_tuple = self._identifier_to_validated_tuple(identifier)
387408

388-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), kind.value: identifier_tuple[-1]}
409+
return {
410+
"namespace": self._encode_namespace_path(identifier_tuple[:-1]),
411+
kind.value: quote(identifier_tuple[-1], safe=""),
412+
}
389413

390414
def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, Identifier | str]:
391415
identifier_tuple = self._identifier_to_validated_tuple(identifier)
@@ -600,7 +624,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) -
600624
@retry(**_RETRY_ARGS)
601625
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
602626
namespace_tuple = self._check_valid_namespace_identifier(namespace)
603-
namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
627+
namespace_concat = self._encode_namespace_path(namespace_tuple)
604628
response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat))
605629
try:
606630
response.raise_for_status()
@@ -681,7 +705,7 @@ def _remove_catalog_name_from_table_request_identifier(self, table_request: Comm
681705
@retry(**_RETRY_ARGS)
682706
def list_views(self, namespace: str | Identifier) -> list[Identifier]:
683707
namespace_tuple = self._check_valid_namespace_identifier(namespace)
684-
namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
708+
namespace_concat = self._encode_namespace_path(namespace_tuple)
685709
response = self._session.get(self.url(Endpoints.list_views, namespace=namespace_concat))
686710
try:
687711
response.raise_for_status()
@@ -748,7 +772,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties =
748772
@retry(**_RETRY_ARGS)
749773
def drop_namespace(self, namespace: str | Identifier) -> None:
750774
namespace_tuple = self._check_valid_namespace_identifier(namespace)
751-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
775+
namespace = self._encode_namespace_path(namespace_tuple)
752776
response = self._session.delete(self.url(Endpoints.drop_namespace, namespace=namespace))
753777
try:
754778
response.raise_for_status()
@@ -760,7 +784,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
760784
namespace_tuple = self.identifier_to_tuple(namespace)
761785
response = self._session.get(
762786
self.url(
763-
f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}"
787+
f"{Endpoints.list_namespaces}?parent={self._namespace_separator.join(namespace_tuple)}"
764788
if namespace_tuple
765789
else Endpoints.list_namespaces
766790
),
@@ -775,7 +799,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
775799
@retry(**_RETRY_ARGS)
776800
def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
777801
namespace_tuple = self._check_valid_namespace_identifier(namespace)
778-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
802+
namespace = self._encode_namespace_path(namespace_tuple)
779803
response = self._session.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace))
780804
try:
781805
response.raise_for_status()
@@ -789,7 +813,7 @@ def update_namespace_properties(
789813
self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties = EMPTY_DICT
790814
) -> PropertiesUpdateSummary:
791815
namespace_tuple = self._check_valid_namespace_identifier(namespace)
792-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
816+
namespace = self._encode_namespace_path(namespace_tuple)
793817
payload = {"removals": list(removals or []), "updates": updates}
794818
response = self._session.post(self.url(Endpoints.update_namespace_properties, namespace=namespace), json=payload)
795819
try:
@@ -806,7 +830,16 @@ def update_namespace_properties(
806830
@retry(**_RETRY_ARGS)
807831
def namespace_exists(self, namespace: str | Identifier) -> bool:
808832
namespace_tuple = self._check_valid_namespace_identifier(namespace)
809-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
833+
namespace = self._encode_namespace_path(namespace_tuple)
834+
835+
# fallback in order to work with older rest catalog implementations
836+
if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints:
837+
try:
838+
self.load_namespace_properties(namespace_tuple)
839+
return True
840+
except NoSuchNamespaceError:
841+
return False
842+
810843
response = self._session.head(self.url(Endpoints.namespace_exists, namespace=namespace))
811844

812845
if response.status_code == 404:

tests/catalog/test_rest.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,6 +1899,33 @@ def test_rest_catalog_with_google_credentials_path(
18991899
actual_headers = history[0].headers
19001900
assert actual_headers["Authorization"] == expected_auth_header
19011901

1902+
assert actual_headers["Authorization"] == expected_auth_header
1903+
1904+
1905+
def test_custom_namespace_separator(rest_mock: Mocker) -> None:
1906+
custom_separator = "-"
1907+
namespace_part1 = "some"
1908+
namespace_part2 = "namespace"
1909+
# The expected URL path segment should use the literal custom_separator
1910+
expected_url_path_segment = f"{namespace_part1}{custom_separator}{namespace_part2}"
1911+
1912+
rest_mock.get(
1913+
f"{TEST_URI}v1/config",
1914+
json={"defaults": {}, "overrides": {}},
1915+
status_code=200,
1916+
)
1917+
rest_mock.get(
1918+
f"{TEST_URI}v1/namespaces/{expected_url_path_segment}",
1919+
json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}},
1920+
status_code=204,
1921+
request_headers=TEST_HEADERS,
1922+
)
1923+
1924+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{"namespace-separator": custom_separator})
1925+
catalog.load_namespace_properties((namespace_part1, namespace_part2))
1926+
1927+
assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/{expected_url_path_segment}"
1928+
19021929

19031930
@pytest.mark.filterwarnings(
19041931
"ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"

0 commit comments

Comments
 (0)