Skip to content

Commit 48ee1ce

Browse files
committed
Make REST catalog namespace separator configurable
The REST spec currently uses %1F as the UTF-8 encoded namespace separator for multi-part namespaces.
1 parent dea8078 commit 48ee1ce

2 files changed

Lines changed: 61 additions & 10 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
Any,
2121
Union,
2222
)
23+
from urllib.parse import quote, unquote
2324

2425
from pydantic import ConfigDict, Field, field_validator
2526
from requests import HTTPError, Session
@@ -227,7 +228,8 @@ class IdentifierKind(Enum):
227228
VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"
228229
VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False
229230

230-
NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
231+
NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator"
232+
DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
231233

232234

233235
def _retry_hook(retry_state: RetryCallState) -> None:
@@ -319,6 +321,7 @@ class RestCatalog(Catalog):
319321
uri: str
320322
_session: Session
321323
_supported_endpoints: set[Endpoint]
324+
_namespace_separator: str
322325

323326
def __init__(self, name: str, **properties: str):
324327
"""Rest Catalog.
@@ -333,6 +336,8 @@ def __init__(self, name: str, **properties: str):
333336
self.uri = properties[URI]
334337
self._fetch_config()
335338
self._session = self._create_session()
339+
separator_from_properties = self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR)
340+
self._namespace_separator = unquote(separator_from_properties)
336341

337342
def _create_session(self) -> Session:
338343
"""Create a request session with provided catalog configuration."""
@@ -478,6 +483,16 @@ def _extract_optional_oauth_params(self) -> dict[str, str]:
478483

479484
return optional_oauth_param
480485

486+
def _encode_namespace_path(self, namespace: Identifier) -> str:
487+
"""
488+
Encode a namespace for use as a path parameter in a URL.
489+
490+
Each part of the namespace is URL-encoded using `urllib.parse.quote`
491+
(ensuring characters like '/' are encoded) and then joined by the
492+
configured namespace separator.
493+
"""
494+
return self._namespace_separator.join(quote(part, safe="") for part in namespace)
495+
481496
def _fetch_config(self) -> None:
482497
params = {}
483498
if warehouse_location := self.properties.get(WAREHOUSE_LOCATION):
@@ -519,11 +534,19 @@ def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> Identi
519534
def _split_identifier_for_path(
520535
self, identifier: str | Identifier | TableIdentifier, kind: IdentifierKind = IdentifierKind.TABLE
521536
) -> Properties:
537+
from urllib.parse import quote
538+
522539
if isinstance(identifier, TableIdentifier):
523-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: identifier.name}
540+
return {
541+
"namespace": self._encode_namespace_path(tuple(identifier.namespace.root)),
542+
kind.value: quote(identifier.name, safe=""),
543+
}
524544
identifier_tuple = self._identifier_to_validated_tuple(identifier)
525545

526-
return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), kind.value: identifier_tuple[-1]}
546+
return {
547+
"namespace": self._encode_namespace_path(identifier_tuple[:-1]),
548+
kind.value: quote(identifier_tuple[-1], safe=""),
549+
}
527550

528551
def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, Identifier | str]:
529552
identifier_tuple = self._identifier_to_validated_tuple(identifier)
@@ -741,7 +764,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) -
741764
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
742765
self._check_endpoint(Capability.V1_LIST_TABLES)
743766
namespace_tuple = self._check_valid_namespace_identifier(namespace)
744-
namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
767+
namespace_concat = self._encode_namespace_path(namespace_tuple)
745768
response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat))
746769
try:
747770
response.raise_for_status()
@@ -827,7 +850,7 @@ def list_views(self, namespace: str | Identifier) -> list[Identifier]:
827850
if Capability.V1_LIST_VIEWS not in self._supported_endpoints:
828851
return []
829852
namespace_tuple = self._check_valid_namespace_identifier(namespace)
830-
namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
853+
namespace_concat = self._encode_namespace_path(namespace_tuple)
831854
response = self._session.get(self.url(Endpoints.list_views, namespace=namespace_concat))
832855
try:
833856
response.raise_for_status()
@@ -897,7 +920,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties =
897920
def drop_namespace(self, namespace: str | Identifier) -> None:
898921
self._check_endpoint(Capability.V1_DELETE_NAMESPACE)
899922
namespace_tuple = self._check_valid_namespace_identifier(namespace)
900-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
923+
namespace = self._encode_namespace_path(namespace_tuple)
901924
response = self._session.delete(self.url(Endpoints.drop_namespace, namespace=namespace))
902925
try:
903926
response.raise_for_status()
@@ -910,7 +933,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
910933
namespace_tuple = self.identifier_to_tuple(namespace)
911934
response = self._session.get(
912935
self.url(
913-
f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}"
936+
f"{Endpoints.list_namespaces}?parent={self._namespace_separator.join(namespace_tuple)}"
914937
if namespace_tuple
915938
else Endpoints.list_namespaces
916939
),
@@ -926,7 +949,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
926949
def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
927950
self._check_endpoint(Capability.V1_LOAD_NAMESPACE)
928951
namespace_tuple = self._check_valid_namespace_identifier(namespace)
929-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
952+
namespace = self._encode_namespace_path(namespace_tuple)
930953
response = self._session.get(self.url(Endpoints.load_namespace_metadata, namespace=namespace))
931954
try:
932955
response.raise_for_status()
@@ -941,7 +964,7 @@ def update_namespace_properties(
941964
) -> PropertiesUpdateSummary:
942965
self._check_endpoint(Capability.V1_UPDATE_NAMESPACE)
943966
namespace_tuple = self._check_valid_namespace_identifier(namespace)
944-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
967+
namespace = self._encode_namespace_path(namespace_tuple)
945968
payload = {"removals": list(removals or []), "updates": updates}
946969
response = self._session.post(self.url(Endpoints.update_namespace_properties, namespace=namespace), json=payload)
947970
try:
@@ -958,7 +981,8 @@ def update_namespace_properties(
958981
@retry(**_RETRY_ARGS)
959982
def namespace_exists(self, namespace: str | Identifier) -> bool:
960983
namespace_tuple = self._check_valid_namespace_identifier(namespace)
961-
namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
984+
namespace = self._encode_namespace_path(namespace_tuple)
985+
962986
# fallback in order to work with older rest catalog implementations
963987
if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints:
964988
try:

tests/catalog/test_rest.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,6 +1921,33 @@ def test_rest_catalog_with_google_credentials_path(
19211921
actual_headers = history[0].headers
19221922
assert actual_headers["Authorization"] == expected_auth_header
19231923

1924+
assert actual_headers["Authorization"] == expected_auth_header
1925+
1926+
1927+
def test_custom_namespace_separator(rest_mock: Mocker) -> None:
1928+
custom_separator = "-"
1929+
namespace_part1 = "some"
1930+
namespace_part2 = "namespace"
1931+
# The expected URL path segment should use the literal custom_separator
1932+
expected_url_path_segment = f"{namespace_part1}{custom_separator}{namespace_part2}"
1933+
1934+
rest_mock.get(
1935+
f"{TEST_URI}v1/config",
1936+
json={"defaults": {}, "overrides": {}},
1937+
status_code=200,
1938+
)
1939+
rest_mock.get(
1940+
f"{TEST_URI}v1/namespaces/{expected_url_path_segment}",
1941+
json={"namespace": [namespace_part1, namespace_part2], "properties": {"prop": "yes"}},
1942+
status_code=204,
1943+
request_headers=TEST_HEADERS,
1944+
)
1945+
1946+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{"namespace-separator": custom_separator})
1947+
catalog.load_namespace_properties((namespace_part1, namespace_part2))
1948+
1949+
assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/{expected_url_path_segment}"
1950+
19241951

19251952
@pytest.mark.filterwarnings(
19261953
"ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"

0 commit comments

Comments
 (0)