Skip to content

Commit 1d3ecfe

Browse files
committed
Add support for 'overwrite' option in register_table
1 parent b67b724 commit 1d3ecfe

10 files changed

Lines changed: 67 additions & 8 deletions

File tree

mkdocs/docs/api.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,16 @@ catalog.register_table(
196196
)
197197
```
198198

199+
To overwrite a table using existing metadata:
200+
201+
```python
202+
catalog.register_table(
203+
identifier="docs_example.bids",
204+
metadata_location="s3://warehouse/path/to/metadata.json",
205+
overwrite=True
206+
)
207+
```
208+
199209
## Load a table
200210

201211
There are two ways of reading an Iceberg table; through a catalog, and by pointing at the Iceberg metadata directly. Reading through a catalog is preferred, and directly pointing at the metadata is read-only.

pyiceberg/catalog/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,12 +493,13 @@ def namespace_exists(self, namespace: str | Identifier) -> bool:
493493
"""
494494

495495
@abstractmethod
496-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
496+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
497497
"""Register a new table using existing metadata.
498498
499499
Args:
500500
identifier (Union[str, Identifier]): Table identifier for the table
501501
metadata_location (str): The location to the metadata
502+
overwrite (bool): Whether to overwrite the existing table, default False
502503
503504
Returns:
504505
Table: The newly registered table

pyiceberg/catalog/bigquery_metastore.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,19 +270,23 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
270270
datasets_iterator = self.client.list_datasets()
271271
return [(dataset.dataset_id,) for dataset in datasets_iterator]
272272

273-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
273+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
274274
"""Register a new table using existing metadata.
275275
276276
Args:
277277
identifier (str | Identifier): Table identifier for the table
278278
metadata_location (str): The location to the metadata
279+
overwrite (bool): Whether to overwrite the existing table, default False
279280
280281
Returns:
281282
Table: The newly registered table
282283
283284
Raises:
284285
TableAlreadyExistsError: If the table already exists
285286
"""
287+
if overwrite:
288+
raise NotImplementedError("`overwrite` isn't supported")
289+
286290
dataset_name, table_name = self.identifier_to_database_and_table(identifier)
287291

288292
dataset_ref = DatasetReference(project=self.project_id, dataset_id=dataset_name)

pyiceberg/catalog/dynamodb.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,12 +210,13 @@ def create_table(
210210

211211
return self.load_table(identifier=identifier)
212212

213-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
213+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
214214
"""Register a new table using existing metadata.
215215
216216
Args:
217217
identifier (Union[str, Identifier]): Table identifier for the table
218218
metadata_location (str): The location to the metadata
219+
overwrite (bool): Whether to overwrite the existing table, default False
219220
220221
Returns:
221222
Table: The newly registered table

pyiceberg/catalog/glue.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,19 +601,23 @@ def create_table(
601601
catalog=self,
602602
)
603603

604-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
604+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
605605
"""Register a new table using existing metadata.
606606
607607
Args:
608608
identifier (Union[str, Identifier]): Table identifier for the table
609609
metadata_location (str): The location to the metadata
610+
overwrite (bool): Whether to overwrite the existing table, default False
610611
611612
Returns:
612613
Table: The newly registered table
613614
614615
Raises:
615616
TableAlreadyExistsError: If the table already exists
616617
"""
618+
if overwrite:
619+
raise NotImplementedError("`overwrite` isn't supported")
620+
617621
database_name, table_name = self.identifier_to_database_and_table(identifier)
618622
properties = EMPTY_DICT
619623
io = self._load_file_io(location=metadata_location)

pyiceberg/catalog/hive.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,19 +446,23 @@ def create_view(
446446
) -> View:
447447
raise NotImplementedError
448448

449-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
449+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
450450
"""Register a new table using existing metadata.
451451
452452
Args:
453453
identifier (Union[str, Identifier]): Table identifier for the table
454454
metadata_location (str): The location to the metadata
455+
overwrite (bool): Whether to overwrite the existing table, default False
455456
456457
Returns:
457458
Table: The newly registered table
458459
459460
Raises:
460461
TableAlreadyExistsError: If the table already exists
461462
"""
463+
if overwrite:
464+
raise NotImplementedError("`overwrite` isn't supported")
465+
462466
database_name, table_name = self.identifier_to_database_and_table(identifier)
463467
io = self._load_file_io(location=metadata_location)
464468
metadata_file = io.new_input(metadata_location)

pyiceberg/catalog/noop.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,13 @@ def load_table(self, identifier: str | Identifier) -> Table:
7070
def table_exists(self, identifier: str | Identifier) -> bool:
7171
raise NotImplementedError
7272

73-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
73+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
7474
"""Register a new table using existing metadata.
7575
7676
Args:
7777
identifier (Union[str, Identifier]): Table identifier for the table
7878
metadata_location (str): The location to the metadata
79+
overwrite (bool): Whether to overwrite the existing table, default False
7980
8081
Returns:
8182
Table: The newly registered table

pyiceberg/catalog/rest/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ def transform_properties_dict_value_to_str(cls, properties: Properties) -> dict[
316316
class RegisterTableRequest(IcebergBaseModel):
317317
name: str
318318
metadata_location: str = Field(..., alias="metadata-location")
319+
overwrite: bool
319320

320321

321322
class ConfigResponse(IcebergBaseModel):
@@ -976,12 +977,13 @@ def create_view(
976977
return self._response_to_view(self.identifier_to_tuple(identifier), view_response)
977978

978979
@retry(**_RETRY_ARGS)
979-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
980+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
980981
"""Register a new table using existing metadata.
981982
982983
Args:
983984
identifier (Union[str, Identifier]): Table identifier for the table
984985
metadata_location (str): The location to the metadata
986+
overwrite (bool): Whether to overwrite the existing table, default False
985987
986988
Returns:
987989
Table: The newly registered table
@@ -994,6 +996,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) -
994996
request = RegisterTableRequest(
995997
name=self._identifier_to_validated_tuple(identifier)[-1],
996998
metadata_location=metadata_location,
999+
overwrite=overwrite,
9971000
)
9981001
serialized_json = request.model_dump_json().encode(UTF8)
9991002
response = self._session.post(

pyiceberg/catalog/sql.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,13 @@ def create_table(
237237

238238
return self.load_table(identifier=identifier)
239239

240-
def register_table(self, identifier: str | Identifier, metadata_location: str) -> Table:
240+
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
241241
"""Register a new table using existing metadata.
242242
243243
Args:
244244
identifier (Union[str, Identifier]): Table identifier for the table
245245
metadata_location (str): The location to the metadata
246+
overwrite (bool): Whether to overwrite the existing table, default False
246247
247248
Returns:
248249
Table: The newly registered table
@@ -251,6 +252,9 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) -
251252
TableAlreadyExistsError: If the table already exists
252253
NoSuchNamespaceError: If namespace does not exist
253254
"""
255+
if overwrite:
256+
raise NotImplementedError("`overwrite` isn't supported")
257+
254258
namespace_tuple = Catalog.namespace_from(identifier)
255259
namespace = Catalog.namespace_to_string(namespace_tuple)
256260
table_name = Catalog.table_name_from(identifier)

tests/catalog/test_rest.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,33 @@ def test_register_table_409(rest_mock: Mocker, table_schema_simple: Schema) -> N
15831583
assert "Table already exists" in str(e.value)
15841584

15851585

1586+
def test_register_table_overwrite(
1587+
rest_mock: Mocker, table_schema_simple: Schema, example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any]
1588+
) -> None:
1589+
rest_mock.post(
1590+
f"{TEST_URI}v1/namespaces/default/register",
1591+
json=example_table_metadata_no_snapshot_v1_rest_json,
1592+
status_code=200,
1593+
request_headers=TEST_HEADERS,
1594+
)
1595+
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
1596+
actual = catalog.register_table(
1597+
identifier=("default", "registered_table"),
1598+
metadata_location="s3://warehouse/database/table/metadata.json",
1599+
overwrite=True,
1600+
)
1601+
expected = Table(
1602+
identifier=("default", "registered_table"),
1603+
metadata_location=example_table_metadata_no_snapshot_v1_rest_json["metadata-location"],
1604+
metadata=TableMetadataV1(**example_table_metadata_no_snapshot_v1_rest_json["metadata"]),
1605+
io=load_file_io(),
1606+
catalog=catalog,
1607+
)
1608+
assert actual.metadata.model_dump() == expected.metadata.model_dump()
1609+
assert actual.metadata_location == expected.metadata_location
1610+
assert actual.name() == expected.name()
1611+
1612+
15861613
def test_delete_namespace_204(rest_mock: Mocker) -> None:
15871614
namespace = "example"
15881615
rest_mock.delete(

0 commit comments

Comments
 (0)