Skip to content

Commit 640da58

Browse files
committed
Merge branch 'main' into feat/rest-catalog-pagination
2 parents 65fe2b8 + 41ff734 commit 640da58

21 files changed

Lines changed: 397 additions & 311 deletions

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ repos:
2727
- id: check-yaml
2828
- id: check-ast
2929
- repo: https://github.com/astral-sh/ruff-pre-commit
30-
rev: v0.8.6
30+
rev: v0.11.13
3131
hooks:
3232
- id: ruff
3333
args: [ --fix, --exit-non-zero-on-fix ]
3434
- id: ruff-format
3535
- repo: https://github.com/pre-commit/mirrors-mypy
36-
rev: v1.14.1
36+
rev: v1.16.0
3737
hooks:
3838
- id: mypy
3939
args:

pyiceberg/catalog/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,10 @@ def load_catalog(name: Optional[str] = None, **properties: Optional[str]) -> Cat
262262
raise ValueError(f"Could not initialize catalog with the following properties: {properties}")
263263

264264

265+
def list_catalogs() -> List[str]:
266+
return _ENV_CONFIG.get_known_catalogs()
267+
268+
265269
def delete_files(io: FileIO, files_to_delete: Set[str], file_type: str) -> None:
266270
"""Delete files.
267271
@@ -923,6 +927,20 @@ def _resolve_table_location(self, location: Optional[str], database_name: str, t
923927
return location.rstrip("/")
924928

925929
def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str:
930+
"""Return the default warehouse location using the convention of `warehousePath/databaseName/tableName`."""
931+
database_properties = self.load_namespace_properties(database_name)
932+
if database_location := database_properties.get(LOCATION):
933+
database_location = database_location.rstrip("/")
934+
return f"{database_location}/{table_name}"
935+
936+
if warehouse_path := self.properties.get(WAREHOUSE_LOCATION):
937+
warehouse_path = warehouse_path.rstrip("/")
938+
return f"{warehouse_path}/{database_name}/{table_name}"
939+
940+
raise ValueError("No default path is set, please specify a location when creating a table")
941+
942+
def _get_hive_style_warehouse_location(self, database_name: str, table_name: str) -> str:
943+
"""Return the default warehouse location following the Hive convention of `warehousePath/databaseName.db/tableName`."""
926944
database_properties = self.load_namespace_properties(database_name)
927945
if database_location := database_properties.get(LOCATION):
928946
database_location = database_location.rstrip("/")

pyiceberg/catalog/dynamodb.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,10 @@ def _convert_dynamo_table_item_to_iceberg_table(self, dynamo_table_item: Dict[st
664664
catalog=self,
665665
)
666666

667+
def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str:
668+
"""Override the default warehouse location to follow Hive-style conventions."""
669+
return self._get_hive_style_warehouse_location(database_name, table_name)
670+
667671

668672
def _get_create_table_item(database_name: str, table_name: str, properties: Properties, metadata_location: str) -> Dict[str, Any]:
669673
current_timestamp_ms = str(round(time() * 1000))

pyiceberg/catalog/glue.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,3 +821,7 @@ def view_exists(self, identifier: Union[str, Identifier]) -> bool:
821821
@staticmethod
822822
def __is_iceberg_table(table: "TableTypeDef") -> bool:
823823
return table.get("Parameters", {}).get(TABLE_TYPE, "").lower() == ICEBERG
824+
825+
def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str:
826+
"""Override the default warehouse location to follow Hive-style conventions."""
827+
return self._get_hive_style_warehouse_location(database_name, table_name)

pyiceberg/catalog/hive.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,18 @@ def _construct_hive_storage_descriptor(
211211
DEFAULT_PROPERTIES = {TableProperties.PARQUET_COMPRESSION: TableProperties.PARQUET_COMPRESSION_DEFAULT}
212212

213213

214-
def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]:
214+
def _construct_parameters(
215+
metadata_location: str, previous_metadata_location: Optional[str] = None, metadata_properties: Optional[Properties] = None
216+
) -> Dict[str, Any]:
215217
properties = {PROP_EXTERNAL: "TRUE", PROP_TABLE_TYPE: "ICEBERG", PROP_METADATA_LOCATION: metadata_location}
216218
if previous_metadata_location:
217219
properties[PROP_PREVIOUS_METADATA_LOCATION] = previous_metadata_location
218220

221+
if metadata_properties:
222+
for key, value in metadata_properties.items():
223+
if key not in properties:
224+
properties[key] = str(value)
225+
219226
return properties
220227

221228

@@ -360,7 +367,7 @@ def _convert_iceberg_into_hive(self, table: Table) -> HiveTable:
360367
property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
361368
),
362369
tableType=EXTERNAL_TABLE,
363-
parameters=_construct_parameters(table.metadata_location),
370+
parameters=_construct_parameters(metadata_location=table.metadata_location, metadata_properties=table.properties),
364371
)
365372

366373
def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None:
@@ -541,6 +548,7 @@ def commit_table(
541548
hive_table.parameters = _construct_parameters(
542549
metadata_location=updated_staged_table.metadata_location,
543550
previous_metadata_location=current_table.metadata_location,
551+
metadata_properties=updated_staged_table.properties,
544552
)
545553
open_client.alter_table_with_environment_context(
546554
dbname=database_name,
@@ -790,3 +798,7 @@ def update_namespace_properties(
790798

791799
def drop_view(self, identifier: Union[str, Identifier]) -> None:
792800
raise NotImplementedError
801+
802+
def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str:
803+
"""Override the default warehouse location to follow Hive-style conventions."""
804+
return self._get_hive_style_warehouse_location(database_name, table_name)

pyiceberg/expressions/literals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ def __init__(self, value: bool) -> None:
262262
super().__init__(value, bool)
263263

264264
@singledispatchmethod
265-
def to(self, type_var: IcebergType) -> Literal[bool]: # type: ignore
265+
def to(self, type_var: IcebergType) -> Literal[bool]:
266266
raise TypeError(f"Cannot convert BooleanLiteral into {type_var}")
267267

268268
@to.register(BooleanType)

pyiceberg/io/fsspec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191

9292

9393
def s3v4_rest_signer(properties: Properties, request: "AWSRequest", **_: Any) -> "AWSRequest":
94-
signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/")
94+
signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") # type: ignore
9595
signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT)
9696

9797
signer_headers = {}

pyiceberg/manifest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,7 @@ def _inherit_from_manifest(entry: ManifestEntry, manifest: ManifestFile) -> Mani
912912
"""
913913
# Inherit sequence numbers.
914914
# The snapshot_id is required in V1, inherit with V2 when null
915-
if entry.snapshot_id is None:
915+
if entry.snapshot_id is None and manifest.added_snapshot_id is not None:
916916
entry.snapshot_id = manifest.added_snapshot_id
917917

918918
# in v1 tables, the sequence number is not persisted and can be safely defaulted to 0

pyiceberg/transforms.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,11 +1049,11 @@ def _truncate_number(
10491049
raise ValueError(f"Expected a numeric literal, got: {type(boundary)}")
10501050

10511051
if isinstance(pred, BoundLessThan):
1052-
return LessThanOrEqual(Reference(name), _transform_literal(transform, boundary.decrement())) # type: ignore
1052+
return LessThanOrEqual(Reference(name), _transform_literal(transform, boundary.decrement()))
10531053
elif isinstance(pred, BoundLessThanOrEqual):
10541054
return LessThanOrEqual(Reference(name), _transform_literal(transform, boundary))
10551055
elif isinstance(pred, BoundGreaterThan):
1056-
return GreaterThanOrEqual(Reference(name), _transform_literal(transform, boundary.increment())) # type: ignore
1056+
return GreaterThanOrEqual(Reference(name), _transform_literal(transform, boundary.increment()))
10571057
elif isinstance(pred, BoundGreaterThanOrEqual):
10581058
return GreaterThanOrEqual(Reference(name), _transform_literal(transform, boundary))
10591059
elif isinstance(pred, BoundEqualTo):
@@ -1073,11 +1073,11 @@ def _truncate_number_strict(
10731073
if isinstance(pred, BoundLessThan):
10741074
return LessThan(Reference(name), _transform_literal(transform, boundary))
10751075
elif isinstance(pred, BoundLessThanOrEqual):
1076-
return LessThan(Reference(name), _transform_literal(transform, boundary.increment())) # type: ignore
1076+
return LessThan(Reference(name), _transform_literal(transform, boundary.increment()))
10771077
elif isinstance(pred, BoundGreaterThan):
10781078
return GreaterThan(Reference(name), _transform_literal(transform, boundary))
10791079
elif isinstance(pred, BoundGreaterThanOrEqual):
1080-
return GreaterThan(Reference(name), _transform_literal(transform, boundary.decrement())) # type: ignore
1080+
return GreaterThan(Reference(name), _transform_literal(transform, boundary.decrement()))
10811081
elif isinstance(pred, BoundNotEqualTo):
10821082
return NotEqualTo(Reference(name), _transform_literal(transform, boundary))
10831083
elif isinstance(pred, BoundEqualTo):

pyiceberg/utils/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ def get_catalog_config(self, catalog_name: str) -> Optional[RecursiveDict]:
159159
return catalog_conf
160160
return None
161161

162+
def get_known_catalogs(self) -> List[str]:
163+
catalogs = self.config.get(CATALOG, {})
164+
if not isinstance(catalogs, dict):
165+
raise ValueError("Catalog configurations needs to be an object")
166+
return list(catalogs.keys())
167+
162168
def get_int(self, key: str) -> Optional[int]:
163169
if (val := self.config.get(key)) is not None:
164170
try:

0 commit comments

Comments
 (0)