Skip to content

Commit 1e81c87

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-update-docs
2 parents d5c0b72 + ae11ba4 commit 1e81c87

20 files changed

Lines changed: 985 additions & 412 deletions

File tree

dev/hive/core-site.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,9 @@
5050
<name>fs.s3a.path.style.access</name>
5151
<value>true</value>
5252
</property>
53+
<property>
54+
<name>hive.metastore.disallow.incompatible.col.type.changes</name>
55+
<value>false</value>
56+
</property>
57+
5358
</configuration>

dev/provision.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,9 @@
273273
"""
274274
)
275275

276-
# There is an issue with CREATE OR REPLACE
277-
# https://github.com/apache/iceberg/issues/8756
278-
spark.sql(f"DROP TABLE IF EXISTS {catalog_name}.default.test_table_version")
279-
280276
spark.sql(
281277
f"""
282-
CREATE TABLE {catalog_name}.default.test_table_version (
278+
CREATE OR REPLACE TABLE {catalog_name}.default.test_table_version (
283279
dt date,
284280
number integer,
285281
letter string

mkdocs/docs/configuration.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ catalog:
351351
| rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request |
352352
| oauth2-server-uri | <https://auth-service/cc> | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') |
353353
| snapshot-loading-mode | refs | The snapshots to return in the body of the metadata. Setting the value to `all` would return the full set of snapshots currently valid for the table. Setting the value to `refs` would load all snapshots referenced by branches or tags. |
354+
| warehouse | myWarehouse | Warehouse location or identifier to request from the catalog service. May be used to determine server-side overrides, such as the warehouse location. |
354355

355356
<!-- markdown-link-check-enable-->
356357

@@ -438,10 +439,11 @@ catalog:
438439
s3.secret-access-key: password
439440
```
440441

441-
| Key | Example | Description |
442-
|------------------------------| ------- | --------------------------------- |
443-
| hive.hive2-compatible | true | Using Hive 2.x compatibility mode |
444-
| hive.kerberos-authentication | true | Using authentication via Kerberos |
442+
| Key | Example | Description |
443+
|------------------------------| ------- | ------------------------------------ |
444+
| hive.hive2-compatible | true | Using Hive 2.x compatibility mode |
445+
| hive.kerberos-authentication | true | Using authentication via Kerberos |
446+
| hive.kerberos-service-name | hive | Kerberos service name (default hive) |
445447

446448
When using Hive 2.x, make sure to set the compatibility flag:
447449

poetry.lock

Lines changed: 249 additions & 247 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/catalog/glue.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,20 @@
140140

141141

142142
def _construct_parameters(
143-
metadata_location: str, glue_table: Optional["TableTypeDef"] = None, prev_metadata_location: Optional[str] = None
143+
metadata_location: str,
144+
glue_table: Optional["TableTypeDef"] = None,
145+
prev_metadata_location: Optional[str] = None,
146+
metadata_properties: Optional[Properties] = None,
144147
) -> Properties:
145148
new_parameters = glue_table.get("Parameters", {}) if glue_table else {}
146149
new_parameters.update({TABLE_TYPE: ICEBERG.upper(), METADATA_LOCATION: metadata_location})
147150
if prev_metadata_location:
148151
new_parameters[PREVIOUS_METADATA_LOCATION] = prev_metadata_location
152+
153+
if metadata_properties:
154+
for key, value in metadata_properties.items():
155+
new_parameters[key] = str(value)
156+
149157
return new_parameters
150158

151159

@@ -236,7 +244,7 @@ def _construct_table_input(
236244
table_input: "TableInputTypeDef" = {
237245
"Name": table_name,
238246
"TableType": EXTERNAL_TABLE,
239-
"Parameters": _construct_parameters(metadata_location, glue_table, prev_metadata_location),
247+
"Parameters": _construct_parameters(metadata_location, glue_table, prev_metadata_location, properties),
240248
"StorageDescriptor": {
241249
"Columns": _to_columns(metadata),
242250
"Location": metadata.location,

pyiceberg/catalog/hive.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@
130130

131131
HIVE_KERBEROS_AUTH = "hive.kerberos-authentication"
132132
HIVE_KERBEROS_AUTH_DEFAULT = False
133+
HIVE_KERBEROS_SERVICE_NAME = "hive.kerberos-service-name"
134+
HIVE_KERBEROS_SERVICE_NAME_DEFAULT = "hive"
133135

134136
LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time"
135137
LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time"
@@ -149,9 +151,16 @@ class _HiveClient:
149151
_transport: TTransport
150152
_ugi: Optional[List[str]]
151153

152-
def __init__(self, uri: str, ugi: Optional[str] = None, kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT):
154+
def __init__(
155+
self,
156+
uri: str,
157+
ugi: Optional[str] = None,
158+
kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT,
159+
kerberos_service_name: Optional[str] = HIVE_KERBEROS_SERVICE_NAME,
160+
):
153161
self._uri = uri
154162
self._kerberos_auth = kerberos_auth
163+
self._kerberos_service_name = kerberos_service_name
155164
self._ugi = ugi.split(":") if ugi else None
156165
self._transport = self._init_thrift_transport()
157166

@@ -161,7 +170,7 @@ def _init_thrift_transport(self) -> TTransport:
161170
if not self._kerberos_auth:
162171
return TTransport.TBufferedTransport(socket)
163172
else:
164-
return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service="hive")
173+
return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service=self._kerberos_service_name)
165174

166175
def _client(self) -> Client:
167176
protocol = TBinaryProtocol.TBinaryProtocol(self._transport)
@@ -314,6 +323,7 @@ def _create_hive_client(properties: Dict[str, str]) -> _HiveClient:
314323
uri,
315324
properties.get("ugi"),
316325
property_as_bool(properties, HIVE_KERBEROS_AUTH, HIVE_KERBEROS_AUTH_DEFAULT),
326+
properties.get(HIVE_KERBEROS_SERVICE_NAME, HIVE_KERBEROS_SERVICE_NAME_DEFAULT),
317327
)
318328
except BaseException as e:
319329
last_exception = e
@@ -551,6 +561,12 @@ def commit_table(
551561
previous_metadata_location=current_table.metadata_location,
552562
metadata_properties=updated_staged_table.properties,
553563
)
564+
# Update hive's schema and properties
565+
hive_table.sd = _construct_hive_storage_descriptor(
566+
updated_staged_table.schema(),
567+
updated_staged_table.location(),
568+
property_as_bool(updated_staged_table.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
569+
)
554570
open_client.alter_table_with_environment_context(
555571
dbname=database_name,
556572
tbl_name=table_name,

pyiceberg/catalog/rest/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ class RegisterTableRequest(IcebergBaseModel):
178178

179179

180180
class ConfigResponse(IcebergBaseModel):
181-
defaults: Properties = Field()
182-
overrides: Properties = Field()
181+
defaults: Optional[Properties] = Field(default_factory=dict)
182+
overrides: Optional[Properties] = Field(default_factory=dict)
183183

184184

185185
class ListNamespaceResponse(IcebergBaseModel):

pyiceberg/cli/console.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from pyiceberg.cli.output import ConsoleOutput, JsonOutput, Output
3434
from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchPropertyException, NoSuchTableError
3535
from pyiceberg.table import TableProperties
36-
from pyiceberg.table.refs import SnapshotRef
36+
from pyiceberg.table.refs import SnapshotRef, SnapshotRefType
3737
from pyiceberg.utils.properties import property_as_int
3838

3939

@@ -417,7 +417,7 @@ def list_refs(ctx: Context, identifier: str, type: str, verbose: bool) -> None:
417417
refs = table.refs()
418418
if type:
419419
type = type.lower()
420-
if type not in {"branch", "tag"}:
420+
if type not in {SnapshotRefType.BRANCH, SnapshotRefType.TAG}:
421421
raise ValueError(f"Type must be either branch or tag, got: {type}")
422422

423423
relevant_refs = [
@@ -431,7 +431,7 @@ def list_refs(ctx: Context, identifier: str, type: str, verbose: bool) -> None:
431431

432432
def _retention_properties(ref: SnapshotRef, table_properties: Dict[str, str]) -> Dict[str, str]:
433433
retention_properties = {}
434-
if ref.snapshot_ref_type == "branch":
434+
if ref.snapshot_ref_type == SnapshotRefType.BRANCH:
435435
default_min_snapshots_to_keep = property_as_int(
436436
table_properties,
437437
TableProperties.MIN_SNAPSHOTS_TO_KEEP,

0 commit comments

Comments
 (0)