Skip to content

Commit 96c6e73

Browse files
committed
encryption key
1 parent f71806e commit 96c6e73

6 files changed

Lines changed: 126 additions & 1 deletion

File tree

pyiceberg/table/encryption.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from typing import Optional
19+
from pydantic import Field
20+
from pyiceberg.typedef import IcebergBaseModel
21+
22+
23+
class EncryptedKey(IcebergBaseModel):
24+
key_id: str = Field(alias="key-id", description="ID of the encryption key")
25+
encrypted_key_metadata: bytes = Field(alias="encrypted-key-metadata", description="Encrypted key and metadata, base64 encoded")
26+
encrypted_by_id: Optional[str] = Field(alias="encrypted-by-id", description="Optional ID of the key used to encrypt or wrap `key-metadata`", default=None)
27+
properties: Optional[dict[str, str]] = Field(alias="properties", description="A string to string map of additional metadata used by the table's encryption scheme", default=None)

pyiceberg/table/metadata.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pyiceberg.exceptions import ValidationError
2828
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec, assign_fresh_partition_spec_ids
2929
from pyiceberg.schema import Schema, assign_fresh_schema_ids
30+
from pyiceberg.table.encryption import EncryptedKey
3031
from pyiceberg.table.name_mapping import NameMapping, parse_mapping_from_json
3132
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
3233
from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot, SnapshotLogEntry
@@ -516,6 +517,7 @@ class TableMetadataV3(TableMetadataCommonFields, IcebergBaseModel):
516517
- Multi-argument transforms for partitioning and sorting
517518
- Row Lineage tracking
518519
- Binary deletion vectors
520+
- Encryption Keys
519521
520522
For more information:
521523
https://iceberg.apache.org/spec/?column-projection#version-3-extended-types-and-capabilities
@@ -552,6 +554,9 @@ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
552554
next_row_id: Optional[int] = Field(alias="next-row-id", default=None)
553555
"""A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""
554556

557+
encryption_keys: List[EncryptedKey] = Field(alias="encryption-keys", default=[])
558+
"""The list of encryption keys for this table."""
559+
555560
def model_dump_json(
556561
self, exclude_none: bool = True, exclude: Optional[Any] = None, by_alias: bool = True, **kwargs: Any
557562
) -> str:

pyiceberg/table/snapshots.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ class Snapshot(IcebergBaseModel):
243243
manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file")
244244
summary: Optional[Summary] = Field(default=None)
245245
schema_id: Optional[int] = Field(alias="schema-id", default=None)
246+
key_id: Optional[str] = Field(alias="key-id", default=None, description="The id of the encryption key")
246247

247248
def __str__(self) -> str:
248249
"""Return the string representation of the Snapshot class."""

pyiceberg/table/update/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pyiceberg.exceptions import CommitFailedException
2929
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec
3030
from pyiceberg.schema import Schema
31+
from pyiceberg.table.encryption import EncryptedKey
3132
from pyiceberg.table.metadata import SUPPORTED_TABLE_FORMAT_VERSION, TableMetadata, TableMetadataUtil
3233
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef
3334
from pyiceberg.table.snapshots import (
@@ -84,6 +85,13 @@ class UpgradeFormatVersionUpdate(IcebergBaseModel):
8485
action: Literal["upgrade-format-version"] = Field(default="upgrade-format-version")
8586
format_version: int = Field(alias="format-version")
8687

88+
class AddEncryptedKeyUpdate(IcebergBaseModel):
89+
action: Literal["add-encryption-key"] = Field(default="add-encryption-key")
90+
key: EncryptedKey = Field(alias="key")
91+
92+
class RemoveEncryptedKeyUpdate(IcebergBaseModel):
93+
action: Literal["remove-encryption-key"] = Field(default="remove-encryption-key")
94+
key_id: str = Field(alias="key-id")
8795

8896
class AddSchemaUpdate(IcebergBaseModel):
8997
action: Literal["add-schema"] = Field(default="add-schema")
@@ -217,6 +225,8 @@ class RemoveStatisticsUpdate(IcebergBaseModel):
217225
RemovePropertiesUpdate,
218226
SetStatisticsUpdate,
219227
RemoveStatisticsUpdate,
228+
AddEncryptedKeyUpdate,
229+
RemoveEncryptedKeyUpdate,
220230
],
221231
Field(discriminator="action"),
222232
]
@@ -581,6 +591,14 @@ def _(update: RemoveStatisticsUpdate, base_metadata: TableMetadata, context: _Ta
581591

582592
return base_metadata.model_copy(update={"statistics": statistics})
583593

594+
@_apply_table_update.register(AddEncryptedKeyUpdate)
595+
def _(update: AddEncryptedKeyUpdate, base_metadata: TableMetadata, context: _TableMetadataUpdateContext) -> TableMetadata:
596+
context.add_update(update)
597+
598+
if base_metadata.format_version <= 2:
599+
raise ValueError("Cannot add encryption keys to Iceberg v1 or v2 tables")
600+
601+
return base_metadata.model_copy(update={"encryption_keys": base_metadata.encryption_keys + [update.key]})
584602

585603
def update_table_metadata(
586604
base_metadata: TableMetadata,

tests/conftest.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
from pyiceberg.schema import Accessor, Schema
6565
from pyiceberg.serializers import ToOutputFile
6666
from pyiceberg.table import FileScanTask, Table
67-
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2
67+
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2, TableMetadataV3
6868
from pyiceberg.types import (
6969
BinaryType,
7070
BooleanType,
@@ -2341,6 +2341,17 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
23412341
catalog=NoopCatalog("NoopCatalog"),
23422342
)
23432343

2344+
@pytest.fixture
2345+
def table_v3(example_table_metadata_v3: Dict[str, Any]) -> Table:
2346+
table_metadata = TableMetadataV3(**example_table_metadata_v3)
2347+
return Table(
2348+
identifier=("database", "table"),
2349+
metadata=table_metadata,
2350+
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
2351+
io=load_file_io(),
2352+
catalog=NoopCatalog("NoopCatalog"),
2353+
)
2354+
23442355

23452356
@pytest.fixture
23462357
def table_v2_with_fixed_and_decimal_types(

tests/table/test_init.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
# pylint:disable=redefined-outer-name
18+
import base64
1819
import json
1920
import uuid
2021
from copy import copy
@@ -49,6 +50,7 @@
4950
TableIdentifier,
5051
_match_deletes_to_data_file,
5152
)
53+
from pyiceberg.table.encryption import EncryptedKey
5254
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataUtil, TableMetadataV2, _generate_snapshot_id
5355
from pyiceberg.table.refs import SnapshotRef
5456
from pyiceberg.table.snapshots import (
@@ -66,6 +68,7 @@
6668
)
6769
from pyiceberg.table.statistics import BlobMetadata, StatisticsFile
6870
from pyiceberg.table.update import (
71+
AddEncryptedKeyUpdate,
6972
AddSnapshotUpdate,
7073
AddSortOrderUpdate,
7174
AssertCreate,
@@ -1345,3 +1348,63 @@ def test_remove_statistics_update(table_v2_with_statistics: Table) -> None:
13451348
table_v2_with_statistics.metadata,
13461349
(RemoveStatisticsUpdate(snapshot_id=123456789),),
13471350
)
1351+
1352+
def test_add_encryption_key(table_v3: Table) -> None:
1353+
update = AddEncryptedKeyUpdate(
1354+
key=EncryptedKey(
1355+
key_id="test",
1356+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1357+
)
1358+
)
1359+
1360+
expected = """
1361+
{
1362+
"key-id": "test",
1363+
"encrypted-key-metadata": "aGVsbG8="
1364+
}"""
1365+
1366+
assert table_v3.metadata.encryption_keys == []
1367+
add_metadata = update_table_metadata(table_v3.metadata, (update,))
1368+
assert len(add_metadata.encryption_keys) == 1
1369+
1370+
assert json.loads(add_metadata.encryption_keys[0].model_dump_json()) == json.loads(expected)
1371+
1372+
def test_remove_encryption_key(table_v3: Table) -> None:
1373+
update_add = AddEncryptedKeyUpdate(
1374+
key=EncryptedKey(
1375+
key_id="test",
1376+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1377+
)
1378+
)
1379+
add_metadata = update_table_metadata(table_v3.metadata, (update_add,))
1380+
assert len(add_metadata.encryption_keys) == 1
1381+
1382+
update_remove = RemoveEncryptedKeyUpdate(key_id="test")
1383+
remove_metadata = update_table_metadata(add_metadata, (update_remove,))
1384+
assert len(remove_metadata.encryption_keys) == 0
1385+
1386+
1387+
def test_remove_non_existent_encryption_key(table_v3: Table) -> None:
1388+
update_add = AddEncryptedKeyUpdate(
1389+
key=EncryptedKey(
1390+
key_id="test",
1391+
encrypted_key_metadata=base64.b64encode("hello".encode('utf-8'))
1392+
)
1393+
)
1394+
add_metadata = update_table_metadata(table_v3.metadata, (update_add,))
1395+
assert len(add_metadata.encryption_keys) == 1
1396+
1397+
update_remove = RemoveEncryptedKeyUpdate(key_id="non_existent_key")
1398+
remove_metadata = update_table_metadata(add_metadata, (update_remove,))
1399+
assert len(remove_metadata.encryption_keys) == 1 # Should be a no-op
1400+
1401+
1402+
def test_add_remove_encryption_key_v2_table(table_v2: Table) -> None:
1403+
update_add = AddEncryptedKeyUpdate(
1404+
key=EncryptedKey(
1405+
key_id="test_v2",
1406+
encrypted_key_metadata=base64.b64encode("hello_v2".encode('utf-8'))
1407+
)
1408+
)
1409+
with pytest.raises(ValueError, match=r"Cannot add encryption keys from Iceberg v1 or v2 table"):
1410+
update_table_metadata(table_v2.metadata, (update_add,))

0 commit comments

Comments
 (0)