Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions pyiceberg/table/encryption.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Encryption metadata plumbing only.

This module models encrypted table metadata for faithful JSON round-tripping.
It does not implement cryptography, KMS integration, or key wrapping.
"""

from __future__ import annotations

from pydantic import Field

from pyiceberg.typedef import IcebergBaseModel


class EncryptedKey(IcebergBaseModel):
key_id: str = Field(alias="key-id")
encrypted_key_metadata: str = Field(alias="encrypted-key-metadata")
encrypted_by_id: str | None = Field(alias="encrypted-by-id", default=None)
properties: dict[str, str] | None = Field(default=None)
4 changes: 4 additions & 0 deletions pyiceberg/table/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pyiceberg.exceptions import ValidationError
from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec, assign_fresh_partition_spec_ids
from pyiceberg.schema import Schema, assign_fresh_schema_ids
from pyiceberg.table.encryption import EncryptedKey
from pyiceberg.table.name_mapping import NameMapping, parse_mapping_from_json
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot, SnapshotLogEntry
Expand Down Expand Up @@ -574,6 +575,9 @@ def construct_refs(self) -> TableMetadata:
next_row_id: int | None = Field(alias="next-row-id", default=None)
"""A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""

encryption_keys: list[EncryptedKey] | None = Field(alias="encryption-keys", default=None)
"""A list of encrypted keys used by this table."""

def model_dump_json(self, exclude_none: bool = True, exclude: Any | None = None, by_alias: bool = True, **kwargs: Any) -> str:
raise NotImplementedError("Writing V3 is not yet supported, see: https://github.com/apache/iceberg-python/issues/1551")

Expand Down
4 changes: 4 additions & 0 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ class Snapshot(IcebergBaseModel):
added_rows: int | None = Field(
alias="added-rows", default=None, description="The upper bound of the number of rows with assigned row IDs"
)
key_id: str | None = Field(
alias="key-id", default=None, description="ID of the encryption key that encrypts the manifest list key metadata"
)

def __str__(self) -> str:
"""Return the string representation of the Snapshot class."""
Expand All @@ -273,6 +276,7 @@ def __repr__(self) -> str:
f"schema_id={self.schema_id}" if self.schema_id is not None else None,
f"first_row_id={self.first_row_id}" if self.first_row_id is not None else None,
f"added_rows={self.added_rows}" if self.added_rows is not None else None,
f"key_id='{self.key_id}'" if self.key_id is not None else None,
]
filtered_fields = [field for field in fields if field is not None]
return f"Snapshot({', '.join(filtered_fields)})"
Expand Down
21 changes: 21 additions & 0 deletions pyiceberg/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ def handle_primitive_type(cls, v: Any, handler: ValidatorFunctionWrapHandler) ->
return BinaryType()
if v == "unknown":
return UnknownType()
if v == "variant":
return VariantType()
if v.startswith("fixed"):
return FixedType(_parse_fixed_type(v))
if v.startswith("decimal"):
Expand Down Expand Up @@ -954,6 +956,25 @@ def minimum_format_version(self) -> TableVersion:
return 3


class VariantType(PrimitiveType):
"""A variant data type in Iceberg can be represented using an instance of this class.

Variants in Iceberg are semi-structured values encoded using the Parquet Variant binary format.

Example:
>>> column_foo = VariantType()
>>> isinstance(column_foo, VariantType)
True
>>> column_foo
VariantType()
"""

root: Literal["variant"] = Field(default="variant")

def minimum_format_version(self) -> TableVersion:
return 3


class GeometryType(PrimitiveType):
"""A geometry data type in Iceberg (v3+) for storing spatial geometries.

Expand Down
Loading