Skip to content

Commit e1eac1b

Browse files
authored
Merge branch 'main' into vended-credential-refresh
2 parents 5d44fb5 + 0bdff48 commit e1eac1b

17 files changed

Lines changed: 619 additions & 18 deletions

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717
.PHONY: help install install-uv check-license lint \
1818
test test-integration test-integration-setup test-integration-exec test-integration-cleanup test-integration-rebuild \
19-
test-s3 test-adls test-gcs test-coverage coverage-report \
19+
test-s3 test-adls test-gcs test-coverage coverage-report test test-notebook\
2020
docs-serve docs-build notebook notebook-infra \
2121
clean
2222

@@ -150,6 +150,9 @@ coverage-report: ## Combine and report coverage
150150
uv run $(PYTHON_ARG) coverage html
151151
uv run $(PYTHON_ARG) coverage xml
152152

153+
test-notebook: ## Run notebook tests (pyiceberg_example and spark_integration_example) via papermill
154+
$(TEST_RUNNER) pytest tests/notebooks/test_pyiceberg_example.py tests/notebooks/test_spark_integration_example.py -m notebook $(PYTEST_ARGS)
155+
153156
# ================
154157
# Documentation
155158
# ================

pyiceberg/catalog/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
cast,
3232
)
3333

34+
from typing_extensions import override
35+
3436
from pyiceberg.exceptions import (
3537
NamespaceAlreadyExistsError,
3638
NoSuchNamespaceError,
@@ -904,9 +906,11 @@ class MetastoreCatalog(Catalog, ABC):
904906
def __init__(self, name: str, **properties: str):
905907
super().__init__(name, **properties)
906908

909+
@override
907910
def supports_server_side_planning(self) -> bool:
908911
return False
909912

913+
@override
910914
def create_table_transaction(
911915
self,
912916
identifier: str | Identifier,
@@ -920,13 +924,15 @@ def create_table_transaction(
920924
self._create_staged_table(identifier, schema, location, partition_spec, sort_order, properties)
921925
)
922926

927+
@override
923928
def table_exists(self, identifier: str | Identifier) -> bool:
924929
try:
925930
self.load_table(identifier)
926931
return True
927932
except NoSuchTableError:
928933
return False
929934

935+
@override
930936
def namespace_exists(self, namespace: str | Identifier) -> bool:
931937
"""Check if a namespace exists.
932938
@@ -942,6 +948,7 @@ def namespace_exists(self, namespace: str | Identifier) -> bool:
942948
except NoSuchNamespaceError:
943949
return False
944950

951+
@override
945952
def purge_table(self, identifier: str | Identifier) -> None:
946953
table = self.load_table(identifier)
947954
self.drop_table(identifier)
@@ -962,6 +969,7 @@ def purge_table(self, identifier: str | Identifier) -> None:
962969
delete_files(io, prev_metadata_files, PREVIOUS_METADATA)
963970
delete_files(io, {table.metadata_location}, METADATA)
964971

972+
@override
965973
def create_view(
966974
self,
967975
identifier: str | Identifier,

pyiceberg/catalog/bigquery_metastore.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor
2727
from google.cloud.exceptions import Conflict
2828
from google.oauth2 import service_account
29+
from typing_extensions import override
2930

3031
from pyiceberg.catalog import WAREHOUSE_LOCATION, MetastoreCatalog, PropertiesUpdateSummary
3132
from pyiceberg.exceptions import NamespaceAlreadyExistsError, NoSuchNamespaceError, NoSuchTableError, TableAlreadyExistsError
@@ -101,6 +102,7 @@ def __init__(self, name: str, **properties: str):
101102
self.location = location
102103
self.project_id = project_id
103104

105+
@override
104106
def create_table(
105107
self,
106108
identifier: str | Identifier,
@@ -156,6 +158,7 @@ def create_table(
156158

157159
return self.load_table(identifier=identifier)
158160

161+
@override
159162
def create_namespace(self, namespace: str | Identifier, properties: Properties = EMPTY_DICT) -> None:
160163
"""Create a namespace in the catalog.
161164
@@ -177,6 +180,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties =
177180
except Conflict as e:
178181
raise NamespaceAlreadyExistsError("Namespace {database_name} already exists") from e
179182

183+
@override
180184
def load_table(self, identifier: str | Identifier) -> Table:
181185
"""
182186
Load the table's metadata and returns the table instance.
@@ -205,6 +209,7 @@ def load_table(self, identifier: str | Identifier) -> Table:
205209
except NotFound as e:
206210
raise NoSuchTableError(f"Table does not exist: {dataset_name}.{table_name}") from e
207211

212+
@override
208213
def drop_table(self, identifier: str | Identifier) -> None:
209214
"""Drop a table.
210215
@@ -225,14 +230,17 @@ def drop_table(self, identifier: str | Identifier) -> None:
225230
except NoSuchTableError as e:
226231
raise NoSuchTableError(f"Table does not exist: {dataset_name}.{table_name}") from e
227232

233+
@override
228234
def commit_table(
229235
self, table: Table, requirements: tuple[TableRequirement, ...], updates: tuple[TableUpdate, ...]
230236
) -> CommitTableResponse:
231237
raise NotImplementedError
232238

239+
@override
233240
def rename_table(self, from_identifier: str | Identifier, to_identifier: str | Identifier) -> Table:
234241
raise NotImplementedError
235242

243+
@override
236244
def drop_namespace(self, namespace: str | Identifier) -> None:
237245
database_name = self.identifier_to_database(namespace)
238246

@@ -243,6 +251,7 @@ def drop_namespace(self, namespace: str | Identifier) -> None:
243251
except NotFound as e:
244252
raise NoSuchNamespaceError(f"Namespace {namespace} does not exist.") from e
245253

254+
@override
246255
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
247256
database_name = self.identifier_to_database(namespace)
248257
iceberg_tables: list[Identifier] = []
@@ -257,6 +266,7 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
257266
raise NoSuchNamespaceError(f"Namespace (dataset) '{database_name}' not found.") from None
258267
return iceberg_tables
259268

269+
@override
260270
def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
261271
# Since this catalog only supports one-level namespaces, it always returns an empty list unless
262272
# passed an empty namespace to list all namespaces within the catalog.
@@ -267,6 +277,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
267277
datasets_iterator = self.client.list_datasets()
268278
return [(dataset.dataset_id,) for dataset in datasets_iterator]
269279

280+
@override
270281
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
271282
"""Register a new table using existing metadata.
272283
@@ -302,21 +313,27 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
302313

303314
return self.load_table(identifier=identifier)
304315

316+
@override
305317
def list_views(self, namespace: str | Identifier) -> list[Identifier]:
306318
raise NotImplementedError
307319

320+
@override
308321
def register_view(self, identifier: str | Identifier, metadata_location: str) -> View:
309322
raise NotImplementedError
310323

324+
@override
311325
def drop_view(self, identifier: str | Identifier) -> None:
312326
raise NotImplementedError
313327

328+
@override
314329
def view_exists(self, identifier: str | Identifier) -> bool:
315330
raise NotImplementedError
316331

332+
@override
317333
def load_view(self, identifier: str | Identifier) -> View:
318334
raise NotImplementedError
319335

336+
@override
320337
def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
321338
dataset_name = self.identifier_to_database(namespace)
322339

@@ -329,6 +346,7 @@ def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
329346
raise NoSuchNamespaceError(f"Namespace {namespace} not found") from e
330347
return {}
331348

349+
@override
332350
def update_namespace_properties(
333351
self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties = EMPTY_DICT
334352
) -> PropertiesUpdateSummary:

pyiceberg/catalog/dynamodb.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
)
2525

2626
import boto3
27+
from typing_extensions import override
2728

2829
from pyiceberg.catalog import (
2930
BOTOCORE_SESSION,
@@ -151,6 +152,7 @@ def _dynamodb_table_exists(self) -> bool:
151152
else:
152153
return True
153154

155+
@override
154156
def create_table(
155157
self,
156158
identifier: str | Identifier,
@@ -210,6 +212,7 @@ def create_table(
210212

211213
return self.load_table(identifier=identifier)
212214

215+
@override
213216
def register_table(self, identifier: str | Identifier, metadata_location: str, overwrite: bool = False) -> Table:
214217
"""Register a new table using existing metadata.
215218
@@ -226,6 +229,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
226229
"""
227230
raise NotImplementedError
228231

232+
@override
229233
def commit_table(
230234
self, table: Table, requirements: tuple[TableRequirement, ...], updates: tuple[TableUpdate, ...]
231235
) -> CommitTableResponse:
@@ -245,6 +249,7 @@ def commit_table(
245249
"""
246250
raise NotImplementedError
247251

252+
@override
248253
def load_table(self, identifier: str | Identifier) -> Table:
249254
"""
250255
Load the table's metadata and returns the table instance.
@@ -265,6 +270,7 @@ def load_table(self, identifier: str | Identifier) -> Table:
265270
dynamo_table_item = self._get_iceberg_table_item(database_name=database_name, table_name=table_name)
266271
return self._convert_dynamo_table_item_to_iceberg_table(dynamo_table_item=dynamo_table_item)
267272

273+
@override
268274
def drop_table(self, identifier: str | Identifier) -> None:
269275
"""Drop a table.
270276
@@ -285,6 +291,7 @@ def drop_table(self, identifier: str | Identifier) -> None:
285291
except ConditionalCheckFailedException as e:
286292
raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") from e
287293

294+
@override
288295
def rename_table(self, from_identifier: str | Identifier, to_identifier: str | Identifier) -> Table:
289296
"""Rename a fully classified table name.
290297
@@ -351,6 +358,7 @@ def rename_table(self, from_identifier: str | Identifier, to_identifier: str | I
351358

352359
return self.load_table(to_identifier)
353360

361+
@override
354362
def create_namespace(self, namespace: str | Identifier, properties: Properties = EMPTY_DICT) -> None:
355363
"""Create a namespace in the catalog.
356364
@@ -372,6 +380,7 @@ def create_namespace(self, namespace: str | Identifier, properties: Properties =
372380
except ConditionalCheckFailedException as e:
373381
raise NamespaceAlreadyExistsError(f"Database {database_name} already exists") from e
374382

383+
@override
375384
def drop_namespace(self, namespace: str | Identifier) -> None:
376385
"""Drop a namespace.
377386
@@ -399,6 +408,7 @@ def drop_namespace(self, namespace: str | Identifier) -> None:
399408
except ConditionalCheckFailedException as e:
400409
raise NoSuchNamespaceError(f"Database does not exist: {database_name}") from e
401410

411+
@override
402412
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
403413
"""List Iceberg tables under the given namespace in the catalog.
404414
@@ -443,6 +453,7 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
443453

444454
return table_identifiers
445455

456+
@override
446457
def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
447458
"""List top-level namespaces from the catalog.
448459
@@ -485,6 +496,7 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
485496

486497
return database_identifiers
487498

499+
@override
488500
def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
489501
"""
490502
Get properties for a namespace.
@@ -503,6 +515,7 @@ def load_namespace_properties(self, namespace: str | Identifier) -> Properties:
503515
namespace_dict = _convert_dynamo_item_to_regular_dict(namespace_item)
504516
return _get_namespace_properties(namespace_dict=namespace_dict)
505517

518+
@override
506519
def update_namespace_properties(
507520
self, namespace: str | Identifier, removals: set[str] | None = None, updates: Properties = EMPTY_DICT
508521
) -> PropertiesUpdateSummary:
@@ -540,6 +553,7 @@ def update_namespace_properties(
540553

541554
return properties_update_summary
542555

556+
@override
543557
def create_view(
544558
self,
545559
identifier: str | Identifier,
@@ -550,18 +564,23 @@ def create_view(
550564
) -> View:
551565
raise NotImplementedError
552566

567+
@override
553568
def list_views(self, namespace: str | Identifier) -> list[Identifier]:
554569
raise NotImplementedError
555570

571+
@override
556572
def register_view(self, identifier: str | Identifier, metadata_location: str) -> View:
557573
raise NotImplementedError
558574

575+
@override
559576
def drop_view(self, identifier: str | Identifier) -> None:
560577
raise NotImplementedError
561578

579+
@override
562580
def view_exists(self, identifier: str | Identifier) -> bool:
563581
raise NotImplementedError
564582

583+
@override
565584
def load_view(self, identifier: str | Identifier) -> View:
566585
raise NotImplementedError
567586

0 commit comments

Comments
 (0)