Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions pyiceberg/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,20 @@ def delete_data_files(io: FileIO, manifests_to_delete: list[ManifestFile]) -> No
deleted_files[path] = True


def _raise_if_view_exists(catalog: Catalog, identifier: str | Identifier) -> None:
"""Raise `TableAlreadyExistsError` if a view exists at the given identifier.

Catalogs that don't support views raise `NotImplementedError` from `view_exists` —
treat that as "no view at this identifier".
"""
try:
view_collision = catalog.view_exists(identifier)
except NotImplementedError:
view_collision = False
if view_collision:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Helper mirrors the pattern from Java HiveCatalog and Java REST's RESTSessionCatalog.replaceTransaction. Re-uses TableAlreadyExistsError so existing create_table_if_not_exists callers keep working. Catalogs without view support raise NotImplementedError from view_exists — treat that as 'no view'.

raise TableAlreadyExistsError(f"View with same name already exists: {identifier}")


def _import_catalog(name: str, catalog_impl: str, properties: Properties) -> Catalog | None:
try:
path_parts = catalog_impl.split(".")
Expand Down Expand Up @@ -920,6 +934,7 @@ def create_table_transaction(
sort_order: SortOrder = UNSORTED_SORT_ORDER,
properties: Properties = EMPTY_DICT,
) -> CreateTableTransaction:
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deliberate divergence from Java: Java HiveCatalog.ViewAwareTableBuilder overrides create() with the view check but does not override createTransaction(), leaving the eager-create path stricter than the txn path. Checking here fixes that asymmetry — a create_table_transaction that would collide with a view fails fast, before any metadata files are written.

return CreateTableTransaction(
self._create_staged_table(identifier, schema, location, partition_spec, sort_order, properties)
)
Expand Down
4 changes: 3 additions & 1 deletion pyiceberg/catalog/bigquery_metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from google.oauth2 import service_account
from typing_extensions import override

from pyiceberg.catalog import WAREHOUSE_LOCATION, MetastoreCatalog, PropertiesUpdateSummary
from pyiceberg.catalog import WAREHOUSE_LOCATION, MetastoreCatalog, PropertiesUpdateSummary, _raise_if_view_exists
from pyiceberg.exceptions import NamespaceAlreadyExistsError, NoSuchNamespaceError, NoSuchTableError, TableAlreadyExistsError
from pyiceberg.io import load_file_io
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
Expand Down Expand Up @@ -134,6 +134,7 @@ def create_table(
schema: Schema = self._convert_schema_if_needed(schema) # type: ignore

dataset_name, table_name = self.identifier_to_database_and_table(identifier)
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No iceberg-java BigQuery analog. Hive reference for the semantics: HiveCatalog.ViewAwareTableBuilder.create(). Runtime no-op until BQ gains view support.


location = self._resolve_table_location(location, dataset_name, table_name)
provider = load_location_provider(table_location=location, table_properties=properties)
Expand Down Expand Up @@ -295,6 +296,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
if overwrite:
raise NotImplementedError("`overwrite` isn't supported")

_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No iceberg-java BigQuery analog. Hive reference: HiveCatalog.registerTable. Runtime no-op.

dataset_name, table_name = self.identifier_to_database_and_table(identifier)

dataset_ref = DatasetReference(project=self.project_id, dataset_id=dataset_name)
Expand Down
3 changes: 3 additions & 0 deletions pyiceberg/catalog/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
TABLE_TYPE,
MetastoreCatalog,
PropertiesUpdateSummary,
_raise_if_view_exists,
)
from pyiceberg.exceptions import (
ConditionalCheckFailedException,
Expand Down Expand Up @@ -187,6 +188,7 @@ def create_table(
)

database_name, table_name = self.identifier_to_database_and_table(identifier)
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Java DynamoDbCatalog has no view support. Closest reference is HiveCatalog.ViewAwareTableBuilder.create(). Runtime no-op.


location = self._resolve_table_location(location, database_name, table_name)
provider = load_location_provider(table_location=location, table_properties=properties)
Expand Down Expand Up @@ -313,6 +315,7 @@ def rename_table(self, from_identifier: str | Identifier, to_identifier: str | I
"""
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
_raise_if_view_exists(self, to_identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No Java DynamoDb analog. Hive reference: HiveCatalog.renameTableOrView. Runtime no-op.


from_table_item = self._get_iceberg_table_item(database_name=from_database_name, table_name=from_table_name)

Expand Down
4 changes: 4 additions & 0 deletions pyiceberg/catalog/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
TABLE_TYPE,
MetastoreCatalog,
PropertiesUpdateSummary,
_raise_if_view_exists,
)
from pyiceberg.exceptions import (
CommitFailedException,
Expand Down Expand Up @@ -571,6 +572,7 @@ def create_table(
"""
database_name, table_name = self.identifier_to_database_and_table(identifier)
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Java GlueCatalog has no view support and no check here; closest reference is HiveCatalog.ViewAwareTableBuilder.create(). Runtime no-op until Glue gains view support — view_exists raises NotImplementedError.


if self._is_s3tables_database(database_name):
return self._create_table_s3tables(
Expand Down Expand Up @@ -621,6 +623,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
if overwrite:
raise NotImplementedError("`overwrite` isn't supported")

_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same shape as the other Glue call sites — no Java analog (Glue Java has no view support), Hive reference HiveCatalog.registerTable. Runtime no-op.

database_name, table_name = self.identifier_to_database_and_table(identifier)
properties = EMPTY_DICT
io = self._load_file_io(location=metadata_location)
Expand Down Expand Up @@ -772,6 +775,7 @@ def rename_table(self, from_identifier: str | Identifier, to_identifier: str | I
"""
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError)
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
_raise_if_view_exists(self, to_identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No Java Glue analog. Hive reference: HiveCatalog.renameTableOrView. Runtime no-op.

try:
get_table_response = self.glue.get_table(DatabaseName=from_database_name, Name=from_table_name)
except self.glue.exceptions.EntityNotFoundException as e:
Expand Down
4 changes: 4 additions & 0 deletions pyiceberg/catalog/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
URI,
MetastoreCatalog,
PropertiesUpdateSummary,
_raise_if_view_exists,
)
from pyiceberg.exceptions import (
CommitFailedException,
Expand Down Expand Up @@ -413,6 +414,7 @@ def create_table(
ValueError: If the identifier is invalid.
"""
properties = {**DEFAULT_PROPERTIES, **properties}
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1:1 with Java HiveCatalog.ViewAwareTableBuilder.create(). Hive is the only PyIceberg catalog that actually implements view_exists, so this is the call site where the check fires for real.

staged_table = self._create_staged_table(
identifier=identifier,
schema=schema,
Expand Down Expand Up @@ -461,6 +463,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
if overwrite:
raise NotImplementedError("`overwrite` isn't supported")

_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1:1 with Java HiveCatalog.registerTable.

database_name, table_name = self.identifier_to_database_and_table(identifier)
io = self._load_file_io(location=metadata_location)
metadata_file = io.new_input(metadata_location)
Expand Down Expand Up @@ -700,6 +703,7 @@ def rename_table(self, from_identifier: str | Identifier, to_identifier: str | I

if self.table_exists(to_identifier):
raise TableAlreadyExistsError(f"Table already exists: {to_table_name}")
_raise_if_view_exists(self, to_identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1:1 with Java HiveCatalog.renameTableOrView. The check is on the destination, matching Java.


try:
with self._client as open_client:
Expand Down
4 changes: 4 additions & 0 deletions pyiceberg/catalog/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
Catalog,
MetastoreCatalog,
PropertiesUpdateSummary,
_raise_if_view_exists,
)
from pyiceberg.exceptions import (
CommitFailedException,
Expand Down Expand Up @@ -211,6 +212,7 @@ def create_table(
table_name = Catalog.table_name_from(identifier)
if not self.namespace_exists(namespace_identifier):
raise NoSuchNamespaceError(f"Namespace does not exist: {namespace_identifier}")
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mirrors Java HiveCatalog.ViewAwareTableBuilder.create(). Java JdbcCatalog only checks views on replaceTransaction, not on create — runtime no-op here since SqlCatalog.view_exists raises NotImplementedError, but the helper keeps the call site uniform across catalogs.


namespace = Catalog.namespace_to_string(namespace_identifier)
location = self._resolve_table_location(location, namespace, table_name)
Expand Down Expand Up @@ -263,6 +265,7 @@ def register_table(self, identifier: str | Identifier, metadata_location: str, o
table_name = Catalog.table_name_from(identifier)
if not self.namespace_exists(namespace):
raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}")
_raise_if_view_exists(self, identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mirrors Java HiveCatalog.registerTable. Java JdbcCatalog inherits registerTable from BaseMetastoreCatalog without a view check — runtime no-op here for the same reason as create_table.


with Session(self.engine) as session:
try:
Expand Down Expand Up @@ -376,6 +379,7 @@ def rename_table(self, from_identifier: str | Identifier, to_identifier: str | I
to_table_name = Catalog.table_name_from(to_identifier)
if not self.namespace_exists(to_namespace):
raise NoSuchNamespaceError(f"Namespace does not exist: {to_namespace}")
_raise_if_view_exists(self, to_identifier)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mirrors Java HiveCatalog.renameTableOrView. Java JdbcCatalog.renameTable does the check too but gates it on schemaVersion == V1. PyIceberg's SqlCatalog has no equivalent V1/V2 split — view_exists raises NotImplementedError, so this is a no-op at runtime.

with Session(self.engine) as session:
try:
if self.engine.dialect.supports_sane_rowcount:
Expand Down
35 changes: 34 additions & 1 deletion tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@

import pytest

from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog import Catalog, _raise_if_view_exists, load_catalog
from pyiceberg.catalog.memory import InMemoryCatalog
from pyiceberg.exceptions import TableAlreadyExistsError
from pyiceberg.io import WAREHOUSE
from pyiceberg.schema import Schema
from pyiceberg.types import NestedField, StringType
Expand Down Expand Up @@ -69,6 +70,38 @@ def test_catalog_repr(catalog: InMemoryCatalog) -> None:
assert s == "test.in_memory.catalog (<class 'pyiceberg.catalog.memory.InMemoryCatalog'>)"


class _StubCatalog:
def __init__(self, *, returns: bool | None = None, raises: type[Exception] | None = None) -> None:
self._returns = returns
self._raises = raises
self.calls: list[object] = []

def view_exists(self, identifier: object) -> bool:
self.calls.append(identifier)
if self._raises is not None:
raise self._raises
assert self._returns is not None
return self._returns


class TestRaiseIfViewExists:
def test_raises_when_view_exists(self) -> None:
stub = _StubCatalog(returns=True)
with pytest.raises(TableAlreadyExistsError, match="View with same name already exists: ns.t"):
_raise_if_view_exists(stub, "ns.t") # type: ignore[arg-type]
assert stub.calls == ["ns.t"]

def test_no_raise_when_view_absent(self) -> None:
stub = _StubCatalog(returns=False)
_raise_if_view_exists(stub, ("ns", "t")) # type: ignore[arg-type]
assert stub.calls == [("ns", "t")]

def test_not_implemented_treated_as_no_view(self) -> None:
stub = _StubCatalog(raises=NotImplementedError)
_raise_if_view_exists(stub, "ns.t") # type: ignore[arg-type]
assert stub.calls == ["ns.t"]


class TestCatalogClose:
"""Test catalog close functionality."""

Expand Down
95 changes: 95 additions & 0 deletions tests/catalog/test_catalog_behaviors.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,101 @@ def test_rename_table_to_missing_namespace(
catalog.rename_table(test_table_identifier, another_table_identifier)


@pytest.fixture(name="single_catalog")
def fixture_single_catalog(tmp_path: Path) -> Catalog:
from pyiceberg.catalog.memory import InMemoryCatalog
from pyiceberg.io import WAREHOUSE

return InMemoryCatalog("view_collision_tests", **{WAREHOUSE: tmp_path.absolute().as_posix()})


def _recording_view_exists(target: Identifier) -> tuple[list[Identifier], Any]:
"""Build a `view_exists` fake that records each call and returns True for `target`."""
calls: list[Identifier] = []

def fake(identifier: str | Identifier) -> bool:
seen = Catalog.identifier_to_tuple(identifier)
calls.append(seen)
return seen == target

return calls, fake


def test_create_table_raises_when_view_exists_at_identifier(
single_catalog: Catalog,
table_schema_simple: Schema,
monkeypatch: pytest.MonkeyPatch,
) -> None:
identifier: Identifier = ("ns", "t")
single_catalog.create_namespace(identifier[:-1])
calls, fake = _recording_view_exists(identifier)
monkeypatch.setattr(single_catalog, "view_exists", fake)

with pytest.raises(TableAlreadyExistsError, match="View with same name already exists"):
single_catalog.create_table(identifier, table_schema_simple)

assert identifier in calls
assert not single_catalog.table_exists(identifier)


def test_create_table_transaction_raises_when_view_exists_at_identifier(
single_catalog: Catalog,
table_schema_simple: Schema,
monkeypatch: pytest.MonkeyPatch,
) -> None:
identifier: Identifier = ("ns", "t")
single_catalog.create_namespace(identifier[:-1])
calls, fake = _recording_view_exists(identifier)
monkeypatch.setattr(single_catalog, "view_exists", fake)

with pytest.raises(TableAlreadyExistsError, match="View with same name already exists"):
single_catalog.create_table_transaction(identifier, table_schema_simple)

assert identifier in calls
assert not single_catalog.table_exists(identifier)


def test_register_table_raises_when_view_exists_at_identifier(
single_catalog: Catalog,
metadata_location: str,
monkeypatch: pytest.MonkeyPatch,
) -> None:
identifier: Identifier = ("ns", "t")
single_catalog.create_namespace(identifier[:-1])
calls, fake = _recording_view_exists(identifier)
monkeypatch.setattr(single_catalog, "view_exists", fake)

with pytest.raises(TableAlreadyExistsError, match="View with same name already exists"):
single_catalog.register_table(identifier, metadata_location)

assert identifier in calls
assert not single_catalog.table_exists(identifier)


def test_rename_table_raises_when_view_exists_at_destination(
single_catalog: Catalog,
table_schema_simple: Schema,
monkeypatch: pytest.MonkeyPatch,
) -> None:
source: Identifier = ("ns", "src")
destination: Identifier = ("ns", "dst")
single_catalog.create_namespace(("ns",))
single_catalog.create_table(source, table_schema_simple)

calls, fake = _recording_view_exists(destination)
monkeypatch.setattr(single_catalog, "view_exists", fake)

with pytest.raises(TableAlreadyExistsError, match="View with same name already exists"):
single_catalog.rename_table(source, destination)

# The check must be on the destination, not the source.
assert destination in calls
assert source not in calls
# Source table must still exist — the rename was rejected before any mutation.
assert single_catalog.table_exists(source)
assert not single_catalog.table_exists(destination)


# Drop table tests


Expand Down
Loading