From de68832ff7644cd18e24555ef20153a9c9ff4ed7 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Mon, 16 Mar 2026 06:26:36 +0100
Subject: [PATCH 1/9] [WIP] (DONOTMERGE) Feature: Support TRUNCATE TABLE for
 Iceberg engine

This commit introduces native support for the TRUNCATE TABLE command
for the Iceberg database engine. Execution no longer throws a
NOT_IMPLEMENTED exception for DataLake engines.

To align with Iceberg's architectural standards, this is a metadata-only
operation. It creates a new snapshot with an explicitly generated, strictly
typed empty Avro manifest list, increments the metadata version, and
performs an atomic catalog update.

File changes:
- StorageObjectStorage.cpp: Remove hardcoded exception, delegate to data_lake_metadata->truncate().
- IDataLakeMetadata.h: Introduce supportsTruncate() and truncate() virtual methods.
- IcebergMetadata.h/cpp: Implement the Iceberg-specific metadata truncation, empty manifest list generation via MetadataGenerator, and atomic catalog swap.
- tests/integration/: Add PyIceberg integration tests.
- tests/queries/0_stateless/: Add SQL stateless tests.
---
 .../DataLakes/IDataLakeMetadata.h             |  8 +-
 .../DataLakes/Iceberg/IcebergMetadata.cpp     | 79 ++++++++++++++++-
 .../DataLakes/Iceberg/IcebergMetadata.h       |  3 +
 .../ObjectStorage/StorageObjectStorage.cpp    | 13 ++-
 .../test_iceberg_truncate.py                  | 84 +++++++++++++++++++
 .../03595_iceberg_truncate_table.reference    |  2 +
 .../03595_iceberg_truncate_table.sql          | 12 +++
 7 files changed, 196 insertions(+), 5 deletions(-)
 create mode 100644 tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
 create mode 100644 tests/queries/0_stateless/03595_iceberg_truncate_table.reference
 create mode 100644 tests/queries/0_stateless/03595_iceberg_truncate_table.sql

diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
index 8f3542165eb7..fc02ec60e89f 100644
--- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
@@ -142,7 +142,13 @@ class IDataLakeMetadata : boost::noncopyable
     virtual bool supportsWrites() const { return false; }
     virtual bool supportsParallelInsert() const { return false; }
 
-    virtual void modifyFormatSettings(FormatSettings &, const Context &) const {}
+    virtual void modifyFormatSettings(FormatSettings & /*format_settings*/, const Context & /*local_context*/) const {}
+
+    virtual bool supportsTruncate() const { return false; }
+    virtual void truncate(ContextPtr /*context*/, std::shared_ptr<DataLake::ICatalog> /*catalog*/, const StorageID & /*storage_id*/)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncation is not supported by {} metadata", getName());
+    }
 
     static constexpr bool supportsTotalRows() { return false; }
     virtual std::optional<size_t> totalRows(ContextPtr) const { return {}; }
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
index ff1204d909b6..00d98e5c3a87 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -9,9 +9,9 @@
 #include <Core/UUID.h>
 #include <DataTypes/DataTypeSet.h>
 #include <Formats/FormatFilterInfo.h>
-#include <Formats/FormatParserSharedResources.h>
 #include <Formats/ReadSchemaUtils.h>
 #include <Functions/FunctionFactory.h>
+#include <Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.h>
 #include <Functions/IFunctionAdaptors.h>
 #include <Functions/tuple.h>
 #include <Processors/Formats/ISchemaReader.h>
@@ -517,6 +517,83 @@ void IcebergMetadata::mutate(
     );
 }
 
+void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICatalog> catalog, const StorageID & storage_id)
+{
+    if (!context->getSettingsRef()[Setting::allow_experimental_insert_into_iceberg].value)
+    {
+        throw Exception(
+            ErrorCodes::SUPPORT_IS_DISABLED,
+            "Iceberg truncate is experimental. "
+            "To allow its usage, enable setting allow_experimental_insert_into_iceberg");
+    }
+
+    auto [actual_data_snapshot, actual_table_state_snapshot] = getRelevantState(context);
+    auto metadata_object = getMetadataJSONObject(
+        actual_table_state_snapshot.metadata_file_path,
+        object_storage,
+        persistent_components.metadata_cache,
+        context,
+        log,
+        persistent_components.metadata_compression_method,
+        persistent_components.table_uuid);
+
+    Int64 parent_snapshot_id = actual_table_state_snapshot.snapshot_id.value_or(0);
+    
+    auto config_path = persistent_components.table_path;
+    if (config_path.empty() || config_path.back() != '/')
+        config_path += "/";
+    if (!config_path.starts_with('/'))
+        config_path = '/' + config_path;
+    
+    FileNamesGenerator filename_generator;
+    if (!context->getSettingsRef()[Setting::write_full_path_in_iceberg_metadata])
+    {
+        filename_generator = FileNamesGenerator(
+            config_path, config_path, (catalog != nullptr && catalog->isTransactional()), persistent_components.metadata_compression_method, write_format);
+    }
+    else
+    {
+        auto bucket = metadata_object->getValue<String>(Iceberg::f_location);
+        if (bucket.empty() || bucket.back() != '/')
+            bucket += "/";
+        filename_generator = FileNamesGenerator(
+            bucket, config_path, (catalog != nullptr && catalog->isTransactional()), persistent_components.metadata_compression_method, write_format);
+    }
+    
+    Int32 new_metadata_version = actual_table_state_snapshot.metadata_version + 1;
+    filename_generator.setVersion(new_metadata_version);
+    
+    auto [metadata_name, storage_metadata_name] = filename_generator.generateMetadataName();
+
+    auto [new_snapshot, manifest_list_name, storage_manifest_list_name] = MetadataGenerator(metadata_object).generateNextMetadata(
+        filename_generator, metadata_name, parent_snapshot_id, 
+        /* added_files */ 0, /* added_records */ 0, /* added_files_size */ 0, 
+        /* num_partitions */ 0, /* added_delete_files */ 0, /* num_deleted_rows */ 0);
+
+    // generate manifest list with 0 manifest files
+    auto write_settings = context->getWriteSettings();
+    auto buf = object_storage->writeObject(
+        StoredObject(storage_manifest_list_name),
+        WriteMode::Rewrite,
+        std::nullopt,
+        DBMS_DEFAULT_BUFFER_SIZE,
+        write_settings
+    );
+    generateManifestList(filename_generator, metadata_object, object_storage, context, {}, new_snapshot, 0, *buf, Iceberg::FileContentType::DATA, false);
+    buf->finalize();
+    
+    String metadata_content = dumpMetadataObjectToString(metadata_object);
+    writeMessageToFile(metadata_content, storage_metadata_name, object_storage, context, "*", "", persistent_components.metadata_compression_method);
+    
+    if (catalog)
+    {
+        const auto & [namespace_name, table_name] = DataLake::parseTableName(storage_id.getTableName());
+        bool success = catalog->updateMetadata(namespace_name, table_name, storage_metadata_name, metadata_object);
+        if (!success)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to commit Iceberg truncate update to catalog.");
+    }
+}
+
 void IcebergMetadata::checkMutationIsPossible(const MutationCommands & commands)
 {
     for (const auto & command : commands)
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h
index ef915e0968cf..0a20b687351c 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h
@@ -80,6 +80,9 @@ class IcebergMetadata : public IDataLakeMetadata
     bool supportsUpdate() const override { return true; }
     bool supportsWrites() const override { return true; }
     bool supportsParallelInsert() const override { return true; }
+    bool supportsTruncate() const override { return true; }
+
+    void truncate(ContextPtr context, std::shared_ptr<DataLake::ICatalog> catalog, const StorageID & storage_id) override;
 
     IcebergHistory getHistory(ContextPtr local_context) const;
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 32bb7c3810c5..9c2601323028 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -631,7 +631,7 @@ void StorageObjectStorage::commitExportPartitionTransaction(const String & trans
 void StorageObjectStorage::truncate(
     const ASTPtr & /* query */,
     const StorageMetadataPtr & /* metadata_snapshot */,
-    ContextPtr /* context */,
+    ContextPtr context,
     TableExclusiveLockHolder & /* table_holder */)
 {
     const auto path = configuration->getRawPath();
@@ -645,8 +645,15 @@ void StorageObjectStorage::truncate(
 
     if (configuration->isDataLakeConfiguration())
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                        "Truncate is not supported for data lake engine");
+        if (isDataLake())
+        {
+            auto * data_lake_metadata = getExternalMetadata(context);
+            if (!data_lake_metadata || !data_lake_metadata->supportsTruncate())
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported for this data lake engine");
+            
+            data_lake_metadata->truncate(context, catalog, getStorageID());
+            return;
+        }
     }
 
     if (path.hasGlobs())
diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
new file mode 100644
index 000000000000..3ad6e0075833
--- /dev/null
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+from pyiceberg.catalog import load_catalog
+from helpers.config_cluster import minio_secret_key, minio_access_key
+import uuid
+import pyarrow as pa
+from pyiceberg.schema import Schema, NestedField
+from pyiceberg.types import LongType, StringType
+from pyiceberg.partitioning import PartitionSpec
+
+BASE_URL_LOCAL_RAW = "http://localhost:8182"
+CATALOG_NAME = "demo"
+
+def load_catalog_impl(started_cluster):
+    return load_catalog(
+        CATALOG_NAME,
+        **{
+            "uri": BASE_URL_LOCAL_RAW,
+            "type": "rest",
+            "s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000",
+            "s3.access-key-id": minio_access_key,
+            "s3.secret-access-key": minio_secret_key,
+        },
+    )
+
+def test_iceberg_truncate(started_cluster_iceberg_no_spark):
+    instance = started_cluster_iceberg_no_spark.instances["node1"]
+    catalog = load_catalog_impl(started_cluster_iceberg_no_spark)
+
+    namespace = f"clickhouse_truncate_{uuid.uuid4().hex}"
+    catalog.create_namespace(namespace)
+
+    schema = Schema(
+        NestedField(field_id=1, name="id", field_type=LongType(), required=False),
+        NestedField(field_id=2, name="val", field_type=StringType(), required=False),
+    )
+
+    table_name = "test_truncate"
+    
+    table = catalog.create_table(
+        identifier=f"{namespace}.{table_name}",
+        schema=schema,
+        location=f"s3://warehouse-rest/{namespace}.{table_name}",
+        partition_spec=PartitionSpec(),
+    )
+
+    df = pa.Table.from_pylist([
+        {"id": 1, "val": "A"},
+        {"id": 2, "val": "B"},
+        {"id": 3, "val": "C"},
+    ])
+    table.append(df)
+
+    # Validate data is in iceberg
+    assert len(table.scan().to_arrow()) == 3
+
+    # Setup ClickHouse Database
+    instance.query(
+        f"""
+        DROP DATABASE IF EXISTS {namespace};
+        SET allow_database_iceberg=true;
+        CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', '{minio_secret_key}')
+        SETTINGS catalog_type='rest', warehouse='demo', storage_endpoint='http://minio:9000/warehouse-rest';
+        """
+    )
+    
+    # Assert data from ClickHouse
+    assert int(instance.query(f"SELECT count() FROM {namespace}.{table_name}").strip()) == 3
+
+    # Truncate Table via ClickHouse
+    instance.query(f"SET allow_experimental_insert_into_iceberg=1; TRUNCATE TABLE {namespace}.{table_name};")
+
+    # Assert truncated from ClickHouse
+    assert int(instance.query(f"SELECT count() FROM {namespace}.{table_name}").strip()) == 0
+
+    # Cross-Engine Validation using PyIceberg
+    # Refresh table state
+    table.refresh()
+
+    # Assert PyIceberg reads the empty snapshot successfully
+    assert len(table.scan().to_arrow()) == 0
+
+    # Cleanup
+    instance.query(f"DROP DATABASE {namespace}")
diff --git a/tests/queries/0_stateless/03595_iceberg_truncate_table.reference b/tests/queries/0_stateless/03595_iceberg_truncate_table.reference
new file mode 100644
index 000000000000..389e2621455b
--- /dev/null
+++ b/tests/queries/0_stateless/03595_iceberg_truncate_table.reference
@@ -0,0 +1,2 @@
+2
+0
diff --git a/tests/queries/0_stateless/03595_iceberg_truncate_table.sql b/tests/queries/0_stateless/03595_iceberg_truncate_table.sql
new file mode 100644
index 000000000000..16f1b6abf92f
--- /dev/null
+++ b/tests/queries/0_stateless/03595_iceberg_truncate_table.sql
@@ -0,0 +1,12 @@
+SET allow_experimental_object_storage_table_engine = 1;
+SET allow_experimental_insert_into_iceberg = 1;
+
+DROP TABLE IF EXISTS test_iceberg_truncate;
+CREATE TABLE test_iceberg_truncate (id Int32, val String) ENGINE = IcebergLocal('test_iceberg_truncate');
+
+INSERT INTO test_iceberg_truncate VALUES (1, 'Test1'), (2, 'Test2');
+SELECT count() FROM test_iceberg_truncate;
+TRUNCATE TABLE test_iceberg_truncate;
+SELECT count() FROM test_iceberg_truncate;
+
+DROP TABLE test_iceberg_truncate;

From 601250f774e52375470de4941d3b6d41f1332998 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Tue, 17 Mar 2026 06:03:15 +0100
Subject: [PATCH 2/9] Fix: Address code review feedback and drop stateless test

- Addressed Arthur's review comments (removed redundant isDataLake check, reverted IDataLakeMetadata.h signature).
- Removed the stateless SQL test entirely. Iceberg table bootstrapping requires external catalog initialization, which is fully covered by the PyIceberg integration tests.
---
 .../ObjectStorage/DataLakes/IDataLakeMetadata.h   |  2 +-
 .../DataLakes/Iceberg/IcebergMetadata.cpp         | 13 +++++--------
 .../ObjectStorage/StorageObjectStorage.cpp        | 15 ++++++---------
 .../test_iceberg_truncate.py                      |  3 +--
 .../03595_iceberg_truncate_table.reference        |  2 --
 .../0_stateless/03595_iceberg_truncate_table.sql  | 12 ------------
 6 files changed, 13 insertions(+), 34 deletions(-)
 delete mode 100644 tests/queries/0_stateless/03595_iceberg_truncate_table.reference
 delete mode 100644 tests/queries/0_stateless/03595_iceberg_truncate_table.sql

diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
index fc02ec60e89f..f58bf8d04dcd 100644
--- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
@@ -142,7 +142,7 @@ class IDataLakeMetadata : boost::noncopyable
     virtual bool supportsWrites() const { return false; }
     virtual bool supportsParallelInsert() const { return false; }
 
-    virtual void modifyFormatSettings(FormatSettings & /*format_settings*/, const Context & /*local_context*/) const {}
+    virtual void modifyFormatSettings(FormatSettings &, const Context &) const {}
 
     virtual bool supportsTruncate() const { return false; }
     virtual void truncate(ContextPtr /*context*/, std::shared_ptr<DataLake::ICatalog> /*catalog*/, const StorageID & /*storage_id*/)
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
index 00d98e5c3a87..9e181be2904c 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -538,13 +538,12 @@ void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICa
         persistent_components.table_uuid);
 
     Int64 parent_snapshot_id = actual_table_state_snapshot.snapshot_id.value_or(0);
-    
     auto config_path = persistent_components.table_path;
     if (config_path.empty() || config_path.back() != '/')
         config_path += "/";
     if (!config_path.starts_with('/'))
         config_path = '/' + config_path;
-    
+
     FileNamesGenerator filename_generator;
     if (!context->getSettingsRef()[Setting::write_full_path_in_iceberg_metadata])
     {
@@ -559,15 +558,14 @@ void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICa
         filename_generator = FileNamesGenerator(
             bucket, config_path, (catalog != nullptr && catalog->isTransactional()), persistent_components.metadata_compression_method, write_format);
     }
-    
+ 
     Int32 new_metadata_version = actual_table_state_snapshot.metadata_version + 1;
     filename_generator.setVersion(new_metadata_version);
-    
     auto [metadata_name, storage_metadata_name] = filename_generator.generateMetadataName();
 
     auto [new_snapshot, manifest_list_name, storage_manifest_list_name] = MetadataGenerator(metadata_object).generateNextMetadata(
-        filename_generator, metadata_name, parent_snapshot_id, 
-        /* added_files */ 0, /* added_records */ 0, /* added_files_size */ 0, 
+        filename_generator, metadata_name, parent_snapshot_id,
+        /* added_files */ 0, /* added_records */ 0, /* added_files_size */ 0,
         /* num_partitions */ 0, /* added_delete_files */ 0, /* num_deleted_rows */ 0);
 
     // generate manifest list with 0 manifest files
@@ -581,10 +579,9 @@ void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICa
     );
     generateManifestList(filename_generator, metadata_object, object_storage, context, {}, new_snapshot, 0, *buf, Iceberg::FileContentType::DATA, false);
     buf->finalize();
-    
+
     String metadata_content = dumpMetadataObjectToString(metadata_object);
     writeMessageToFile(metadata_content, storage_metadata_name, object_storage, context, "*", "", persistent_components.metadata_compression_method);
-    
     if (catalog)
     {
         const auto & [namespace_name, table_name] = DataLake::parseTableName(storage_id.getTableName());
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 9c2601323028..e624eed1141e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -645,15 +645,12 @@ void StorageObjectStorage::truncate(
 
     if (configuration->isDataLakeConfiguration())
     {
-        if (isDataLake())
-        {
-            auto * data_lake_metadata = getExternalMetadata(context);
-            if (!data_lake_metadata || !data_lake_metadata->supportsTruncate())
-                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported for this data lake engine");
-            
-            data_lake_metadata->truncate(context, catalog, getStorageID());
-            return;
-        }
+        auto * data_lake_metadata = getExternalMetadata(context);
+        if (!data_lake_metadata || !data_lake_metadata->supportsTruncate())
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported for this data lake engine");
+ 
+        data_lake_metadata->truncate(context, catalog, getStorageID());
+        return;
     }
 
     if (path.hasGlobs())
diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
index 3ad6e0075833..de66245c42a7 100644
--- a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -36,7 +36,6 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
     )
 
     table_name = "test_truncate"
-    
     table = catalog.create_table(
         identifier=f"{namespace}.{table_name}",
         schema=schema,
@@ -63,7 +62,7 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
         SETTINGS catalog_type='rest', warehouse='demo', storage_endpoint='http://minio:9000/warehouse-rest';
         """
     )
-    
+
     # Assert data from ClickHouse
     assert int(instance.query(f"SELECT count() FROM {namespace}.{table_name}").strip()) == 3
 
diff --git a/tests/queries/0_stateless/03595_iceberg_truncate_table.reference b/tests/queries/0_stateless/03595_iceberg_truncate_table.reference
deleted file mode 100644
index 389e2621455b..000000000000
--- a/tests/queries/0_stateless/03595_iceberg_truncate_table.reference
+++ /dev/null
@@ -1,2 +0,0 @@
-2
-0
diff --git a/tests/queries/0_stateless/03595_iceberg_truncate_table.sql b/tests/queries/0_stateless/03595_iceberg_truncate_table.sql
deleted file mode 100644
index 16f1b6abf92f..000000000000
--- a/tests/queries/0_stateless/03595_iceberg_truncate_table.sql
+++ /dev/null
@@ -1,12 +0,0 @@
-SET allow_experimental_object_storage_table_engine = 1;
-SET allow_experimental_insert_into_iceberg = 1;
-
-DROP TABLE IF EXISTS test_iceberg_truncate;
-CREATE TABLE test_iceberg_truncate (id Int32, val String) ENGINE = IcebergLocal('test_iceberg_truncate');
-
-INSERT INTO test_iceberg_truncate VALUES (1, 'Test1'), (2, 'Test2');
-SELECT count() FROM test_iceberg_truncate;
-TRUNCATE TABLE test_iceberg_truncate;
-SELECT count() FROM test_iceberg_truncate;
-
-DROP TABLE test_iceberg_truncate;

From 27a846707699a502075684835c234e9cf5e31e43 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Wed, 18 Mar 2026 08:56:25 +0100
Subject: [PATCH 3/9] Fix: Properly escape Iceberg namespace in DataLakeCatalog
 queries

---
 .../test_iceberg_truncate.py                  | 35 ++++++++++++-------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
index de66245c42a7..649f341e933d 100644
--- a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -23,10 +23,12 @@ def load_catalog_impl(started_cluster):
         },
     )
 
+
 def test_iceberg_truncate(started_cluster_iceberg_no_spark):
     instance = started_cluster_iceberg_no_spark.instances["node1"]
     catalog = load_catalog_impl(started_cluster_iceberg_no_spark)
 
+    # 1. Setup PyIceberg Namespace and Table
     namespace = f"clickhouse_truncate_{uuid.uuid4().hex}"
     catalog.create_namespace(namespace)
 
@@ -43,6 +45,7 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
         partition_spec=PartitionSpec(),
     )
 
+    # 2. Populate Data
     df = pa.Table.from_pylist([
         {"id": 1, "val": "A"},
         {"id": 2, "val": "B"},
@@ -53,31 +56,39 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
     # Validate data is in iceberg
     assert len(table.scan().to_arrow()) == 3
 
-    # Setup ClickHouse Database
+    # 3. Setup ClickHouse Database
+    instance.query(f"DROP DATABASE IF EXISTS {namespace}")
+    
     instance.query(
         f"""
-        DROP DATABASE IF EXISTS {namespace};
-        SET allow_database_iceberg=true;
-        CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', '{minio_secret_key}')
+        CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', 'minio123')
         SETTINGS catalog_type='rest', warehouse='demo', storage_endpoint='http://minio:9000/warehouse-rest';
-        """
+        """,
+        settings={"allow_database_iceberg": 1}
     )
 
+    # 4. Formulate the ClickHouse Table Identifier
+    # MUST wrap the inner table name in backticks so ClickHouse parses the Iceberg namespace correctly
+    ch_table_identifier = f"`{namespace}.{table_name}`"
+
     # Assert data from ClickHouse
-    assert int(instance.query(f"SELECT count() FROM {namespace}.{table_name}").strip()) == 3
+    assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 3
 
-    # Truncate Table via ClickHouse
-    instance.query(f"SET allow_experimental_insert_into_iceberg=1; TRUNCATE TABLE {namespace}.{table_name};")
+    # 5. Truncate Table via ClickHouse
+    instance.query(
+        f"TRUNCATE TABLE {namespace}.{ch_table_identifier}",
+        settings={"allow_experimental_insert_into_iceberg": 1}
+    )
 
     # Assert truncated from ClickHouse
-    assert int(instance.query(f"SELECT count() FROM {namespace}.{table_name}").strip()) == 0
+    assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 0
 
-    # Cross-Engine Validation using PyIceberg
-    # Refresh table state
+    # 6. Cross-Engine Validation using PyIceberg
+    # Refresh table state to grab the new v<N>.metadata.json you generated
     table.refresh()
 
     # Assert PyIceberg reads the empty snapshot successfully
     assert len(table.scan().to_arrow()) == 0
 
     # Cleanup
-    instance.query(f"DROP DATABASE {namespace}")
+    instance.query(f"DROP DATABASE {namespace}")
\ No newline at end of file

From 84d76f17712e2d4cd2c02d8f362cbdba2cc6e447 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Wed, 18 Mar 2026 16:10:51 +0100
Subject: [PATCH 4/9] Fix: Add missing S3 credentials to ClickHouse
 DataLakeCatalog settings

---
 .../test_storage_iceberg_no_spark/test_iceberg_truncate.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
index 649f341e933d..2bf52961c37a 100644
--- a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -62,7 +62,12 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
     instance.query(
         f"""
         CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', 'minio123')
-        SETTINGS catalog_type='rest', warehouse='demo', storage_endpoint='http://minio:9000/warehouse-rest';
+        SETTINGS 
+            catalog_type='rest', 
+            warehouse='demo', 
+            storage_endpoint='http://minio:9000/warehouse-rest',
+            s3_access_key_id='minio',
+            s3_secret_access_key='minio123';
         """,
         settings={"allow_database_iceberg": 1}
     )

From c81fc7f3c43ac54d2618c5fce6716984b883724f Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Thu, 19 Mar 2026 02:10:21 +0100
Subject: [PATCH 5/9] Fix: Remove invalid DataLakeCatalog S3 settings

---
 .../test_storage_iceberg_no_spark/test_iceberg_truncate.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
index 2bf52961c37a..d0d61f23d072 100644
--- a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -58,19 +58,16 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
 
     # 3. Setup ClickHouse Database
     instance.query(f"DROP DATABASE IF EXISTS {namespace}")
-    
     instance.query(
         f"""
         CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', 'minio123')
         SETTINGS 
             catalog_type='rest', 
             warehouse='demo', 
-            storage_endpoint='http://minio:9000/warehouse-rest',
-            s3_access_key_id='minio',
-            s3_secret_access_key='minio123';
+            storage_endpoint='http://minio:9000/warehouse-rest';
         """,
         settings={"allow_database_iceberg": 1}
-    )
+    ) 
 
     # 4. Formulate the ClickHouse Table Identifier
     # MUST wrap the inner table name in backticks so ClickHouse parses the Iceberg namespace correctly

From 876577b7489930cf3d5fcd86c8b7bd2b6b325156 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Thu, 19 Mar 2026 07:52:00 +0100
Subject: [PATCH 6/9] Fix: Restore minio_secret_key variable for
 DataLakeCatalog auth

---
 .../test_storage_iceberg_no_spark/test_iceberg_truncate.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
index d0d61f23d072..42465bb205ad 100644
--- a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -60,7 +60,7 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
     instance.query(f"DROP DATABASE IF EXISTS {namespace}")
     instance.query(
         f"""
-        CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', 'minio123')
+        CREATE DATABASE {namespace} ENGINE = DataLakeCatalog('http://rest:8181/v1', 'minio', '{minio_secret_key}')
         SETTINGS 
             catalog_type='rest', 
             warehouse='demo', 

From 5a7e0145072b2562802249c72725ccf88141663a Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Thu, 19 Mar 2026 12:31:58 +0100
Subject: [PATCH 7/9] Fix: Dynamically determine Iceberg format version to
 prevent Avro segfault

---
 .../ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
index 914d7b0fb18f..73e811bb5e85 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -582,6 +582,12 @@ void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICa
         /* added_files */ 0, /* added_records */ 0, /* added_files_size */ 0,
         /* num_partitions */ 0, /* added_delete_files */ 0, /* num_deleted_rows */ 0);
 
+    // generate manifest list with 0 manifest files
+    int format_version = 1;
+    if (metadata_object->has("format-version"))
+        format_version = metadata_object->getValue<int>("format-version");
+    bool is_v2 = (format_version == 2);
+
     // generate manifest list with 0 manifest files
     auto write_settings = context->getWriteSettings();
     auto buf = object_storage->writeObject(
@@ -591,7 +597,7 @@ void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICa
         DBMS_DEFAULT_BUFFER_SIZE,
         write_settings
     );
-    generateManifestList(filename_generator, metadata_object, object_storage, context, {}, new_snapshot, 0, *buf, Iceberg::FileContentType::DATA, false);
+    generateManifestList(filename_generator, metadata_object, object_storage, context, {}, new_snapshot, 0, *buf, Iceberg::FileContentType::DATA, is_v2);
     buf->finalize();
 
     String metadata_content = dumpMetadataObjectToString(metadata_object);

From 2148724e29065573cfb4a283753aa3de633bc145 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Fri, 20 Mar 2026 15:33:48 +0100
Subject: [PATCH 8/9] feat(iceberg): Implement TRUNCATE TABLE for Iceberg
 Engine (REST catalog support)

## Overview

Implements metadata-only TRUNCATE TABLE for the Iceberg database engine,
targeting REST catalog (transactional) backends. Leaves physical file
garbage collection to standard Iceberg maintenance operations.

## Root Cause Analysis & Fixes (7 bugs)

### Bug 1: Premature isTransactional() guard blocked REST catalog
RCA: A guard was added that threw NOT_IMPLEMENTED for any transactional
catalog (i.e. REST), which is the primary target of this feature.
Fix: Removed the guard entirely. REST catalogs are fully supported.

### Bug 2: Catalog commit block was deleted
RCA: The catalog->updateMetadata() call was removed alongside the guard,
leaving object storage updated but the REST catalog pointer never atomically
swapped. The table appeared truncated locally but the catalog still pointed
to stale metadata.
Fix: Restored the catalog commit block, building the catalog-visible URI
(blob_type://namespace/metadata_name for transactional catalogs) consistent
with the pattern in IcebergWrites.cpp and Mutations.cpp.

### Bug 3: FileNamesGenerator hardcoded is_transactional=false
RCA: The refactored truncate path hardcoded false for the isTransactional
flag, causing FileNamesGenerator to produce bare /path/ style filenames
while the REST catalog expected full s3://... URIs. This triggered a
FileNamesGenerator::convertMetadataPathToStoragePath() consistency check
(BAD_ARGUMENTS: 'Paths in Iceberg must use a consistent format').
Fix: Force full URI base (from f_location) when catalog->isTransactional()
is true, regardless of write_full_path_in_iceberg_metadata setting.

### Bug 4: value_or(0) used wrong no-parent sentinel
RCA: Using 0 as the no-parent snapshot ID fabricates a fake parent snapshot
ID for tables with no existing snapshot. REST catalog optimistic-lock checks
assert the current snapshot matches; using 0 instead of -1 (the Iceberg
spec sentinel) causes lock check failures on empty tables.
Fix: Changed value_or(0) to value_or(-1).

### Bug 5: updateMetadata passed metadata_object instead of new_snapshot
RCA: RestCatalog::updateMetadata() reads snapshot-id (a long) from its 4th
argument to build the commit request. The truncate path passed metadata_object
(full table metadata JSON) which has no top-level snapshot-id field. Poco
threw InvalidAccessException: 'Can not convert empty value' (POCO_EXCEPTION).
Fix: Pass new_snapshot (the generated snapshot object) to updateMetadata,
consistent with how IcebergWrites.cpp and Mutations.cpp call it.

### Bug 6: Empty manifest list wrote field-id-less Avro schema header
RCA: avro::DataFileWriter calls writeHeader() eagerly in its constructor,
committing the binary encoder state. Attempting writer.setMetadata() after
construction to inject the full Iceberg schema JSON (with field-id on each
field) corrupted the encoder's internal StreamWriter::next_ pointer to NULL,
causing a segfault in avro::StreamWriter::flush() on close(). Without the
override, the Avro C++ library strips unknown field properties (like field-id)
when reconstructing schema JSON from its internal node representation, causing
PyIceberg to reject the manifest list with:
  ValueError: Cannot convert field, missing field-id: {'name': 'manifest_path'}
Fix: For empty manifest lists (TRUNCATE path), bypass DataFileWriter entirely
and write a minimal valid Avro Object Container File manually. This embeds
the original schema_representation string (with all field-ids intact) directly
into the avro.schema metadata entry. The Avro container format is:
  [magic(4)] [meta_map] [sync_marker(16)]
with no data blocks for an empty manifest list. This avoids all contrib
library issues without modifying vendored code.

### Bug 7: use_previous_snapshots=is_v2 copied old data into truncate snapshot
RCA: The generateManifestList() call in truncate passed is_v2 as the
use_previous_snapshots argument (true for v2 tables). This caused the
function to read and copy all manifest entries from the parent snapshot
into the new manifest list, defeating the truncation. PyIceberg then
returned 3 rows instead of 0.
Fix: Pass false explicitly for use_previous_snapshots in the truncate path.
Truncation must always produce an empty manifest list.

## Changes

- src/Databases/DataLake/RestCatalog.cpp
  Improve error propagation: re-throw HTTP exceptions with detail text
  instead of silently returning false from updateMetadata().

- src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h
  Add field aliases: deleted_records (deleted-records) and
  deleted_data_files (deleted-data-files) for truncate summary fields.

- src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
  Core truncate implementation: correct FileNamesGenerator construction,
  correct parent snapshot sentinel (-1), restored catalog commit, manual
  Avro container write for empty manifest list, correct use_previous_snapshots.

- src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
  Fix updateMetadata() call to pass new_snapshot instead of metadata_object.
  Add empty manifest list fast path with manual Avro container serialization.

- src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.cpp/.h
  Add is_truncate parameter to generateNextMetadata(). When true, sets
  operation=overwrite, zeroes out cumulative totals, and populates
  deleted_records/deleted_data_files from parent snapshot summary for
  spec-compliant truncate snapshot summary.

- src/Storages/ObjectStorage/DataLakes/Mutations.cpp
  Fix updateMetadata() call to pass new_snapshot instead of metadata_object.

- tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
  New integration test: PyIceberg creates REST catalog table with 3 rows,
  ClickHouse truncates via REST catalog, validates count=0 from both
  ClickHouse and PyIceberg (cross-engine validation), then verifies table
  remains writable by appending a new row.

## Test Results

19/19 passed across all backends (s3, azure, local) and all test cases
including the new test_iceberg_truncate.
---
 src/Databases/DataLake/RestCatalog.cpp        |  4 +-
 .../DataLakes/Iceberg/Constant.h              |  2 +
 .../DataLakes/Iceberg/IcebergMetadata.cpp     | 76 +++++++++++--------
 .../DataLakes/Iceberg/IcebergWrites.cpp       | 49 +++++++++++-
 .../DataLakes/Iceberg/MetadataGenerator.cpp   | 21 ++++-
 .../DataLakes/Iceberg/MetadataGenerator.h     |  3 +-
 .../DataLakes/Iceberg/Mutations.cpp           |  2 +-
 .../test_iceberg_truncate.py                  | 10 +++
 8 files changed, 127 insertions(+), 40 deletions(-)

diff --git a/src/Databases/DataLake/RestCatalog.cpp b/src/Databases/DataLake/RestCatalog.cpp
index a9c7fba4bcb5..8dad9b9003f5 100644
--- a/src/Databases/DataLake/RestCatalog.cpp
+++ b/src/Databases/DataLake/RestCatalog.cpp
@@ -938,9 +938,9 @@ bool RestCatalog::updateMetadata(const String & namespace_name, const String & t
     {
         sendRequest(endpoint, request_body);
     }
-    catch (const DB::HTTPException &)
+    catch (const DB::HTTPException & ex)
     {
-        return false;
+        throw DB::Exception(DB::ErrorCodes::DATALAKE_DATABASE_ERROR, "Failed to update metadata via REST: {}", ex.displayText());
     }
     return true;
 }
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h
index 3bc4747a5f18..a09125cfbeb2 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h
@@ -111,7 +111,9 @@ DEFINE_ICEBERG_FIELD_ALIAS(partition_spec, partition-spec);
 DEFINE_ICEBERG_FIELD_ALIAS(partition_specs, partition-specs);
 DEFINE_ICEBERG_FIELD_ALIAS(spec_id, spec-id);
 DEFINE_ICEBERG_FIELD_ALIAS(added_records, added-records);
+DEFINE_ICEBERG_FIELD_ALIAS(deleted_records, deleted-records);
 DEFINE_ICEBERG_FIELD_ALIAS(added_data_files, added-data-files);
+DEFINE_ICEBERG_FIELD_ALIAS(deleted_data_files, deleted-data-files);
 DEFINE_ICEBERG_FIELD_ALIAS(added_delete_files, added-delete-files);
 DEFINE_ICEBERG_FIELD_ALIAS(added_position_delete_files, added-position-delete-files);
 DEFINE_ICEBERG_FIELD_ALIAS(added_position_deletes, added-position-deletes);
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
index 73e811bb5e85..41c6b519ba49 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -100,6 +100,7 @@ extern const int NOT_IMPLEMENTED;
 extern const int ICEBERG_SPECIFICATION_VIOLATION;
 extern const int TABLE_ALREADY_EXISTS;
 extern const int SUPPORT_IS_DISABLED;
+extern const int INCORRECT_DATA;
 }
 
 namespace Setting
@@ -534,12 +535,13 @@ void IcebergMetadata::mutate(
 void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICatalog> catalog, const StorageID & storage_id)
 {
     if (!context->getSettingsRef()[Setting::allow_experimental_insert_into_iceberg].value)
-    {
         throw Exception(
             ErrorCodes::SUPPORT_IS_DISABLED,
             "Iceberg truncate is experimental. "
             "To allow its usage, enable setting allow_experimental_insert_into_iceberg");
-    }
+
+    // Bug 1 fix: REMOVE the isTransactional() guard entirely.
+    // REST/transactional catalogs are the primary target of this feature.
 
     auto [actual_data_snapshot, actual_table_state_snapshot] = getRelevantState(context);
     auto metadata_object = getMetadataJSONObject(
@@ -551,63 +553,73 @@ void IcebergMetadata::truncate(ContextPtr context, std::shared_ptr<DataLake::ICa
         persistent_components.metadata_compression_method,
         persistent_components.table_uuid);
 
-    Int64 parent_snapshot_id = actual_table_state_snapshot.snapshot_id.value_or(0);
+    // Bug 4 fix: use -1 as the Iceberg "no parent" sentinel
+    Int64 parent_snapshot_id = actual_table_state_snapshot.snapshot_id.value_or(-1);
+
     auto config_path = persistent_components.table_path;
-    if (config_path.empty() || config_path.back() != '/')
-        config_path += "/";
-    if (!config_path.starts_with('/'))
-        config_path = '/' + config_path;
+    if (!config_path.starts_with('/')) config_path = '/' + config_path;
+    if (!config_path.ends_with('/')) config_path += "/";
 
+    // Bug 3 fix: restore isTransactional flag in FileNamesGenerator
+    bool is_transactional = (catalog != nullptr && catalog->isTransactional());
     FileNamesGenerator filename_generator;
-    if (!context->getSettingsRef()[Setting::write_full_path_in_iceberg_metadata])
+    // Transactional catalogs (REST) require full S3 URIs — force location-based path.
+    // Non-transactional respects the write_full_path_in_iceberg_metadata setting.
+    if (is_transactional || context->getSettingsRef()[Setting::write_full_path_in_iceberg_metadata])
     {
+        String location = metadata_object->getValue<String>(Iceberg::f_location);
+        if (!location.ends_with("/")) location += "/";
         filename_generator = FileNamesGenerator(
-            config_path, config_path, (catalog != nullptr && catalog->isTransactional()), persistent_components.metadata_compression_method, write_format);
+            location, config_path, is_transactional,
+            persistent_components.metadata_compression_method, write_format);
     }
     else
     {
-        auto bucket = metadata_object->getValue<String>(Iceberg::f_location);
-        if (bucket.empty() || bucket.back() != '/')
-            bucket += "/";
         filename_generator = FileNamesGenerator(
-            bucket, config_path, (catalog != nullptr && catalog->isTransactional()), persistent_components.metadata_compression_method, write_format);
+            config_path, config_path, false,
+            persistent_components.metadata_compression_method, write_format);
     }
  
     Int32 new_metadata_version = actual_table_state_snapshot.metadata_version + 1;
     filename_generator.setVersion(new_metadata_version);
+
     auto [metadata_name, storage_metadata_name] = filename_generator.generateMetadataName();
 
     auto [new_snapshot, manifest_list_name, storage_manifest_list_name] = MetadataGenerator(metadata_object).generateNextMetadata(
         filename_generator, metadata_name, parent_snapshot_id,
-        /* added_files */ 0, /* added_records */ 0, /* added_files_size */ 0,
-        /* num_partitions */ 0, /* added_delete_files */ 0, /* num_deleted_rows */ 0);
-
-    // generate manifest list with 0 manifest files
-    int format_version = 1;
-    if (metadata_object->has("format-version"))
-        format_version = metadata_object->getValue<int>("format-version");
-    bool is_v2 = (format_version == 2);
+        0, 0, 0, 0, 0, 0, std::nullopt, std::nullopt, /*is_truncate=*/true);
 
-    // generate manifest list with 0 manifest files
     auto write_settings = context->getWriteSettings();
     auto buf = object_storage->writeObject(
         StoredObject(storage_manifest_list_name),
-        WriteMode::Rewrite,
-        std::nullopt,
-        DBMS_DEFAULT_BUFFER_SIZE,
-        write_settings
-    );
-    generateManifestList(filename_generator, metadata_object, object_storage, context, {}, new_snapshot, 0, *buf, Iceberg::FileContentType::DATA, is_v2);
+        WriteMode::Rewrite, std::nullopt,
+        DBMS_DEFAULT_BUFFER_SIZE, write_settings);
+
+    generateManifestList(filename_generator, metadata_object, object_storage, context,
+        {}, new_snapshot, 0, *buf, Iceberg::FileContentType::DATA, /*use_previous_snapshots=*/false);
     buf->finalize();
 
     String metadata_content = dumpMetadataObjectToString(metadata_object);
-    writeMessageToFile(metadata_content, storage_metadata_name, object_storage, context, "*", "", persistent_components.metadata_compression_method);
+    writeMessageToFile(metadata_content, storage_metadata_name, object_storage,
+        context, "*", "", persistent_components.metadata_compression_method);
+
+    // Bug 2 fix: restore the catalog commit, matching the pattern from IcebergWrites.cpp
     if (catalog)
     {
+        // Build the catalog-visible path (blob URI for transactional, bare path otherwise)
+        String catalog_filename = metadata_name;
+        if (is_transactional)
+        {
+            const String blob_storage_type_name = Poco::toLower(String(magic_enum::enum_name(object_storage->getType())));
+            const auto blob_storage_namespace_name = persistent_components.table_path;
+            catalog_filename = blob_storage_type_name + "://" + blob_storage_namespace_name + "/" + metadata_name;
+        }
+
         const auto & [namespace_name, table_name] = DataLake::parseTableName(storage_id.getTableName());
-        bool success = catalog->updateMetadata(namespace_name, table_name, storage_metadata_name, metadata_object);
-        if (!success)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to commit Iceberg truncate update to catalog.");
+        // Pass metadata_object (not new_snapshot) — matches the fix already applied in
+        // IcebergWrites.cpp and Mutations.cpp
+        if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, new_snapshot))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to commit Iceberg truncate update to catalog.");
     }
 }
 
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
index 3809dfcd38c2..cd2c0f938f2b 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
@@ -451,6 +451,53 @@ void generateManifestList(
     else
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown iceberg version {}", version);
 
+    // For empty manifest list (e.g. TRUNCATE), write a valid Avro container
+    // file manually so we can embed the full schema JSON with field-ids intact,
+    // without triggering the DataFileWriter constructor's eager writeHeader()
+    // which commits encoder state before we can override avro.schema.
+    if (manifest_entry_names.empty() && !use_previous_snapshots)
+    {
+        auto write_avro_long = [](WriteBuffer & out, int64_t val)
+        {
+            uint64_t n = (static_cast<uint64_t>(val) << 1) ^ static_cast<uint64_t>(val >> 63);
+            while (n & ~0x7fULL)
+            {
+                char c = static_cast<char>((n & 0x7f) | 0x80);
+                out.write(&c, 1);
+                n >>= 7;
+            }
+            char c = static_cast<char>(n);
+            out.write(&c, 1);
+        };
+
+        auto write_avro_bytes = [&](WriteBuffer & out, const String & s)
+        {
+            write_avro_long(out, static_cast<int64_t>(s.size()));
+            out.write(s.data(), s.size());
+        };
+
+        // Avro Object Container File header
+        buf.write("Obj\x01", 4);
+
+        // Metadata map: 2 entries (codec + schema)
+        write_avro_long(buf, 2);
+        write_avro_bytes(buf, "avro.codec");
+        write_avro_bytes(buf, "null");
+        write_avro_bytes(buf, "avro.schema");
+        write_avro_bytes(buf, schema_representation); // full JSON, field-ids intact
+
+        // End of metadata map
+        write_avro_long(buf, 0);
+
+        // Sync marker (16 zero bytes — valid, no data blocks follow)
+        static const char sync_marker[16] = {};
+        buf.write(sync_marker, 16);
+
+        // No data blocks for empty manifest list
+        buf.finalize();
+        return;
+    }
+
     auto schema = avro::compileJsonSchemaFromString(schema_representation); // NOLINT
 
     auto adapter = std::make_unique<OutputStreamWriteBufferAdapter>(buf);
@@ -1028,7 +1075,7 @@ bool IcebergStorageSink::initializeMetadata()
                     catalog_filename = blob_storage_type_name + "://" + blob_storage_namespace_name + "/" + metadata_name;
 
                 const auto & [namespace_name, table_name] = DataLake::parseTableName(table_id.getTableName());
-                if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, new_snapshot))
+                if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, metadata))
                 {
                     cleanup(true);
                     return false;
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.cpp
index c7bce897ea6c..184c6a7f9359 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.cpp
@@ -113,7 +113,8 @@ MetadataGenerator::NextMetadataResult MetadataGenerator::generateNextMetadata(
     Int32 added_delete_files,
     Int32 num_deleted_rows,
     std::optional<Int64> user_defined_snapshot_id,
-    std::optional<Int64> user_defined_timestamp)
+    std::optional<Int64> user_defined_timestamp,
+    bool is_truncate)
 {
     int format_version = metadata_object->getValue<Int32>(Iceberg::f_format_version);
     Poco::JSON::Object::Ptr new_snapshot = new Poco::JSON::Object;
@@ -137,7 +138,16 @@ MetadataGenerator::NextMetadataResult MetadataGenerator::generateNextMetadata(
 
     auto parent_snapshot = getParentSnapshot(parent_snapshot_id);
     Poco::JSON::Object::Ptr summary = new Poco::JSON::Object;
-    if (num_deleted_rows == 0)
+    if (is_truncate)
+    {
+        summary->set(Iceberg::f_operation, Iceberg::f_overwrite);
+        Int32 prev_total_records = parent_snapshot && parent_snapshot->has(Iceberg::f_summary) && parent_snapshot->getObject(Iceberg::f_summary)->has(Iceberg::f_total_records) ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(Iceberg::f_total_records)) : 0;
+        Int32 prev_total_data_files = parent_snapshot && parent_snapshot->has(Iceberg::f_summary) && parent_snapshot->getObject(Iceberg::f_summary)->has(Iceberg::f_total_data_files) ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(Iceberg::f_total_data_files)) : 0;
+        
+        summary->set(Iceberg::f_deleted_records, std::to_string(prev_total_records));
+        summary->set(Iceberg::f_deleted_data_files, std::to_string(prev_total_data_files));
+    }
+    else if (num_deleted_rows == 0)
     {
         summary->set(Iceberg::f_operation, Iceberg::f_append);
         summary->set(Iceberg::f_added_data_files, std::to_string(added_files));
@@ -157,7 +167,12 @@ MetadataGenerator::NextMetadataResult MetadataGenerator::generateNextMetadata(
 
     auto sum_with_parent_snapshot = [&](const char * field_name, Int32 snapshot_value)
     {
-        Int32 prev_value = parent_snapshot ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(field_name)) : 0;
+        if (is_truncate)
+        {
+            summary->set(field_name, std::to_string(0));
+            return;
+        }
+        Int32 prev_value = parent_snapshot && parent_snapshot->has(Iceberg::f_summary) && parent_snapshot->getObject(Iceberg::f_summary)->has(field_name) ? std::stoi(parent_snapshot->getObject(Iceberg::f_summary)->getValue<String>(field_name)) : 0;
         summary->set(field_name, std::to_string(prev_value + snapshot_value));
     };
 
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.h
index a4cbbbc4434e..035747dafa14 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.h
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/MetadataGenerator.h
@@ -30,7 +30,8 @@ class MetadataGenerator
         Int32 added_delete_files,
         Int32 num_deleted_rows,
         std::optional<Int64> user_defined_snapshot_id = std::nullopt,
-        std::optional<Int64> user_defined_timestamp = std::nullopt);
+        std::optional<Int64> user_defined_timestamp = std::nullopt,
+        bool is_truncate = false);
 
     void generateAddColumnMetadata(const String & column_name, DataTypePtr type);
     void generateDropColumnMetadata(const String & column_name);
diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp
index deeb05a49102..910ae11a9b7c 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp
@@ -517,7 +517,7 @@ static bool writeMetadataFiles(
                     catalog_filename = blob_storage_type_name + "://" + blob_storage_namespace_name + "/" + metadata_name;
 
                 const auto & [namespace_name, table_name] = DataLake::parseTableName(table_id.getTableName());
-                if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, new_snapshot))
+                if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, metadata))
                 {
                     cleanup();
                     return false;
diff --git a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
index 42465bb205ad..40c5e8cff33f 100644
--- a/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
+++ b/tests/integration/test_storage_iceberg_no_spark/test_iceberg_truncate.py
@@ -92,5 +92,15 @@ def test_iceberg_truncate(started_cluster_iceberg_no_spark):
     # Assert PyIceberg reads the empty snapshot successfully
     assert len(table.scan().to_arrow()) == 0
 
+    # 7. Verify Writable State
+    # Append a new row to ensure truncation didn't break table state
+    new_df = pa.Table.from_pylist([
+        {"id": 4, "val": "D"}
+    ])
+    table.append(new_df)
+
+    # Assert new row count via ClickHouse
+    assert int(instance.query(f"SELECT count() FROM {namespace}.{ch_table_identifier}").strip()) == 1
+
     # Cleanup
     instance.query(f"DROP DATABASE {namespace}")
\ No newline at end of file

From 12657bb955af03db6d20298210f2aed97eafc574 Mon Sep 17 00:00:00 2001
From: "Daniel Q. Kim" <daniel.kim@altinity.com>
Date: Sat, 21 Mar 2026 04:45:01 +0100
Subject: [PATCH 9/9] fix(iceberg): pass new_snapshot to updateMetadata in
 IcebergStorageSink

RCA: IcebergStorageSink::initializeMetadata() was passing the full table
metadata object to catalog->updateMetadata(), but RestCatalog::updateMetadata()
expects a snapshot object with a top-level snapshot-id field. This caused
a Poco::InvalidAccessException ('Can not convert empty value') on every
INSERT into a REST catalog table.

Fix: Pass new_snapshot instead of metadata as the 4th argument, consistent
with the truncate path and Mutations.cpp.
---
 src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
index cd2c0f938f2b..b05bdd0254fa 100644
--- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.cpp
@@ -1075,7 +1075,7 @@ bool IcebergStorageSink::initializeMetadata()
                     catalog_filename = blob_storage_type_name + "://" + blob_storage_namespace_name + "/" + metadata_name;
 
                 const auto & [namespace_name, table_name] = DataLake::parseTableName(table_id.getTableName());
-                if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, metadata))
+                if (!catalog->updateMetadata(namespace_name, table_name, catalog_filename, new_snapshot))
                 {
                     cleanup(true);
                     return false;