From ed2bb31baaa96f0efd62846915e36a98e8db4046 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 19 Jan 2026 21:07:12 -0300 Subject: [PATCH 1/5] impl --- src/Storages/ColumnsDescription.cpp | 1 - src/Storages/MergeTree/ExportPartTask.cpp | 35 +++++++++++++++ src/Storages/MergeTree/MergeTreeData.cpp | 6 ++- ...erge_tree_part_to_object_storage.reference | 14 ++++++ ...xport_merge_tree_part_to_object_storage.sh | 45 ++++++++++++++++++- 5 files changed, 97 insertions(+), 4 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 8c72d47f0865..ff0ba613e042 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -851,7 +851,6 @@ std::optional ColumnsDescription::getDefault(const String & colum return {}; } - bool ColumnsDescription::hasCompressionCodec(const String & column_name) const { const auto it = columns.get<1>().find(column_name); diff --git a/src/Storages/MergeTree/ExportPartTask.cpp b/src/Storages/MergeTree/ExportPartTask.cpp index ad737fedcb21..1dc417b45e54 100644 --- a/src/Storages/MergeTree/ExportPartTask.cpp +++ b/src/Storages/MergeTree/ExportPartTask.cpp @@ -4,17 +4,20 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include #include #include +#include namespace ProfileEvents { @@ -58,7 +61,11 @@ bool ExportPartTask::executeStep() const auto & metadata_snapshot = manifest.metadata_snapshot; + // Read only physical columns from the part Names columns_to_read = metadata_snapshot->getColumns().getNamesOfPhysical(); + + // But we want all columns (including aliases) in the output + NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAll(); MergeTreeSequentialSourceType read_type = MergeTreeSequentialSourceType::Export; @@ -146,6 +153,34 @@ bool ExportPartTask::executeStep() local_context, getLogger("ExportPartition")); + // Add expression step to compute alias and other default columns for export + // This materializes virtual columns (like ALIAS) so they can be written to output + const auto & current_header = plan_for_part.getCurrentHeader(); + + // Enable all experimental settings for default expressions + // (same pattern as in IMergeTreeReader::evaluateMissingDefaults) + auto context_for_defaults = Context::createCopy(local_context); + enableAllExperimentalSettings(context_for_defaults); + + auto defaults_dag = evaluateMissingDefaults( + *current_header, + all_columns, + metadata_snapshot->getColumns(), + context_for_defaults); + + if (defaults_dag) + { + // Ensure columns are in the correct order matching all_columns + defaults_dag->removeUnusedActions(all_columns.getNames(), false); + defaults_dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); + + auto expression_step = std::make_unique( + current_header, + std::move(*defaults_dag)); + expression_step->setStepDescription("Compute alias and default expressions for export"); + plan_for_part.addStep(std::move(expression_step)); + } + ThreadGroupSwitcher switcher((*exports_list_entry)->thread_group, ""); QueryPlanOptimizationSettings optimization_settings(local_context); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e90a6e3ffc0b..d2d573b0a86a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6242,7 +6242,11 @@ void MergeTreeData::exportPartToTable( auto source_metadata_ptr = getInMemoryMetadataPtr(); auto destination_metadata_ptr = dest_storage->getInMemoryMetadataPtr(); - if (destination_metadata_ptr->getColumns().getAllPhysical().sizeOfDifference(source_metadata_ptr->getColumns().getAllPhysical())) + const auto & source_columns = source_metadata_ptr->getColumns(); + + const auto & destination_columns = destination_metadata_ptr->getColumns(); + + if (destination_columns.getAll().sizeOfDifference(source_columns.getAll())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); if (query_to_string(source_metadata_ptr->getPartitionKeyAST()) != query_to_string(destination_metadata_ptr->getPartitionKeyAST())) diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference index d11773c3c9cd..3d9ee8308464 100644 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference @@ -42,3 +42,17 @@ ---- Count rows in big_table and big_destination_max_rows 4194304 4194304 +---- Test ALIAS columns export +---- Verify ALIAS column data in source table (arr_1 computed from arr[1]) +1 [1,2,3] 1 +1 [10,20,30] 10 +---- Verify ALIAS column data exported to S3 (should match source) +1 [1,2,3] 1 +1 [10,20,30] 10 +---- Test MATERIALIZED columns export +---- Verify MATERIALIZED column data in source table (arr_1 computed from arr[1]) +1 [1,2,3] 1 +1 [10,20,30] 10 +---- Verify MATERIALIZED column data exported to S3 (should match source) +1 [1,2,3] 1 +1 [10,20,30] 10 diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh index 669da7a9d163..3757acb3df71 100755 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh @@ -18,12 +18,17 @@ mt_table_roundtrip="mt_table_roundtrip_${RANDOM}" big_table="big_table_${RANDOM}" big_destination_max_bytes="big_destination_max_bytes_${RANDOM}" big_destination_max_rows="big_destination_max_rows_${RANDOM}" +mt_table_tf="mt_table_tf_${RANDOM}" +mt_alias="mt_alias_${RANDOM}" +mt_materialized="mt_materialized_${RANDOM}" +s3_alias_export="s3_alias_export_${RANDOM}" +s3_materialized_export="s3_materialized_export_${RANDOM}" query() { $CLICKHOUSE_CLIENT --query "$1" } -query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function" +query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export" query "CREATE TABLE $mt_table (id UInt64, year UInt16) ENGINE = MergeTree() PARTITION BY year ORDER BY tuple()" query "CREATE TABLE $s3_table (id UInt64, year UInt16) ENGINE = S3(s3_conn, filename='$s3_table', format=Parquet, partition_strategy='hive') PARTITION BY year" @@ -114,4 +119,40 @@ echo "---- Count rows in big_table and big_destination_max_rows" query "SELECT COUNT() from $big_table" query "SELECT COUNT() from $big_destination_max_rows" -query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $big_table, $big_destination_max_bytes, $big_destination_max_rows" +echo "---- Test ALIAS columns export" +query "CREATE TABLE $mt_alias (a UInt32, arr Array(UInt64), arr_1 UInt64 ALIAS arr[1]) ENGINE = MergeTree() PARTITION BY a ORDER BY (a, arr[1]) SETTINGS index_granularity = 1" +query "CREATE TABLE $s3_alias_export (a UInt32, arr Array(UInt64), arr_1 UInt64) ENGINE = S3(s3_conn, filename='$s3_alias_export', format=Parquet, partition_strategy='hive') PARTITION BY a" + +query "INSERT INTO $mt_alias VALUES (1, [1, 2, 3]), (1, [10, 20, 30])" + +alias_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_alias' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_alias EXPORT PART '$alias_part' TO TABLE $s3_alias_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify ALIAS column data in source table (arr_1 computed from arr[1])" +query "SELECT a, arr, arr_1 FROM $mt_alias ORDER BY arr" + +echo "---- Verify ALIAS column data exported to S3 (should match source)" +query "SELECT a, arr, arr_1 FROM $s3_alias_export ORDER BY arr" + +echo "---- Test MATERIALIZED columns export" +query "CREATE TABLE $mt_materialized (a UInt32, arr Array(UInt64), arr_1 UInt64 MATERIALIZED arr[1]) ENGINE = MergeTree() PARTITION BY a ORDER BY (a, arr_1) SETTINGS index_granularity = 1" +query "CREATE TABLE $s3_materialized_export (a UInt32, arr Array(UInt64), arr_1 UInt64) ENGINE = S3(s3_conn, filename='$s3_materialized_export', format=Parquet, partition_strategy='hive') PARTITION BY a" + +query "INSERT INTO $mt_materialized VALUES (1, [1, 2, 3]), (1, [10, 20, 30])" + +materialized_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_materialized' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_materialized EXPORT PART '$materialized_part' TO TABLE $s3_materialized_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify MATERIALIZED column data in source table (arr_1 computed from arr[1])" +query "SELECT a, arr, arr_1 FROM $mt_materialized ORDER BY arr" + +echo "---- Verify MATERIALIZED column data exported to S3 (should match source)" +query "SELECT a, arr, arr_1 FROM $s3_materialized_export ORDER BY arr" + +query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $big_table, $big_destination_max_bytes, $big_destination_max_rows, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export" From 2875fcc8b8e5d2fd0f1f428635a8d83861c0eb18 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 20 Jan 2026 10:27:36 -0300 Subject: [PATCH 2/5] move code to a separate function, add docs --- src/Storages/MergeTree/ExportPartTask.cpp | 66 +++++++++++++---------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/src/Storages/MergeTree/ExportPartTask.cpp b/src/Storages/MergeTree/ExportPartTask.cpp index 1dc417b45e54..6fcaefe908d5 100644 --- a/src/Storages/MergeTree/ExportPartTask.cpp +++ b/src/Storages/MergeTree/ExportPartTask.cpp @@ -45,6 +45,42 @@ namespace Setting extern const SettingsUInt64 export_merge_tree_part_max_rows_per_file; } +namespace +{ + void materializeSpecialColumns( + const SharedHeader & header, + const NamesAndTypesList & all_columns, + const ColumnsDescription & columns, + const ContextPtr & local_context, + QueryPlan & plan_for_part + ) + { + // Enable all experimental settings for default expressions + // (same pattern as in IMergeTreeReader::evaluateMissingDefaults) + auto context_for_defaults = Context::createCopy(local_context); + enableAllExperimentalSettings(context_for_defaults); + + auto defaults_dag = evaluateMissingDefaults( + *header, + all_columns, + columns, + context_for_defaults); + + if (defaults_dag) + { + // Ensure columns are in the correct order matching all_columns + defaults_dag->removeUnusedActions(all_columns.getNames(), false); + defaults_dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); + + auto expression_step = std::make_unique( + header, + std::move(*defaults_dag)); + expression_step->setStepDescription("Compute alias and default expressions for export"); + plan_for_part.addStep(std::move(expression_step)); + } + } +} + ExportPartTask::ExportPartTask(MergeTreeData & storage_, const MergeTreePartExportManifest & manifest_) : storage(storage_), manifest(manifest_) @@ -153,33 +189,9 @@ bool ExportPartTask::executeStep() local_context, getLogger("ExportPartition")); - // Add expression step to compute alias and other default columns for export - // This materializes virtual columns (like ALIAS) so they can be written to output - const auto & current_header = plan_for_part.getCurrentHeader(); - - // Enable all experimental settings for default expressions - // (same pattern as in IMergeTreeReader::evaluateMissingDefaults) - auto context_for_defaults = Context::createCopy(local_context); - enableAllExperimentalSettings(context_for_defaults); - - auto defaults_dag = evaluateMissingDefaults( - *current_header, - all_columns, - metadata_snapshot->getColumns(), - context_for_defaults); - - if (defaults_dag) - { - // Ensure columns are in the correct order matching all_columns - defaults_dag->removeUnusedActions(all_columns.getNames(), false); - defaults_dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); - - auto expression_step = std::make_unique( - current_header, - std::move(*defaults_dag)); - expression_step->setStepDescription("Compute alias and default expressions for export"); - plan_for_part.addStep(std::move(expression_step)); - } + /// We need to support exporting materialized and alias columns to object storage. For some reason, object storage engines don't support them. + /// This is a hack that materializes the columns before the export so they can be exported to tables that have matching columns + materializeSpecialColumns(plan_for_part.getCurrentHeader(), all_columns, metadata_snapshot->getColumns(), local_context, plan_for_part); ThreadGroupSwitcher switcher((*exports_list_entry)->thread_group, ""); From 0c0e26505dcdfb4eeb5d306bb7de666328d8627a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 21 Jan 2026 11:07:28 -0300 Subject: [PATCH 3/5] ignore ephemeral during export, add tests --- src/Storages/ColumnsDescription.cpp | 9 ++ src/Storages/ColumnsDescription.h | 1 + src/Storages/MergeTree/ExportPartTask.cpp | 22 ++--- src/Storages/MergeTree/MergeTreeData.cpp | 4 +- ...erge_tree_part_to_object_storage.reference | 21 ++++ ...xport_merge_tree_part_to_object_storage.sh | 97 ++++++++++++++++++- ...rge_tree_part_to_object_storage_simple.sql | 13 ++- 7 files changed, 150 insertions(+), 17 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index ff0ba613e042..9100f5660629 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -469,6 +469,15 @@ NamesAndTypesList ColumnsDescription::getInsertable() const return ret; } +NamesAndTypesList ColumnsDescription::getReadable() const +{ + NamesAndTypesList ret; + for (const auto & col : columns) + if (col.default_desc.kind != ColumnDefaultKind::Ephemeral) + ret.emplace_back(col.name, col.type); + return ret; +} + NamesAndTypesList ColumnsDescription::getMaterialized() const { NamesAndTypesList ret; diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index af0d2be4c7f9..23a14fa92d3e 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -149,6 +149,7 @@ class ColumnsDescription : public IHints<> NamesAndTypesList getOrdinary() const; NamesAndTypesList getMaterialized() const; NamesAndTypesList getInsertable() const; /// ordinary + ephemeral + NamesAndTypesList getReadable() const; /// ordinary + materialized + aliases (no ephemeral) NamesAndTypesList getAliases() const; NamesAndTypesList getEphemeral() const; NamesAndTypesList getAllPhysical() const; /// ordinary + materialized. diff --git a/src/Storages/MergeTree/ExportPartTask.cpp b/src/Storages/MergeTree/ExportPartTask.cpp index 6fcaefe908d5..af9217e34e40 100644 --- a/src/Storages/MergeTree/ExportPartTask.cpp +++ b/src/Storages/MergeTree/ExportPartTask.cpp @@ -49,12 +49,13 @@ namespace { void materializeSpecialColumns( const SharedHeader & header, - const NamesAndTypesList & all_columns, - const ColumnsDescription & columns, + const StorageMetadataPtr & storage_metadata, const ContextPtr & local_context, QueryPlan & plan_for_part ) { + const auto readable_columns = storage_metadata->getColumns().getReadable(); + // Enable all experimental settings for default expressions // (same pattern as in IMergeTreeReader::evaluateMissingDefaults) auto context_for_defaults = Context::createCopy(local_context); @@ -62,14 +63,14 @@ namespace auto defaults_dag = evaluateMissingDefaults( *header, - all_columns, - columns, + readable_columns, + storage_metadata->getColumns(), context_for_defaults); if (defaults_dag) { - // Ensure columns are in the correct order matching all_columns - defaults_dag->removeUnusedActions(all_columns.getNames(), false); + /// Ensure columns are in the correct order matching readable_columns + defaults_dag->removeUnusedActions(readable_columns.getNames(), false); defaults_dag->addMaterializingOutputActions(/*materialize_sparse=*/ false); auto expression_step = std::make_unique( @@ -97,11 +98,8 @@ bool ExportPartTask::executeStep() const auto & metadata_snapshot = manifest.metadata_snapshot; - // Read only physical columns from the part - Names columns_to_read = metadata_snapshot->getColumns().getNamesOfPhysical(); - - // But we want all columns (including aliases) in the output - NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAll(); + /// Read only physical columns from the part + const auto columns_to_read = metadata_snapshot->getColumns().getNamesOfPhysical(); MergeTreeSequentialSourceType read_type = MergeTreeSequentialSourceType::Export; @@ -191,7 +189,7 @@ bool ExportPartTask::executeStep() /// We need to support exporting materialized and alias columns to object storage. For some reason, object storage engines don't support them. /// This is a hack that materializes the columns before the export so they can be exported to tables that have matching columns - materializeSpecialColumns(plan_for_part.getCurrentHeader(), all_columns, metadata_snapshot->getColumns(), local_context, plan_for_part); + materializeSpecialColumns(plan_for_part.getCurrentHeader(), metadata_snapshot, local_context, plan_for_part); ThreadGroupSwitcher switcher((*exports_list_entry)->thread_group, ""); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d2d573b0a86a..6e120bfc56a7 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6246,7 +6246,9 @@ void MergeTreeData::exportPartToTable( const auto & destination_columns = destination_metadata_ptr->getColumns(); - if (destination_columns.getAll().sizeOfDifference(source_columns.getAll())) + /// compare all source readable columns with all destination insertable columns + /// this allows us to skip ephemeral columns + if (source_columns.getReadable().sizeOfDifference(destination_columns.getInsertable())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); if (query_to_string(source_metadata_ptr->getPartitionKeyAST()) != query_to_string(destination_metadata_ptr->getPartitionKeyAST())) diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference index 3d9ee8308464..98b2247bc1fb 100644 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.reference @@ -56,3 +56,24 @@ ---- Verify MATERIALIZED column data exported to S3 (should match source) 1 [1,2,3] 1 1 [10,20,30] 10 +---- Test EPHEMERAL column (not stored, ignored during export) +---- Verify data in source +1 ALICE +1 BOB +---- Verify exported data +1 ALICE +1 BOB +---- Test Mixed ALIAS, MATERIALIZED, and EPHEMERAL in same table +---- Verify mixed columns in source table +1 5 10 15 TEST +1 10 20 30 PROD +---- Verify mixed columns exported to S3 (should match source) +1 5 10 15 TEST +1 10 20 30 PROD +---- Test Complex Expressions in computed columns +---- Verify complex expressions in source table +1 alice ALICE alice-1 +1 bob BOB bob-1 +---- Verify complex expressions exported to S3 (should match source) +1 alice ALICE alice-1 +1 bob BOB bob-1 diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh index 3757acb3df71..ea478d1b3df4 100755 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage.sh @@ -23,12 +23,18 @@ mt_alias="mt_alias_${RANDOM}" mt_materialized="mt_materialized_${RANDOM}" s3_alias_export="s3_alias_export_${RANDOM}" s3_materialized_export="s3_materialized_export_${RANDOM}" +mt_mixed="mt_mixed_${RANDOM}" +s3_mixed_export="s3_mixed_export_${RANDOM}" +mt_complex_expr="mt_complex_expr_${RANDOM}" +s3_complex_expr_export="s3_complex_expr_export_${RANDOM}" +mt_ephemeral="mt_ephemeral_${RANDOM}" +s3_ephemeral_export="s3_ephemeral_export_${RANDOM}" query() { $CLICKHOUSE_CLIENT --query "$1" } -query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export" +query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export, $mt_mixed, $s3_mixed_export, $mt_complex_expr, $s3_complex_expr_export, $mt_ephemeral, $s3_ephemeral_export" query "CREATE TABLE $mt_table (id UInt64, year UInt16) ENGINE = MergeTree() PARTITION BY year ORDER BY tuple()" query "CREATE TABLE $s3_table (id UInt64, year UInt16) ENGINE = S3(s3_conn, filename='$s3_table', format=Parquet, partition_strategy='hive') PARTITION BY year" @@ -155,4 +161,91 @@ query "SELECT a, arr, arr_1 FROM $mt_materialized ORDER BY arr" echo "---- Verify MATERIALIZED column data exported to S3 (should match source)" query "SELECT a, arr, arr_1 FROM $s3_materialized_export ORDER BY arr" -query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $big_table, $big_destination_max_bytes, $big_destination_max_rows, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export" +echo "---- Test EPHEMERAL column (not stored, ignored during export)" +query "CREATE TABLE $mt_ephemeral ( + id UInt32, + name_input String EPHEMERAL, + name_upper String DEFAULT upper(name_input) +) ENGINE = MergeTree() PARTITION BY id ORDER BY id SETTINGS index_granularity = 1" + +query "CREATE TABLE $s3_ephemeral_export ( + id UInt32, + name_upper String +) ENGINE = S3(s3_conn, filename='$s3_ephemeral_export', format=Parquet, partition_strategy='hive') PARTITION BY id" + +query "INSERT INTO $mt_ephemeral (id, name_input) VALUES (1, 'alice'), (1, 'bob')" + +ephemeral_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_ephemeral' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_ephemeral EXPORT PART '$ephemeral_part' TO TABLE $s3_ephemeral_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify data in source" +query "SELECT id, name_upper FROM $mt_ephemeral ORDER BY name_upper" + +echo "---- Verify exported data" +query "SELECT id, name_upper FROM $s3_ephemeral_export ORDER BY name_upper" + +echo "---- Test Mixed ALIAS, MATERIALIZED, and EPHEMERAL in same table" +query "CREATE TABLE $mt_mixed ( + id UInt32, + value UInt32, + tag_input String EPHEMERAL, + doubled UInt64 ALIAS value * 2, + tripled UInt64 MATERIALIZED value * 3, + tag String DEFAULT upper(tag_input) +) ENGINE = MergeTree() PARTITION BY id ORDER BY id SETTINGS index_granularity = 1" + +query "CREATE TABLE $s3_mixed_export ( + id UInt32, + value UInt32, + doubled UInt64, + tripled UInt64, + tag String +) ENGINE = S3(s3_conn, filename='$s3_mixed_export', format=Parquet, partition_strategy='hive') PARTITION BY id" + +query "INSERT INTO $mt_mixed (id, value, tag_input) VALUES (1, 5, 'test'), (1, 10, 'prod')" + +mixed_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_mixed' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_mixed EXPORT PART '$mixed_part' TO TABLE $s3_mixed_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify mixed columns in source table" +query "SELECT id, value, doubled, tripled, tag FROM $mt_mixed ORDER BY value" + +echo "---- Verify mixed columns exported to S3 (should match source)" +query "SELECT id, value, doubled, tripled, tag FROM $s3_mixed_export ORDER BY value" + +echo "---- Test Complex Expressions in computed columns" +query "CREATE TABLE $mt_complex_expr ( + id UInt32, + name String, + upper_name String ALIAS upper(name), + concat_result String MATERIALIZED concat(name, '-', toString(id)) +) ENGINE = MergeTree() PARTITION BY id ORDER BY id SETTINGS index_granularity = 1" + +query "CREATE TABLE $s3_complex_expr_export ( + id UInt32, + name String, + upper_name String, + concat_result String +) ENGINE = S3(s3_conn, filename='$s3_complex_expr_export', format=Parquet, partition_strategy='hive') PARTITION BY id" + +query "INSERT INTO $mt_complex_expr (id, name) VALUES (1, 'alice'), (1, 'bob')" + +complex_expr_part=$(query "SELECT name FROM system.parts WHERE database = currentDatabase() AND table = '$mt_complex_expr' AND partition_id = '1' AND active = 1 ORDER BY name LIMIT 1" | tr -d '\n') + +query "ALTER TABLE $mt_complex_expr EXPORT PART '$complex_expr_part' TO TABLE $s3_complex_expr_export SETTINGS allow_experimental_export_merge_tree_part = 1" + +sleep 3 + +echo "---- Verify complex expressions in source table" +query "SELECT id, name, upper_name, concat_result FROM $mt_complex_expr ORDER BY name" + +echo "---- Verify complex expressions exported to S3 (should match source)" +query "SELECT id, name, upper_name, concat_result FROM $s3_complex_expr_export ORDER BY name" + +query "DROP TABLE IF EXISTS $mt_table, $s3_table, $mt_table_roundtrip, $s3_table_wildcard, $s3_table_wildcard_partition_expression_with_function, $mt_table_partition_expression_with_function, $big_table, $big_destination_max_bytes, $big_destination_max_rows, $mt_alias, $mt_materialized, $s3_alias_export, $s3_materialized_export, $mt_ephemeral, $s3_ephemeral_export, $mt_mixed, $s3_mixed_export, $mt_complex_expr, $s3_complex_expr_export" diff --git a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage_simple.sql b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage_simple.sql index a61c066e8789..f92f6607646c 100644 --- a/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage_simple.sql +++ b/tests/queries/0_stateless/03572_export_merge_tree_part_to_object_storage_simple.sql @@ -1,6 +1,6 @@ -- Tags: no-parallel, no-fasttest -DROP TABLE IF EXISTS 03572_mt_table, 03572_invalid_schema_table; +DROP TABLE IF EXISTS 03572_mt_table, 03572_invalid_schema_table, 03572_ephemeral_mt_table, 03572_matching_ephemeral_s3_table; CREATE TABLE 03572_mt_table (id UInt64, year UInt16) ENGINE = MergeTree() PARTITION BY year ORDER BY tuple(); @@ -19,4 +19,13 @@ CREATE TABLE 03572_invalid_schema_table (id UInt64, year UInt16) ENGINE = S3(s3_ ALTER TABLE 03572_mt_table EXPORT PART '2020_1_1_0' TO TABLE 03572_invalid_schema_table SETTINGS allow_experimental_export_merge_tree_part = 1; -- {serverError NOT_IMPLEMENTED} -DROP TABLE IF EXISTS 03572_mt_table, 03572_invalid_schema_table; +-- Test that destination table can not have a column that matches the source ephemeral +CREATE TABLE 03572_ephemeral_mt_table (id UInt64, year UInt16, name String EPHEMERAL) ENGINE = MergeTree() PARTITION BY year ORDER BY tuple(); + +CREATE TABLE 03572_matching_ephemeral_s3_table (id UInt64, year UInt16, name String) ENGINE = S3(s3_conn, filename='03572_matching_ephemeral_s3_table', format='Parquet', partition_strategy='hive') PARTITION BY year; + +INSERT INTO 03572_ephemeral_mt_table (id, year, name) VALUES (1, 2020, 'alice'); + +ALTER TABLE 03572_ephemeral_mt_table EXPORT PART '2020_1_1_0' TO TABLE 03572_matching_ephemeral_s3_table SETTINGS allow_experimental_export_merge_tree_part = 1; -- {serverError INCOMPATIBLE_COLUMNS} + +DROP TABLE IF EXISTS 03572_mt_table, 03572_invalid_schema_table, 03572_ephemeral_mt_table, 03572_matching_ephemeral_s3_table; From 0fa51582d45a2ba265afd6b1395bcb735a1cfe21 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 21 Jan 2026 11:36:09 -0300 Subject: [PATCH 4/5] implement and test for export partition aswell --- src/Storages/StorageReplicatedMergeTree.cpp | 4 +- .../test.py | 56 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0e6b54f06f86..6eb70ca74ecb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8133,7 +8133,9 @@ void StorageReplicatedMergeTree::exportPartitionToTable(const PartitionCommand & auto src_snapshot = getInMemoryMetadataPtr(); auto destination_snapshot = dest_storage->getInMemoryMetadataPtr(); - if (destination_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) + /// compare all source readable columns with all destination insertable columns + /// this allows us to skip ephemeral columns + if (src_snapshot->getColumns().getReadable().sizeOfDifference(destination_snapshot->getColumns().getInsertable())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); if (query_to_string(src_snapshot->getPartitionKeyAST()) != query_to_string(destination_snapshot->getPartitionKeyAST())) diff --git a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py index a4cb0807d6ee..f5917632667d 100644 --- a/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py +++ b/tests/integration/test_export_replicated_mt_partition_to_object_storage/test.py @@ -747,3 +747,59 @@ def test_multiple_exports_within_a_single_query(cluster): # # Wait for export to finish and then verify destination still reflects the original snapshot (3 rows) # time.sleep(5) # assert node.query(f"SELECT count() FROM {s3_table} WHERE year = 2020") == '3\n', "Export did not preserve snapshot at start time after source mutation" + + +def test_export_partition_with_mixed_computed_columns(cluster): + """Test export partition with ALIAS, MATERIALIZED, and EPHEMERAL columns.""" + node = cluster.instances["replica1"] + + mt_table = "mixed_computed_mt_table" + s3_table = "mixed_computed_s3_table" + + node.query(f""" + CREATE TABLE {mt_table} ( + id UInt32, + value UInt32, + tag_input String EPHEMERAL, + doubled UInt64 ALIAS value * 2, + tripled UInt64 MATERIALIZED value * 3, + tag String DEFAULT upper(tag_input) + ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{mt_table}', 'replica1') + PARTITION BY id + ORDER BY id + SETTINGS index_granularity = 1 + """) + + # Create S3 destination table with regular columns (no EPHEMERAL) + node.query(f""" + CREATE TABLE {s3_table} ( + id UInt32, + value UInt32, + doubled UInt64, + tripled UInt64, + tag String + ) ENGINE = S3(s3_conn, filename='{s3_table}', format=Parquet, partition_strategy='hive') + PARTITION BY id + """) + + node.query(f"INSERT INTO {mt_table} (id, value, tag_input) VALUES (1, 5, 'test'), (1, 10, 'prod')") + + node.query(f"ALTER TABLE {mt_table} EXPORT PARTITION ID '1' TO TABLE {s3_table}") + + wait_for_export_status(node, mt_table, s3_table, "1", "COMPLETED") + + # Verify source data (ALIAS computed, EPHEMERAL not stored) + source_result = node.query(f"SELECT id, value, doubled, tripled, tag FROM {mt_table} ORDER BY value") + expected = "1\t5\t10\t15\tTEST\n1\t10\t20\t30\tPROD\n" + assert source_result == expected, f"Source table data mismatch. Expected:\n{expected}\nGot:\n{source_result}" + + dest_result = node.query(f"SELECT id, value, doubled, tripled, tag FROM {s3_table} ORDER BY value") + assert dest_result == expected, f"Exported data mismatch. Expected:\n{expected}\nGot:\n{dest_result}" + + status = node.query(f""" + SELECT status FROM system.replicated_partition_exports + WHERE source_table = '{mt_table}' + AND destination_table = '{s3_table}' + AND partition_id = '1' + """) + assert status.strip() == "COMPLETED", f"Expected COMPLETED status, got: {status}" From 95562db3cb15711b970467324dfb02aeda1654d8 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 21 Jan 2026 12:03:52 -0300 Subject: [PATCH 5/5] sign check