From 02fa1908eb9e517e0d1c1e72e389c62c6660a7da Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 May 2026 22:51:32 +0900 Subject: [PATCH] Merge pull request #102689 from ClickHouse/fix-substreams-rename-bug Fix columns_substreams.txt corruption during column rename in some cases --- .../Serializations/ISerialization.cpp | 4 +- ...lumn_corrupts_columns_substreams.reference | 38 ++++++++++ ...name_column_corrupts_columns_substreams.sh | 72 +++++++++++++++++++ 3 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.reference create mode 100755 tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.sh diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index d932033192d2..f693c7b64537 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -301,11 +301,11 @@ String ISerialization::getFileNameForRenamedColumnStream(const String & name_fro { auto name_from_escaped = escapeForFileName(name_from); if (file_name.starts_with(name_from_escaped)) - return escapeForFileName(name_to) + file_name.substr(0, name_from_escaped.size()); + return escapeForFileName(name_to) + file_name.substr(name_from_escaped.size()); auto nested_storage_name_escaped = escapeForFileName(Nested::extractTableName(name_from)); if (file_name.starts_with(nested_storage_name_escaped)) - return escapeForFileName(Nested::extractTableName(name_to)) + file_name.substr(0, nested_storage_name_escaped.size()); + return escapeForFileName(Nested::extractTableName(name_to)) + file_name.substr(nested_storage_name_escaped.size()); throw Exception(ErrorCodes::LOGICAL_ERROR, "File name {} doesn't correspond to column {}", file_name, name_from); } diff --git a/tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.reference b/tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.reference new file mode 100644 index 000000000000..2284b9e556b9 --- /dev/null +++ b/tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.reference @@ -0,0 +1,38 @@ +Before rename: +columns substreams version: 1 +2 columns: +1 substreams for column `id`: + id +2 substreams for column `arr`: + arr.size0 + arr +After rename arr -> brr: +columns substreams version: 1 +2 columns: +1 substreams for column `id`: + id +2 substreams for column `brr`: + brr.size0 + brr +Nested before rename: +columns substreams version: 1 +3 columns: +1 substreams for column `id`: + id +2 substreams for column `nested.a`: + nested.size0 + nested%2Ea +2 substreams for column `nested.b`: + nested.size0 + nested%2Eb +Nested after rename nested.a -> nested.aa: +columns substreams version: 1 +3 columns: +1 substreams for column `id`: + id +2 substreams for column `nested.aa`: + nested.size0 + nested%2Eaa +2 substreams for column `nested.b`: + nested.size0 + nested%2Eb diff --git a/tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.sh b/tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.sh new file mode 100755 index 000000000000..08fed87fd1da --- /dev/null +++ b/tests/queries/0_stateless/04093_rename_column_corrupts_columns_substreams.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Tags: no-shared-merge-tree, no-object-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_rename_substreams" + +${CLICKHOUSE_CLIENT} --query " + CREATE TABLE test_rename_substreams + ( + id UInt32, + arr Array(UInt32) + ) + ENGINE = MergeTree + ORDER BY id + SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1, + enable_block_number_column = 0, enable_block_offset_column = 0, + replace_long_file_name_to_hash = 0, ratio_of_defaults_for_sparse_serialization = 1; +" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test_rename_substreams SELECT number, [number, number + 1] FROM numbers(10)" + +# Get data path before rename to verify initial state +DATA_PATH=$(${CLICKHOUSE_CLIENT} --query "SELECT path FROM system.parts WHERE database = currentDatabase() AND table = 'test_rename_substreams' AND active") + +echo "Before rename:" +cat "${DATA_PATH}columns_substreams.txt" + +# Rename arr -> brr +${CLICKHOUSE_CLIENT} --query "ALTER TABLE test_rename_substreams RENAME COLUMN arr TO brr" + +# Get the new part path (mutation creates a new part) +DATA_PATH_NEW=$(${CLICKHOUSE_CLIENT} --query "SELECT path FROM system.parts WHERE database = currentDatabase() AND table = 'test_rename_substreams' AND active") + +echo "After rename arr -> brr:" +cat "${DATA_PATH_NEW}columns_substreams.txt" + +# Also rename a Nested column to test the second code path (line 450) +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_rename_nested_substreams" + +${CLICKHOUSE_CLIENT} --query " + CREATE TABLE test_rename_nested_substreams + ( + id UInt32, + nested Nested(a UInt32, b UInt32) + ) + ENGINE = MergeTree + ORDER BY id + SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1, + enable_block_number_column = 0, enable_block_offset_column = 0, + replace_long_file_name_to_hash = 0, ratio_of_defaults_for_sparse_serialization = 1; +" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test_rename_nested_substreams SELECT number, [number], [number + 1] FROM numbers(10)" + +DATA_PATH_NESTED=$(${CLICKHOUSE_CLIENT} --query "SELECT path FROM system.parts WHERE database = currentDatabase() AND table = 'test_rename_nested_substreams' AND active") + +echo "Nested before rename:" +cat "${DATA_PATH_NESTED}columns_substreams.txt" + +# Rename nested.a -> nested.aa +${CLICKHOUSE_CLIENT} --query "ALTER TABLE test_rename_nested_substreams RENAME COLUMN nested.a TO nested.aa" + +DATA_PATH_NESTED_NEW=$(${CLICKHOUSE_CLIENT} --query "SELECT path FROM system.parts WHERE database = currentDatabase() AND table = 'test_rename_nested_substreams' AND active") + +echo "Nested after rename nested.a -> nested.aa:" +cat "${DATA_PATH_NESTED_NEW}columns_substreams.txt" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test_rename_substreams" +${CLICKHOUSE_CLIENT} --query "DROP TABLE test_rename_nested_substreams"