From 7ba1a9d3837952bcd00b2fe76e2417616b9c2819 Mon Sep 17 00:00:00 2001 From: Mikhail Koviazin Date: Wed, 1 Jul 2026 16:46:33 +0200 Subject: [PATCH 1/4] Resolve currentDatabase() in Hybrid segment metadata A Hybrid table created in a non-default database with `currentDatabase()` in an additional segment persisted the unresolved `currentDatabase()` token in its metadata. On restart the startup ATTACH replays the definition with no session database, so `currentDatabase()` resolved to `default` and the segment pointed at a missing `default` table, failing to attach with `UNKNOWN_TABLE`. `registerStorageHybrid` resolved `currentDatabase()` only on the clone used for schema validation and in-memory segment execution, never on the original `engine_args[i]` that gets serialized to metadata. Resolve it on the original argument too, so the persisted value is a concrete database name. This mirrors the base segment, which is already resolved in place by `TableFunctionRemote::parseArguments`, and the identifier branch, which writes the qualified name back into `engine_args[i]`. Add an integration test that creates a Hybrid table in a non-default database using `currentDatabase()`, restarts the server, and verifies the table still attaches and is queryable. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/Storages/StorageDistributed.cpp | 7 ++ .../test_hybrid_table_restart/__init__.py | 0 .../test_hybrid_table_restart/test.py | 74 +++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 tests/integration/test_hybrid_table_restart/__init__.py create mode 100644 tests/integration/test_hybrid_table_restart/test.py diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index bff01204aa34..436128e3271b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -3008,6 +3008,13 @@ void registerStorageHybrid(StorageFactory & factory) ColumnsDescription segment_columns = additional_table_function->getActualTableStructure(local_context, true); replaceCurrentDatabaseFunction(normalized_table_function_ast, local_context); + // The clone above is only used for schema validation and in-memory segment + // execution. Resolve `currentDatabase()` in the original argument too, so the + // value written to the table metadata is a concrete database name. Otherwise + // startup ATTACH (which has no session database) resolves it to `default` and + // the segment fails to attach with UNKNOWN_TABLE. + replaceCurrentDatabaseFunction(engine_args[i], local_context); + validate_segment_schema(segment_columns, normalized_table_function_ast->formatForLogging()); // It's a table function - store the AST and cached schema for later execution diff --git a/tests/integration/test_hybrid_table_restart/__init__.py b/tests/integration/test_hybrid_table_restart/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_hybrid_table_restart/test.py b/tests/integration/test_hybrid_table_restart/test.py new file mode 100644 index 000000000000..1c254375a045 --- /dev/null +++ b/tests/integration/test_hybrid_table_restart/test.py @@ -0,0 +1,74 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", stay_alive=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_current_database_survives_restart(start_cluster): + """A Hybrid table created in a non-default database with currentDatabase() in a + segment must survive a server restart. + + currentDatabase() has to be resolved to the actual database name and stored in + the table metadata at CREATE time. Otherwise the unresolved currentDatabase() + token is replayed during startup ATTACH, where there is no session database, so + it resolves to `default` and the segment points at the missing default.local_cold, + failing to attach with UNKNOWN_TABLE. + """ + settings = {"allow_experimental_hybrid_table": 1} + + node.query("CREATE DATABASE IF NOT EXISTS test_db") + node.query( + "CREATE TABLE test_db.local_hot (ts DateTime, value UInt64) ENGINE = MergeTree ORDER BY ts" + ) + node.query( + "CREATE TABLE test_db.local_cold (ts DateTime, value UInt64) ENGINE = MergeTree ORDER BY ts" + ) + node.query( + "INSERT INTO test_db.local_hot VALUES ('2025-10-15', 1), ('2025-11-01', 2)" + ) + node.query( + "INSERT INTO test_db.local_cold VALUES ('2025-08-01', 3), ('2025-06-15', 4)" + ) + + # Run with test_db as the session database so currentDatabase() resolves to it. + node.query( + """ + CREATE TABLE test_db.hybrid_t (ts DateTime, value UInt64) + ENGINE = Hybrid( + remote('localhost:9000', 'test_db', 'local_hot'), + ts > hybridParam('hybrid_watermark_hot', 'DateTime'), + remote('localhost:9000', currentDatabase(), 'local_cold'), + ts <= hybridParam('hybrid_watermark_hot', 'DateTime') + ) + SETTINGS hybrid_watermark_hot = '2025-09-01' + """, + database="test_db", + settings=settings, + ) + + # Sanity: the table works before the restart, so any post-restart failure is + # attributable to metadata persistence, not to the table definition itself. + assert node.query("SELECT count() FROM test_db.hybrid_t", settings=settings).strip() == "4" + + node.restart_clickhouse(kill=True) + + # On the buggy build the stored metadata still contains currentDatabase(), which + # resolves to `default` during startup ATTACH, so this query fails with UNKNOWN_TABLE. + assert node.query("SELECT count() FROM test_db.hybrid_t", settings=settings).strip() == "4" + + # The persisted definition must store the resolved database name, not the + # unresolved currentDatabase() token. + show = node.query("SHOW CREATE TABLE test_db.hybrid_t", settings=settings) + assert "currentDatabase()" not in show + assert "test_db" in show From 081738e866f50dac50f06d3f2b0a5020f3b634c3 Mon Sep 17 00:00:00 2001 From: Mikhail Koviazin Date: Wed, 1 Jul 2026 18:28:25 +0200 Subject: [PATCH 2/4] reduce code duplication, fix the original call instead --- src/Storages/StorageDistributed.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 436128e3271b..a7cae667adee 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -3003,17 +3003,10 @@ void registerStorageHybrid(StorageFactory & factory) // Normalize arguments (evaluate `currentDatabase()`, expand named collections, etc.). // TableFunctionFactory::get mutates the AST in-place inside TableFunctionRemote::parseArguments. - ASTPtr normalized_table_function_ast = table_function_ast->clone(); + replaceCurrentDatabaseFunction(engine_args[i], local_context); + ASTPtr normalized_table_function_ast = engine_args[i]->clone(); auto additional_table_function = TableFunctionFactory::instance().get(normalized_table_function_ast, local_context); ColumnsDescription segment_columns = additional_table_function->getActualTableStructure(local_context, true); - replaceCurrentDatabaseFunction(normalized_table_function_ast, local_context); - - // The clone above is only used for schema validation and in-memory segment - // execution. Resolve `currentDatabase()` in the original argument too, so the - // value written to the table metadata is a concrete database name. Otherwise - // startup ATTACH (which has no session database) resolves it to `default` and - // the segment fails to attach with UNKNOWN_TABLE. - replaceCurrentDatabaseFunction(engine_args[i], local_context); validate_segment_schema(segment_columns, normalized_table_function_ast->formatForLogging()); From 03fb9e97ea7228f73a7e83bde5dc6ff75b14b6bb Mon Sep 17 00:00:00 2001 From: Mikhail Koviazin Date: Wed, 1 Jul 2026 18:29:26 +0200 Subject: [PATCH 3/4] replace integration test with a stateless one --- .../test_hybrid_table_restart/__init__.py | 0 .../test_hybrid_table_restart/test.py | 74 ------------------- ...hybrid_current_database_reattach.reference | 2 + ...04302_hybrid_current_database_reattach.sql | 38 ++++++++++ 4 files changed, 40 insertions(+), 74 deletions(-) delete mode 100644 tests/integration/test_hybrid_table_restart/__init__.py delete mode 100644 tests/integration/test_hybrid_table_restart/test.py create mode 100644 tests/queries/0_stateless/04302_hybrid_current_database_reattach.reference create mode 100644 tests/queries/0_stateless/04302_hybrid_current_database_reattach.sql diff --git a/tests/integration/test_hybrid_table_restart/__init__.py b/tests/integration/test_hybrid_table_restart/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_hybrid_table_restart/test.py b/tests/integration/test_hybrid_table_restart/test.py deleted file mode 100644 index 1c254375a045..000000000000 --- a/tests/integration/test_hybrid_table_restart/test.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", stay_alive=True) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_current_database_survives_restart(start_cluster): - """A Hybrid table created in a non-default database with currentDatabase() in a - segment must survive a server restart. - - currentDatabase() has to be resolved to the actual database name and stored in - the table metadata at CREATE time. Otherwise the unresolved currentDatabase() - token is replayed during startup ATTACH, where there is no session database, so - it resolves to `default` and the segment points at the missing default.local_cold, - failing to attach with UNKNOWN_TABLE. - """ - settings = {"allow_experimental_hybrid_table": 1} - - node.query("CREATE DATABASE IF NOT EXISTS test_db") - node.query( - "CREATE TABLE test_db.local_hot (ts DateTime, value UInt64) ENGINE = MergeTree ORDER BY ts" - ) - node.query( - "CREATE TABLE test_db.local_cold (ts DateTime, value UInt64) ENGINE = MergeTree ORDER BY ts" - ) - node.query( - "INSERT INTO test_db.local_hot VALUES ('2025-10-15', 1), ('2025-11-01', 2)" - ) - node.query( - "INSERT INTO test_db.local_cold VALUES ('2025-08-01', 3), ('2025-06-15', 4)" - ) - - # Run with test_db as the session database so currentDatabase() resolves to it. - node.query( - """ - CREATE TABLE test_db.hybrid_t (ts DateTime, value UInt64) - ENGINE = Hybrid( - remote('localhost:9000', 'test_db', 'local_hot'), - ts > hybridParam('hybrid_watermark_hot', 'DateTime'), - remote('localhost:9000', currentDatabase(), 'local_cold'), - ts <= hybridParam('hybrid_watermark_hot', 'DateTime') - ) - SETTINGS hybrid_watermark_hot = '2025-09-01' - """, - database="test_db", - settings=settings, - ) - - # Sanity: the table works before the restart, so any post-restart failure is - # attributable to metadata persistence, not to the table definition itself. - assert node.query("SELECT count() FROM test_db.hybrid_t", settings=settings).strip() == "4" - - node.restart_clickhouse(kill=True) - - # On the buggy build the stored metadata still contains currentDatabase(), which - # resolves to `default` during startup ATTACH, so this query fails with UNKNOWN_TABLE. - assert node.query("SELECT count() FROM test_db.hybrid_t", settings=settings).strip() == "4" - - # The persisted definition must store the resolved database name, not the - # unresolved currentDatabase() token. - show = node.query("SHOW CREATE TABLE test_db.hybrid_t", settings=settings) - assert "currentDatabase()" not in show - assert "test_db" in show diff --git a/tests/queries/0_stateless/04302_hybrid_current_database_reattach.reference b/tests/queries/0_stateless/04302_hybrid_current_database_reattach.reference new file mode 100644 index 000000000000..7290ba859f4a --- /dev/null +++ b/tests/queries/0_stateless/04302_hybrid_current_database_reattach.reference @@ -0,0 +1,2 @@ +4 +4 diff --git a/tests/queries/0_stateless/04302_hybrid_current_database_reattach.sql b/tests/queries/0_stateless/04302_hybrid_current_database_reattach.sql new file mode 100644 index 000000000000..8ed53ef63ebb --- /dev/null +++ b/tests/queries/0_stateless/04302_hybrid_current_database_reattach.sql @@ -0,0 +1,38 @@ +-- Reproduce the Hybrid currentDatabase() metadata bug without a server restart. +-- Detaching the table and re-attaching it while a different database is current +-- mimics the missing session-database context that startup ATTACH has. Before the +-- fix the stored metadata keeps currentDatabase(), which then resolves to the wrong +-- database and the segment fails to attach with UNKNOWN_TABLE. + +SET allow_experimental_hybrid_table = 1; + +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.local_hot (ts DateTime, value UInt64) ENGINE = MergeTree ORDER BY ts; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.local_cold (ts DateTime, value UInt64) ENGINE = MergeTree ORDER BY ts; +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.local_hot VALUES ('2025-10-15', 1), ('2025-11-01', 2); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.local_cold VALUES ('2025-08-01', 3), ('2025-06-15', 4); + +-- Create with the test database as current so currentDatabase() resolves to it. +USE {CLICKHOUSE_DATABASE:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.hybrid_t (ts DateTime, value UInt64) +ENGINE = Hybrid( + remote('localhost:9000', {CLICKHOUSE_DATABASE:String}, 'local_hot'), + ts > hybridParam('hybrid_watermark_hot', 'DateTime'), + remote('localhost:9000', currentDatabase(), 'local_cold'), + ts <= hybridParam('hybrid_watermark_hot', 'DateTime') +) +SETTINGS hybrid_watermark_hot = '2025-09-01'; + +SELECT count() FROM {CLICKHOUSE_DATABASE:Identifier}.hybrid_t; + +-- Detach, switch the current database, then re-attach. currentDatabase() in the +-- stored metadata now resolves against the other database, whose local_cold does +-- not exist. With the fix the metadata holds the resolved name, so this succeeds. +DETACH TABLE {CLICKHOUSE_DATABASE:Identifier}.hybrid_t; +CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; +USE {CLICKHOUSE_DATABASE_1:Identifier}; +ATTACH TABLE {CLICKHOUSE_DATABASE:Identifier}.hybrid_t; + +SELECT count() FROM {CLICKHOUSE_DATABASE:Identifier}.hybrid_t; + +USE {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; From 1037c7d712b6da5667427e9a26618df602bd50d2 Mon Sep 17 00:00:00 2001 From: Mikhail Koviazin Date: Wed, 1 Jul 2026 18:29:36 +0200 Subject: [PATCH 4/4] fix 03645_hybrid_watermarks after the changes The reference file referred to currentDatabase(), replace it with plain 'default' instead --- .../0_stateless/03645_hybrid_watermarks.reference | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/03645_hybrid_watermarks.reference b/tests/queries/0_stateless/03645_hybrid_watermarks.reference index 144780c0d6a4..21cc7180ba34 100644 --- a/tests/queries/0_stateless/03645_hybrid_watermarks.reference +++ b/tests/queries/0_stateless/03645_hybrid_watermarks.reference @@ -1,6 +1,6 @@ --- Test 1: CREATE with watermarks --- Test 2: SHOW CREATE TABLE -CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', currentDatabase(), \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-09-01\' +CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', \'default\', \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-09-01\' --- Test 3: First query after CREATE 1 1 @@ -8,25 +8,25 @@ CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = H --- Test 5: Query with updated boundary 1 --- Test 6: SHOW CREATE after ALTER -CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', currentDatabase(), \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-10-01\' +CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', \'default\', \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-10-01\' --- Test 7: DETACH/ATTACH persistence -CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', currentDatabase(), \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-10-01\' +CREATE TABLE default.t\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', \'default\', \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-10-01\' --- Test 8: Three segments, two watermarks -CREATE TABLE default.t3\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', currentDatabase(), \'local_warm\'), (ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\')) AND (ts > hybridParam(\'hybrid_watermark_cold\', \'DateTime\')), remote(\'localhost:9000\', currentDatabase(), \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_cold\', \'DateTime\'))\nSETTINGS hybrid_watermark_cold = \'2025-07-01\', hybrid_watermark_hot = \'2025-10-01\' +CREATE TABLE default.t3\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', \'default\', \'local_warm\'), (ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\')) AND (ts > hybridParam(\'hybrid_watermark_cold\', \'DateTime\')), remote(\'localhost:9000\', \'default\', \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_cold\', \'DateTime\'))\nSETTINGS hybrid_watermark_cold = \'2025-07-01\', hybrid_watermark_hot = \'2025-10-01\' --- Test 9: Reject non-watermark parameter --- Test 10: Missing watermark SETTINGS rejected at CREATE --- Test 11: Invalid typed value --- Test 12: Reject non-watermark MODIFY SETTING --- Test 13: Reject RESET SETTING on Hybrid --- Test 14: Alter one preserves the other -CREATE TABLE default.t3\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', currentDatabase(), \'local_warm\'), (ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\')) AND (ts > hybridParam(\'hybrid_watermark_cold\', \'DateTime\')), remote(\'localhost:9000\', currentDatabase(), \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_cold\', \'DateTime\'))\nSETTINGS hybrid_watermark_cold = \'2025-08-01\', hybrid_watermark_hot = \'2025-12-01\' +CREATE TABLE default.t3\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', \'default\', \'local_warm\'), (ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\')) AND (ts > hybridParam(\'hybrid_watermark_cold\', \'DateTime\')), remote(\'localhost:9000\', \'default\', \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_cold\', \'DateTime\'))\nSETTINGS hybrid_watermark_cold = \'2025-08-01\', hybrid_watermark_hot = \'2025-12-01\' 1 --- Test 15: Reject DistributedSettings at CREATE --- Test 16: Plain Distributed unaffected --- Test 17: Value via SETTINGS 1 1 -CREATE TABLE default.t_settings_only\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', currentDatabase(), \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-09-01\' +CREATE TABLE default.t_settings_only\n(\n `ts` DateTime,\n `value` UInt64\n)\nENGINE = Hybrid(remote(\'localhost:9000\', \'default\', \'local_hot\'), ts > hybridParam(\'hybrid_watermark_hot\', \'DateTime\'), remote(\'localhost:9000\', \'default\', \'local_cold\'), ts <= hybridParam(\'hybrid_watermark_hot\', \'DateTime\'))\nSETTINGS hybrid_watermark_hot = \'2025-09-01\' --- Test 18: Conflicting types rejected --- Test 19: Invalid SETTINGS value rejected at CREATE --- Test 20: Typo in CREATE SETTINGS rejected