From c5727d8e7c89ff02951c0b3ed8c58789c1b26834 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 7 May 2026 10:47:45 +0000 Subject: [PATCH] Merge pull request #94359 from simonmichal/issue-89940 Allow positional arguments in distributed queries --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 16 ++++++- src/Interpreters/Context.cpp | 2 + src/Interpreters/Context.h | 17 +++++++ .../QueryPlan/DistributedCreateLocalPlan.cpp | 9 ++-- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 9 ++-- src/Storages/StorageView.cpp | 6 +++ ...al_arg_in_view_distributed_query.reference | 11 +++++ ...sitional_arg_in_view_distributed_query.sql | 34 ++++++++++++++ ..._args_via_local_plan.oldanalyzer.reference | 4 ++ ...w_positional_args_via_local_plan.reference | 3 ++ ...105_view_positional_args_via_local_plan.sh | 46 +++++++++++++++++++ 11 files changed, 148 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.reference create mode 100644 tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.sql create mode 100644 tests/queries/0_stateless/04105_view_positional_args_via_local_plan.oldanalyzer.reference create mode 100644 tests/queries/0_stateless/04105_view_positional_args_via_local_plan.reference create mode 100755 tests/queries/0_stateless/04105_view_positional_args_via_local_plan.sh diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 26d34be495f3..60e02d3e50eb 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -919,8 +919,22 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope) { const auto & settings = scope.context->getSettingsRef(); - if (!settings[Setting::enable_positional_arguments] || scope.context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY) + if (!settings[Setting::enable_positional_arguments]) return; + const bool is_view_inner = scope.context->isViewInnerQuery(); + if (!is_view_inner) + { + /// Skip resolution on distributed/parallel-replicas local plan nodes: the initiator + /// already resolved positional arguments in the outer query. View-inner contexts are + /// exempt because views are expanded on the local node and were never resolved by the + /// initiator. + if (scope.context->isPositionalArgumentsAlreadyResolved()) + return; + /// Skip on remote shard execution (SECONDARY_QUERY): same reasoning as above for + /// paths not covered by setPositionalArgumentsAlreadyResolved. + if (scope.context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY) + return; + } auto & node_list_typed = node_list->as(); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b554e36fe257..1afa57b68645 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1150,6 +1150,8 @@ ContextData::ContextData(const ContextData &o) : global_context(o.global_context), buffer_context(o.buffer_context), is_internal_query(o.is_internal_query), + is_view_inner_query(o.is_view_inner_query), + positional_arguments_already_resolved(o.positional_arguments_already_resolved), temp_data_on_disk(o.temp_data_on_disk), classifier(o.classifier), prepared_sets_cache(o.prepared_sets_cache), diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 626705099e22..8804713ea21d 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -513,6 +513,17 @@ class ContextData /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; + /// True when this context belongs to the inner query of an expanded view. + /// Positional arguments inside views must be resolved even on remote/secondary nodes where + /// enable_positional_arguments would otherwise be skipped (views are expanded on remote nodes, + /// not on the initiator). + bool is_view_inner_query = false; + /// True when positional arguments in the outer query have already been resolved by the + /// initiator node. Set by distributed/parallel-replicas local plan builders to prevent + /// double-resolution. Unlike disabling enable_positional_arguments, this flag is a context + /// field and does not propagate through settings copies (e.g. getSQLSecurityOverriddenContext), + /// so view-inner queries on the same node are unaffected. + bool positional_arguments_already_resolved = false; inline static ContextPtr global_context_instance; @@ -1437,6 +1448,12 @@ class Context: public ContextData, public std::enable_shared_from_this bool isInternalQuery() const { return is_internal_query; } void setInternalQuery(bool internal) { is_internal_query = internal; } + bool isViewInnerQuery() const { return is_view_inner_query; } + void setIsViewInnerQuery(bool value) { is_view_inner_query = value; } + + bool isPositionalArgumentsAlreadyResolved() const { return positional_arguments_already_resolved; } + void setPositionalArgumentsAlreadyResolved(bool value) { positional_arguments_already_resolved = value; } + ActionLocksManagerPtr getActionLocksManager() const; enum class ApplicationType : uint8_t diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index c16c270f8ab7..6f96cbb638c7 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -49,10 +49,11 @@ std::unique_ptr createLocalPlan( if (context->getSettingsRef()[Setting::allow_experimental_analyzer]) { - /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to - /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace - /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). - new_context->setSetting("enable_positional_arguments", Field(false)); + /// Positional arguments in the outer query were already resolved by the initiator. + /// Use a context flag instead of disabling enable_positional_arguments so that + /// view-inner queries on this node (which were never resolved by the initiator) are + /// still processed correctly. See https://github.com/ClickHouse/ClickHouse/issues/62289. + new_context->setPositionalArgumentsAlreadyResolved(true); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); } diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 10cca9b5e718..06e4c215f82c 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -46,10 +46,11 @@ std::pair createLocalPlanForParallelReplicas( /// and we can produce query, inconsistent with remote plans. auto select_query_options = SelectQueryOptions(processed_stage).ignoreASTOptimizations(); - /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to - /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace - /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). - new_context->setSetting("enable_positional_arguments", Field(false)); + /// Positional arguments in the outer query were already resolved by the initiator. + /// Use a context flag instead of disabling enable_positional_arguments so that + /// view-inner queries on this node are still processed correctly. + /// See https://github.com/ClickHouse/ClickHouse/issues/62289. + new_context->setPositionalArgumentsAlreadyResolved(true); new_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index da9ddc532e7b..ad5ee1556618 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -41,6 +41,7 @@ namespace Setting extern const SettingsBool extremes; extern const SettingsUInt64 max_result_rows; extern const SettingsUInt64 max_result_bytes; + extern const SettingsBool enable_positional_arguments; } namespace ErrorCodes @@ -102,6 +103,10 @@ bool hasJoin(const ASTSelectWithUnionQuery & ast) /** There are no limits on the maximum size of the result for the view. * Since the result of the view is not the result of the entire query. + * + * The context is also marked as a view inner context so that the query analyzer + * resolves positional arguments inside the view even on remote/secondary nodes + * (views are expanded on remote nodes, unlike the outer query). */ ContextPtr getViewContext(ContextPtr context, const StorageSnapshotPtr & storage_snapshot) { @@ -111,6 +116,7 @@ ContextPtr getViewContext(ContextPtr context, const StorageSnapshotPtr & storage view_settings[Setting::max_result_bytes] = 0; view_settings[Setting::extremes] = false; view_context->setSettings(view_settings); + view_context->setIsViewInnerQuery(true); return view_context; } diff --git a/tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.reference b/tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.reference new file mode 100644 index 000000000000..748b85e7c8b7 --- /dev/null +++ b/tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.reference @@ -0,0 +1,11 @@ +a +b +c +--- +a +b +c +--- +a +b +c diff --git a/tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.sql b/tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.sql new file mode 100644 index 000000000000..5e02704a2bfc --- /dev/null +++ b/tests/queries/0_stateless/03802_positional_arg_in_view_distributed_query.sql @@ -0,0 +1,34 @@ +-- Clean up if needed +drop table if exists test_table SYNC; +drop view if exists test_view SYNC; + +-- Create a simple base table +create table test_table ( str String ) ENGINE = MergeTree +order by str; + +-- Insert some sample data (optional, but helps verify locally) +insert into test_table values ('a'), ('b'), ('c'); + +-- Create a view using positional GROUP BY +create view test_view as +select str +from test_table +group by 1; + + +select str from test_table order by all; + +select '---'; +-- Simulate distributed query to "remote" nodes (points back to localhost or multiple addresses) +select str +from remote('127.0.0.{1|2}', currentDatabase(), test_view) +order by str settings prefer_localhost_replica=0; + +select '---'; +select str +from remote('127.0.0.{1|2}', currentDatabase(), test_view) +order by str settings prefer_localhost_replica=1; + +-- Clean up +drop table if exists test_table SYNC; +drop view if exists test_view SYNC; diff --git a/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.oldanalyzer.reference b/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.oldanalyzer.reference new file mode 100644 index 000000000000..1297d7d604c3 --- /dev/null +++ b/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.oldanalyzer.reference @@ -0,0 +1,4 @@ +--- prefer_localhost_replica=1, positional args enabled: 3 --- +3 +--- prefer_localhost_replica=1, positional args disabled: error --- +1 diff --git a/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.reference b/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.reference new file mode 100644 index 000000000000..4e7df320c152 --- /dev/null +++ b/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.reference @@ -0,0 +1,3 @@ +--- prefer_localhost_replica=1, positional args enabled: 3 --- +3 +--- prefer_localhost_replica=1, positional args disabled: error --- diff --git a/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.sh b/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.sh new file mode 100755 index 000000000000..8f434f8da41e --- /dev/null +++ b/tests/queries/0_stateless/04105_view_positional_args_via_local_plan.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Verify that positional arguments inside views are resolved correctly via the +# local-plan path (createLocalPlan) triggered when prefer_localhost_replica=1 +# routes a distributed query shard to the local node. +# +# createLocalPlan sets enable_positional_arguments=false on a copy of the +# context to prevent double-resolution of the outer query's positional args. +# This must not prevent the view's own positional args from being resolved, +# because the view is expanded on the local node (not on the initiator). +# +# When enable_positional_arguments=0 is set explicitly by the user the view's +# GROUP BY 1 stays as a literal constant. In the new analyzer this produces +# NOT_AN_AGGREGATE (215); in the old analyzer the constant GROUP BY is silently +# dropped and a global aggregate is returned instead. +# The two .reference files handle this behavioral difference. +# +# Refs: https://github.com/ClickHouse/ClickHouse/issues/89940 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_table SYNC" +${CLICKHOUSE_CLIENT} -q "DROP VIEW IF EXISTS test_view SYNC" + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE test_table (str String) ENGINE = MergeTree ORDER BY str" +${CLICKHOUSE_CLIENT} -q "INSERT INTO test_table VALUES ('a'), ('b'), ('c')" +# GROUP BY 1 resolves to GROUP BY str -> 3 distinct groups. +${CLICKHOUSE_CLIENT} -q "CREATE VIEW test_view AS SELECT str, count() AS cnt FROM test_table GROUP BY 1" + +# 127.0.0.1 is the local host; prefer_localhost_replica=1 guarantees that +# this single shard is served by createLocalPlan (no remote TCP connection). +echo '--- prefer_localhost_replica=1, positional args enabled: 3 ---' +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM remote('127.0.0.1', currentDatabase(), test_view) +SETTINGS prefer_localhost_replica = 1" + +# Sanity check: disabling positional arguments must also be respected on the +# local-plan path. New analyzer: GROUP BY 1 stays literal -> NOT_AN_AGGREGATE +# (215) -> no output here. Old analyzer: constant GROUP BY is dropped, +# global aggregate returns one row -> prints 1. See .oldanalyzer.reference. +echo '--- prefer_localhost_replica=1, positional args disabled: error ---' +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM remote('127.0.0.1', currentDatabase(), test_view) +SETTINGS prefer_localhost_replica = 1, enable_positional_arguments = 0" 2>/dev/null || true + +${CLICKHOUSE_CLIENT} -q "DROP TABLE test_table SYNC" +${CLICKHOUSE_CLIENT} -q "DROP VIEW test_view SYNC"