Skip to content

Commit c8bc98a

Browse files
committed
Analyzer: fix __aliasMarker, covering complex cases.
Signed-off-by: Mikhail Filimonov <mfilimonov@altinity.com>
1 parent 216182f commit c8bc98a

27 files changed

Lines changed: 1210 additions & 104 deletions

src/Analyzer/Utils.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,84 @@ void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const Stri
975975
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
976976
}
977977

978+
namespace
979+
{
980+
981+
/// Finalize __aliasMarker nodes right before distributed SQL boundaries.
982+
/// This pass preserves nested markers and materializes arg2 to String constant
983+
/// only when arg2 is ColumnNode.
984+
class FinalizeAliasMarkersForDistributedSerializationVisitor : public InDepthQueryTreeVisitor<FinalizeAliasMarkersForDistributedSerializationVisitor>
985+
{
986+
public:
987+
explicit FinalizeAliasMarkersForDistributedSerializationVisitor(ContextPtr context_)
988+
: context(std::move(context_))
989+
{}
990+
991+
bool shouldTraverseTopToBottom() const
992+
{
993+
return false;
994+
}
995+
996+
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr &)
997+
{
998+
/// Keep traversing marker payload recursively so nested chains are preserved
999+
/// and each marker can materialize its own arg2 when needed.
1000+
return true;
1001+
}
1002+
1003+
void visitImpl(QueryTreeNodePtr & node)
1004+
{
1005+
auto * function_node = node->as<FunctionNode>();
1006+
if (!function_node || function_node->getFunctionName() != "__aliasMarker")
1007+
return;
1008+
1009+
auto & arguments = function_node->getArguments().getNodes();
1010+
if (arguments.size() != 2 || !arguments[0] || !arguments[1])
1011+
return;
1012+
1013+
String alias_id;
1014+
if (const auto * marker_column_node = arguments[1]->as<ColumnNode>())
1015+
{
1016+
if (const auto & marker_source = marker_column_node->getColumnSourceOrNull();
1017+
marker_source && marker_source->hasAlias())
1018+
{
1019+
alias_id = marker_source->getAlias() + "." + marker_column_node->getColumnName();
1020+
}
1021+
else
1022+
{
1023+
throw Exception(
1024+
ErrorCodes::LOGICAL_ERROR,
1025+
"__aliasMarker expects the second argument to resolve to a column with a source alias before distributed serialization. "
1026+
"Column '{}' has an unnamed or missing source",
1027+
marker_column_node->getColumnName());
1028+
}
1029+
}
1030+
else if (const auto * marker_id_node = arguments[1]->as<ConstantNode>();
1031+
marker_id_node && isString(marker_id_node->getResultType()))
1032+
{
1033+
/// Already materialized marker id from a previous hop. Keep as is.
1034+
return;
1035+
}
1036+
1037+
if (alias_id.empty())
1038+
return;
1039+
1040+
arguments[1] = std::make_shared<ConstantNode>(std::move(alias_id), std::make_shared<DataTypeString>());
1041+
resolveOrdinaryFunctionNodeByName(*function_node, "__aliasMarker", context);
1042+
}
1043+
1044+
private:
1045+
ContextPtr context;
1046+
};
1047+
1048+
}
1049+
1050+
void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context)
1051+
{
1052+
FinalizeAliasMarkersForDistributedSerializationVisitor visitor(context);
1053+
visitor.visit(node);
1054+
}
1055+
9781056
std::pair<QueryTreeNodePtr, bool> getExpressionSource(const QueryTreeNodePtr & node)
9791057
{
9801058
if (const auto * column = node->as<ColumnNode>())

src/Analyzer/Utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin
157157
/// Arguments and parameters are taken from the node.
158158
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name);
159159

160+
/// Finalize __aliasMarker nodes before distributed SQL boundaries by materializing
161+
/// marker ids in arg2 from ColumnNode to String ConstantNode when needed.
162+
void finalizeAliasMarkersForDistributedSerialization(QueryTreeNodePtr & node, const ContextPtr & context);
163+
160164
/// Returns single source of expression node.
161165
/// First element of pair is source node, can be nullptr if there are no sources or multiple sources.
162166
/// Second element of pair is true if there is at most one source, false if there are multiple sources.

src/Analyzer/createUniqueAliasesIfNecessary.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ void createUniqueAliasesIfNecessary(QueryTreeNodePtr & node, const ContextPtr &
226226
* It's required to create a valid AST for distributed query.
227227
*/
228228
CreateUniqueArrayJoinAliasesVisitor(context).visit(node);
229+
229230
}
230231

231232
}

src/Functions/identity.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ REGISTER_FUNCTION(AliasMarker)
3838
{
3939
factory.registerFunction<FunctionAliasMarker>(FunctionDocumentation{
4040
.description = R"(
41-
Internal function that marks ALIAS column expressions for the analyzer. Not intended for direct use.
41+
Internal function. Not for direct use.
4242
)",
4343
.syntax = {"__aliasMarker(expr, alias_name)"},
4444
.arguments = {

src/Functions/identity.h

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,68 @@ struct AliasMarkerName
102102
static constexpr auto name = "__aliasMarker";
103103
};
104104

105+
/**
106+
* __aliasMarker is an internal function used to enforce an alias projection step in the plan exactly
107+
* where it appears in a query received from the initiator.
108+
*
109+
* It allows the initiator to take better control over the aliases returned by shards, including cases
110+
* where the final projection step is skipped due to the WithMergeableState stage. The main usage
111+
* scenario is when the initiator injects an expression that must behave like a real column from the
112+
* initiator's point of view. Namely, this happens after expanding an ALIAS column in a distributed
113+
* table to its underlying expression.
114+
*
115+
* For example, if the initiator executes:
116+
*
117+
* SELECT foo AS bar FROM distr
118+
*
119+
* and `foo` is an ALIAS column such as `1 + x`, the remote query becomes:
120+
*
121+
* SELECT __aliasMarker(1 + x, 'table1.foo') AS bar FROM local AS table1
122+
*
123+
* This must not be confused with normal SQL aliases that appear in the query text: those participate
124+
* in user-visible query semantics and may or may not be materialized depending on the execution stage.
125+
* The user-facing SQL alias (`bar` in the example above) is separate and must stay untouched.
126+
*
127+
* A normal SQL alias cannot be used instead of __aliasMarker here because it may interfere with user
128+
* query logic, clash with existing names, and in the mergeable-state path the final projection step
129+
* that normally assigns aliases is intentionally skipped (see the conditional
130+
* createComputeAliasColumnsStep(...) path in PlannerJoinTree::buildQueryPlanForTableExpression()).
131+
*
132+
* Preserving that identity is important because otherwise remote headers may diverge from initiator
133+
* expectations, leading to header mismatches, incorrect column associations, or column-count
134+
* mismatches.
135+
*
136+
* It slightly differs from the __actionName function (which is used for virtual column injection in
137+
* engine=Merge), which only supports a constant string and survives as a normal function node with a
138+
* forced result name, while __aliasMarker is completely removed from the query plan and supports any
139+
* SQL expression as its first argument.
140+
*
141+
* The marker also prevents distinct logical columns with identical expressions from being merged
142+
* into a single transport column. For example:
143+
*
144+
* SELECT 2 * x AS x, 2 * x AS y
145+
*
146+
* must still produce two columns; otherwise both expressions could collapse into a single
147+
* `multiply(2, x)` output and break distributed header reconciliation.
148+
*
149+
* Lifecycle / invariants:
150+
* 1) Injected around rewritten alias expressions that need stable transport identity, with a second
151+
* argument pointing to the column in the query tree.
152+
* 2) In later phases, some column manipulations and renames may happen (namely after
153+
* createUniqueAliasesIfNecessary) before the column gets its final name.
154+
* 3) After that, and before passing the query down to shards, the second argument of __aliasMarker
155+
* gets "materialized": the column reference id is converted to a String identifier.
156+
* 4) Consumed on the receiver by adding a projection step where it appears, so that identity is
157+
* enforced in actions without changing the user-facing aliasing logic.
158+
* 5) Preserved while forwarding to the next hop. Nested marker chains are allowed, and each marker
159+
* may contribute an alias step during actions construction.
160+
*
161+
* This is a temporary bridge while distributed plan transport still relies on SQL text in these
162+
* paths. As query plan serialization potentially fully replaces that boundary, this marker path may
163+
* become unnecessary. However, to support the same behavior with serialize_query_plan, query plan
164+
* modifications would still be required to control the names of those injected expressions.
165+
*/
166+
105167
class FunctionAliasMarker : public IFunction
106168
{
107169
public:
@@ -110,7 +172,7 @@ class FunctionAliasMarker : public IFunction
110172

111173
String getName() const override { return name; }
112174
size_t getNumberOfArguments() const override { return 2; }
113-
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
175+
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
114176
bool isSuitableForConstantFolding() const override { return false; }
115177
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
116178

@@ -119,14 +181,12 @@ class FunctionAliasMarker : public IFunction
119181
if (arguments.size() != 2)
120182
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments");
121183

122-
if (!WhichDataType(arguments[1]).isString())
123-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");
124-
125184
return arguments.front();
126185
}
127186

128187
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
129188
{
189+
// normally never executed, replaced with 1st arg during plan builing.
130190
return arguments.front().column;
131191
}
132192
};

src/Planner/PlannerActionsVisitor.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <Functions/indexHint.h>
3232

3333
#include <Interpreters/ExpressionActionsSettings.h>
34+
#include <Interpreters/ClientInfo.h>
3435
#include <Interpreters/Context.h>
3536
#include <Interpreters/Set.h>
3637

@@ -88,6 +89,17 @@ String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_nod
8889
return buffer.str();
8990
}
9091

92+
String tryExtractAliasMarkerIdFromSecondArgument(const QueryTreeNodePtr & argument)
93+
{
94+
if (const auto * second_argument_constant = argument->as<ConstantNode>();
95+
second_argument_constant && isString(second_argument_constant->getResultType()))
96+
{
97+
return second_argument_constant->getValue().safeGet<String>();
98+
}
99+
100+
return {};
101+
}
102+
91103
class ActionNodeNameHelper
92104
{
93105
public:
@@ -184,14 +196,12 @@ class ActionNodeNameHelper
184196
{
185197
/// Perform sanity check, because user may call this function with unexpected arguments
186198
const auto & function_argument_nodes = function_node.getArguments().getNodes();
187-
if (function_argument_nodes.size() == 2)
188-
{
189-
if (const auto * second_argument = function_argument_nodes.at(1)->as<ConstantNode>())
190-
{
191-
if (isString(second_argument->getResultType()))
192-
result = second_argument->getValue().safeGet<String>();
193-
}
194-
}
199+
if (function_argument_nodes.size() != 2)
200+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments");
201+
202+
result = tryExtractAliasMarkerIdFromSecondArgument(function_argument_nodes.at(1));
203+
if (result.empty())
204+
result = calculateActionNodeName(function_argument_nodes.at(0));
195205

196206
/// Empty node name is not allowed and leads to logical errors
197207
if (result.empty())
@@ -1119,15 +1129,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
11191129
if (function_arguments.size() != 2)
11201130
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments");
11211131

1122-
const auto * alias_id_node = function_arguments.at(1)->as<ConstantNode>();
1123-
if (!alias_id_node || !isString(alias_id_node->getResultType()))
1124-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");
1125-
1126-
const auto & alias_id = alias_id_node->getValue().safeGet<String>();
1132+
auto [child_name, levels] = visitImpl(function_arguments.at(0));
1133+
auto alias_id = tryExtractAliasMarkerIdFromSecondArgument(function_arguments.at(1));
11271134
if (alias_id.empty())
1128-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");
1135+
alias_id = child_name;
11291136

1130-
auto [child_name, levels] = visitImpl(function_arguments.at(0));
11311137
if (alias_id == child_name)
11321138
return {child_name, levels};
11331139

src/Planner/Utils.cpp

Lines changed: 1 addition & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -198,62 +198,13 @@ ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node, bool set_subq
198198
return result_ast;
199199
}
200200

201-
namespace
202-
{
203-
class NormalizeAliasMarkerVisitor : public InDepthQueryTreeVisitor<NormalizeAliasMarkerVisitor>
204-
{
205-
public:
206-
void visitImpl(QueryTreeNodePtr & node)
207-
{
208-
auto * function_node = node->as<FunctionNode>();
209-
if (!function_node || function_node->getFunctionName() != "__aliasMarker")
210-
return;
211-
212-
auto & arguments = function_node->getArguments().getNodes();
213-
if (arguments.size() != 2)
214-
return;
215-
216-
while (true)
217-
{
218-
auto * inner_function = arguments.front()->as<FunctionNode>();
219-
if (!inner_function || inner_function->getFunctionName() != "__aliasMarker")
220-
break;
221-
222-
auto & inner_arguments = inner_function->getArguments().getNodes();
223-
if (inner_arguments.size() != 2)
224-
break;
225-
226-
arguments.front() = inner_arguments.front();
227-
}
228-
}
229-
230-
bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child)
231-
{
232-
auto * parent_function = parent->as<FunctionNode>();
233-
if (parent_function && parent_function->getFunctionName() == "__aliasMarker")
234-
return false;
235-
236-
auto child_node_type = child->getNodeType();
237-
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
238-
}
239-
};
240-
241-
void normalizeAliasMarkersInQueryTree(QueryTreeNodePtr & node)
242-
{
243-
NormalizeAliasMarkerVisitor visitor;
244-
visitor.visit(node);
245-
}
246-
}
247-
248201
ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node)
249202
{
250203
/// Remove CTEs information from distributed queries.
251204
/// Now, if cte_name is set for subquery node, AST -> String serialization will only print cte name.
252205
/// But CTE is defined only for top-level query part, so may not be sent.
253206
/// Removing cte_name forces subquery to be always printed.
254-
auto query_node_to_convert = query_node->clone();
255-
normalizeAliasMarkersInQueryTree(query_node_to_convert);
256-
auto ast = queryNodeToSelectQuery(query_node_to_convert, /*set_subquery_cte_name=*/false);
207+
auto ast = queryNodeToSelectQuery(query_node, /*set_subquery_cte_name=*/false);
257208
return ast;
258209
}
259210

0 commit comments

Comments
 (0)