@@ -102,6 +102,68 @@ struct AliasMarkerName
102102 static constexpr auto name = " __aliasMarker" ;
103103};
104104
105+ /* *
106+ * __aliasMarker is an internal function used to enforce an alias projection step in the plan exactly
107+ * where it appears in a query received from the initiator.
108+ *
109+ * It allows the initiator to take better control over the aliases returned by shards, including cases
110+ * where the final projection step is skipped due to the WithMergeableState stage. The main usage
111+ * scenario is when the initiator injects an expression that must behave like a real column from the
112+ * initiator's point of view. Namely, this happens after expanding an ALIAS column in a distributed
113+ * table to its underlying expression.
114+ *
115+ * For example, if the initiator executes:
116+ *
117+ * SELECT foo AS bar FROM distr
118+ *
119+ * and `foo` is an ALIAS column such as `1 + x`, the remote query becomes:
120+ *
121+ * SELECT __aliasMarker(1 + x, 'table1.foo') AS bar FROM local AS table1
122+ *
123+ * This must not be confused with normal SQL aliases that appear in the query text: those participate
124+ * in user-visible query semantics and may or may not be materialized depending on the execution stage.
125+ * The user-facing SQL alias (`bar` in the example above) is separate and must stay untouched.
126+ *
127+ * A normal SQL alias cannot be used instead of __aliasMarker here because it may interfere with user
128+ * query logic, clash with existing names, and in the mergeable-state path the final projection step
129+ * that normally assigns aliases is intentionally skipped (see the conditional
130+ * createComputeAliasColumnsStep(...) path in PlannerJoinTree::buildQueryPlanForTableExpression()).
131+ *
132+ * Preserving that identity is important because otherwise remote headers may diverge from initiator
133+ * expectations, leading to header mismatches, incorrect column associations, or column-count
134+ * mismatches.
135+ *
136+ * It slightly differs from the __actionName function (which is used for virtual column injection in
137+ * engine=Merge), which only supports a constant string and survives as a normal function node with a
138+ * forced result name, while __aliasMarker is completely removed from the query plan and supports any
139+ * SQL expression as its first argument.
140+ *
141+ * The marker also prevents distinct logical columns with identical expressions from being merged
142+ * into a single transport column. For example:
143+ *
144+ * SELECT 2 * x AS x, 2 * x AS y
145+ *
146+ * must still produce two columns; otherwise both expressions could collapse into a single
147+ * `multiply(2, x)` output and break distributed header reconciliation.
148+ *
149+ * Lifecycle / invariants:
150+ * 1) Injected around rewritten alias expressions that need stable transport identity, with a second
151+ * argument pointing to the column in the query tree.
152+ * 2) In later phases, some column manipulations and renames may happen (namely after
153+ * createUniqueAliasesIfNecessary) before the column gets its final name.
154+ * 3) After that, and before passing the query down to shards, the second argument of __aliasMarker
155+ * gets "materialized": the column reference id is converted to a String identifier.
156+ * 4) Consumed on the receiver by adding a projection step where it appears, so that identity is
157+ * enforced in actions without changing the user-facing aliasing logic.
158+ * 5) Preserved while forwarding to the next hop. Nested marker chains are allowed, and each marker
159+ * may contribute an alias step during actions construction.
160+ *
161+ * This is a temporary bridge while distributed plan transport still relies on SQL text in these
162+ * paths. As query plan serialization potentially fully replaces that boundary, this marker path may
163+ * become unnecessary. However, to support the same behavior with serialize_query_plan, query plan
164+ * modifications would still be required to control the names of those injected expressions.
165+ */
166+
105167class FunctionAliasMarker : public IFunction
106168{
107169public:
@@ -110,7 +172,7 @@ class FunctionAliasMarker : public IFunction
110172
111173 String getName () const override { return name; }
112174 size_t getNumberOfArguments () const override { return 2 ; }
113- ColumnNumbers getArgumentsThatAreAlwaysConstant () const override { return {1 }; }
175+ ColumnNumbers getArgumentsThatAreAlwaysConstant () const override { return {}; }
114176 bool isSuitableForConstantFolding () const override { return false ; }
115177 bool isSuitableForShortCircuitArgumentsExecution (const DataTypesWithConstInfo & /* arguments*/ ) const override { return false ; }
116178
@@ -119,14 +181,12 @@ class FunctionAliasMarker : public IFunction
119181 if (arguments.size () != 2 )
120182 throw Exception (ErrorCodes::BAD_ARGUMENTS, " Function __aliasMarker expects 2 arguments" );
121183
122- if (!WhichDataType (arguments[1 ]).isString ())
123- throw Exception (ErrorCodes::BAD_ARGUMENTS, " Function __aliasMarker is internal and should not be used directly" );
124-
125184 return arguments.front ();
126185 }
127186
128187 ColumnPtr executeImpl (const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count*/ ) const override
129188 {
189+ // normally never executed, replaced with 1st arg during plan builing.
130190 return arguments.front ().column ;
131191 }
132192};
0 commit comments