Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/Planner/PlannerJoinTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,13 +654,14 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info)
}

std::unique_ptr<ExpressionStep> createComputeAliasColumnsStep(
std::unordered_map<std::string, ActionsDAG> & alias_column_expressions, const SharedHeader & current_header)
AliasColumnExpressions & alias_column_expressions, const SharedHeader & current_header)
{
ActionsDAG merged_alias_columns_actions_dag(current_header->getColumnsWithTypeAndName());
ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs();

for (auto & [column_name, alias_column_actions_dag] : alias_column_expressions)
for (auto & alias_column_expression : alias_column_expressions)
{
auto & alias_column_actions_dag = alias_column_expression.second;
const auto & current_outputs = alias_column_actions_dag.getOutputs();
action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end());
merged_alias_columns_actions_dag.mergeNodes(std::move(alias_column_actions_dag));
Expand Down
22 changes: 19 additions & 3 deletions src/Planner/TableExpressionData.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ namespace ErrorCodes
using ColumnIdentifier = std::string;
using ColumnIdentifiers = std::vector<ColumnIdentifier>;
using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
using AliasColumnExpression = std::pair<std::string, ActionsDAG>;
using AliasColumnExpressions = std::vector<AliasColumnExpression>;

struct PrewhereInfo;
using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
Expand Down Expand Up @@ -77,7 +79,8 @@ class TableExpressionData
/// Add alias column
void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAG actions_dag, bool is_selected_column = true)
{
alias_column_expressions.emplace(column.name, std::move(actions_dag));
alias_column_expressions.emplace_back(column.name, std::move(actions_dag));
alias_column_names_set.emplace(column.name);
addColumnImpl(column, column_identifier, is_selected_column);
}

Expand All @@ -96,11 +99,21 @@ class TableExpressionData
}

/// Get ALIAS columns names mapped to expressions
std::unordered_map<std::string, ActionsDAG> & getAliasColumnExpressions()
AliasColumnExpressions & getAliasColumnExpressions()
{
return alias_column_expressions;
}

const AliasColumnExpressions & getAliasColumnExpressions() const
{
return alias_column_expressions;
}

bool hasAliasColumn(const std::string & column_name) const
{
return alias_column_names_set.contains(column_name);
}

/// Get column name to column map
const ColumnNameToColumn & getColumnNameToColumn() const
{
Expand Down Expand Up @@ -279,7 +292,10 @@ class TableExpressionData
NameSet selected_column_names_set;

/// Expression to calculate ALIAS columns
std::unordered_map<std::string, ActionsDAG> alias_column_expressions;
/// Keep alias name (String) + expression (ActionsDAG) pairs; vector preserves insertion order.
AliasColumnExpressions alias_column_expressions;
/// Fast membership checks for alias column names.
NameSet alias_column_names_set;

/// Valid for table, table function, array join, query, union nodes
ColumnNameToColumn column_name_to_column;
Expand Down
3 changes: 1 addition & 2 deletions src/Storages/SelectQueryInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@ std::unordered_map<std::string, ColumnWithTypeAndName> SelectQueryInfo::buildNod
if (planner_context)
{
auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression);
const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName())
{
/// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG,
/// so they should not be added to the input nodes.
if (alias_column_expressions.contains(column_name))
if (table_expression_data.hasAliasColumn(column_name))
continue;
const auto & column = table_expression_data.getColumnOrThrow(column_name);
node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name));
Expand Down
126 changes: 117 additions & 9 deletions src/Storages/StorageDistributed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,99 @@ class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor<ReplaseAliasCo
}
};

using ColumnNameToColumnNodeMap = std::unordered_map<std::string, ColumnNodePtr>;

ColumnNameToColumnNodeMap buildColumnNodesForTableExpression(const QueryTreeNodePtr & table_expression_node, const ContextPtr & context)
{
const TableNode * table_node = table_expression_node->as<TableNode>();
const TableFunctionNode * table_function_node = table_expression_node->as<TableFunctionNode>();
if (!table_node && !table_function_node)
return {};

// Rebuild per-column nodes (including ALIAS expressions) for the replacement table expression.
const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
if (storage_snapshot->storage.supportsSubcolumns())
get_column_options.withSubcolumns();

auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
const auto & columns_description = storage_snapshot->metadata->getColumns();

ColumnNameToColumnNodeMap column_name_to_node;
column_name_to_node.reserve(column_names_and_types.size());

for (const auto & column_name_and_type : column_names_and_types)
{
const auto & column_default = columns_description.getDefault(column_name_and_type.name);
if (column_default && column_default->kind == ColumnDefaultKind::Alias)
{
auto alias_expression = buildQueryTree(column_default->expression, context);
QueryAnalysisPass(table_expression_node).run(alias_expression, context);
if (!alias_expression->getResultType()->equals(*column_name_and_type.type))
alias_expression = buildCastFunction(alias_expression, column_name_and_type.type, context, true);

auto column_node = std::make_shared<ColumnNode>(column_name_and_type, std::move(alias_expression), table_expression_node);
column_name_to_node.emplace(column_name_and_type.name, std::move(column_node));
}
else
{
auto column_node = std::make_shared<ColumnNode>(column_name_and_type, table_expression_node);
column_name_to_node.emplace(column_name_and_type.name, std::move(column_node));
}
}

return column_name_to_node;
}

class ReplaceColumnNodesForTableExpressionVisitor : public InDepthQueryTreeVisitor<ReplaceColumnNodesForTableExpressionVisitor>
{
public:
ReplaceColumnNodesForTableExpressionVisitor(
const QueryTreeNodePtr & from_,
const QueryTreeNodePtr & to_,
const ColumnNameToColumnNodeMap & column_name_to_node_)
: from(from_), to(to_), column_name_to_node(column_name_to_node_)
{}

void visitImpl(QueryTreeNodePtr & node)
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;

auto column_source = column_node->getColumnSourceOrNull();
if (!column_source)
return;

if (column_source.get() != from.get())
return;

auto it = column_name_to_node.find(column_node->getColumnName());
if (it != column_name_to_node.end())
{
auto replacement = it->second->clone();
replacement->setAlias(column_node->getAlias());
node = std::move(replacement);
}
else
{
// Preserve the column name but rebind its source to the replacement table expression.
column_node->setColumnSource(to);
}
}

static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}

private:
QueryTreeNodePtr from;
QueryTreeNodePtr to;
const ColumnNameToColumnNodeMap & column_name_to_node;
};

class RewriteInToGlobalInVisitor : public InDepthQueryTreeVisitorWithContext<RewriteInToGlobalInVisitor>
{
public:
Expand Down Expand Up @@ -1019,13 +1112,12 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
replacement_table_expression->setAlias(query_info.table_expression->getAlias());

QueryTreeNodePtr filter;

if (additional_filter)
{
const auto & context = query_info.planner_context->getQueryContext();

filter = buildQueryTree(additional_filter->clone(), query_context);

// Resolve now; alias expressions are normalized later for the merged query.
QueryAnalysisPass(replacement_table_expression).run(filter, context);
}

Expand All @@ -1040,6 +1132,17 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
: std::move(filter);
}

if (additional_filter)
{
auto replacement_columns = buildColumnNodesForTableExpression(replacement_table_expression, query_context);

ReplaceColumnNodesForTableExpressionVisitor replace_query_columns_visitor(
replacement_table_expression,
replacement_table_expression,
replacement_columns);
replace_query_columns_visitor.visit(query_tree_to_modify);
}

ReplaseAliasColumnsVisitor replase_alias_columns_visitor;
replase_alias_columns_visitor.visit(query_tree_to_modify);

Expand Down Expand Up @@ -2247,8 +2350,9 @@ void StorageDistributed::setCachedColumnsToCast(ColumnsDescription columns)
if (!cached_columns_to_cast.empty() && log)
{
Names columns_with_types;
columns_with_types.reserve(cached_columns_to_cast.getAllPhysical().size());
for (const auto & col : cached_columns_to_cast.getAllPhysical())
const auto cached_columns = cached_columns_to_cast.getAllPhysical();
columns_with_types.reserve(cached_columns.size());
for (const auto & col : cached_columns)
columns_with_types.emplace_back(col.name + " " + col.type->getName());
LOG_DEBUG(log, "Hybrid auto-cast will apply to: [{}]", fmt::join(columns_with_types, ", "));
}
Expand Down Expand Up @@ -2420,12 +2524,15 @@ void registerStorageHybrid(StorageFactory & factory)
if (columns_to_use.empty())
columns_to_use = first_segment_columns;

const auto physical_columns = columns_to_use.getAllPhysical();

NameSet columns_to_cast_names;
auto validate_segment_schema = [&](const ColumnsDescription & segment_columns, const String & segment_name)
{
for (const auto & column : columns_to_use.getAllPhysical())
for (const auto & column : physical_columns)
{
auto found = segment_columns.tryGetPhysical(column.name);
// all columns defined as physical in hybrid should exists in segments (but can be aliases there)
auto found = segment_columns.tryGetColumn(GetColumnsOptions(GetColumnsOptions::AllPhysicalAndAliases), column.name);
if (!found)
{
throw Exception(
Expand All @@ -2434,6 +2541,7 @@ void registerStorageHybrid(StorageFactory & factory)
segment_name, column.name);
}

// if the type of the column is the segment differs - we need to add it to the list of columns which require casts
if (!found->type->equals(*column.type))
columns_to_cast_names.emplace(column.name);
}
Expand Down Expand Up @@ -2467,8 +2575,6 @@ void registerStorageHybrid(StorageFactory & factory)
"TableFunctionRemote did not return a StorageDistributed or StorageProxy, got: {}", actual_type);
}

const auto physical_columns = columns_to_use.getAllPhysical();

auto validate_predicate = [&](ASTPtr & predicate, size_t argument_index)
{
try
Expand Down Expand Up @@ -2619,7 +2725,9 @@ void registerStorageHybrid(StorageFactory & factory)
if (!columns_to_cast_names.empty())
{
NamesAndTypesList cast_cols;
for (const auto & col : columns_to_use.getAllPhysical())

// 'physical' columns of Hybrid will be read from segments, and may need CASTS
for (const auto & col : physical_columns)
{
if (columns_to_cast_names.contains(col.name))
cast_cols.emplace_back(col.name, col.type);
Expand Down
11 changes: 11 additions & 0 deletions tests/queries/0_stateless/03645_hybrid_alias_columns.reference
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
test1
1 ['foo1','bar1_before'] foo1
2 ['foo2','bar2_after'] foo2
Hybrid segment predicates with alias columns
3 30 60 33
Insert into Hybrid with EPHEMERAL column
2 0A0B0C0D
Select from Hybrid with EPHEMERAL column
1 5A90B714
2 0A0B0C0D
10 01020304
Loading
Loading