Skip to content

Commit 48d57dd

Browse files
committed
fix(parse_tables): extract tables from expression subqueries
1 parent 0c74f1b commit 48d57dd

3 files changed

Lines changed: 259 additions & 3 deletions

File tree

src/parse_tables.cpp

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,13 @@
77
#include "duckdb/parser/statement/select_statement.hpp"
88
#include "duckdb/parser/query_node/select_node.hpp"
99
#include "duckdb/parser/query_node/cte_node.hpp"
10+
#include "duckdb/parser/query_node/set_operation_node.hpp"
11+
#include "duckdb/parser/result_modifier.hpp"
1012
#include "duckdb/parser/tableref/basetableref.hpp"
1113
#include "duckdb/parser/tableref/joinref.hpp"
1214
#include "duckdb/parser/tableref/subqueryref.hpp"
15+
#include "duckdb/parser/expression/subquery_expression.hpp"
16+
#include "duckdb/parser/parsed_expression_iterator.hpp"
1317
#include "duckdb/function/scalar/nested_functions.hpp"
1418

1519
namespace duckdb {
@@ -73,6 +77,13 @@ static unique_ptr<GlobalTableFunctionState> ParseTablesInit(ClientContext &conte
7377
return make_uniq<ParseTablesState>();
7478
}
7579

80+
// Forward declaration for mutual recursion
81+
static void ExtractTablesFromExpression(
82+
const duckdb::ParsedExpression &expr,
83+
std::vector<TableRefResult> &results,
84+
const duckdb::CommonTableExpressionMap *cte_map
85+
);
86+
7687
static void ExtractTablesFromRef(
7788
const duckdb::TableRef &ref,
7889
std::vector<TableRefResult> &results,
@@ -104,6 +115,10 @@ static void ExtractTablesFromRef(
104115
auto &join = (JoinRef &)ref;
105116
ExtractTablesFromRef(*join.left, results, TableContext::JoinLeft, is_top_level, cte_map);
106117
ExtractTablesFromRef(*join.right, results, TableContext::JoinRight, false, cte_map);
118+
// Process JOIN condition for subqueries
119+
if (join.condition) {
120+
ExtractTablesFromExpression(*join.condition, results, cte_map);
121+
}
107122
break;
108123
}
109124
case TableReferenceType::SUBQUERY: {
@@ -118,6 +133,33 @@ static void ExtractTablesFromRef(
118133
}
119134
}
120135

136+
// Extract tables from expressions that may contain subqueries (WHERE, HAVING, SELECT list, etc.)
137+
static void ExtractTablesFromExpression(
138+
const duckdb::ParsedExpression &expr,
139+
std::vector<TableRefResult> &results,
140+
const duckdb::CommonTableExpressionMap *cte_map
141+
) {
142+
using namespace duckdb;
143+
144+
// Check if this is a subquery expression
145+
if (expr.GetExpressionClass() == ExpressionClass::SUBQUERY) {
146+
auto &subquery_expr = (const SubqueryExpression &)expr;
147+
if (subquery_expr.subquery && subquery_expr.subquery->node) {
148+
ExtractTablesFromQueryNode(*subquery_expr.subquery->node, results, TableContext::Subquery, cte_map);
149+
}
150+
// Also process the child expression (e.g., the left side of IN)
151+
if (subquery_expr.child) {
152+
ExtractTablesFromExpression(*subquery_expr.child, results, cte_map);
153+
}
154+
return;
155+
}
156+
157+
// Recursively process child expressions
158+
ParsedExpressionIterator::EnumerateChildren(expr,
159+
[&](const ParsedExpression &child) {
160+
ExtractTablesFromExpression(child, results, cte_map);
161+
});
162+
}
121163

122164
static void ExtractTablesFromQueryNode(
123165
const duckdb::QueryNode &node,
@@ -144,7 +186,36 @@ static void ExtractTablesFromQueryNode(
144186
if (select_node.from_table) {
145187
ExtractTablesFromRef(*select_node.from_table, results, context, true, &select_node.cte_map);
146188
}
147-
}
189+
190+
// Extract tables from WHERE clause subqueries
191+
if (select_node.where_clause) {
192+
ExtractTablesFromExpression(*select_node.where_clause, results, &select_node.cte_map);
193+
}
194+
195+
// Extract tables from SELECT list subqueries
196+
for (const auto &expr : select_node.select_list) {
197+
if (expr) {
198+
ExtractTablesFromExpression(*expr, results, &select_node.cte_map);
199+
}
200+
}
201+
202+
// Extract tables from HAVING clause subqueries
203+
if (select_node.having) {
204+
ExtractTablesFromExpression(*select_node.having, results, &select_node.cte_map);
205+
}
206+
207+
// Extract tables from QUALIFY clause subqueries
208+
if (select_node.qualify) {
209+
ExtractTablesFromExpression(*select_node.qualify, results, &select_node.cte_map);
210+
}
211+
212+
// Extract tables from GROUP BY expressions
213+
for (const auto &expr : select_node.groups.group_expressions) {
214+
if (expr) {
215+
ExtractTablesFromExpression(*expr, results, &select_node.cte_map);
216+
}
217+
}
218+
}
148219
// additional step necessary for duckdb v1.4.0: unwrap CTE node
149220
else if (node.type == QueryNodeType::CTE_NODE) {
150221
auto &cte_node = (CTENode &)node;
@@ -153,6 +224,37 @@ static void ExtractTablesFromQueryNode(
153224
ExtractTablesFromQueryNode(*cte_node.child, results, context, cte_map);
154225
}
155226
}
227+
// Handle UNION/INTERSECT/EXCEPT (set operations)
228+
else if (node.type == QueryNodeType::SET_OPERATION_NODE) {
229+
auto &set_node = (SetOperationNode &)node;
230+
231+
if (set_node.left) {
232+
ExtractTablesFromQueryNode(*set_node.left, results, context, cte_map);
233+
}
234+
if (set_node.right) {
235+
ExtractTablesFromQueryNode(*set_node.right, results, context, cte_map);
236+
}
237+
}
238+
239+
// Process result modifiers (ORDER BY, LIMIT) for all node types
240+
for (const auto &modifier : node.modifiers) {
241+
if (modifier->type == ResultModifierType::ORDER_MODIFIER) {
242+
auto &order_modifier = (OrderModifier &)*modifier;
243+
for (const auto &order : order_modifier.orders) {
244+
if (order.expression) {
245+
ExtractTablesFromExpression(*order.expression, results, cte_map);
246+
}
247+
}
248+
} else if (modifier->type == ResultModifierType::LIMIT_MODIFIER) {
249+
auto &limit_modifier = (LimitModifier &)*modifier;
250+
if (limit_modifier.limit) {
251+
ExtractTablesFromExpression(*limit_modifier.limit, results, cte_map);
252+
}
253+
if (limit_modifier.offset) {
254+
ExtractTablesFromExpression(*limit_modifier.offset, results, cte_map);
255+
}
256+
}
257+
}
156258
}
157259

158260
static void ExtractTablesFromSQL(const std::string &sql, std::vector<TableRefResult> &results) {

test/sql/parse_tools/scalar_functions/parse_tables.test

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,41 @@ SELECT list_filter(parse_tables('select * from MyTable t inner join Other o on o
2929
----
3030
[{'schema': main, 'table': MyTable, 'context': from}]
3131

32+
# subquery in WHERE clause (IN)
33+
query I
34+
SELECT parse_tables('SELECT * FROM schema1.users WHERE id IN (SELECT user_id FROM schema2.orders)');
35+
----
36+
[{'schema': schema1, 'table': users, 'context': from}, {'schema': schema2, 'table': orders, 'context': from}]
37+
38+
# subquery in WHERE clause (EXISTS)
39+
query I
40+
SELECT parse_tables('SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id)');
41+
----
42+
[{'schema': main, 'table': users, 'context': from}, {'schema': main, 'table': orders, 'context': from}]
43+
44+
# deeply nested expression subqueries
45+
query I
46+
SELECT parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2 WHERE b IN (SELECT y FROM t3))');
47+
----
48+
[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}, {'schema': main, 'table': t3, 'context': from}]
49+
50+
# NOT IN subquery
51+
query I
52+
SELECT parse_tables('SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2)');
53+
----
54+
[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}]
55+
56+
# subquery in HAVING clause
57+
query I
58+
SELECT parse_tables('SELECT user_id, COUNT(*) FROM orders GROUP BY user_id HAVING COUNT(*) > (SELECT AVG(order_count) FROM stats)');
59+
----
60+
[{'schema': main, 'table': orders, 'context': from}, {'schema': main, 'table': stats, 'context': from}]
61+
62+
# subquery in QUALIFY clause
63+
query I
64+
SELECT parse_tables('SELECT * FROM t1 QUALIFY row_number() OVER() > (SELECT COUNT(*) FROM t2)');
65+
----
66+
[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}]
3267

3368
# Unsupported
3469
# -----------

test/sql/parse_tools/table_functions/parse_tables.test

Lines changed: 121 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,129 @@ $$);
8080
main k from
8181
main l from
8282

83-
# INSERT INTO ... SELECT
83+
# subquery in WHERE clause (IN)
84+
query III
85+
SELECT * FROM parse_tables('SELECT * FROM schema1.users WHERE id IN (SELECT user_id FROM schema2.orders);');
86+
----
87+
schema1 users from
88+
schema2 orders from
89+
90+
# subquery in WHERE clause (EXISTS)
91+
query III
92+
SELECT * FROM parse_tables('SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id);');
93+
----
94+
main users from
95+
main orders from
96+
97+
# subquery in SELECT list (scalar subquery)
98+
query III
99+
SELECT * FROM parse_tables('SELECT id, (SELECT COUNT(*) FROM orders WHERE orders.user_id = users.id) AS order_count FROM users;');
100+
----
101+
main users from
102+
main orders from
103+
104+
# multiple subqueries in WHERE
105+
query III
106+
SELECT * FROM parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2) AND b IN (SELECT y FROM t3);');
107+
----
108+
main t1 from
109+
main t2 from
110+
main t3 from
111+
112+
# deeply nested expression subqueries
84113
query III
85-
SELECT * FROM parse_tables('INSERT INTO m SELECT * FROM n;');
114+
SELECT * FROM parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2 WHERE b IN (SELECT y FROM t3));');
86115
----
116+
main t1 from
117+
main t2 from
118+
main t3 from
119+
120+
# NOT IN subquery
121+
query III
122+
SELECT * FROM parse_tables('SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2);');
123+
----
124+
main t1 from
125+
main t2 from
126+
127+
# NOT EXISTS subquery
128+
query III
129+
SELECT * FROM parse_tables('SELECT * FROM users WHERE NOT EXISTS (SELECT 1 FROM banned WHERE banned.user_id = users.id);');
130+
----
131+
main users from
132+
main banned from
133+
134+
# subquery in HAVING clause
135+
query III
136+
SELECT * FROM parse_tables('SELECT user_id, COUNT(*) FROM orders GROUP BY user_id HAVING COUNT(*) > (SELECT AVG(order_count) FROM stats);');
137+
----
138+
main orders from
139+
main stats from
140+
141+
# subquery in QUALIFY clause
142+
query III
143+
SELECT * FROM parse_tables('SELECT * FROM t1 QUALIFY row_number() OVER() > (SELECT COUNT(*) FROM t2);');
144+
----
145+
main t1 from
146+
main t2 from
147+
148+
# CASE WHEN with subquery
149+
query III
150+
SELECT * FROM parse_tables('SELECT CASE WHEN (SELECT COUNT(*) FROM t2) > 0 THEN 1 ELSE 0 END FROM t1;');
151+
----
152+
main t1 from
153+
main t2 from
154+
155+
# UNION query
156+
query III
157+
SELECT * FROM parse_tables('SELECT * FROM t1 UNION SELECT * FROM t2;');
158+
----
159+
main t1 from
160+
main t2 from
161+
162+
# UNION ALL query
163+
query III
164+
SELECT * FROM parse_tables('SELECT * FROM t1 UNION ALL SELECT * FROM t2;');
165+
----
166+
main t1 from
167+
main t2 from
168+
169+
# INTERSECT query
170+
query III
171+
SELECT * FROM parse_tables('SELECT * FROM t1 INTERSECT SELECT * FROM t2;');
172+
----
173+
main t1 from
174+
main t2 from
175+
176+
# EXCEPT query
177+
query III
178+
SELECT * FROM parse_tables('SELECT * FROM t1 EXCEPT SELECT * FROM t2;');
179+
----
180+
main t1 from
181+
main t2 from
182+
183+
# subquery in JOIN condition
184+
query III
185+
SELECT * FROM parse_tables('SELECT * FROM t1 JOIN t2 ON t1.id = (SELECT MAX(id) FROM t3);');
186+
----
187+
main t1 from
188+
main t2 join_right
189+
main t3 from
190+
191+
# subquery in GROUP BY (rare but valid)
192+
query III
193+
SELECT * FROM parse_tables('SELECT COUNT(*) FROM t1 GROUP BY (SELECT 1 FROM t2 LIMIT 1);');
194+
----
195+
main t1 from
196+
main t2 from
197+
198+
# subquery in ORDER BY
199+
query III
200+
SELECT * FROM parse_tables('SELECT * FROM t1 ORDER BY (SELECT COUNT(*) FROM t2);');
201+
----
202+
main t1 from
203+
main t2 from
204+
205+
# INSERT INTO ... SELECT
87206

88207
# UPDATE with FROM
89208
query III

0 commit comments

Comments
 (0)