From 1b7dcce06b397b81c3253fe760172aafdd1f5b7f Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 06:41:47 -0700 Subject: [PATCH 1/3] Add parser-agnostic regression test for DuckDB parser swap DuckDB is replacing its legacy PostgreSQL parser with a PEG parser (opt-in in 1.5, the sole parser on main, default in 2.0). Yardstick integrates via allow_parser_override_extension="fallback", so when the override declines a query, DuckDB's parser handles it -- including surfacing syntax errors, whose shape differs between the legacy and PEG parsers. Add test/sql/peg_parser.test guarding the two behaviors most exposed to the swap: the parser_override rewrite path still works, and the fallback path still raises a parser-agnostic "Parser Error". The test runs under whichever parser the build uses, so it becomes PEG coverage automatically once the suite runs against a PEG-default DuckDB. --- test/sql/peg_parser.test | 70 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 test/sql/peg_parser.test diff --git a/test/sql/peg_parser.test b/test/sql/peg_parser.test new file mode 100644 index 0000000..9122093 --- /dev/null +++ b/test/sql/peg_parser.test @@ -0,0 +1,70 @@ +# name: test/sql/peg_parser.test +# description: Guard Yardstick's parser integration across DuckDB's parser swap. +# DuckDB is replacing its legacy PostgreSQL parser with a PEG parser (opt-in in +# 1.5, the sole parser on main, default in 2.0). This file is parser-agnostic so +# it runs under whichever parser the build uses -- the legacy parser on the +# stable line today, and the PEG parser automatically once the suite runs +# against a PEG-default DuckDB (the duckdb-next-build job, once Yardstick +# compiles against current main again). It pins down the two behaviors most +# exposed to the swap: +# 1. Yardstick's parser_override path still rewrites custom syntax correctly. +# 2. When the override declines, the active parser still surfaces syntax errors +# (the "fallback" path). The assertion matches only "Parser Error", which +# both the legacy and PEG parsers emit, so it stays parser-agnostic. +# group: [yardstick] + +require yardstick + +statement ok +CREATE TABLE peg_sales (year INT, region TEXT, amount DOUBLE); + +statement ok +INSERT INTO peg_sales VALUES + (2022, 'US', 100), (2022, 'EU', 50), + (2023, 'US', 150), (2023, 'EU', 75); + +statement ok +CREATE VIEW peg_sales_v AS +SELECT year, region, SUM(amount) AS MEASURE revenue +FROM peg_sales; + +# Override path, no SEMANTIC prefix (the parser_override="fallback" route). +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) FROM peg_sales_v; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# Override path with explicit SEMANTIC prefix. +query IIR rowsort +SEMANTIC SELECT year, region, AGGREGATE(revenue) FROM peg_sales_v; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# AT modifier expansion still parses and rewrites under the active parser. +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +FROM peg_sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# A plain query Yardstick does not touch must still execute normally. +query I +SELECT 1 + 1; +---- +2 + +# Fallback path: Yardstick declines, so the active parser must still raise a +# syntax error. Match only the parser-agnostic "Parser Error" prefix. +statement error +SELECT * FROM; +---- +Parser Error From ef1898017b34a838ae39594c5d045ab8515bcded Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 07:21:29 -0700 Subject: [PATCH 2/3] Port parser FFI to DuckDB main's expression API (dual-API) DuckDB main refactored the parsed-expression API: subclass fields are now private (accessed via FunctionName()/GetArgumentsMutable()/LeftMutable()/...) and names use a dedicated Identifier type instead of std::string. This broke the duckdb-next-build CI lane (compile failure in yardstick_parser_ffi.cpp), which has been red for a week. Add a small compatibility shim, selected at compile time via __has_include("duckdb/common/identifier.hpp") (matching the project's existing __has_include pattern), and route all expression field access through it. The old field-based API (DuckDB 1.5 and the pinned submodule) and the new accessor-based API (main) are both supported, so the stable and next CI lanes both build. Behavior is unchanged: the shim is a pure access adaptation over the same tree walks. Verified with -fsyntax-only against both DuckDB main and v1.5-variegata headers (0 errors each). --- src/yardstick_parser_ffi.cpp | 440 ++++++++++++++++++++++++++--------- 1 file changed, 325 insertions(+), 115 deletions(-) diff --git a/src/yardstick_parser_ffi.cpp b/src/yardstick_parser_ffi.cpp index 80f5d3e..951a5d8 100644 --- a/src/yardstick_parser_ffi.cpp +++ b/src/yardstick_parser_ffi.cpp @@ -43,6 +43,204 @@ using namespace duckdb; +//============================================================================= +// Compatibility shims for DuckDB's expression-API refactor. +// +// DuckDB main made the ParsedExpression subclass fields private (exposing +// accessors instead) and introduced a dedicated Identifier type in place of +// std::string for names. DuckDB 1.5 and earlier expose public fields and use +// std::string. We detect the new API via the header it introduced and route +// field access through these helpers, so the extension builds against both the +// stable line and main. Remove this shim once the minimum supported DuckDB has +// the new API. +//============================================================================= + +#if __has_include("duckdb/common/identifier.hpp") +#define YARDSTICK_NEW_EXPR_API 1 +#else +#define YARDSTICK_NEW_EXPR_API 0 +#endif + +namespace { + +// Identifier/string -> raw string (case preserved). The Identifier overload only +// exists on the new API; the std::string overload covers the old API and any +// already-raw string. +#if YARDSTICK_NEW_EXPR_API +inline const std::string &YsName(const Identifier &id) { return id.GetIdentifierName(); } +#endif +inline const std::string &YsName(const std::string &s) { return s; } + +inline const std::string &YsFuncName(const FunctionExpression &f) { +#if YARDSTICK_NEW_EXPR_API + return f.FunctionName().GetIdentifierName(); +#else + return f.function_name; +#endif +} + +// Direct child expressions (arguments) of a function/window call. +inline std::vector YsArgs(FunctionExpression &f) { + std::vector out; +#if YARDSTICK_NEW_EXPR_API + for (auto &arg : f.GetArgumentsMutable()) out.push_back(arg.GetExpressionMutable().get()); +#else + for (auto &child : f.children) out.push_back(child.get()); +#endif + return out; +} +inline std::vector YsArgs(WindowExpression &w) { + std::vector out; +#if YARDSTICK_NEW_EXPR_API + for (auto &arg : w.GetArgumentsMutable()) out.push_back(arg.GetExpressionMutable().get()); +#else + for (auto &child : w.children) out.push_back(child.get()); +#endif + return out; +} + +inline ParsedExpression *YsFilter(FunctionExpression &f) { +#if YARDSTICK_NEW_EXPR_API + return f.FilterMutable().get(); +#else + return f.filter.get(); +#endif +} +inline ParsedExpression *YsFilter(WindowExpression &w) { +#if YARDSTICK_NEW_EXPR_API + return w.FilterMutable().get(); +#else + return w.filter_expr.get(); +#endif +} + +inline ParsedExpression *YsLeft(ComparisonExpression &c) { +#if YARDSTICK_NEW_EXPR_API + return c.LeftMutable().get(); +#else + return c.left.get(); +#endif +} +inline ParsedExpression *YsRight(ComparisonExpression &c) { +#if YARDSTICK_NEW_EXPR_API + return c.RightMutable().get(); +#else + return c.right.get(); +#endif +} + +inline std::vector YsChildren(ConjunctionExpression &c) { + std::vector out; +#if YARDSTICK_NEW_EXPR_API + for (auto &child : c.GetChildrenMutable()) out.push_back(child.get()); +#else + for (auto &child : c.children) out.push_back(child.get()); +#endif + return out; +} +inline std::vector YsChildren(OperatorExpression &o) { + std::vector out; +#if YARDSTICK_NEW_EXPR_API + for (auto &child : o.GetChildrenMutable()) out.push_back(child.get()); +#else + for (auto &child : o.children) out.push_back(child.get()); +#endif + return out; +} +inline std::vector YsPartitions(WindowExpression &w) { + std::vector out; +#if YARDSTICK_NEW_EXPR_API + for (auto &part : w.PartitionsMutable()) out.push_back(part.get()); +#else + for (auto &part : w.partitions) out.push_back(part.get()); +#endif + return out; +} + +inline std::vector &YsCaseChecks(CaseExpression &c) { +#if YARDSTICK_NEW_EXPR_API + return c.CaseChecksMutable(); +#else + return c.case_checks; +#endif +} +inline ParsedExpression *YsElse(CaseExpression &c) { +#if YARDSTICK_NEW_EXPR_API + return c.ElseMutable().get(); +#else + return c.else_expr.get(); +#endif +} + +inline ParsedExpression *YsChild(CastExpression &c) { +#if YARDSTICK_NEW_EXPR_API + return c.ChildMutable().get(); +#else + return c.child.get(); +#endif +} +inline ParsedExpression *YsChild(SubqueryExpression &s) { +#if YARDSTICK_NEW_EXPR_API + return s.GetChildMutable().get(); +#else + return s.child.get(); +#endif +} +inline const ParsedExpression *YsChild(const SubqueryExpression &s) { +#if YARDSTICK_NEW_EXPR_API + return s.GetChild().get(); +#else + return s.child.get(); +#endif +} +// Mutable child slot of a subquery, for in-place rewrites. +inline unique_ptr &YsChildRef(SubqueryExpression &s) { +#if YARDSTICK_NEW_EXPR_API + return s.GetChildMutable(); +#else + return s.child; +#endif +} + +inline ParsedExpression *YsInput(BetweenExpression &b) { +#if YARDSTICK_NEW_EXPR_API + return b.InputMutable().get(); +#else + return b.input.get(); +#endif +} +inline ParsedExpression *YsLower(BetweenExpression &b) { +#if YARDSTICK_NEW_EXPR_API + return b.LowerBoundMutable().get(); +#else + return b.lower.get(); +#endif +} +inline ParsedExpression *YsUpper(BetweenExpression &b) { +#if YARDSTICK_NEW_EXPR_API + return b.UpperBoundMutable().get(); +#else + return b.upper.get(); +#endif +} + +inline size_t YsColumnNameCount(const ColumnRefExpression &c) { +#if YARDSTICK_NEW_EXPR_API + return c.ColumnNames().size(); +#else + return c.column_names.size(); +#endif +} +inline void YsPrependQualifier(ColumnRefExpression &c, const std::string &qualifier) { +#if YARDSTICK_NEW_EXPR_API + c.ColumnNamesMutable().insert(c.ColumnNamesMutable().begin(), Identifier(qualifier)); +#else + c.column_names.insert(c.column_names.begin(), qualifier); +#endif +} + +} // namespace + //============================================================================= // Helper: Safe strdup that handles nullptr //============================================================================= @@ -72,14 +270,16 @@ static bool IsPotentialOrderAliasRef( std::string &alias_name ) { if (!colref.IsQualified()) { - alias_name = colref.GetColumnName(); + alias_name = YsName(colref.GetColumnName()); return true; } - if (colref.column_names.size() == 2 && StringUtil::CIEquals(colref.GetTableName(), "alias")) { - if (from_qualifiers.find(NormalizeAliasName(colref.GetTableName())) != from_qualifiers.end()) { + if (YsColumnNameCount(colref) == 2 && + StringUtil::CIEquals(YsName(colref.GetTableName()), "alias")) { + if (from_qualifiers.find(NormalizeAliasName(YsName(colref.GetTableName()))) != + from_qualifiers.end()) { return false; } - alias_name = colref.GetColumnName(); + alias_name = YsName(colref.GetColumnName()); return true; } return false; @@ -529,18 +729,19 @@ static void FindAggregateCalls(ParsedExpression* expr, std::vectorGetExpressionClass()) { case ExpressionClass::FUNCTION: { auto* func = static_cast(expr); - std::string lower_name = StringUtil::Lower(func->function_name); + std::string lower_name = StringUtil::Lower(YsFuncName(*func)); + auto args = YsArgs(*func); if (lower_name == "aggregate") { AggregateCallInfo info; // Get measure name from first argument - if (!func->children.empty()) { + if (!args.empty()) { // First argument should be measure name (column ref or string) - auto* first_arg = func->children[0].get(); + auto* first_arg = args[0]; if (first_arg->GetExpressionClass() == ExpressionClass::COLUMN_REF) { auto* col = static_cast(first_arg); - info.measure_name = col->GetColumnName(); + info.measure_name = YsName(col->GetColumnName()); } else { info.measure_name = first_arg->ToString(); } @@ -559,21 +760,22 @@ static void FindAggregateCalls(ParsedExpression* expr, std::vectorchildren.size(); i++) { - auto* arg = func->children[i].get(); + for (size_t i = 1; i < args.size(); i++) { + auto* arg = args[i]; // Check if this is an AT modifier call if (arg->GetExpressionClass() == ExpressionClass::FUNCTION) { auto* at_func = static_cast(arg); - std::string at_name = StringUtil::Lower(at_func->function_name); + std::string at_name = StringUtil::Lower(YsFuncName(*at_func)); if (at_name == "at") { YardstickAtModifier mod; mod.dimension = nullptr; mod.value = nullptr; - if (!at_func->children.empty()) { - auto* at_arg = at_func->children[0].get(); + auto at_args = YsArgs(*at_func); + if (!at_args.empty()) { + auto* at_arg = at_args[0]; std::string at_arg_str = at_arg->ToString(); std::string at_arg_lower = StringUtil::Lower(at_arg_str); @@ -594,9 +796,9 @@ static void FindAggregateCalls(ParsedExpression* expr, std::vectorchildren.size() >= 2) { + } else if (at_args.size() >= 2) { // WHERE modifier or ALL dimension - auto* second = at_func->children[1].get(); + auto* second = at_args[1]; if (at_arg_lower == "all") { mod.type = YARDSTICK_AT_ALL_DIM; mod.dimension = safe_strdup(second->ToString()); @@ -622,60 +824,60 @@ static void FindAggregateCalls(ParsedExpression* expr, std::vectorchildren) { - FindAggregateCalls(child.get(), results, sql); + for (auto* child : args) { + FindAggregateCalls(child, results, sql); } - if (func->filter) { - FindAggregateCalls(func->filter.get(), results, sql); + if (auto* filter = YsFilter(*func)) { + FindAggregateCalls(filter, results, sql); } break; } case ExpressionClass::COMPARISON: { auto* comp = static_cast(expr); - FindAggregateCalls(comp->left.get(), results, sql); - FindAggregateCalls(comp->right.get(), results, sql); + FindAggregateCalls(YsLeft(*comp), results, sql); + FindAggregateCalls(YsRight(*comp), results, sql); break; } case ExpressionClass::CONJUNCTION: { auto* conj = static_cast(expr); - for (auto& child : conj->children) { - FindAggregateCalls(child.get(), results, sql); + for (auto* child : YsChildren(*conj)) { + FindAggregateCalls(child, results, sql); } break; } case ExpressionClass::OPERATOR: { auto* op = static_cast(expr); - for (auto& child : op->children) { - FindAggregateCalls(child.get(), results, sql); + for (auto* child : YsChildren(*op)) { + FindAggregateCalls(child, results, sql); } break; } case ExpressionClass::CASE: { auto* case_expr = static_cast(expr); - for (auto& check : case_expr->case_checks) { + for (auto& check : YsCaseChecks(*case_expr)) { FindAggregateCalls(check.when_expr.get(), results, sql); FindAggregateCalls(check.then_expr.get(), results, sql); } - if (case_expr->else_expr) { - FindAggregateCalls(case_expr->else_expr.get(), results, sql); + if (auto* else_expr = YsElse(*case_expr)) { + FindAggregateCalls(else_expr, results, sql); } break; } case ExpressionClass::CAST: { auto* cast = static_cast(expr); - FindAggregateCalls(cast->child.get(), results, sql); + FindAggregateCalls(YsChild(*cast), results, sql); break; } case ExpressionClass::SUBQUERY: { auto* subq = static_cast(expr); - if (subq->child) { - FindAggregateCalls(subq->child.get(), results, sql); + if (auto* child = YsChild(*subq)) { + FindAggregateCalls(child, results, sql); } // Note: We don't recurse into the subquery itself break; @@ -683,23 +885,23 @@ static void FindAggregateCalls(ParsedExpression* expr, std::vector(expr); - for (auto& child : window->children) { - FindAggregateCalls(child.get(), results, sql); + for (auto* child : YsArgs(*window)) { + FindAggregateCalls(child, results, sql); } - for (auto& part : window->partitions) { - FindAggregateCalls(part.get(), results, sql); + for (auto* part : YsPartitions(*window)) { + FindAggregateCalls(part, results, sql); } - if (window->filter_expr) { - FindAggregateCalls(window->filter_expr.get(), results, sql); + if (auto* filter = YsFilter(*window)) { + FindAggregateCalls(filter, results, sql); } break; } case ExpressionClass::BETWEEN: { auto* between = static_cast(expr); - FindAggregateCalls(between->input.get(), results, sql); - FindAggregateCalls(between->lower.get(), results, sql); - FindAggregateCalls(between->upper.get(), results, sql); + FindAggregateCalls(YsInput(*between), results, sql); + FindAggregateCalls(YsLower(*between), results, sql); + FindAggregateCalls(YsUpper(*between), results, sql); break; } @@ -720,8 +922,8 @@ static void CollectTablesFromTableRef(TableRef* ref, std::vector(ref); YardstickTableRef t; - t.table_name = safe_strdup(base->table_name); - t.alias = base->alias.empty() ? nullptr : safe_strdup(base->alias); + t.table_name = safe_strdup(YsName(base->table_name)); + t.alias = base->alias.empty() ? nullptr : safe_strdup(YsName(base->alias)); t.is_subquery = false; tables.push_back(t); break; @@ -737,8 +939,8 @@ static void CollectTablesFromTableRef(TableRef* ref, std::vector(ref); YardstickTableRef t; - t.table_name = subq->alias.empty() ? safe_strdup("(subquery)") : safe_strdup(subq->alias); - t.alias = subq->alias.empty() ? nullptr : safe_strdup(subq->alias); + t.table_name = subq->alias.empty() ? safe_strdup("(subquery)") : safe_strdup(YsName(subq->alias)); + t.alias = subq->alias.empty() ? nullptr : safe_strdup(YsName(subq->alias)); t.is_subquery = true; tables.push_back(t); break; @@ -760,50 +962,53 @@ static bool ExpressionContainsAggregate(ParsedExpression* expr) { switch (expr->GetExpressionClass()) { case ExpressionClass::FUNCTION: { auto* func = static_cast(expr); - if (IsStandardAggregate(func->function_name)) { + if (IsStandardAggregate(YsFuncName(*func))) { return true; } - for (auto& child : func->children) { - if (ExpressionContainsAggregate(child.get())) return true; + for (auto* child : YsArgs(*func)) { + if (ExpressionContainsAggregate(child)) return true; + } + if (auto* filter = YsFilter(*func)) { + if (ExpressionContainsAggregate(filter)) return true; } - if (func->filter && ExpressionContainsAggregate(func->filter.get())) return true; return false; } case ExpressionClass::COMPARISON: { auto* comp = static_cast(expr); - return ExpressionContainsAggregate(comp->left.get()) || - ExpressionContainsAggregate(comp->right.get()); + return ExpressionContainsAggregate(YsLeft(*comp)) || + ExpressionContainsAggregate(YsRight(*comp)); } case ExpressionClass::CONJUNCTION: { auto* conj = static_cast(expr); - for (auto& child : conj->children) { - if (ExpressionContainsAggregate(child.get())) return true; + for (auto* child : YsChildren(*conj)) { + if (ExpressionContainsAggregate(child)) return true; } return false; } case ExpressionClass::OPERATOR: { auto* op = static_cast(expr); - for (auto& child : op->children) { - if (ExpressionContainsAggregate(child.get())) return true; + for (auto* child : YsChildren(*op)) { + if (ExpressionContainsAggregate(child)) return true; } return false; } case ExpressionClass::CASE: { auto* case_expr = static_cast(expr); - for (auto& check : case_expr->case_checks) { + for (auto& check : YsCaseChecks(*case_expr)) { if (ExpressionContainsAggregate(check.when_expr.get())) return true; if (ExpressionContainsAggregate(check.then_expr.get())) return true; } - return case_expr->else_expr && ExpressionContainsAggregate(case_expr->else_expr.get()); + auto* else_expr = YsElse(*case_expr); + return else_expr && ExpressionContainsAggregate(else_expr); } case ExpressionClass::CAST: { auto* cast = static_cast(expr); - return ExpressionContainsAggregate(cast->child.get()); + return ExpressionContainsAggregate(YsChild(*cast)); } case ExpressionClass::WINDOW: @@ -812,9 +1017,9 @@ static bool ExpressionContainsAggregate(ParsedExpression* expr) { case ExpressionClass::BETWEEN: { auto* between = static_cast(expr); - return ExpressionContainsAggregate(between->input.get()) || - ExpressionContainsAggregate(between->lower.get()) || - ExpressionContainsAggregate(between->upper.get()); + return ExpressionContainsAggregate(YsInput(*between)) || + ExpressionContainsAggregate(YsLower(*between)) || + ExpressionContainsAggregate(YsUpper(*between)); } default: @@ -832,57 +1037,60 @@ static bool ExpressionContainsMeasureRef(ParsedExpression* expr) { switch (expr->GetExpressionClass()) { case ExpressionClass::FUNCTION: { auto* func = static_cast(expr); - if (StringUtil::Lower(func->function_name) == "aggregate") { + if (StringUtil::Lower(YsFuncName(*func)) == "aggregate") { return true; } - for (auto& child : func->children) { - if (ExpressionContainsMeasureRef(child.get())) return true; + for (auto* child : YsArgs(*func)) { + if (ExpressionContainsMeasureRef(child)) return true; + } + if (auto* filter = YsFilter(*func)) { + if (ExpressionContainsMeasureRef(filter)) return true; } - if (func->filter && ExpressionContainsMeasureRef(func->filter.get())) return true; return false; } case ExpressionClass::COMPARISON: { auto* comp = static_cast(expr); - return ExpressionContainsMeasureRef(comp->left.get()) || - ExpressionContainsMeasureRef(comp->right.get()); + return ExpressionContainsMeasureRef(YsLeft(*comp)) || + ExpressionContainsMeasureRef(YsRight(*comp)); } case ExpressionClass::CONJUNCTION: { auto* conj = static_cast(expr); - for (auto& child : conj->children) { - if (ExpressionContainsMeasureRef(child.get())) return true; + for (auto* child : YsChildren(*conj)) { + if (ExpressionContainsMeasureRef(child)) return true; } return false; } case ExpressionClass::OPERATOR: { auto* op = static_cast(expr); - for (auto& child : op->children) { - if (ExpressionContainsMeasureRef(child.get())) return true; + for (auto* child : YsChildren(*op)) { + if (ExpressionContainsMeasureRef(child)) return true; } return false; } case ExpressionClass::CASE: { auto* case_expr = static_cast(expr); - for (auto& check : case_expr->case_checks) { + for (auto& check : YsCaseChecks(*case_expr)) { if (ExpressionContainsMeasureRef(check.when_expr.get())) return true; if (ExpressionContainsMeasureRef(check.then_expr.get())) return true; } - return case_expr->else_expr && ExpressionContainsMeasureRef(case_expr->else_expr.get()); + auto* else_expr = YsElse(*case_expr); + return else_expr && ExpressionContainsMeasureRef(else_expr); } case ExpressionClass::CAST: { auto* cast = static_cast(expr); - return ExpressionContainsMeasureRef(cast->child.get()); + return ExpressionContainsMeasureRef(YsChild(*cast)); } case ExpressionClass::BETWEEN: { auto* between = static_cast(expr); - return ExpressionContainsMeasureRef(between->input.get()) || - ExpressionContainsMeasureRef(between->lower.get()) || - ExpressionContainsMeasureRef(between->upper.get()); + return ExpressionContainsMeasureRef(YsInput(*between)) || + ExpressionContainsMeasureRef(YsLower(*between)) || + ExpressionContainsMeasureRef(YsUpper(*between)); } default: @@ -896,82 +1104,82 @@ static void QualifyColumnRefs(ParsedExpression* expr, const std::string& qualifi switch (expr->GetExpressionClass()) { case ExpressionClass::COLUMN_REF: { auto* col = static_cast(expr); - if (col->column_names.size() == 1) { - col->column_names.insert(col->column_names.begin(), qualifier); + if (YsColumnNameCount(*col) == 1) { + YsPrependQualifier(*col, qualifier); } break; } case ExpressionClass::FUNCTION: { auto* func = static_cast(expr); - for (auto& child : func->children) { - QualifyColumnRefs(child.get(), qualifier); + for (auto* child : YsArgs(*func)) { + QualifyColumnRefs(child, qualifier); } - if (func->filter) { - QualifyColumnRefs(func->filter.get(), qualifier); + if (auto* filter = YsFilter(*func)) { + QualifyColumnRefs(filter, qualifier); } break; } case ExpressionClass::COMPARISON: { auto* comp = static_cast(expr); - QualifyColumnRefs(comp->left.get(), qualifier); - QualifyColumnRefs(comp->right.get(), qualifier); + QualifyColumnRefs(YsLeft(*comp), qualifier); + QualifyColumnRefs(YsRight(*comp), qualifier); break; } case ExpressionClass::CONJUNCTION: { auto* conj = static_cast(expr); - for (auto& child : conj->children) { - QualifyColumnRefs(child.get(), qualifier); + for (auto* child : YsChildren(*conj)) { + QualifyColumnRefs(child, qualifier); } break; } case ExpressionClass::OPERATOR: { auto* op = static_cast(expr); - for (auto& child : op->children) { - QualifyColumnRefs(child.get(), qualifier); + for (auto* child : YsChildren(*op)) { + QualifyColumnRefs(child, qualifier); } break; } case ExpressionClass::CASE: { auto* case_expr = static_cast(expr); - for (auto& check : case_expr->case_checks) { + for (auto& check : YsCaseChecks(*case_expr)) { QualifyColumnRefs(check.when_expr.get(), qualifier); QualifyColumnRefs(check.then_expr.get(), qualifier); } - if (case_expr->else_expr) { - QualifyColumnRefs(case_expr->else_expr.get(), qualifier); + if (auto* else_expr = YsElse(*case_expr)) { + QualifyColumnRefs(else_expr, qualifier); } break; } case ExpressionClass::CAST: { auto* cast = static_cast(expr); - QualifyColumnRefs(cast->child.get(), qualifier); + QualifyColumnRefs(YsChild(*cast), qualifier); break; } case ExpressionClass::SUBQUERY: { auto* subq = static_cast(expr); - if (subq->child) { - QualifyColumnRefs(subq->child.get(), qualifier); + if (auto* child = YsChild(*subq)) { + QualifyColumnRefs(child, qualifier); } break; } case ExpressionClass::WINDOW: { auto* window = static_cast(expr); - for (auto& child : window->children) { - QualifyColumnRefs(child.get(), qualifier); + for (auto* child : YsArgs(*window)) { + QualifyColumnRefs(child, qualifier); } - for (auto& part : window->partitions) { - QualifyColumnRefs(part.get(), qualifier); + for (auto* part : YsPartitions(*window)) { + QualifyColumnRefs(part, qualifier); } - if (window->filter_expr) { - QualifyColumnRefs(window->filter_expr.get(), qualifier); + if (auto* filter = YsFilter(*window)) { + QualifyColumnRefs(filter, qualifier); } break; } case ExpressionClass::BETWEEN: { auto* between = static_cast(expr); - QualifyColumnRefs(between->input.get(), qualifier); - QualifyColumnRefs(between->lower.get(), qualifier); - QualifyColumnRefs(between->upper.get(), qualifier); + QualifyColumnRefs(YsInput(*between), qualifier); + QualifyColumnRefs(YsLower(*between), qualifier); + QualifyColumnRefs(YsUpper(*between), qualifier); break; } default: @@ -1146,7 +1354,7 @@ extern "C" YardstickSelectInfo* yardstick_parse_select(const char* sql) { for (auto& expr : select_node->select_list) { YardstickSelectItem item; item.expression_sql = safe_strdup(expr->ToString()); - item.alias = expr->HasAlias() ? safe_strdup(expr->GetAlias()) : nullptr; + item.alias = expr->HasAlias() ? safe_strdup(YsName(expr->GetAlias())) : nullptr; auto query_location = expr->GetQueryLocation(); if (query_location.IsValid()) { @@ -1267,13 +1475,13 @@ static void CollectTableQualifiers(const TableRef *ref, TableQualifierSet &quali return; } - AddTableQualifier(qualifiers, ref->alias); + AddTableQualifier(qualifiers, YsName(ref->alias)); switch (ref->type) { case TableReferenceType::BASE_TABLE: { auto *base = static_cast(ref); if (ref->alias.empty()) { - AddTableQualifier(qualifiers, base->table_name); + AddTableQualifier(qualifiers, YsName(base->table_name)); } break; } @@ -1322,8 +1530,8 @@ static void EnumerateOrderAliasScopeChildren( ) { if (expr.GetExpressionClass() == ExpressionClass::SUBQUERY) { auto &subquery_expr = expr.Cast(); - if (subquery_expr.child) { - callback(*subquery_expr.child); + if (auto* child = YsChild(subquery_expr)) { + callback(*child); } return; } @@ -1337,8 +1545,9 @@ static void EnumerateOrderAliasScopeChildren( ) { if (expr.GetExpressionClass() == ExpressionClass::SUBQUERY) { auto &subquery_expr = expr.Cast(); - if (subquery_expr.child) { - callback(subquery_expr.child); + auto &child = YsChildRef(subquery_expr); + if (child) { + callback(child); } return; } @@ -1431,7 +1640,7 @@ extern "C" char* yardstick_inline_order_by_subquery_aliases(const char* sql) { continue; } bool has_subquery = expr->HasSubquery(); - aliases[NormalizeAliasName(expr->GetAlias())] = SelectAliasEntry { expr.get(), has_subquery }; + aliases[NormalizeAliasName(YsName(expr->GetAlias()))] = SelectAliasEntry { expr.get(), has_subquery }; has_subquery_alias = has_subquery_alias || has_subquery; } @@ -1504,10 +1713,11 @@ extern "C" YardstickExpressionInfo* yardstick_parse_expression(const char* expr_ // If it's a simple aggregate function, extract the function name and inner expr if (expr->GetExpressionClass() == ExpressionClass::FUNCTION) { auto* func = static_cast(expr.get()); - if (IsStandardAggregate(func->function_name)) { - result->aggregate_func = safe_strdup(StringUtil::Upper(func->function_name)); - if (!func->children.empty()) { - result->inner_expr = safe_strdup(func->children[0]->ToString()); + if (IsStandardAggregate(YsFuncName(*func))) { + result->aggregate_func = safe_strdup(StringUtil::Upper(YsFuncName(*func))); + auto args = YsArgs(*func); + if (!args.empty()) { + result->inner_expr = safe_strdup(args[0]->ToString()); } } } From fae2eec61cd99f22c69fa13b96f363f4af0e1828 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 07:34:30 -0700 Subject: [PATCH 3/3] Port ParserExtension parse_function to DuckDB main's token API Fixing the expression FFI let the next-build compile advance to the next DuckDB-main API change: parse_function_t now receives the post-PEG-failure token tail (const vector&) instead of the raw query string (yardstick_extension.hpp/.cpp). Yardstick does all of its rewriting in yardstick_parser_override, which still receives the full query string on both APIs, so on the new signature parse_function is a no-op fallback. The old string-based fallback is retained for DuckDB 1.5 via the same __has_include detection. Matching yardstick_parse to parse_function_t also fixes the function-pointer comparison in yardstick_bind. Verified with -fsyntax-only against both DuckDB main and v1.5-variegata (0 errors); all three extension translation units now compile against main. --- src/include/yardstick_extension.hpp | 17 +++++++++++++++++ src/yardstick_extension.cpp | 11 +++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/include/yardstick_extension.hpp b/src/include/yardstick_extension.hpp index 067b8fc..b886e22 100644 --- a/src/include/yardstick_extension.hpp +++ b/src/include/yardstick_extension.hpp @@ -22,8 +22,25 @@ class YardstickExtension : public Extension { BoundStatement yardstick_bind(ClientContext &context, Binder &binder, OperatorExtensionInfo *info, SQLStatement &statement); +// DuckDB main changed parse_function_t to receive the post-PEG-failure token +// tail (vector) instead of the raw query string; DuckDB 1.5 and +// earlier pass the query string. Detected via the header the refactor introduced. +// Yardstick performs all of its rewriting in yardstick_parser_override (which +// still receives the full query string on both APIs), so on the new signature +// parse_function is a no-op fallback. +#if __has_include("duckdb/common/identifier.hpp") +#define YARDSTICK_TOKEN_PARSE_FN 1 +#else +#define YARDSTICK_TOKEN_PARSE_FN 0 +#endif + +#if YARDSTICK_TOKEN_PARSE_FN +ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, + const vector &tokens); +#else ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, const std::string &query); +#endif ParserExtensionPlanResult yardstick_plan(ParserExtensionInfo *, ClientContext &, unique_ptr); diff --git a/src/yardstick_extension.cpp b/src/yardstick_extension.cpp index fc879a0..4891911 100644 --- a/src/yardstick_extension.cpp +++ b/src/yardstick_extension.cpp @@ -330,6 +330,16 @@ static bool StartsWithSemantic(const std::string &query, std::string &stripped_q return true; } +#if YARDSTICK_TOKEN_PARSE_FN +// DuckDB main: parse_function receives the post-PEG-failure token tail rather +// than the query string. Yardstick rewrites queries earlier, in +// yardstick_parser_override (which sees the full query string), so there is +// nothing to do here -- decline and let DuckDB proceed. +ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, + const vector &) { + return ParserExtensionParseResult(); +} +#else ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, const std::string &query) { @@ -422,6 +432,7 @@ ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, // Not a yardstick query, let DuckDB handle it return ParserExtensionParseResult(); } +#endif // YARDSTICK_TOKEN_PARSE_FN //============================================================================= // PARSER OVERRIDE: intercepts ALL queries before DuckDB's native parser