From 50b4253333a88d01ae2d2bb71343a916cd2bec42 Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Thu, 30 Apr 2026 23:23:35 +0000 Subject: [PATCH] Fix UNWIND [null] causing count() to miscount null rows (#2383) Cypher's count() aggregate follows Neo4j/openCypher semantics and must ignore nulls. Previously, UNWIND [null] AS x RETURN count(x) returned 1 instead of 0 because age_unnest() materialized an agtype JSON null as a non-NULL datum wrapping AGTV_NULL, so the count() strictness check never skipped the row. The reference paths RETURN count(null) WITH null AS x RETURN count(x) already returned 0. Rather than change age_unnest() -- which is also reached by list comprehensions and the predicate functions all/any/none/single via the construct_age_function_name('unnest') -> 'age_unnest' rewrite and whose callers deliberately rely on agtype-null flowing through -- introduce a dedicated SRF age_unwind() with UNWIND-specific null semantics: * top-level AGTV_NULL elements are emitted as SQL NULL rows so null-strict operators (count, IS NULL, WHERE x IS NOT NULL, ...) behave like they do for WITH null AS x, * non-null elements are returned unchanged, and * nested nulls inside arrays/objects are preserved as agtype-null so container semantics are unchanged. transform_cypher_unwind() now calls age_unwind(); list comprehensions and predicate functions continue to go through age_unnest(), so their existing AGE-specific null behavior is untouched. Adds regression tests covering: * UNWIND [null] RETURN count(x) = 0 * UNWIND [1, null, 2, null, 3] RETURN count(x) = 3 * UNWIND [null] RETURN x IS NULL = true * UNWIND [null] RETURN x produces an SQL NULL row value * WHERE x IS NOT NULL filtering after UNWIND * count(*) still sees the row produced by UNWIND [null] * nested nulls inside containers survive unchanged Fixes #2383. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- age--1.7.0--y.y.y.sql | 12 ++++ regress/expected/cypher_unwind.out | 77 +++++++++++++++++++++ regress/sql/cypher_unwind.sql | 51 ++++++++++++++ sql/agtype_typecast.sql | 7 ++ src/backend/parser/cypher_clause.c | 6 +- src/backend/utils/adt/agtype.c | 103 +++++++++++++++++++++++++++++ 6 files changed, 253 insertions(+), 3 deletions(-) diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index a3cdea279..1a000a97e 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -459,3 +459,15 @@ BEGIN END LOOP; END; $$; + +-- +-- Issue #2383: add age_unwind() for Cypher UNWIND clause. Behaves like +-- age_unnest() but emits SQL NULL for top-level agtype JSON null elements +-- so null-strict operators (count, IS NULL) match Neo4j/openCypher. +-- +CREATE FUNCTION ag_catalog.age_unwind(agtype) + RETURNS SETOF agtype +LANGUAGE c +IMMUTABLE +PARALLEL SAFE +AS 'MODULE_PATHNAME'; diff --git a/regress/expected/cypher_unwind.out b/regress/expected/cypher_unwind.out index fae1a26ff..5ae04861b 100644 --- a/regress/expected/cypher_unwind.out +++ b/regress/expected/cypher_unwind.out @@ -259,6 +259,83 @@ $$) as (i agtype); (1 row) +-- +-- Issue 2383: UNWIND over a list containing JSON null must produce an +-- SQL NULL row so that null-strict operators (count, IS NULL, ...) behave +-- like they do for `WITH null AS x`. +-- +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN count(x) AS c +$$) as (c agtype); + c +--- + 0 +(1 row) + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [1, null, 2, null, 3] AS x + RETURN count(x) AS c +$$) as (c agtype); + c +--- + 3 +(1 row) + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN x IS NULL AS is_null +$$) as (is_null agtype); + is_null +--------- + true +(1 row) + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [1, null, 2] AS x + WITH x WHERE x IS NOT NULL + RETURN count(x) AS c +$$) as (c agtype); + c +--- + 2 +(1 row) + +-- count(*) must still see the row produced by UNWIND [null] +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN count(*) AS c +$$) as (c agtype); + c +--- + 1 +(1 row) + +-- only the unwound top-level element becomes SQL NULL; nested nulls +-- stay as agtype-null inside the returned container, so the outer rows +-- themselves are non-NULL and counted. +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [[null], {k: null}] AS x + RETURN count(x) AS c +$$) as (c agtype); + c +--- + 2 +(1 row) + +-- The unwound row value itself must be SQL NULL (not a datum wrapping +-- agtype-null). Without this, count(x)=0 could be achieved by tricks +-- in count() rather than by UNWIND emitting a real SQL NULL. +SELECT x IS NULL +FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN x +$$) as (x agtype); + ?column? +---------- + t +(1 row) + -- -- Clean up -- diff --git a/regress/sql/cypher_unwind.sql b/regress/sql/cypher_unwind.sql index 0e8440afd..7e3e8bf8d 100644 --- a/regress/sql/cypher_unwind.sql +++ b/regress/sql/cypher_unwind.sql @@ -148,6 +148,57 @@ SELECT * FROM cypher('cypher_unwind', $$ RETURN i $$) as (i agtype); +-- +-- Issue 2383: UNWIND over a list containing JSON null must produce an +-- SQL NULL row so that null-strict operators (count, IS NULL, ...) behave +-- like they do for `WITH null AS x`. +-- + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN count(x) AS c +$$) as (c agtype); + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [1, null, 2, null, 3] AS x + RETURN count(x) AS c +$$) as (c agtype); + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN x IS NULL AS is_null +$$) as (is_null agtype); + +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [1, null, 2] AS x + WITH x WHERE x IS NOT NULL + RETURN count(x) AS c +$$) as (c agtype); + +-- count(*) must still see the row produced by UNWIND [null] +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN count(*) AS c +$$) as (c agtype); + +-- only the unwound top-level element becomes SQL NULL; nested nulls +-- stay as agtype-null inside the returned container, so the outer rows +-- themselves are non-NULL and counted. +SELECT * FROM cypher('cypher_unwind', $$ + UNWIND [[null], {k: null}] AS x + RETURN count(x) AS c +$$) as (c agtype); + + +-- The unwound row value itself must be SQL NULL (not a datum wrapping +-- agtype-null). Without this, count(x)=0 could be achieved by tricks +-- in count() rather than by UNWIND emitting a real SQL NULL. +SELECT x IS NULL +FROM cypher('cypher_unwind', $$ + UNWIND [null] AS x + RETURN x +$$) as (x agtype); + -- -- Clean up -- diff --git a/sql/agtype_typecast.sql b/sql/agtype_typecast.sql index c29c0a657..a1b398c7e 100644 --- a/sql/agtype_typecast.sql +++ b/sql/agtype_typecast.sql @@ -188,6 +188,13 @@ IMMUTABLE PARALLEL SAFE AS 'MODULE_PATHNAME'; +CREATE FUNCTION ag_catalog.age_unwind(agtype) + RETURNS SETOF agtype +LANGUAGE c +IMMUTABLE +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + CREATE FUNCTION ag_catalog.age_vertex_stats(agtype, agtype) RETURNS agtype LANGUAGE c diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 3083c52e1..58e956417 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -1443,8 +1443,8 @@ static Query *transform_cypher_delete(cypher_parsestate *cpstate, /* * transform_cypher_unwind * It contains logic to convert the form of an array into a row. Here, we - * are simply calling `age_unnest` function, and the actual transformation - * is handled by `age_unnest` function. + * are simply calling `age_unwind` function, and the actual transformation + * is handled by `age_unwind` function. */ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, cypher_clause *clause) @@ -1501,7 +1501,7 @@ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, parser_errposition(pstate, self->target->location)); } - unwind = makeFuncCall(list_make1(makeString("age_unnest")), NIL, + unwind = makeFuncCall(list_make1(makeString("age_unwind")), NIL, COERCE_SQL_SYNTAX, -1); old_expr_kind = pstate->p_expr_kind; diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 6700be3f3..c4fec06e8 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -12450,6 +12450,109 @@ Datum age_unnest(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } +PG_FUNCTION_INFO_V1(age_unwind); +/* + * Function to convert an agtype array into a set of rows for the Cypher + * `UNWIND` clause. Behaves like age_unnest() but, per Neo4j/openCypher + * semantics, emits an SQL NULL row for each top-level element that is an + * agtype JSON null (AGTV_NULL). This lets null-strict operators such as + * count(x) and IS NULL treat `UNWIND [null] AS x` the same as + * `WITH null AS x`. Nested nulls inside arrays/objects are preserved as + * agtype-null so container semantics are unchanged. See issue #2383. + */ +Datum age_unwind(PG_FUNCTION_ARGS) +{ + agtype *agtype_arg = NULL; + ReturnSetInfo *rsi; + Tuplestorestate *tuple_store; + TupleDesc tupdesc; + TupleDesc ret_tdesc; + MemoryContext old_cxt, tmp_cxt; + bool skipNested = false; + agtype_iterator *it; + agtype_value v; + agtype_iterator_token r; + + /* check for a NULL expr */ + if (PG_ARGISNULL(0)) + { + PG_RETURN_NULL(); + } + + agtype_arg = AG_GET_ARG_AGTYPE_P(0); + if (!AGT_ROOT_IS_ARRAY(agtype_arg)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract elements from a non-array"))); + } + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + rsi->returnMode = SFRM_Materialize; + + /* it's a simple type, so don't use get_call_result_type() */ + tupdesc = rsi->expectedDesc; + + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + ret_tdesc = CreateTupleDescCopy(tupdesc); + BlessTupleDesc(ret_tdesc); + tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "age_unwind temporary cxt", + ALLOCSET_DEFAULT_SIZES); + + it = agtype_iterator_init(&agtype_arg->root); + + while ((r = agtype_iterator_next(&it, &v, skipNested)) != WAGT_DONE) + { + skipNested = true; + + if (r == WAGT_ELEM) + { + HeapTuple tuple; + Datum values[1] = {(Datum) 0}; + bool nulls[1] = {false}; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(tmp_cxt); + + if (v.type == AGTV_NULL) + { + /* emit SQL NULL for agtype JSON null (issue #2383) */ + nulls[0] = true; + } + else + { + agtype *val = agtype_value_to_agtype(&v); + + values[0] = PointerGetDatum(val); + } + + tuple = heap_form_tuple(ret_tdesc, values, nulls); + + tuplestore_puttuple(tuple_store, tuple); + + /* clean up and switch back (mirror age_unnest's pattern) */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(tmp_cxt); + } + } + + MemoryContextDelete(tmp_cxt); + + rsi->setResult = tuple_store; + rsi->setDesc = ret_tdesc; + + PG_RETURN_NULL(); +} + /* * Volatile wrapper replacement. The previous version was PL/SQL * and could only handle AGTYPE input and returned AGTYPE output.