From cefdd1a8d54cab1eb50e399ca0ef90b30f8ad89e Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Fri, 1 May 2026 00:24:33 +0000 Subject: [PATCH] Fix VLE [*0..N] zero-hop self-binding when edge label is missing (#2382) A variable-length relationship pattern with a zero lower bound, e.g. `(p)-[:LABEL*0..N]-(f)`, must produce the zero-hop self-binding row (`f` = `p`) regardless of whether any edge of `LABEL` exists in the graph. This matches Neo4j/openCypher semantics. Previously, when the edge label did not exist in the label cache, AGE short-circuited the entire MATCH to zero rows (or NULL-extended rows for OPTIONAL MATCH). The fix has three parts: 1. parser/cypher_clause.c: A new helper `is_zero_lower_bound_vle()` inspects the FuncCall produced by `build_VLE_relation()` and reports whether the relationship is a zero-bound VLE. It is intentionally defensive about the FuncCall shape so that any future parser changes fall back to the existing short-circuit safely. `match_check_valid_label()` and `path_check_valid_label()` now treat a missing edge label as fatal only when the relationship requires at least one edge of that label. Patterns mixing a zero-bound segment with another impossible segment (e.g. `(a)-[:NOEXIST*0..1]-(b)-[:STILL_MISSING]-(c)`) still correctly resolve to zero rows because the second segment independently fails the label check. 2. utils/adt/age_vle.c: `is_an_edge_match()` now returns false early when the user requested a specific label that does not exist (`edge_label_name != NULL && edge_label_name_oid == InvalidOid`). This prevents a zero-bound traversal of `[:NOEXIST*0..N]` from incorrectly walking arbitrary other-label edges via the existing "no constraints -> match all" fast path. The zero-hop case itself is unaffected because it is generated by `build_VLE_zero_container()` without ever consulting `is_an_edge_match()`. 3. regress/sql/cypher_vle.sql: Adds seven regression cases that lock in the new behaviour, including the rubber-duck scenarios where another label exists in the graph (must NOT be matched by the missing-label VLE), where another segment is unsatisfiable (must still produce zero rows), and where the label exists (sanity check, unchanged behaviour). --- regress/expected/cypher_vle.out | 115 +++++++++++++++++++++++++++++ regress/sql/cypher_vle.sql | 69 +++++++++++++++++ src/backend/parser/cypher_clause.c | 73 +++++++++++++++++- src/backend/utils/adt/age_vle.c | 14 ++++ 4 files changed, 269 insertions(+), 2 deletions(-) diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index 6574e0608..2e959371b 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -1219,6 +1219,121 @@ NOTICE: graph "cypher_vle" has been dropped (1 row) +-- +-- Issue #2382: variable-length relationships with a zero lower bound must +-- still produce the zero-hop self-binding even when the edge label does not +-- exist in the graph (Neo4j/openCypher semantics). +-- +SELECT create_graph('issue_2382'); +NOTICE: graph "issue_2382" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2382', $$ + CREATE (:Person {name: 'Alice'})-[:KNOWS]->(:Person {name: 'Bob'}) +$$) AS (v agtype); + v +--- +(0 rows) + +-- Plain MATCH on a non-existent edge label with [*0..N] must return the +-- zero-hop self-binding row (Alice -> Alice). It must NOT match arbitrary +-- edges of other labels (e.g. KNOWS). +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:NOEXIST*0..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + person | friend +---------+--------- + "Alice" | "Alice" +(1 row) + +-- OPTIONAL MATCH form (the exact shape from the issue report). +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + OPTIONAL MATCH (p)-[:NOEXIST*0..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + person | friend +---------+--------- + "Alice" | "Alice" +(1 row) + +-- [*0..0] still emits exactly the zero-hop self-binding. +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:NOEXIST*0..0]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + person | friend +---------+--------- + "Alice" | "Alice" +(1 row) + +-- Fixed-length (lower bound > 0) on a missing label must still return zero +-- rows: there is no edge of that label, so the pattern is unsatisfiable. +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:NOEXIST*1..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + person | friend +--------+-------- +(0 rows) + +-- OPTIONAL MATCH on the unsatisfiable fixed-length pattern still preserves +-- the outer row with NULL bindings. +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + OPTIONAL MATCH (p)-[:NOEXIST*1..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + person | friend +---------+-------- + "Alice" | +(1 row) + +-- Mixed pattern: a zero-bound VLE on a missing label combined with another +-- fixed-length missing label segment must still yield zero rows. The other +-- segment is impossible regardless of the zero-hop case. +SELECT * FROM cypher('issue_2382', $$ + MATCH (a:Person {name: 'Alice'}) + MATCH (a)-[:NOEXIST*0..1]-(b:Person)-[:STILL_MISSING]-(c:Person) + RETURN a.name, b.name, c.name +$$) AS (a agtype, b agtype, c agtype); + a | b | c +---+---+--- +(0 rows) + +-- Sanity: zero-bound VLE on an EXISTING label still works the way it did +-- before (Alice via zero-hop, Bob via 1-hop KNOWS). +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:KNOWS*0..1]-(f:Person) + RETURN p.name AS person, f.name AS friend + ORDER BY f.name +$$) AS (person agtype, friend agtype); + person | friend +---------+--------- + "Alice" | "Alice" + "Alice" | "Bob" +(2 rows) + +SELECT drop_graph('issue_2382', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table issue_2382._ag_label_vertex +drop cascades to table issue_2382._ag_label_edge +drop cascades to table issue_2382."Person" +drop cascades to table issue_2382."KNOWS" +NOTICE: graph "issue_2382" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/sql/cypher_vle.sql b/regress/sql/cypher_vle.sql index c960aa7a4..35f38f376 100644 --- a/regress/sql/cypher_vle.sql +++ b/regress/sql/cypher_vle.sql @@ -417,6 +417,75 @@ SELECT drop_graph('issue_2092', true); DROP TABLE start_and_end_points; SELECT drop_graph('cypher_vle', true); +-- +-- Issue #2382: variable-length relationships with a zero lower bound must +-- still produce the zero-hop self-binding even when the edge label does not +-- exist in the graph (Neo4j/openCypher semantics). +-- +SELECT create_graph('issue_2382'); + +SELECT * FROM cypher('issue_2382', $$ + CREATE (:Person {name: 'Alice'})-[:KNOWS]->(:Person {name: 'Bob'}) +$$) AS (v agtype); + +-- Plain MATCH on a non-existent edge label with [*0..N] must return the +-- zero-hop self-binding row (Alice -> Alice). It must NOT match arbitrary +-- edges of other labels (e.g. KNOWS). +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:NOEXIST*0..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + +-- OPTIONAL MATCH form (the exact shape from the issue report). +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + OPTIONAL MATCH (p)-[:NOEXIST*0..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + +-- [*0..0] still emits exactly the zero-hop self-binding. +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:NOEXIST*0..0]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + +-- Fixed-length (lower bound > 0) on a missing label must still return zero +-- rows: there is no edge of that label, so the pattern is unsatisfiable. +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:NOEXIST*1..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + +-- OPTIONAL MATCH on the unsatisfiable fixed-length pattern still preserves +-- the outer row with NULL bindings. +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + OPTIONAL MATCH (p)-[:NOEXIST*1..1]-(f:Person) + RETURN p.name AS person, f.name AS friend +$$) AS (person agtype, friend agtype); + +-- Mixed pattern: a zero-bound VLE on a missing label combined with another +-- fixed-length missing label segment must still yield zero rows. The other +-- segment is impossible regardless of the zero-hop case. +SELECT * FROM cypher('issue_2382', $$ + MATCH (a:Person {name: 'Alice'}) + MATCH (a)-[:NOEXIST*0..1]-(b:Person)-[:STILL_MISSING]-(c:Person) + RETURN a.name, b.name, c.name +$$) AS (a agtype, b agtype, c agtype); + +-- Sanity: zero-bound VLE on an EXISTING label still works the way it did +-- before (Alice via zero-hop, Bob via 1-hop KNOWS). +SELECT * FROM cypher('issue_2382', $$ + MATCH (p:Person {name: 'Alice'}) + MATCH (p)-[:KNOWS*0..1]-(f:Person) + RETURN p.name AS person, f.name AS friend + ORDER BY f.name +$$) AS (person agtype, friend agtype); + +SELECT drop_graph('issue_2382', true); -- -- End diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 3083c52e1..98d437db3 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -130,6 +130,60 @@ static Expr *transform_cypher_edge(cypher_parsestate *cpstate, static Expr *transform_cypher_node(cypher_parsestate *cpstate, cypher_node *node, List **target_list, bool output_node, bool valid_label); +/* + * Issue #2382: For variable-length relationships with a lower bound of 0 + * (e.g., [:LABEL*0..N]), the zero-hop self-binding case must succeed even + * when LABEL is missing from the cache, because Neo4j/openCypher semantics + * say a zero-hop pattern matches the same node regardless of any edges. + * + * By the time match_check_valid_label() runs, build_VLE_relation() (in + * cypher_gram.y) has rewritten cypher_relationship.varlen from A_Indices + * into a FuncCall named "vle" whose argument list is: + * (start_id, end_id, edge_match_proto, lidx, uidx, dir, unique_id) + * so the lower-bound is the 4th argument (1-based). + * + * This helper is intentionally defensive: every assumption about the shape + * of the FuncCall is guarded so any parser refactor that changes it will + * fall back to "not zero-bound", which is the safe behaviour (the existing + * false-where short-circuit will still kick in for impossible patterns). + */ +static bool is_zero_lower_bound_vle(Node *varlen) +{ + FuncCall *fc; + String *fname; + Node *lidx_node; + A_Const *lidx; + + if (varlen == NULL || !IsA(varlen, FuncCall)) + return false; + + fc = (FuncCall *) varlen; + + if (list_length(fc->funcname) != 1) + return false; + fname = (String *) linitial(fc->funcname); + if (fname == NULL || !IsA(fname, String)) + return false; + if (strcmp(strVal(fname), "vle") != 0) + return false; + + /* args = {start, end, edge_match, lidx, uidx, dir, uniq} */ + if (list_length(fc->args) < 5) + return false; + + lidx_node = (Node *) list_nth(fc->args, 3); + if (lidx_node == NULL || !IsA(lidx_node, A_Const)) + return false; + + lidx = (A_Const *) lidx_node; + if (lidx->isnull) + return false; + if (lidx->val.ival.type != T_Integer) + return false; + + return lidx->val.ival.ival == 0; +} + static bool match_check_valid_label(cypher_match *match, cypher_parsestate *cpstate); static Node *make_vertex_expr(cypher_parsestate *cpstate, @@ -2900,7 +2954,14 @@ static bool match_check_valid_label(cypher_match *match, if (lcd == NULL || lcd->kind != LABEL_KIND_EDGE) { - return false; + /* + * Issue #2382: a missing edge label is fatal only if + * the pattern actually requires an edge of that label. + * For VLE with lower bound 0, the zero-hop self-bind + * case must still produce rows. + */ + if (!is_zero_lower_bound_vle(rel->varlen)) + return false; } } } @@ -4967,7 +5028,15 @@ static bool path_check_valid_label(cypher_path *path, if (lcd == NULL || lcd->kind != LABEL_KIND_EDGE) { - return false; + /* + * Issue #2382: Don't invalidate the whole path just + * because a VLE edge with lower bound 0 references a + * missing label. The zero-hop self-binding semantics + * still allow the surrounding nodes to bind, so the + * other vertex labels in this path must be honoured. + */ + if (!is_zero_lower_bound_vle(rel->varlen)) + return false; } } } diff --git a/src/backend/utils/adt/age_vle.c b/src/backend/utils/adt/age_vle.c index 22c268cdf..f8a1f7855 100644 --- a/src/backend/utils/adt/age_vle.c +++ b/src/backend/utils/adt/age_vle.c @@ -384,6 +384,20 @@ static bool is_an_edge_match(VLE_local_context *vlelctx, edge_entry *ee) /* get the number of conditions from the prototype edge */ num_edge_property_constraints = AGT_ROOT_COUNT(vlelctx->edge_property_constraint); + /* + * Issue #2382: If the user asked for a specific edge label but that label + * does not exist in the graph (edge_label_name_oid == InvalidOid while + * edge_label_name is non-NULL), no real edge can match. Returning false + * here ensures that for VLE patterns like [:NOEXIST*0..N] we do not + * traverse arbitrary other-label edges. Zero-hop self-binding is handled + * separately via build_VLE_zero_container() so this does not break it. + */ + if (vlelctx->edge_label_name != NULL && + vlelctx->edge_label_name_oid == InvalidOid) + { + return false; + } + /* * We only care about verifying that we have all of the property conditions. * We don't care about extra unmatched properties. If there aren't any edge