diff --git a/Makefile b/Makefile index 81dd8363..cc85ca83 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ QUALITY_INPUTS += -mutation-report $(MUTATION_REPORT) endif QUALITY_INPUTS += -benchmark-regression $(BENCHMARK_REGRESSION) -.PHONY: default all build deps tidy lint format test test_all test_integration test_neo4j test_pg test_update complexity complexity_check crap crap_check quality quality_check quality_backend quality_bench metrics metrics_check generate clean help +.PHONY: default all build deps tidy lint format test test_all test_integration test_neo4j test_pg test_update plan_corpus complexity complexity_check crap crap_check quality quality_check quality_backend quality_bench metrics metrics_check generate clean help # Default target default: help @@ -109,6 +109,10 @@ test_update: @cp -fv cypher/models/pgsql/test/updated_cases/* cypher/models/pgsql/test/translation_cases @rm -rf cypher/models/pgsql/test/updated_cases +plan_corpus: $(METRICS_DIR) + @echo "Capturing Cypher plan corpus..." + @$(GO_CMD) run ./cmd/plancorpus + # Metric targets $(METRICS_DIR): @mkdir -p $(METRICS_DIR) @@ -218,6 +222,7 @@ help: @echo " test_bench - Run benchmark test" @echo " test_neo4j - Run Neo4j integration tests" @echo " test_pg - Run PostgreSQL integration tests" + @echo " plan_corpus - Capture shared corpus query plans for configured backends" @echo " test_update - Update test cases" @echo " complexity - Report cyclomatic complexity" @echo " crap - Report CRAP scores from unit test coverage" diff --git a/README.md b/README.md index e1aad7bb..bd082303 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ export CONNECTION_STRING="postgresql://dawgs:weneedbetterpasswords@localhost:654 export CONNECTION_STRING="neo4j://neo4j:weneedbetterpasswords@localhost:7687" ``` +Neo4j connection strings may use `neo4j://`, `neo4j+s://`, or `neo4j+ssc://`; a single path segment selects the Neo4j database name. + Use `make test` for unit tests only and `make test_integration` for integration tests only. ### Test Metrics @@ -95,6 +97,24 @@ make quality FUZZ_REPORT=.coverage/fuzz.json MUTATION_REPORT=.coverage/mutation. `PG_CONNECTION_STRING` and `NEO4J_CONNECTION_STRING`. `make quality_bench` writes benchmark markdown and JSON captures for later baseline comparison. +`make plan_corpus` captures plan diagnostics for the shared Cypher integration corpus. It accepts either +`CONNECTION_STRING` for one backend or `PG_CONNECTION_STRING` and `NEO4J_CONNECTION_STRING` for both backends, then +writes JSONL captures and markdown/JSON summaries under `.coverage/`. + +`go run ./cmd/graphbench` captures runtime diagnostics for the scale corpus under `benchmark/testdata/scale`. The +current modes are `postgres_sql`, `local_traversal`, and `neo4j`; AGE is reference-design input only and is not a direct +comparison mode yet. The command can emit JSONL records plus Markdown and JSON summaries, and can compare current timings +against a previous JSONL baseline. + +PostgreSQL translates exact string property equality with a JSON string type guard and `properties ->>` extraction, so +indexes created on expressions such as `properties ->> 'objectid'` and `properties ->> 'name'` can be used for selective +anchors without matching JSON booleans or numbers. Simple relationship count fast paths depend on the schema's +`kind_id`-first edge index for efficient typed counts. + +Substring and suffix predicates are intentionally not promoted to blanket schema indexes. PostgreSQL deployments can +request explicit `TextSearchIndex`/trigram property indexes for fields that need `CONTAINS`, `STARTS WITH`, or +`ENDS WITH`, but default schema assertion should wait until all suffix forms share one semantics-preserving lowering. + Thresholds are report-only by default. To enforce the configured thresholds, run: ```bash diff --git a/batch_operation_plan.md b/batch_operation_plan.md new file mode 100644 index 00000000..be00c3d5 --- /dev/null +++ b/batch_operation_plan.md @@ -0,0 +1,129 @@ +# BatchOperation COPY Streaming Plan + +## Objective + +Move PostgreSQL `BatchOperation` toward chunked streaming writes backed by `COPY` and staging tables, while documenting that `BatchOperation` is intentionally non-transactional across the whole delegate. + +## Ground Rules + +- `BatchOperation` is a buffered, non-atomic write API. +- Successful flushes may persist even if the delegate later returns an error. +- PostgreSQL flushes may use short chunk-local transactions. +- Avoid one giant transaction for large batches. +- Use PostgreSQL `COPY` into staging tables for high-volume batch paths. +- Keep backend-neutral integration cases backend-equivalent; PG-specific behavior belongs in PG-scoped tests. + +## Steps + +### 1. Clarify Public Semantics + +Update `graph.BatchOperation` documentation to state that the API is non-transactional across the whole operation. Mention that flushes may commit before the delegate returns and that delegate errors do not roll back successful flushes. + +Status: Complete. + +### 2. Introduce PG COPY Staging Helpers + +Add internal PostgreSQL helpers for chunk flushes: + +- begin a chunk-local transaction +- create a temporary staging table +- stream rows with `COPY` +- merge/upsert/delete into final graph tables +- commit or roll back the chunk transaction + +Status: Complete. + +### 3. Add Streaming `CopyFromSource` Types + +Implement row-source types that satisfy `pgx.CopyFromSource` without materializing full `[][]any` batches. Each source should expose only current-row state plus encoder state. + +Status: Complete. + +### 4. Convert Relationship Create/Upsert + +Replace relationship create array batching with staging-table `COPY`. + +The flush should: + +- stream `graph_id`, `start_id`, `end_id`, `kind_id`, and `properties` into a temporary staging table +- coalesce duplicate `(graph_id, start_id, end_id, kind_id)` rows in SQL +- insert into the edge partition with `ON CONFLICT ... DO UPDATE` + +Status: Complete. + +### 5. Convert Node Create + +Replace node create array batching with staging-table `COPY`. + +Preserve the existing behavior that a single flush may not mix preset node IDs with nodes that require generated IDs. + +Status: Complete. + +### 6. Convert Node Update + +Replace the normal parameter-array node update and the special large-update path with one staging-based implementation. The existing large-update flow is a useful starting point but should become streaming rather than pre-materialized. + +Status: Complete. + +### 7. Convert Upsert Batches + +Convert `UpdateNodeBy` and `UpdateRelationshipBy` after the simpler paths are stable. Preserve current identity-property semantics while moving the data transfer to staging-table `COPY`. + +Status: Complete. + +### 8. Add PG-Scoped Tests + +Add PostgreSQL driver-scoped tests for: + +- flushed data persists after the delegate returns an error +- relationship create duplicate coalescing +- node create with and without IDs +- node update via staging +- `UpdateNodeBy` and `UpdateRelationshipBy` +- streaming source behavior + +Status: Complete. + +### 9. Validate + +Run formatting and targeted tests: + +```bash +make format +go test ./drivers/pg/... +``` + +Run PostgreSQL integration tests only when `CONNECTION_STRING` points at PostgreSQL. + +Status: Complete. + +## Evaluation Notes + +This plan should be updated after each step is completed. If a step exposes a simpler or safer implementation order, update this file before moving on. + +- Step 1 confirmed the intended contract: `BatchOperation` remains a buffered, non-atomic API. The implementation work should optimize chunk flushes without introducing whole-operation atomicity. +- Step 2 added the transaction and staging execution boundary as PG-internal helpers. The next step should focus on row sources so batch paths can stream rows into those helpers. +- Step 3 added a generic slice-backed `CopyFromSource`. It streams encoded rows from existing buffers without creating a second materialized row matrix; later steps can still replace the outer buffers if needed. +- Step 4 moved relationship create/upsert to staging-table `COPY` and SQL duplicate coalescing. This removed the old in-memory relationship de-duplication path, including its ambiguous key and incorrect index lookup behavior. +- Step 5 moved node creation to staging-table `COPY` while preserving the existing split between preset-ID and generated-ID batches. Kind assertion remains outside the COPY stream, and row streaming uses kind mapping only. +- Step 6 unified normal and large node updates on the same staging-table `COPY` flush path. Large node update inputs now use normal batch chunking instead of a separate all-at-once row materialization path. +- Step 7 moved `UpdateNodeBy` and `UpdateRelationshipBy` to staging-table `COPY`. Node upserts still scan returned IDs into futures in staged row order so relationship upserts can reuse the resolved endpoint IDs. +- Step 8 added manual PostgreSQL integration coverage for non-transactional flushed chunks, node create with and without IDs, relationship duplicate coalescing, node update staging, and `UpdateNodeBy`/`UpdateRelationshipBy` staging. Existing PG unit tests cover the streaming `CopyFromSource` behavior. +- Step 9 validation passed for `go test ./drivers/pg/...` and manual integration compilation via `go test -tags manual_integration ./integration -run '^$'`. `make format` could not run because `goimports` is not available as an executable on this PATH, so the touched Go test file was formatted with `go run golang.org/x/tools/cmd/goimports@v0.44.0 -w`. Live PostgreSQL integration tests were not run because `CONNECTION_STRING` is unset. + +## Findings Follow-up + +The review findings were addressed in this order: + +- Refreshed the PostgreSQL schema graph cache during schema assertion and fixed the PG batch integration helper so default graph constraints are actually asserted. +- Made node upsert ID resolution map returned IDs back to futures by staging row ordinal instead of result position. +- Split relationship update staging from relationship create staging so identity updates no longer inherit physical-key coalescing. +- Coalesced duplicate node ID updates before staging to avoid matching the same target row more than once in a PostgreSQL `MERGE`. +- Converted node and relationship delete buffers to chunk-local `COPY` staging. + +Latest validation: + +- `go test ./drivers/pg/...` passed. +- Full tagged PostgreSQL run passed with a PostgreSQL `CONNECTION_STRING`. +- Full tagged Neo4j run passed with a Neo4j `CONNECTION_STRING`. +- `make format` still fails in this environment because `goimports` is not executable on `PATH`; touched Go files were formatted with `gofmt` and `go run golang.org/x/tools/cmd/goimports@v0.44.0 -w`. diff --git a/benchmark/testdata/scale/README.md b/benchmark/testdata/scale/README.md new file mode 100644 index 00000000..85c2f788 --- /dev/null +++ b/benchmark/testdata/scale/README.md @@ -0,0 +1,28 @@ +# GraphBench Scale Corpus + +This corpus measures graph workload shapes, not general Cypher correctness. +The shared integration corpus remains the source of backend-equivalent semantic +coverage. + +Cases declare the values a query observes so benchmark reports can separate +ID-only work from node, relationship, property, and path materialization. +Current execution modes are `postgres_sql`, `local_traversal`, and `neo4j`. +Apache AGE is intentionally not a benchmark mode here; it may appear only in +`reference_design` notes as input for DAWGS design choices. + +Each JSON file contains a list of scale cases with: + +- `source`: the source corpus or workload family. +- `dataset`: the fixture dataset to load from `integration/testdata`. +- `name` and `category`: stable identifiers used in reports. +- `cypher`: the Cypher query under test. +- `parameters`: named parameter values. +- `expected_rows`: the expected result cardinality. +- `observes`: whether the query observes paths, nodes, relationships, + properties, or only IDs internally. +- `candidate_modes`: the execution modes that should attempt the case. +- `reference_design`: optional design notes, including AGE observations when + useful. + +Use `cmd/graphbench` to run this corpus and produce JSONL, Markdown, and JSON +summaries. diff --git a/benchmark/testdata/scale/cases/counts.json b/benchmark/testdata/scale/cases/counts.json new file mode 100644 index 00000000..37f93714 --- /dev/null +++ b/benchmark/testdata/scale/cases/counts.json @@ -0,0 +1,70 @@ +{ + "cases": [ + { + "name": "all_node_count", + "dataset": "base", + "category": "counts", + "cypher": "MATCH (n) RETURN count(n)", + "expected": { + "row_count": 1, + "result_kind": "scalar" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["count", "count-store"] + }, + { + "name": "typed_node_count", + "dataset": "base", + "category": "counts", + "cypher": "MATCH (n:NodeKind1) RETURN count(n)", + "expected": { + "row_count": 1, + "result_kind": "scalar" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "terminal_predicate": "node_kind", + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["count", "typed-count", "graph-stats"] + }, + { + "name": "typed_edge_count", + "dataset": "base", + "category": "counts", + "cypher": "MATCH ()-[r:EdgeKind1]->() RETURN count(r)", + "expected": { + "row_count": 1, + "result_kind": "scalar" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "edge_kinds": ["EdgeKind1"], + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["count", "typed-count", "graph-stats"] + } + ] +} + diff --git a/benchmark/testdata/scale/cases/lookups.json b/benchmark/testdata/scale/cases/lookups.json new file mode 100644 index 00000000..724c1af8 --- /dev/null +++ b/benchmark/testdata/scale/cases/lookups.json @@ -0,0 +1,54 @@ +{ + "cases": [ + { + "name": "objectid_exact_string_anchor", + "dataset": "base", + "category": "lookups", + "cypher": "MATCH (n:NodeKind1) WHERE n.objectid = $objectid RETURN id(n)", + "params": { + "objectid": "S-1-5-21-1" + }, + "expected": { + "row_count": 1, + "result_kind": "id_set" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "root_predicate": "selective_property", + "terminal_predicate": "node_kind", + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["property-anchor", "expression-index"] + }, + { + "name": "boolean_property_filter", + "dataset": "base", + "category": "lookups", + "cypher": "MATCH (n:NodeKind1) WHERE n.enabled = true RETURN id(n)", + "expected": { + "row_count": 1, + "result_kind": "id_set" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "root_predicate": "boolean_property", + "terminal_predicate": "node_kind", + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["property-filter"] + } + ] +} + diff --git a/benchmark/testdata/scale/cases/shortest_paths.json b/benchmark/testdata/scale/cases/shortest_paths.json new file mode 100644 index 00000000..b9539b36 --- /dev/null +++ b/benchmark/testdata/scale/cases/shortest_paths.json @@ -0,0 +1,61 @@ +{ + "cases": [ + { + "name": "shortest_distance_bound_pair", + "dataset": "base", + "category": "shortest_path", + "cypher": "MATCH p = shortestPath((s)-[*1..]->(e)) WHERE id(s) = $start_id AND id(e) = $end_id RETURN length(p)", + "node_params": { + "start_id": "n1", + "end_id": "n3" + }, + "expected": { + "row_count": 1, + "result_kind": "scalar" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "root_predicate": "bound_id", + "terminal_predicate": "bound_id", + "min_depth": 1, + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "local_traversal", "neo4j"], + "tags": ["shortest-distance", "local-traversal-candidate"] + }, + { + "name": "one_shortest_path_bound_pair", + "dataset": "base", + "category": "shortest_path", + "cypher": "MATCH p = shortestPath((s)-[*1..]->(e)) WHERE id(s) = $start_id AND id(e) = $end_id RETURN p LIMIT 1", + "node_params": { + "start_id": "n1", + "end_id": "n3" + }, + "expected": { + "row_count": 1, + "result_kind": "path_set" + }, + "observes": { + "paths": true, + "nodes": true, + "relationships": true, + "properties": true + }, + "shape": { + "root_predicate": "bound_id", + "terminal_predicate": "bound_id", + "min_depth": 1, + "path_materialization_required": true + }, + "candidate_modes": ["postgres_sql", "local_traversal", "neo4j"], + "tags": ["one-shortest-path", "local-traversal-candidate"] + } + ] +} + diff --git a/benchmark/testdata/scale/cases/traversal.json b/benchmark/testdata/scale/cases/traversal.json new file mode 100644 index 00000000..2bab928d --- /dev/null +++ b/benchmark/testdata/scale/cases/traversal.json @@ -0,0 +1,143 @@ +{ + "cases": [ + { + "name": "one_hop_typed_from_bound_id", + "dataset": "base", + "category": "one_hop", + "cypher": "MATCH (s)-[:EdgeKind1]->(e) WHERE id(s) = $start_id RETURN id(e)", + "node_params": { + "start_id": "n1" + }, + "expected": { + "row_count": 1, + "result_kind": "id_set" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "root_predicate": "bound_id", + "edge_kinds": ["EdgeKind1"], + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["typed-expansion", "id-only"] + }, + { + "name": "variable_length_id_only_from_bound_id", + "dataset": "base", + "category": "variable_length_reachability", + "cypher": "MATCH (s)-[*1..]->(e) WHERE id(s) = $start_id RETURN id(e)", + "node_params": { + "start_id": "n1" + }, + "expected": { + "row_count": 2, + "result_kind": "id_set" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "root_predicate": "bound_id", + "min_depth": 1, + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "local_traversal", "neo4j"], + "tags": ["reachability", "id-only", "local-traversal-candidate"], + "reference_design": { + "age_relevance": ["vle_cost_model"], + "notes": "AGE VLE behavior is useful design context for cycle and duplicate handling, but this case is not run against AGE." + } + }, + { + "name": "variable_length_path_observed_from_bound_id", + "dataset": "base", + "category": "path_observed_variable_length", + "cypher": "MATCH p = (s)-[*1..]->(e) WHERE id(s) = $start_id RETURN p", + "node_params": { + "start_id": "n1" + }, + "expected": { + "row_count": 2, + "result_kind": "path_set" + }, + "observes": { + "paths": true, + "nodes": true, + "relationships": true, + "properties": true + }, + "shape": { + "root_predicate": "bound_id", + "min_depth": 1, + "path_materialization_required": true + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["path-materialization"] + }, + { + "name": "adcs_p1_endpoint_ids", + "dataset": "adcs_fanout", + "category": "bloodhound_search", + "cypher": "MATCH (n:Group) WHERE n.objectid = $objectid MATCH (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) RETURN id(ca), id(d)", + "params": { + "objectid": "S-1-5-21-2643190041-1319121918-239771340-513" + }, + "expected": { + "row_count": 4, + "result_kind": "id_rows" + }, + "observes": { + "paths": false, + "nodes": false, + "relationships": false, + "properties": false + }, + "shape": { + "root_predicate": "selective_property", + "terminal_predicate": "fixed_suffix", + "edge_kinds": ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + "min_depth": 0, + "path_materialization_required": false + }, + "candidate_modes": ["postgres_sql", "local_traversal", "neo4j"], + "tags": ["bloodhound", "adcs", "id-only", "local-traversal-candidate"] + }, + { + "name": "adcs_p1_path_observed", + "dataset": "adcs_fanout", + "category": "bloodhound_search", + "cypher": "MATCH (n:Group) WHERE n.objectid = $objectid MATCH p = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) RETURN p", + "params": { + "objectid": "S-1-5-21-2643190041-1319121918-239771340-513" + }, + "expected": { + "row_count": 4, + "result_kind": "path_set" + }, + "observes": { + "paths": true, + "nodes": true, + "relationships": true, + "properties": true + }, + "shape": { + "root_predicate": "selective_property", + "terminal_predicate": "fixed_suffix", + "edge_kinds": ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + "min_depth": 0, + "path_materialization_required": true + }, + "candidate_modes": ["postgres_sql", "neo4j"], + "tags": ["bloodhound", "adcs", "path-materialization"] + } + ] +} + diff --git a/cmd/benchmark/README.md b/cmd/benchmark/README.md index 741118f5..1ac95798 100644 --- a/cmd/benchmark/README.md +++ b/cmd/benchmark/README.md @@ -1,11 +1,11 @@ # Benchmark -Runs query scenarios against a real database and outputs a markdown timing table. +Runs query scenarios against a real database and outputs a markdown timing table with warm-up row counts. Path-heavy scenarios can also report distinct returned path rows and duplicate returned path rows. PostgreSQL explain capture includes translated SQL, plan text, and optimizer rule/lowering metadata in JSON output. ## Usage ```bash -# Default dataset (base) +# Default datasets (base and adcs_fanout) go run ./cmd/benchmark -connection "postgresql://dawgs:dawgs@localhost:5432/dawgs" # Local dataset (not committed to repo) @@ -22,6 +22,9 @@ go run ./cmd/benchmark -connection "..." -output report.md # Save markdown and JSON for quality baseline comparison go run ./cmd/benchmark -connection "..." -output report.md -json-output report.json + +# Capture PostgreSQL EXPLAIN (ANALYZE, BUFFERS), translated SQL, and optimizer metadata in JSON output +go run ./cmd/benchmark -connection "..." -dataset adcs_fanout -json-output report.json -explain ``` ## Flags @@ -31,6 +34,7 @@ go run ./cmd/benchmark -connection "..." -output report.md -json-output report.j | `-driver` | `pg` | Database driver (`pg`, `neo4j`) | | `-connection` | | Connection string (or `CONNECTION_STRING` env) | | `-iterations` | `10` | Timed iterations per scenario | +| `-explain` | `false` | Capture PostgreSQL `EXPLAIN (ANALYZE, BUFFERS)` and translated SQL for Cypher scenarios in JSON output | | `-dataset` | | Run only this dataset | | `-local-dataset` | | Add a local dataset to the default set | | `-dataset-dir` | `integration/testdata` | Path to testdata directory | @@ -43,20 +47,10 @@ go run ./cmd/benchmark -connection "..." -output report.md -json-output report.j $ go run ./cmd/benchmark -driver neo4j -connection "neo4j://neo4j:testpassword@localhost:7687" -dataset local/phantom ``` -| Query | Dataset | Median | P95 | Max | -|-------|---------|-------:|----:|----:| -| Match Nodes | local/phantom | 1.4ms | 2.3ms | 2.3ms | -| Match Edges | local/phantom | 1.6ms | 1.9ms | 1.9ms | -| Filter By Kind / User | local/phantom | 2.0ms | 2.6ms | 2.6ms | -| Filter By Kind / Group | local/phantom | 2.1ms | 2.3ms | 2.3ms | -| Filter By Kind / Computer | local/phantom | 1.6ms | 2.0ms | 2.0ms | -| Traversal Depth / depth 1 | local/phantom | 1.4ms | 2.1ms | 2.1ms | -| Traversal Depth / depth 2 | local/phantom | 1.6ms | 1.9ms | 1.9ms | -| Traversal Depth / depth 3 | local/phantom | 2.5ms | 3.3ms | 3.3ms | -| Edge Kind Traversal / MemberOf | local/phantom | 1.2ms | 1.4ms | 1.4ms | -| Edge Kind Traversal / GenericAll | local/phantom | 1.1ms | 1.5ms | 1.5ms | -| Edge Kind Traversal / HasSession | local/phantom | 1.1ms | 1.4ms | 1.4ms | -| Shortest Paths / 41 -> 587 | local/phantom | 1.5ms | 1.9ms | 1.9ms | +| Query | Dataset | Rows | Distinct Rows | Duplicate Rows | Median | P95 | Max | Explain | +|-------|---------|-----:|--------------:|---------------:|-------:|----:|----:|:--------| +| Match Nodes | local/phantom | 1000 | - | - | 1.4ms | 2.3ms | 2.3ms | - | +| Match Edges | local/phantom | 2000 | - | - | 1.6ms | 1.9ms | 1.9ms | - | ## Example: PG on local/phantom @@ -65,17 +59,7 @@ $ export CONNECTION_STRING="postgresql://dawgs:dawgs@localhost:5432/dawgs" $ go run ./cmd/benchmark -dataset local/phantom ``` -| Query | Dataset | Median | P95 | Max | -|-------|---------|-------:|----:|----:| -| Match Nodes | local/phantom | 2.0ms | 6.5ms | 6.5ms | -| Match Edges | local/phantom | 464ms | 604ms | 604ms | -| Filter By Kind / User | local/phantom | 4.5ms | 18.3ms | 18.3ms | -| Filter By Kind / Group | local/phantom | 6.2ms | 28.8ms | 28.8ms | -| Filter By Kind / Computer | local/phantom | 1.1ms | 5.5ms | 5.5ms | -| Traversal Depth / depth 1 | local/phantom | 596ms | 636ms | 636ms | -| Traversal Depth / depth 2 | local/phantom | 639ms | 660ms | 660ms | -| Traversal Depth / depth 3 | local/phantom | 726ms | 745ms | 745ms | -| Edge Kind Traversal / MemberOf | local/phantom | 602ms | 627ms | 627ms | -| Edge Kind Traversal / GenericAll | local/phantom | 676ms | 791ms | 791ms | -| Edge Kind Traversal / HasSession | local/phantom | 682ms | 778ms | 778ms | -| Shortest Paths / 41 -> 587 | local/phantom | 708ms | 731ms | 731ms | +| Query | Dataset | Rows | Distinct Rows | Duplicate Rows | Median | P95 | Max | Explain | +|-------|---------|-----:|--------------:|---------------:|-------:|----:|----:|:--------| +| Match Nodes | local/phantom | 1000 | - | - | 2.0ms | 6.5ms | 6.5ms | - | +| Match Edges | local/phantom | 2000 | - | - | 464ms | 604ms | 604ms | - | diff --git a/cmd/benchmark/explain.go b/cmd/benchmark/explain.go new file mode 100644 index 00000000..ca1334f7 --- /dev/null +++ b/cmd/benchmark/explain.go @@ -0,0 +1,76 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/translate" + "github.com/specterops/dawgs/graph" +) + +// ExplainResult captures PostgreSQL-specific plan diagnostics for a scenario. +type ExplainResult struct { + SQL string `json:"sql"` + Plan []string `json:"plan"` + Optimization translate.OptimizationSummary `json:"optimization"` +} + +func newPostgresExplainer(kindMapper pgsql.KindMapper, graphID int32) ExplainFunc { + return func(ctx context.Context, tx graph.Transaction, cypherQuery string) (*ExplainResult, error) { + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), cypherQuery) + if err != nil { + return nil, err + } + + translation, err := translate.Translate(ctx, regularQuery, kindMapper, nil, graphID) + if err != nil { + return nil, err + } + + sqlQuery, err := translate.Translated(translation) + if err != nil { + return nil, err + } + + result := tx.Raw("EXPLAIN (ANALYZE, BUFFERS) "+sqlQuery, translation.Parameters) + defer result.Close() + + var plan []string + for result.Next() { + values := result.Values() + if len(values) == 0 { + continue + } + + plan = append(plan, fmt.Sprint(values[0])) + } + + if err := result.Error(); err != nil { + return nil, err + } + + return &ExplainResult{ + SQL: sqlQuery, + Plan: plan, + Optimization: translation.Optimization, + }, nil + } +} diff --git a/cmd/benchmark/main.go b/cmd/benchmark/main.go index 34c7a8b6..0045977a 100644 --- a/cmd/benchmark/main.go +++ b/cmd/benchmark/main.go @@ -43,6 +43,7 @@ func main() { iterations = flag.Int("iterations", 10, "timed iterations per scenario") output = flag.String("output", "", "markdown output file (default: stdout)") jsonOutput = flag.String("json-output", "", "JSON output file for baseline comparison") + explain = flag.Bool("explain", false, "capture PostgreSQL EXPLAIN (ANALYZE, BUFFERS) for Cypher scenarios") datasetDir = flag.String("dataset-dir", "integration/testdata", "path to testdata directory") localDataset = flag.String("local-dataset", "", "additional local dataset (e.g. local/phantom)") onlyDataset = flag.String("dataset", "", "run only this dataset (e.g. diamond, local/phantom)") @@ -50,6 +51,10 @@ func main() { flag.Parse() + if err := validateIterations(*iterations); err != nil { + fatal("%v", err) + } + conn := *connStr if conn == "" { conn = os.Getenv("CONNECTION_STRING") @@ -110,6 +115,19 @@ func main() { fatal("failed to assert schema: %v", err) } + var runOptions RunOptions + if *explain { + if *driver != pg.DriverName { + fmt.Fprintf(os.Stderr, " explain capture is only supported for pg; continuing without plans\n") + } else if pgDB, ok := db.(*pg.Driver); !ok { + fmt.Fprintf(os.Stderr, " explain capture unavailable for %T; continuing without plans\n", db) + } else if defaultGraph, hasDefaultGraph := pgDB.DefaultGraph(); !hasDefaultGraph { + fatal("failed to resolve default graph for explain capture") + } else { + runOptions.Explain = newPostgresExplainer(pgDB.KindMapper(), defaultGraph.ID) + } + } + report := Report{ Driver: *driver, GitRef: gitRef(), @@ -140,18 +158,22 @@ func main() { // Run scenarios for _, s := range scenariosForDataset(ds, idMap) { - result, err := runScenario(ctx, db, s, *iterations) + result, err := runScenario(ctx, db, s, *iterations, runOptions) if err != nil { fmt.Fprintf(os.Stderr, " %s/%s failed: %v\n", s.Section, s.Label, err) continue } report.Results = append(report.Results, result) - fmt.Fprintf(os.Stderr, " %s/%s: median=%s p95=%s max=%s\n", + fmt.Fprintf(os.Stderr, " %s/%s: rows=%d distinct=%s duplicates=%s median=%s p95=%s max=%s explain=%s\n", s.Section, s.Label, + result.RowCount, + fmtOptionalInt64(result.DistinctRowCount), + fmtOptionalInt64(result.DuplicateRowCount), fmtDuration(result.Stats.Median), fmtDuration(result.Stats.P95), fmtDuration(result.Stats.Max), + fmtExplainStatus(result.Explain), ) } } diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go index dacab7dd..5d08bfd8 100644 --- a/cmd/benchmark/report.go +++ b/cmd/benchmark/report.go @@ -40,8 +40,8 @@ func writeJSON(w io.Writer, r Report) error { func writeMarkdown(w io.Writer, r Report) error { fmt.Fprintf(w, "# Benchmarks — %s @ %s (%s, %d iterations)\n\n", r.Driver, r.GitRef, r.Date, r.Iterations) - fmt.Fprintf(w, "| Query | Dataset | Median | P95 | Max |\n") - fmt.Fprintf(w, "|-------|---------|-------:|----:|----:|\n") + fmt.Fprintf(w, "| Query | Dataset | Rows | Distinct Rows | Duplicate Rows | Median | P95 | Max | Explain |\n") + fmt.Fprintf(w, "|-------|---------|-----:|--------------:|---------------:|-------:|----:|----:|:--------|\n") for _, res := range r.Results { label := res.Section @@ -49,12 +49,16 @@ func writeMarkdown(w io.Writer, r Report) error { label = res.Section + " / " + res.Label } - fmt.Fprintf(w, "| %s | %s | %s | %s | %s |\n", + fmt.Fprintf(w, "| %s | %s | %d | %s | %s | %s | %s | %s | %s |\n", label, res.Dataset, + res.RowCount, + fmtOptionalInt64(res.DistinctRowCount), + fmtOptionalInt64(res.DuplicateRowCount), fmtDuration(res.Stats.Median), fmtDuration(res.Stats.P95), fmtDuration(res.Stats.Max), + fmtExplainStatus(res.Explain), ) } @@ -62,6 +66,22 @@ func writeMarkdown(w io.Writer, r Report) error { return nil } +func fmtOptionalInt64(value *int64) string { + if value == nil { + return "-" + } + + return fmt.Sprintf("%d", *value) +} + +func fmtExplainStatus(explain *ExplainResult) string { + if explain == nil { + return "-" + } + + return "captured" +} + func fmtDuration(d time.Duration) string { ms := float64(d.Microseconds()) / 1000.0 if ms < 1 { diff --git a/cmd/benchmark/report_test.go b/cmd/benchmark/report_test.go index 2d72ed4d..1d51b5dc 100644 --- a/cmd/benchmark/report_test.go +++ b/cmd/benchmark/report_test.go @@ -21,18 +21,55 @@ import ( "strings" "testing" "time" + + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" + "github.com/specterops/dawgs/cypher/models/pgsql/translate" ) func TestWriteJSONEmitsBaselineFriendlyReport(t *testing.T) { + var ( + distinctRows = int64(2) + duplicateRows = int64(0) + ) + loweringPlan := optimize.LoweringPlan{ + ProjectionPruning: []optimize.ProjectionPruningDecision{{ + Target: optimize.TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + ReferencedSymbols: []string{"m"}, + }}, + } + report := Report{ Driver: "pg", GitRef: "abc123", Date: "2026-05-14", Iterations: 3, Results: []Result{{ - Section: "Traversal", - Dataset: "base", - Label: "depth 1", + Section: "Traversal", + Dataset: "base", + Label: "depth 1", + RowCount: 2, + DistinctRowCount: &distinctRows, + DuplicateRowCount: &duplicateRows, + Explain: &ExplainResult{ + SQL: "select 1;", + Plan: []string{"Result (actual rows=1 loops=1)"}, + Optimization: translate.OptimizationSummary{ + Rules: []optimize.RuleResult{{ + Name: "ExpansionSuffixPushdown", + Applied: true, + }}, + PlannedLowerings: loweringPlan.Decisions(), + Lowerings: []optimize.LoweringDecision{{ + Name: "ProjectionPruning", + }}, + LoweringPlan: &loweringPlan, + }, + }, Stats: Stats{ Median: 10 * time.Millisecond, P95: 20 * time.Millisecond, @@ -51,6 +88,19 @@ func TestWriteJSONEmitsBaselineFriendlyReport(t *testing.T) { `"driver": "pg"`, `"git_ref": "abc123"`, `"median": 10000000`, + `"row_count": 2`, + `"distinct_row_count": 2`, + `"duplicate_row_count": 0`, + `"sql": "select 1;"`, + `"optimization": {`, + `"name": "ExpansionSuffixPushdown"`, + `"applied": true`, + `"planned_lowerings": [`, + `"lowerings": [`, + `"name": "ProjectionPruning"`, + `"lowering_plan": {`, + `"projection_pruning": [`, + `"referenced_symbols": [`, `"section": "Traversal"`, } { if !strings.Contains(text, expected) { @@ -58,3 +108,57 @@ func TestWriteJSONEmitsBaselineFriendlyReport(t *testing.T) { } } } + +func TestWriteMarkdownIncludesDiagnosticColumns(t *testing.T) { + var ( + distinctRows = int64(2) + duplicateRows = int64(0) + ) + + report := Report{ + Driver: "pg", + GitRef: "abc123", + Date: "2026-05-14", + Iterations: 3, + Results: []Result{{ + Section: "ADCS Fanout", + Dataset: "adcs_fanout", + Label: "combined", + RowCount: 2, + DistinctRowCount: &distinctRows, + DuplicateRowCount: &duplicateRows, + Explain: &ExplainResult{Plan: []string{"Result"}}, + Stats: Stats{ + Median: 10 * time.Millisecond, + P95: 20 * time.Millisecond, + Max: 30 * time.Millisecond, + }, + }}, + } + + var output bytes.Buffer + if err := writeMarkdown(&output, report); err != nil { + t.Fatalf("write markdown: %v", err) + } + + text := output.String() + for _, expected := range []string{ + "Distinct Rows", + "Duplicate Rows", + "| ADCS Fanout / combined | adcs_fanout | 2 | 2 | 0 | 10.0ms | 20.0ms | 30.0ms | captured |", + } { + if !strings.Contains(text, expected) { + t.Fatalf("markdown report missing %q:\n%s", expected, text) + } + } +} + +func TestValidateIterationsRejectsZero(t *testing.T) { + if err := validateIterations(0); err == nil { + t.Fatal("expected zero iterations to be rejected") + } + + if err := validateIterations(1); err != nil { + t.Fatalf("expected one iteration to be valid: %v", err) + } +} diff --git a/cmd/benchmark/runner.go b/cmd/benchmark/runner.go index b146f11d..929593b4 100644 --- a/cmd/benchmark/runner.go +++ b/cmd/benchmark/runner.go @@ -18,12 +18,19 @@ package main import ( "context" + "fmt" "sort" "time" "github.com/specterops/dawgs/graph" ) +type ExplainFunc func(ctx context.Context, tx graph.Transaction, cypher string) (*ExplainResult, error) + +type RunOptions struct { + Explain ExplainFunc +} + // Stats holds computed timing statistics for a scenario. type Stats struct { Median time.Duration `json:"median"` @@ -33,16 +40,29 @@ type Stats struct { // Result is one row in the report. type Result struct { - Section string `json:"section"` - Dataset string `json:"dataset"` - Label string `json:"label"` - Stats Stats `json:"stats"` + Section string `json:"section"` + Dataset string `json:"dataset"` + Label string `json:"label"` + RowCount int64 `json:"row_count"` + DistinctRowCount *int64 `json:"distinct_row_count,omitempty"` + DuplicateRowCount *int64 `json:"duplicate_row_count,omitempty"` + Explain *ExplainResult `json:"explain,omitempty"` + Stats Stats `json:"stats"` } // runScenario executes a scenario N times and returns timing stats. -func runScenario(ctx context.Context, db graph.Database, s Scenario, iterations int) (Result, error) { +func runScenario(ctx context.Context, db graph.Database, s Scenario, iterations int, options RunOptions) (Result, error) { + if err := validateIterations(iterations); err != nil { + return Result{}, err + } + // Warm-up: one untimed run. - if err := db.ReadTransaction(ctx, s.Query); err != nil { + var measurement Measurement + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + nextMeasurement, err := s.Query(tx) + measurement = nextMeasurement + return err + }); err != nil { return Result{}, err } @@ -50,18 +70,44 @@ func runScenario(ctx context.Context, db graph.Database, s Scenario, iterations for i := range iterations { start := time.Now() - if err := db.ReadTransaction(ctx, s.Query); err != nil { + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + _, err := s.Query(tx) + return err + }); err != nil { return Result{}, err } durations[i] = time.Since(start) } - return Result{ - Section: s.Section, - Dataset: s.Dataset, - Label: s.Label, - Stats: computeStats(durations), - }, nil + result := Result{ + Section: s.Section, + Dataset: s.Dataset, + Label: s.Label, + RowCount: measurement.RowCount, + DistinctRowCount: measurement.DistinctRowCount, + DuplicateRowCount: measurement.DuplicateRowCount, + Stats: computeStats(durations), + } + + if options.Explain != nil && s.Cypher != "" { + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + explain, err := options.Explain(ctx, tx, s.Cypher) + result.Explain = explain + return err + }); err != nil { + return Result{}, err + } + } + + return result, nil +} + +func validateIterations(iterations int) error { + if iterations < 1 { + return fmt.Errorf("iterations must be at least 1") + } + + return nil } func computeStats(durations []time.Duration) Stats { diff --git a/cmd/benchmark/scenarios.go b/cmd/benchmark/scenarios.go index 217ae63d..4aad262b 100644 --- a/cmd/benchmark/scenarios.go +++ b/cmd/benchmark/scenarios.go @@ -18,27 +18,39 @@ package main import ( "fmt" + "strconv" + "strings" "github.com/specterops/dawgs/graph" "github.com/specterops/dawgs/opengraph" ) +// Measurement captures the warm-up result shape for a benchmark scenario. +type Measurement struct { + RowCount int64 + DistinctRowCount *int64 + DuplicateRowCount *int64 +} + // Scenario defines a single benchmark query to run against a loaded dataset. type Scenario struct { Section string // grouping key in the report (e.g. "Match Nodes") Dataset string Label string // human-readable row label - Query func(tx graph.Transaction) error + Cypher string + Query func(tx graph.Transaction) (Measurement, error) } // defaultDatasets is the set of datasets committed to the repo. -var defaultDatasets = []string{"base"} +var defaultDatasets = []string{"base", "adcs_fanout"} // scenariosForDataset returns all benchmark scenarios for a given dataset and its loaded ID map. func scenariosForDataset(dataset string, idMap opengraph.IDMap) []Scenario { switch dataset { case "base": return baseScenarios(idMap) + case "adcs_fanout": + return adcsFanoutScenarios() case "local/phantom": return phantomScenarios(idMap) default: @@ -46,24 +58,142 @@ func scenariosForDataset(dataset string, idMap opengraph.IDMap) []Scenario { } } -func countNodes(tx graph.Transaction) error { - _, err := tx.Nodes().Count() - return err +func countNodes(tx graph.Transaction) (int64, error) { + return tx.Nodes().Count() +} + +func countEdges(tx graph.Transaction) (int64, error) { + return tx.Relationships().Count() +} + +func cypherQuery(cypher string) func(tx graph.Transaction) (Measurement, error) { + return func(tx graph.Transaction) (Measurement, error) { + result := tx.Query(cypher, nil) + defer result.Close() + + var rowCount int64 + for result.Next() { + rowCount++ + } + + return Measurement{RowCount: rowCount}, result.Error() + } +} + +func countQuery(query func(tx graph.Transaction) (int64, error)) func(tx graph.Transaction) (Measurement, error) { + return func(tx graph.Transaction) (Measurement, error) { + rowCount, err := query(tx) + if err != nil { + return Measurement{}, err + } + + return Measurement{RowCount: rowCount}, nil + } } -func countEdges(tx graph.Transaction) error { - _, err := tx.Relationships().Count() - return err +func cypherScenario(section, dataset, label, cypher string) Scenario { + return Scenario{ + Section: section, + Dataset: dataset, + Label: label, + Cypher: cypher, + Query: cypherQuery(cypher), + } } -func cypherQuery(cypher string) func(tx graph.Transaction) error { - return func(tx graph.Transaction) error { +func cypherPathScenario(section, dataset, label, cypher string, pathColumns int) Scenario { + return Scenario{ + Section: section, + Dataset: dataset, + Label: label, + Cypher: cypher, + Query: cypherPathQuery(cypher, pathColumns), + } +} + +func cypherPathQuery(cypher string, pathColumns int) func(tx graph.Transaction) (Measurement, error) { + return func(tx graph.Transaction) (Measurement, error) { result := tx.Query(cypher, nil) defer result.Close() + + var ( + rowCount int64 + seen = map[string]struct{}{} + ) + for result.Next() { + rowCount++ + + var ( + values = make([]graph.Path, pathColumns) + targets = make([]any, pathColumns) + ) + for idx := range values { + targets[idx] = &values[idx] + } + + if err := result.Scan(targets...); err != nil { + return Measurement{}, err + } + + seen[pathRowKey(values)] = struct{}{} + } + + if err := result.Error(); err != nil { + return Measurement{}, err + } + + var ( + distinctRowCount = int64(len(seen)) + duplicateRowCount = rowCount - distinctRowCount + ) + + return Measurement{ + RowCount: rowCount, + DistinctRowCount: &distinctRowCount, + DuplicateRowCount: &duplicateRowCount, + }, nil + } +} + +func pathRowKey(paths []graph.Path) string { + var builder strings.Builder + + for pathIdx, path := range paths { + if pathIdx > 0 { + builder.WriteByte('|') + } + + builder.WriteByte('n') + for _, node := range path.Nodes { + builder.WriteByte(':') + if node == nil { + builder.WriteString("nil") + continue + } + + builder.WriteString(strconv.FormatUint(node.ID.Uint64(), 10)) + } + + builder.WriteString(";e") + for _, edge := range path.Edges { + builder.WriteByte(':') + if edge == nil { + builder.WriteString("nil") + continue + } + + builder.WriteString(strconv.FormatUint(edge.ID.Uint64(), 10)) + builder.WriteByte(',') + builder.WriteString(strconv.FormatUint(edge.StartID.Uint64(), 10)) + builder.WriteByte(',') + builder.WriteString(strconv.FormatUint(edge.EndID.Uint64(), 10)) + builder.WriteByte(',') + builder.WriteString(edge.Kind.String()) } - return result.Error() } + + return builder.String() } // --- Base dataset scenarios (n1 -> n2 -> n3) --- @@ -71,22 +201,61 @@ func cypherQuery(cypher string) func(tx graph.Transaction) error { func baseScenarios(idMap opengraph.IDMap) []Scenario { ds := "base" return []Scenario{ - {Section: "Match Nodes", Dataset: ds, Label: ds, Query: countNodes}, - {Section: "Match Edges", Dataset: ds, Label: ds, Query: countEdges}, - {Section: "Shortest Paths", Dataset: ds, Label: "n1 -> n3", Query: cypherQuery(fmt.Sprintf( + {Section: "Match Nodes", Dataset: ds, Label: ds, Query: countQuery(countNodes)}, + {Section: "Match Edges", Dataset: ds, Label: ds, Query: countQuery(countEdges)}, + cypherScenario("Shortest Paths", ds, "n1 -> n3", fmt.Sprintf( "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE id(s) = %d AND id(e) = %d RETURN p", idMap["n1"], idMap["n3"], - ))}, - {Section: "Traversal", Dataset: ds, Label: "n1", Query: cypherQuery(fmt.Sprintf( + )), + cypherScenario("Traversal", ds, "n1", fmt.Sprintf( "MATCH (s)-[*1..]->(e) WHERE id(s) = %d RETURN e", idMap["n1"], - ))}, - {Section: "Match Return", Dataset: ds, Label: "n1", Query: cypherQuery(fmt.Sprintf( + )), + cypherScenario("Match Return", ds, "n1", fmt.Sprintf( "MATCH (s)-[]->(e) WHERE id(s) = %d RETURN e", idMap["n1"], - ))}, - {Section: "Filter By Kind", Dataset: ds, Label: "NodeKind1", Query: cypherQuery("MATCH (n:NodeKind1) RETURN n")}, - {Section: "Filter By Kind", Dataset: ds, Label: "NodeKind2", Query: cypherQuery("MATCH (n:NodeKind2) RETURN n")}, + )), + cypherScenario("Filter By Kind", ds, "NodeKind1", "MATCH (n:NodeKind1) RETURN n"), + cypherScenario("Filter By Kind", ds, "NodeKind2", "MATCH (n:NodeKind2) RETURN n"), + } +} + +const adcsFanoutObjectID = "S-1-5-21-2643190041-1319121918-239771340-513" + +func adcsFanoutScenarios() []Scenario { + ds := "adcs_fanout" + + p1 := fmt.Sprintf(` +MATCH (n:Group) WHERE n.objectid = '%s' +MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) +RETURN p1 +`, adcsFanoutObjectID) + + p2 := fmt.Sprintf(` +MATCH (n:Group) WHERE n.objectid = '%s' +MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca:EnterpriseCA)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d:Domain) +WHERE ct.authenticationenabled = true +AND ct.requiresmanagerapproval = false +AND ct.enrolleesuppliessubject = true +AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) +RETURN p2 +`, adcsFanoutObjectID) + + combinedMatch := fmt.Sprintf(` +MATCH (n:Group) WHERE n.objectid = '%s' +MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) +MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) +WHERE ct.authenticationenabled = true +AND ct.requiresmanagerapproval = false +AND ct.enrolleesuppliessubject = true +AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) +`, adcsFanoutObjectID) + + return []Scenario{ + cypherPathScenario("ADCS Fanout", ds, "p1 only", p1, 1), + cypherPathScenario("ADCS Fanout", ds, "p2 only", p2, 1), + cypherPathScenario("ADCS Fanout", ds, "combined", combinedMatch+"RETURN p1,p2", 2), + cypherScenario("ADCS Fanout", ds, "combined endpoints", combinedMatch+"RETURN id(ca), id(d), id(ct)"), } } @@ -96,59 +265,54 @@ func phantomScenarios(idMap opengraph.IDMap) []Scenario { ds := "local/phantom" scenarios := []Scenario{ - {Section: "Match Nodes", Dataset: ds, Label: ds, Query: countNodes}, - {Section: "Match Edges", Dataset: ds, Label: ds, Query: countEdges}, + {Section: "Match Nodes", Dataset: ds, Label: ds, Query: countQuery(countNodes)}, + {Section: "Match Edges", Dataset: ds, Label: ds, Query: countQuery(countEdges)}, } for _, kind := range []string{"User", "Group", "Computer"} { k := kind - scenarios = append(scenarios, Scenario{ - Section: "Filter By Kind", - Dataset: ds, - Label: k, - Query: cypherQuery(fmt.Sprintf("MATCH (n:%s) RETURN n", k)), - }) + scenarios = append(scenarios, cypherScenario("Filter By Kind", ds, k, fmt.Sprintf("MATCH (n:%s) RETURN n", k))) } if _, ok := idMap["41"]; ok { for _, depth := range []int{1, 2, 3} { d := depth - scenarios = append(scenarios, Scenario{ - Section: "Traversal Depth", - Dataset: ds, - Label: fmt.Sprintf("depth %d", d), - Query: cypherQuery(fmt.Sprintf( + scenarios = append(scenarios, cypherScenario( + "Traversal Depth", + ds, + fmt.Sprintf("depth %d", d), + fmt.Sprintf( "MATCH (s)-[*1..%d]->(e) WHERE id(s) = %d RETURN e", d, idMap["41"], - )), - }) + ), + )) } for _, ek := range []string{"MemberOf", "GenericAll", "HasSession"} { edgeKind := ek - scenarios = append(scenarios, Scenario{ - Section: "Edge Kind Traversal", - Dataset: ds, - Label: edgeKind, - Query: cypherQuery(fmt.Sprintf( + scenarios = append(scenarios, cypherScenario( + "Edge Kind Traversal", + ds, + edgeKind, + fmt.Sprintf( "MATCH (s)-[:%s*1..]->(e) WHERE id(s) = %d RETURN e", edgeKind, idMap["41"], - )), - }) + ), + )) } } if _, ok := idMap["41"]; ok { if _, ok := idMap["587"]; ok { - scenarios = append(scenarios, Scenario{ - Section: "Shortest Paths", - Dataset: ds, - Label: "41 -> 587", - Query: cypherQuery(fmt.Sprintf( + scenarios = append(scenarios, cypherScenario( + "Shortest Paths", + ds, + "41 -> 587", + fmt.Sprintf( "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE id(s) = %d AND id(e) = %d RETURN p", idMap["41"], idMap["587"], - )), - }) + ), + )) } } diff --git a/cmd/graphbench/README.md b/cmd/graphbench/README.md new file mode 100644 index 00000000..ac530326 --- /dev/null +++ b/cmd/graphbench/README.md @@ -0,0 +1,74 @@ +# GraphBench + +`graphbench` runs the scale benchmark corpus under `benchmark/testdata/scale`. +It is meant for runtime gap accounting: query duration, returned row counts, +PostgreSQL plan details, Neo4j plan operators, fallback reasons, and comparison +summaries. + +The current execution modes are: + +- `postgres_sql`: runs DAWGS' PostgreSQL SQL translation against a PostgreSQL database. +- `local_traversal`: records explicit `not_implemented` placeholders until the local traversal executor lands. +- `neo4j`: runs the same corpus against Neo4j through the DAWGS Neo4j backend. + +Apache AGE is not an execution mode in this harness yet. AGE behavior can be +captured in corpus `reference_design` notes so DAWGS can use it as design input +without treating it as a direct benchmark comparison. + +## Inputs + +The command loads cases from `benchmark/testdata/scale` by default and imports +the fixture datasets from `integration/testdata`. + +Connection strings can be supplied as flags or environment variables: + +- PostgreSQL: `-pg-connection`, `PG_CONNECTION_STRING`, `-connection`, or `CONNECTION_STRING`. +- Neo4j: `-neo4j-connection`, `NEO4J_CONNECTION_STRING`, `-connection`, or `CONNECTION_STRING`. + +## Examples + +Run only PostgreSQL SQL translation: + +```bash +go run ./cmd/graphbench \ + -modes postgres_sql \ + -pg-connection "$PG_CONNECTION_STRING" \ + -jsonl-output .coverage/graphbench-postgres.jsonl \ + -summary .coverage/graphbench-postgres.md \ + -summary-json .coverage/graphbench-postgres.json +``` + +Capture PostgreSQL, local traversal placeholders, and Neo4j in one report: + +```bash +go run ./cmd/graphbench \ + -modes postgres_sql,local_traversal,neo4j \ + -pg-connection "$PG_CONNECTION_STRING" \ + -neo4j-connection "$NEO4J_CONNECTION_STRING" \ + -jsonl-output .coverage/graphbench.jsonl \ + -summary .coverage/graphbench.md \ + -summary-json .coverage/graphbench.json +``` + +Compare a run against a previous JSONL capture: + +```bash +go run ./cmd/graphbench \ + -modes postgres_sql,neo4j \ + -pg-connection "$PG_CONNECTION_STRING" \ + -neo4j-connection "$NEO4J_CONNECTION_STRING" \ + -baseline .coverage/graphbench-baseline.jsonl \ + -jsonl-output .coverage/graphbench.jsonl \ + -summary .coverage/graphbench.md +``` + +## Outputs + +JSONL output contains one `CaseResult` record per case and execution mode. +Markdown and JSON summaries aggregate mode status counts, per-case timings, row +counts, fallback reasons, and baseline regressions or improvements when a +baseline capture is supplied. + +PostgreSQL records include translated SQL and `EXPLAIN (ANALYZE, BUFFERS, +TIMING OFF, FORMAT JSON)` metrics. Neo4j records include plan operator names +when an `EXPLAIN` plan can be captured. diff --git a/cmd/graphbench/corpus.go b/cmd/graphbench/corpus.go new file mode 100644 index 00000000..7d1c9075 --- /dev/null +++ b/cmd/graphbench/corpus.go @@ -0,0 +1,123 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" +) + +func loadScaleCorpus(root string) (ScaleCorpus, error) { + casePaths, err := filepath.Glob(filepath.Join(root, "cases", "*.json")) + if err != nil { + return ScaleCorpus{}, fmt.Errorf("glob scale cases: %w", err) + } + if len(casePaths) == 0 { + return ScaleCorpus{}, fmt.Errorf("no scale case files found under %s", filepath.Join(root, "cases")) + } + + sort.Strings(casePaths) + + var corpus ScaleCorpus + for _, path := range casePaths { + var file ScaleCaseFile + if err := decodeJSONFile(path, &file); err != nil { + return ScaleCorpus{}, err + } + + source := filepath.ToSlash(path) + for idx, testCase := range file.Cases { + testCase.Source = source + if err := validateScaleCase(testCase); err != nil { + return ScaleCorpus{}, fmt.Errorf("%s case %d: %w", source, idx, err) + } + + corpus.Cases = append(corpus.Cases, testCase) + } + } + + return corpus, nil +} + +func validateScaleCase(testCase ScaleCase) error { + if testCase.Name == "" { + return fmt.Errorf("name is required") + } + if testCase.Dataset == "" { + return fmt.Errorf("dataset is required") + } + if testCase.Category == "" { + return fmt.Errorf("category is required") + } + if testCase.Cypher == "" { + return fmt.Errorf("cypher is required") + } + if len(testCase.CandidateModes) == 0 { + return fmt.Errorf("candidate_modes is required") + } + + for _, mode := range testCase.CandidateModes { + if !mode.Valid() { + return fmt.Errorf("unsupported candidate mode %q", mode) + } + } + + return nil +} + +func decodeJSONFile(path string, target any) error { + raw, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("read %s: %w", path, err) + } + if err := json.Unmarshal(raw, target); err != nil { + return fmt.Errorf("decode %s: %w", path, err) + } + + return nil +} + +func scaleCorpusDatasets(corpus ScaleCorpus) []string { + var ( + seen = map[string]struct{}{} + datasets = make([]string, 0) + ) + + for _, testCase := range corpus.Cases { + if _, duplicate := seen[testCase.Dataset]; duplicate { + continue + } + + seen[testCase.Dataset] = struct{}{} + datasets = append(datasets, testCase.Dataset) + } + + sort.Strings(datasets) + return datasets +} + +func scaleCasesByDataset(corpus ScaleCorpus) map[string][]ScaleCase { + grouped := map[string][]ScaleCase{} + for _, testCase := range corpus.Cases { + grouped[testCase.Dataset] = append(grouped[testCase.Dataset], testCase) + } + + return grouped +} diff --git a/cmd/graphbench/corpus_test.go b/cmd/graphbench/corpus_test.go new file mode 100644 index 00000000..211b2084 --- /dev/null +++ b/cmd/graphbench/corpus_test.go @@ -0,0 +1,45 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLoadScaleCorpus(t *testing.T) { + corpus, err := loadScaleCorpus("../../benchmark/testdata/scale") + require.NoError(t, err) + require.NotEmpty(t, corpus.Cases) + + for _, testCase := range corpus.Cases { + require.NotEqual(t, "", testCase.Source) + require.True(t, testCase.Supports(ModePostgresSQL), "postgres_sql should be part of the initial corpus for %s", testCase.Name) + require.False(t, testCase.Supports(ExecutionMode("age")), "AGE is a reference design only for %s", testCase.Name) + } +} + +func TestScaleCorpusDatasets(t *testing.T) { + corpus := ScaleCorpus{Cases: []ScaleCase{ + {Name: "a", Dataset: "base", Category: "counts", Cypher: "return 1", CandidateModes: []ExecutionMode{ModePostgresSQL}}, + {Name: "b", Dataset: "adcs_fanout", Category: "counts", Cypher: "return 1", CandidateModes: []ExecutionMode{ModePostgresSQL}}, + {Name: "c", Dataset: "base", Category: "counts", Cypher: "return 1", CandidateModes: []ExecutionMode{ModePostgresSQL}}, + }} + + require.Equal(t, []string{"adcs_fanout", "base"}, scaleCorpusDatasets(corpus)) +} diff --git a/cmd/graphbench/datasets.go b/cmd/graphbench/datasets.go new file mode 100644 index 00000000..af400ca8 --- /dev/null +++ b/cmd/graphbench/datasets.go @@ -0,0 +1,117 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" +) + +const defaultGraphName = "integration_test" + +func scanDatasetKinds(datasetDir string, datasetNames []string) (graph.Kinds, graph.Kinds, error) { + var nodeKinds, edgeKinds graph.Kinds + + for _, datasetName := range datasetNames { + doc, err := parseDataset(datasetDir, datasetName) + if err != nil { + return nil, nil, err + } + + nextNodeKinds, nextEdgeKinds := doc.Graph.Kinds() + nodeKinds = nodeKinds.Add(nextNodeKinds...) + edgeKinds = edgeKinds.Add(nextEdgeKinds...) + } + + return nodeKinds, edgeKinds, nil +} + +func parseDataset(datasetDir, name string) (opengraph.Document, error) { + path := filepath.Join(datasetDir, name+".json") + f, err := os.Open(path) + if err != nil { + return opengraph.Document{}, fmt.Errorf("open dataset %s: %w", name, err) + } + defer f.Close() + + doc, err := opengraph.ParseDocument(f) + if err != nil { + return opengraph.Document{}, fmt.Errorf("parse dataset %s: %w", name, err) + } + + return doc, nil +} + +func loadDataset(ctx context.Context, db graph.Database, datasetDir, name string) (opengraph.IDMap, error) { + path := filepath.Join(datasetDir, name+".json") + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("open dataset %s: %w", name, err) + } + defer f.Close() + + idMap, err := opengraph.Load(ctx, db, f) + if err != nil { + return nil, fmt.Errorf("load dataset %s: %w", name, err) + } + + return idMap, nil +} + +func clearGraph(ctx context.Context, db graph.Database) error { + return db.WriteTransaction(ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }) +} + +func benchmarkSchema(nodeKinds, edgeKinds graph.Kinds) graph.Schema { + return graph.Schema{ + Graphs: []graph.Graph{{ + Name: defaultGraphName, + Nodes: nodeKinds, + Edges: edgeKinds, + }}, + DefaultGraph: graph.Graph{Name: defaultGraphName}, + } +} + +func resolveCaseParams(testCase ScaleCase, idMap opengraph.IDMap) (map[string]any, error) { + params := make(map[string]any, len(testCase.Params)+len(testCase.NodeParams)) + for key, value := range testCase.Params { + params[key] = value + } + + for paramName, nodeName := range testCase.NodeParams { + id, found := idMap[nodeName] + if !found { + return nil, fmt.Errorf("case %s references unknown dataset node %q", testCase.Name, nodeName) + } + + params[paramName] = id.Int64() + } + + if len(params) == 0 { + return nil, nil + } + + return params, nil +} diff --git a/cmd/graphbench/local_traversal.go b/cmd/graphbench/local_traversal.go new file mode 100644 index 00000000..6fff61bd --- /dev/null +++ b/cmd/graphbench/local_traversal.go @@ -0,0 +1,35 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +const localTraversalUnavailableReason = "local traversal executor unavailable" + +func runLocalTraversalPlaceholders(corpus ScaleCorpus) []CaseResult { + records := make([]CaseResult, 0) + for _, testCase := range corpus.Cases { + if !testCase.Supports(ModeLocalTraversal) { + continue + } + + record := newCaseResult(testCase, ModeLocalTraversal, testCase.Params) + record.Status = StatusNotImplemented + record.FallbackReason = localTraversalUnavailableReason + records = append(records, record) + } + + return records +} diff --git a/cmd/graphbench/local_traversal_test.go b/cmd/graphbench/local_traversal_test.go new file mode 100644 index 00000000..39a7e728 --- /dev/null +++ b/cmd/graphbench/local_traversal_test.go @@ -0,0 +1,49 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRunLocalTraversalPlaceholders(t *testing.T) { + records := runLocalTraversalPlaceholders(ScaleCorpus{Cases: []ScaleCase{ + { + Name: "supported", + Dataset: "base", + Category: "reachability", + Cypher: "MATCH (n) RETURN n", + NodeParams: map[string]string{"start_id": "n1"}, + CandidateModes: []ExecutionMode{ModePostgresSQL, ModeLocalTraversal}, + }, + { + Name: "unsupported", + Dataset: "base", + Category: "count", + Cypher: "MATCH (n) RETURN count(n)", + CandidateModes: []ExecutionMode{ModePostgresSQL}, + }, + }}) + + require.Len(t, records, 1) + require.Equal(t, ModeLocalTraversal, records[0].ExecutionMode) + require.Equal(t, StatusNotImplemented, records[0].Status) + require.Equal(t, localTraversalUnavailableReason, records[0].FallbackReason) + require.Equal(t, map[string]string{"start_id": "n1"}, records[0].NodeParams) +} diff --git a/cmd/graphbench/main.go b/cmd/graphbench/main.go new file mode 100644 index 00000000..b4ac4102 --- /dev/null +++ b/cmd/graphbench/main.go @@ -0,0 +1,202 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "flag" + "fmt" + "io" + "os" + "strings" +) + +type config struct { + CorpusRoot string + DatasetDir string + Connection string + PGConnection string + Neo4jConnection string + Modes []ExecutionMode + Iterations int + OutputJSONL string + Summary string + SummaryJSON string + Baseline string +} + +func parseConfig(args []string, env func(string) string) (config, error) { + flags := flag.NewFlagSet("graphbench", flag.ContinueOnError) + flags.SetOutput(io.Discard) + + var ( + cfg config + rawModes string + ) + + flags.StringVar(&cfg.CorpusRoot, "corpus-root", "benchmark/testdata/scale", "scale corpus root") + flags.StringVar(&cfg.DatasetDir, "dataset-dir", "integration/testdata", "dataset root") + flags.StringVar(&cfg.Connection, "connection", env("CONNECTION_STRING"), "single backend connection string") + flags.StringVar(&cfg.PGConnection, "pg-connection", env("PG_CONNECTION_STRING"), "PostgreSQL connection string") + flags.StringVar(&cfg.Neo4jConnection, "neo4j-connection", env("NEO4J_CONNECTION_STRING"), "Neo4j connection string") + flags.StringVar(&rawModes, "modes", string(ModePostgresSQL), "comma-separated execution modes") + flags.IntVar(&cfg.Iterations, "iterations", 3, "timed iterations per case") + flags.StringVar(&cfg.OutputJSONL, "jsonl-output", "", "JSONL output path (default: stdout)") + flags.StringVar(&cfg.Summary, "summary", "", "markdown summary output path") + flags.StringVar(&cfg.SummaryJSON, "summary-json", "", "JSON summary output path") + flags.StringVar(&cfg.Baseline, "baseline", "", "previous JSONL output for baseline comparison") + + if err := flags.Parse(args); err != nil { + return config{}, err + } + if cfg.Iterations < 1 { + return config{}, fmt.Errorf("iterations must be at least 1") + } + + modes, err := parseExecutionModes(rawModes) + if err != nil { + return config{}, err + } + cfg.Modes = modes + + return cfg, nil +} + +func parseExecutionModes(raw string) ([]ExecutionMode, error) { + var modes []ExecutionMode + seen := map[ExecutionMode]struct{}{} + + for _, part := range strings.Split(raw, ",") { + mode, err := parseExecutionMode(part) + if err != nil { + return nil, err + } + if _, duplicate := seen[mode]; duplicate { + continue + } + + seen[mode] = struct{}{} + modes = append(modes, mode) + } + if len(modes) == 0 { + return nil, fmt.Errorf("at least one execution mode is required") + } + + return modes, nil +} + +func fatal(format string, args ...any) { + fmt.Fprintf(os.Stderr, format+"\n", args...) + os.Exit(1) +} + +func main() { + cfg, err := parseConfig(os.Args[1:], os.Getenv) + if err != nil { + fatal("%v", err) + } + + corpus, err := loadScaleCorpus(cfg.CorpusRoot) + if err != nil { + fatal("load corpus: %v", err) + } + + ctx := context.Background() + var records []CaseResult + + for _, mode := range cfg.Modes { + switch mode { + case ModePostgresSQL: + pgConnection := cfg.PGConnection + if pgConnection == "" { + pgConnection = cfg.Connection + } + if pgConnection == "" { + fatal("postgres_sql mode requires -pg-connection, -connection, PG_CONNECTION_STRING, or CONNECTION_STRING") + } + + runner, err := newPostgresSQLRunner(ctx, cfg.DatasetDir, pgConnection, corpus) + if err != nil { + fatal("open postgres_sql runner: %v", err) + } + + nextRecords, err := runner.Run(ctx, cfg.Iterations, corpus) + closeErr := runner.Close(ctx) + if err != nil { + fatal("run postgres_sql: %v", err) + } + if closeErr != nil { + fatal("close postgres_sql: %v", closeErr) + } + + records = append(records, nextRecords...) + + case ModeNeo4j: + neo4jConnection := cfg.Neo4jConnection + if neo4jConnection == "" { + neo4jConnection = cfg.Connection + } + if neo4jConnection == "" { + fatal("neo4j mode requires -neo4j-connection, -connection, NEO4J_CONNECTION_STRING, or CONNECTION_STRING") + } + + runner, err := newNeo4jRunner(ctx, cfg.DatasetDir, neo4jConnection, corpus) + if err != nil { + fatal("open neo4j runner: %v", err) + } + + nextRecords, err := runner.Run(ctx, cfg.Iterations, corpus) + closeErr := runner.Close(ctx) + if err != nil { + fatal("run neo4j: %v", err) + } + if closeErr != nil { + fatal("close neo4j: %v", closeErr) + } + + records = append(records, nextRecords...) + + case ModeLocalTraversal: + records = append(records, runLocalTraversalPlaceholders(corpus)...) + + default: + fatal("execution mode %s is not implemented yet", mode) + } + } + + if cfg.Baseline != "" { + if err := applyBaseline(cfg.Baseline, records); err != nil { + fatal("compare baseline: %v", err) + } + } + + if err := writeJSONLFile(cfg.OutputJSONL, records); err != nil { + fatal("write JSONL: %v", err) + } + + summary := buildSummary(records) + if cfg.Summary != "" { + if err := writeMarkdownSummaryFile(cfg.Summary, summary); err != nil { + fatal("write markdown summary: %v", err) + } + } + if cfg.SummaryJSON != "" { + if err := writeJSONSummaryFile(cfg.SummaryJSON, summary); err != nil { + fatal("write JSON summary: %v", err) + } + } +} diff --git a/cmd/graphbench/measure.go b/cmd/graphbench/measure.go new file mode 100644 index 00000000..7aaa7a93 --- /dev/null +++ b/cmd/graphbench/measure.go @@ -0,0 +1,71 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + "time" + + "github.com/specterops/dawgs/graph" +) + +func countCypherRows(tx graph.Transaction, cypher string, params map[string]any) (int64, error) { + result := tx.Query(cypher, params) + defer result.Close() + + var rowCount int64 + for result.Next() { + rowCount++ + } + + return rowCount, result.Error() +} + +func measureCypher(ctx context.Context, db graph.Database, cypher string, params map[string]any, iterations int) (int64, DurationStats, error) { + if iterations < 1 { + return 0, DurationStats{}, fmt.Errorf("iterations must be at least 1") + } + + var warmupRows int64 + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var err error + warmupRows, err = countCypherRows(tx, cypher, params) + return err + }); err != nil { + return 0, DurationStats{}, err + } + + durations := make([]time.Duration, iterations) + for idx := range iterations { + start := time.Now() + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + _, err := countCypherRows(tx, cypher, params) + return err + }); err != nil { + return 0, DurationStats{}, err + } + durations[idx] = time.Since(start) + } + + stats, err := computeDurationStats(durations) + if err != nil { + return 0, DurationStats{}, err + } + + return warmupRows, stats, nil +} diff --git a/cmd/graphbench/neo4j.go b/cmd/graphbench/neo4j.go new file mode 100644 index 00000000..6b1d2bef --- /dev/null +++ b/cmd/graphbench/neo4j.go @@ -0,0 +1,303 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + "net/url" + "strings" + + neo4jcore "github.com/neo4j/neo4j-go-driver/v5/neo4j" + "github.com/specterops/dawgs" + dawgsneo4j "github.com/specterops/dawgs/drivers/neo4j" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" + "github.com/specterops/dawgs/util/size" +) + +type neo4jRunner struct { + datasetDir string + db graph.Database + planDriver neo4jcore.DriverWithContext + databaseName string +} + +func newNeo4jRunner(ctx context.Context, datasetDir, connection string, corpus ScaleCorpus) (*neo4jRunner, error) { + db, err := dawgs.Open(ctx, dawgsneo4j.DriverName, dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + ConnectionString: connection, + }) + if err != nil { + return nil, fmt.Errorf("open Neo4j database: %w", err) + } + + nodeKinds, edgeKinds, err := scanDatasetKinds(datasetDir, scaleCorpusDatasets(corpus)) + if err != nil { + _ = db.Close(ctx) + return nil, err + } + + if err := db.AssertSchema(ctx, benchmarkSchema(nodeKinds, edgeKinds)); err != nil { + _ = db.Close(ctx) + return nil, fmt.Errorf("assert Neo4j schema: %w", err) + } + + planDriver, databaseName, err := openNeo4jPlanDriver(connection) + if err != nil { + _ = db.Close(ctx) + return nil, err + } + + return &neo4jRunner{ + datasetDir: datasetDir, + db: db, + planDriver: planDriver, + databaseName: databaseName, + }, nil +} + +func (s *neo4jRunner) Close(ctx context.Context) error { + var closeErr error + if s.planDriver != nil { + closeErr = s.planDriver.Close(ctx) + } + if s.db != nil { + if err := s.db.Close(ctx); err != nil && closeErr == nil { + closeErr = err + } + } + + return closeErr +} + +func (s *neo4jRunner) Run(ctx context.Context, iterations int, corpus ScaleCorpus) ([]CaseResult, error) { + var records []CaseResult + casesByDataset := scaleCasesByDataset(corpus) + + for _, datasetName := range scaleCorpusDatasets(corpus) { + if err := clearGraph(ctx, s.db); err != nil { + return nil, fmt.Errorf("clear graph for %s: %w", datasetName, err) + } + + idMap, err := loadDataset(ctx, s.db, s.datasetDir, datasetName) + if err != nil { + return nil, err + } + + for _, testCase := range casesByDataset[datasetName] { + if !testCase.Supports(ModeNeo4j) { + continue + } + + record := s.runCase(ctx, iterations, testCase, idMap) + records = append(records, record) + } + } + + return records, nil +} + +func (s *neo4jRunner) runCase(ctx context.Context, iterations int, testCase ScaleCase, idMap opengraph.IDMap) CaseResult { + params, err := resolveCaseParams(testCase, idMap) + record := newCaseResult(testCase, ModeNeo4j, params) + if err != nil { + record.Status = StatusError + record.Error = err.Error() + return record + } + + rowCount, stats, err := measureCypher(ctx, s.db, testCase.Cypher, params, iterations) + if err != nil { + record.Status = StatusError + record.Error = err.Error() + return record + } + + record.RowCount = rowCount + record.Stats = stats + applyRowExpectation(&record) + + plan, operators, err := s.explain(ctx, testCase.Cypher, params) + if err != nil { + if record.Status == StatusOK { + record.Status = StatusError + record.Error = err.Error() + } + return record + } + + record.Neo4jPlan = plan + record.Neo4jOperators = operators + return record +} + +func (s *neo4jRunner) explain(ctx context.Context, cypherQuery string, params map[string]any) (plan *Neo4jPlanNode, operators []string, err error) { + session := s.planDriver.NewSession(ctx, neo4jcore.SessionConfig{ + AccessMode: neo4jcore.AccessModeRead, + DatabaseName: s.databaseName, + }) + defer func() { + if closeErr := session.Close(ctx); err == nil && closeErr != nil { + err = closeErr + } + }() + + result, err := session.Run(ctx, "EXPLAIN "+cypherWithoutTerminator(cypherQuery), params) + if err != nil { + return nil, nil, err + } + + summary, err := result.Consume(ctx) + if err != nil { + return nil, nil, err + } + if summary.Plan() == nil { + return nil, nil, nil + } + + planNode := convertNeo4jPlan(summary.Plan()) + return &planNode, neo4jOperators(planNode), nil +} + +type neo4jPlanDriverConfig struct { + Target string + Username string + Password string + DatabaseName string +} + +func parseNeo4jPlanDriverConfig(connStr string) (neo4jPlanDriverConfig, error) { + connectionURL, err := url.Parse(connStr) + if err != nil { + return neo4jPlanDriverConfig{}, fmt.Errorf("parse Neo4j connection string: %w", err) + } + + if connectionURL.Scheme != dawgsneo4j.DriverName && connectionURL.Scheme != "neo4j+s" && connectionURL.Scheme != "neo4j+ssc" { + return neo4jPlanDriverConfig{}, fmt.Errorf("expected Neo4j connection string scheme, got %q", connectionURL.Scheme) + } + + password, ok := connectionURL.User.Password() + if !ok { + return neo4jPlanDriverConfig{}, fmt.Errorf("no password provided in Neo4j connection string") + } + if connectionURL.Host == "" { + return neo4jPlanDriverConfig{}, fmt.Errorf("Neo4j connection string host is required") + } + + databaseName, err := neo4jDatabaseName(connectionURL) + if err != nil { + return neo4jPlanDriverConfig{}, err + } + + return neo4jPlanDriverConfig{ + Target: (&url.URL{ + Scheme: connectionURL.Scheme, + Host: connectionURL.Host, + RawQuery: connectionURL.RawQuery, + }).String(), + Username: connectionURL.User.Username(), + Password: password, + DatabaseName: databaseName, + }, nil +} + +func neo4jDatabaseName(connectionURL *url.URL) (string, error) { + databasePath := strings.Trim(connectionURL.EscapedPath(), "/") + if databasePath == "" { + return "", nil + } + if strings.Contains(databasePath, "/") { + return "", fmt.Errorf("Neo4j database path must contain a single database name") + } + + databaseName, err := url.PathUnescape(databasePath) + if err != nil { + return "", fmt.Errorf("parse Neo4j database name: %w", err) + } + if strings.Contains(databaseName, "/") { + return "", fmt.Errorf("Neo4j database path must contain a single database name") + } + + return databaseName, nil +} + +func openNeo4jPlanDriver(connStr string) (neo4jcore.DriverWithContext, string, error) { + cfg, err := parseNeo4jPlanDriverConfig(connStr) + if err != nil { + return nil, "", err + } + + driver, err := neo4jcore.NewDriverWithContext(cfg.Target, neo4jcore.BasicAuth(cfg.Username, cfg.Password, "")) + if err != nil { + return nil, "", err + } + + return driver, cfg.DatabaseName, nil +} + +type Neo4jPlanNode struct { + Operator string `json:"operator"` + Arguments map[string]string `json:"arguments,omitempty"` + Identifiers []string `json:"identifiers,omitempty"` + Children []Neo4jPlanNode `json:"children,omitempty"` +} + +func convertNeo4jPlan(plan neo4jcore.Plan) Neo4jPlanNode { + node := Neo4jPlanNode{ + Operator: plan.Operator(), + Arguments: stringifyArguments(plan.Arguments()), + Identifiers: append([]string(nil), plan.Identifiers()...), + } + + for _, child := range plan.Children() { + node.Children = append(node.Children, convertNeo4jPlan(child)) + } + + return node +} + +func stringifyArguments(arguments map[string]any) map[string]string { + if len(arguments) == 0 { + return nil + } + + values := make(map[string]string, len(arguments)) + for key, value := range arguments { + values[key] = fmt.Sprint(value) + } + + return values +} + +func neo4jOperators(root Neo4jPlanNode) []string { + var operators []string + var walk func(Neo4jPlanNode) + walk = func(node Neo4jPlanNode) { + operators = append(operators, node.Operator+"@neo4j") + for _, child := range node.Children { + walk(child) + } + } + walk(root) + + return operators +} + +func cypherWithoutTerminator(cypherQuery string) string { + return strings.TrimSuffix(strings.TrimSpace(cypherQuery), ";") +} diff --git a/cmd/graphbench/neo4j_test.go b/cmd/graphbench/neo4j_test.go new file mode 100644 index 00000000..a01058c9 --- /dev/null +++ b/cmd/graphbench/neo4j_test.go @@ -0,0 +1,58 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseNeo4jPlanDriverConfig(t *testing.T) { + cfg, err := parseNeo4jPlanDriverConfig("neo4j://neo4j:secret@example.com:7687/neo4jdb?x=1") + + require.NoError(t, err) + require.Equal(t, "neo4j://example.com:7687?x=1", cfg.Target) + require.Equal(t, "neo4j", cfg.Username) + require.Equal(t, "secret", cfg.Password) + require.Equal(t, "neo4jdb", cfg.DatabaseName) +} + +func TestNeo4jDatabaseNameRejectsNestedPath(t *testing.T) { + for _, connStr := range []string{ + "neo4j://neo4j:secret@example.com:7687/a/b", + "neo4j://neo4j:secret@example.com:7687/a%2Fb", + } { + parsed, err := url.Parse(connStr) + require.NoError(t, err) + + _, err = neo4jDatabaseName(parsed) + require.ErrorContains(t, err, "single database name") + } +} + +func TestNeo4jOperatorsAnnotatesOperators(t *testing.T) { + operators := neo4jOperators(Neo4jPlanNode{ + Operator: "ProduceResults", + Children: []Neo4jPlanNode{{ + Operator: "AllNodesScan", + }}, + }) + + require.Equal(t, []string{"ProduceResults@neo4j", "AllNodesScan@neo4j"}, operators) +} diff --git a/cmd/graphbench/postgres.go b/cmd/graphbench/postgres.go new file mode 100644 index 00000000..2ea47784 --- /dev/null +++ b/cmd/graphbench/postgres.go @@ -0,0 +1,279 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/specterops/dawgs" + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/pgsql/translate" + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" + "github.com/specterops/dawgs/util/size" +) + +type postgresSQLRunner struct { + datasetDir string + db graph.Database + pgDriver *pg.Driver + graphID int32 +} + +func newPostgresSQLRunner(ctx context.Context, datasetDir, connection string, corpus ScaleCorpus) (*postgresSQLRunner, error) { + poolCfg, err := pgxpool.ParseConfig(connection) + if err != nil { + return nil, fmt.Errorf("parse PostgreSQL pool configuration: %w", err) + } + pool, err := pg.NewPool(poolCfg) + if err != nil { + return nil, fmt.Errorf("create PostgreSQL pool: %w", err) + } + + db, err := dawgs.Open(ctx, pg.DriverName, dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + ConnectionString: connection, + Pool: pool, + }) + if err != nil { + pool.Close() + return nil, fmt.Errorf("open PostgreSQL database: %w", err) + } + + nodeKinds, edgeKinds, err := scanDatasetKinds(datasetDir, scaleCorpusDatasets(corpus)) + if err != nil { + _ = db.Close(ctx) + return nil, err + } + + if err := db.AssertSchema(ctx, benchmarkSchema(nodeKinds, edgeKinds)); err != nil { + _ = db.Close(ctx) + return nil, fmt.Errorf("assert PostgreSQL schema: %w", err) + } + + pgDriver, ok := db.(*pg.Driver) + if !ok { + _ = db.Close(ctx) + return nil, fmt.Errorf("expected *pg.Driver, got %T", db) + } + + defaultGraph, ok := pgDriver.DefaultGraph() + if !ok { + _ = db.Close(ctx) + return nil, fmt.Errorf("PostgreSQL default graph is not set") + } + + return &postgresSQLRunner{ + datasetDir: datasetDir, + db: db, + pgDriver: pgDriver, + graphID: defaultGraph.ID, + }, nil +} + +func (s *postgresSQLRunner) Close(ctx context.Context) error { + if s.db == nil { + return nil + } + + return s.db.Close(ctx) +} + +func (s *postgresSQLRunner) Run(ctx context.Context, iterations int, corpus ScaleCorpus) ([]CaseResult, error) { + var records []CaseResult + casesByDataset := scaleCasesByDataset(corpus) + + for _, datasetName := range scaleCorpusDatasets(corpus) { + if err := clearGraph(ctx, s.db); err != nil { + return nil, fmt.Errorf("clear graph for %s: %w", datasetName, err) + } + + idMap, err := loadDataset(ctx, s.db, s.datasetDir, datasetName) + if err != nil { + return nil, err + } + + for _, testCase := range casesByDataset[datasetName] { + if !testCase.Supports(ModePostgresSQL) { + continue + } + + record := s.runCase(ctx, iterations, testCase, idMap) + records = append(records, record) + } + } + + return records, nil +} + +func (s *postgresSQLRunner) runCase(ctx context.Context, iterations int, testCase ScaleCase, idMap opengraph.IDMap) CaseResult { + params, err := resolveCaseParams(testCase, idMap) + record := newCaseResult(testCase, ModePostgresSQL, params) + if err != nil { + record.Status = StatusError + record.Error = err.Error() + return record + } + + rowCount, stats, err := measureCypher(ctx, s.db, testCase.Cypher, params, iterations) + if err != nil { + record.Status = StatusError + record.Error = err.Error() + return record + } + + record.RowCount = rowCount + record.Stats = stats + applyRowExpectation(&record) + + explain, err := s.explain(ctx, testCase.Cypher, params) + if err != nil { + if record.Status == StatusOK { + record.Status = StatusError + record.Error = err.Error() + } + return record + } + + record.SQL = explain.SQL + record.PostgresPlan = explain.Plan + record.PostgresMetrics = &explain.Metrics + record.Optimization = &explain.Optimization + return record +} + +type postgresExplain struct { + SQL string + Plan []string + Metrics PostgresPlanMetrics + Optimization translate.OptimizationSummary +} + +func (s *postgresSQLRunner) explain(ctx context.Context, cypherQuery string, params map[string]any) (postgresExplain, error) { + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), cypherQuery) + if err != nil { + return postgresExplain{}, err + } + + translation, err := translate.Translate(ctx, regularQuery, s.pgDriver.KindMapper(), params, s.graphID) + if err != nil { + return postgresExplain{}, err + } + + sqlQuery, err := translate.Translated(translation) + if err != nil { + return postgresExplain{}, err + } + + var plan []string + if err := s.db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Raw("EXPLAIN (ANALYZE, BUFFERS, TIMING OFF) "+sqlQuery, translation.Parameters) + defer result.Close() + + for result.Next() { + values := result.Values() + if len(values) == 0 { + continue + } + + plan = append(plan, fmt.Sprint(values[0])) + } + + return result.Error() + }); err != nil { + return postgresExplain{}, err + } + + return postgresExplain{ + SQL: sqlQuery, + Plan: plan, + Metrics: parsePostgresPlanMetrics(plan), + Optimization: translation.Optimization, + }, nil +} + +var ( + postgresPlanningPattern = regexp.MustCompile(`Planning Time: ([0-9.]+) ms`) + postgresExecutionPattern = regexp.MustCompile(`Execution Time: ([0-9.]+) ms`) + postgresBufferPattern = regexp.MustCompile(`(?:(shared|temp) )?(hit|read|dirtied|written)=([0-9]+)`) +) + +func parsePostgresPlanMetrics(plan []string) PostgresPlanMetrics { + var metrics PostgresPlanMetrics + for _, line := range plan { + if metrics.PlanningMS == nil { + if match := postgresPlanningPattern.FindStringSubmatch(line); match != nil { + if parsed, err := strconv.ParseFloat(match[1], 64); err == nil { + metrics.PlanningMS = &parsed + } + } + } + + if metrics.ExecutionMS == nil { + if match := postgresExecutionPattern.FindStringSubmatch(line); match != nil { + if parsed, err := strconv.ParseFloat(match[1], 64); err == nil { + metrics.ExecutionMS = &parsed + } + } + } + + if strings.Contains(line, "Buffers:") && metrics.Buffers == (Buffers{}) { + metrics.Buffers = parsePostgresBuffers(line) + } + } + + return metrics +} + +func parsePostgresBuffers(line string) Buffers { + var ( + buffers Buffers + bufferScope string + ) + + for _, match := range postgresBufferPattern.FindAllStringSubmatch(line, -1) { + value, err := strconv.ParseInt(match[3], 10, 64) + if err != nil { + continue + } + + if match[1] != "" { + bufferScope = match[1] + } + + switch bufferScope + "_" + match[2] { + case "shared_hit": + buffers.SharedHit = value + case "shared_read": + buffers.SharedRead = value + case "shared_dirtied": + buffers.SharedDirtied = value + case "temp_read": + buffers.TempRead = value + case "temp_written": + buffers.TempWritten = value + } + } + + return buffers +} diff --git a/cmd/graphbench/postgres_test.go b/cmd/graphbench/postgres_test.go new file mode 100644 index 00000000..54470e60 --- /dev/null +++ b/cmd/graphbench/postgres_test.go @@ -0,0 +1,63 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "testing" + + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" + "github.com/stretchr/testify/require" +) + +func TestResolveCaseParams(t *testing.T) { + params, err := resolveCaseParams(ScaleCase{ + Params: map[string]any{ + "name": "value", + }, + NodeParams: map[string]string{ + "start_id": "n1", + }, + }, opengraph.IDMap{"n1": graph.ID(42)}) + + require.NoError(t, err) + require.Equal(t, map[string]any{ + "name": "value", + "start_id": int64(42), + }, params) +} + +func TestParsePostgresPlanMetrics(t *testing.T) { + metrics := parsePostgresPlanMetrics([]string{ + "Nested Loop (actual rows=1 loops=1)", + " Buffers: shared hit=12 read=3 dirtied=2, temp read=4 written=5", + "Planning Time: 1.250 ms", + "Execution Time: 9.750 ms", + }) + + require.NotNil(t, metrics.PlanningMS) + require.Equal(t, 1.25, *metrics.PlanningMS) + require.NotNil(t, metrics.ExecutionMS) + require.Equal(t, 9.75, *metrics.ExecutionMS) + require.Equal(t, Buffers{ + SharedHit: 12, + SharedRead: 3, + SharedDirtied: 2, + TempRead: 4, + TempWritten: 5, + }, metrics.Buffers) +} diff --git a/cmd/graphbench/results.go b/cmd/graphbench/results.go new file mode 100644 index 00000000..27d16093 --- /dev/null +++ b/cmd/graphbench/results.go @@ -0,0 +1,226 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "time" + + "github.com/specterops/dawgs/cypher/models/pgsql/translate" +) + +const ( + StatusOK = "ok" + StatusRowMismatch = "row_mismatch" + StatusError = "error" + StatusNotImplemented = "not_implemented" +) + +type DurationStats struct { + Iterations int `json:"iterations"` + Median time.Duration `json:"median"` + P95 time.Duration `json:"p95"` + Max time.Duration `json:"max"` +} + +type PostgresPlanMetrics struct { + PlanningMS *float64 `json:"planning_ms,omitempty"` + ExecutionMS *float64 `json:"execution_ms,omitempty"` + Buffers Buffers `json:"buffers,omitempty"` +} + +type Buffers struct { + SharedHit int64 `json:"shared_hit,omitempty"` + SharedRead int64 `json:"shared_read,omitempty"` + SharedDirtied int64 `json:"shared_dirtied,omitempty"` + TempRead int64 `json:"temp_read,omitempty"` + TempWritten int64 `json:"temp_written,omitempty"` +} + +type CaseResult struct { + Source string `json:"source"` + Dataset string `json:"dataset"` + Name string `json:"name"` + Category string `json:"category"` + ExecutionMode ExecutionMode `json:"execution_mode"` + Status string `json:"status"` + Cypher string `json:"cypher"` + Params map[string]any `json:"params,omitempty"` + NodeParams map[string]string `json:"node_params,omitempty"` + ExpectedRowCount *int64 `json:"expected_row_count,omitempty"` + RowCount int64 `json:"row_count,omitempty"` + Stats DurationStats `json:"stats,omitempty"` + SQL string `json:"sql,omitempty"` + PostgresPlan []string `json:"postgres_plan,omitempty"` + PostgresMetrics *PostgresPlanMetrics `json:"postgres_metrics,omitempty"` + Neo4jPlan *Neo4jPlanNode `json:"neo4j_plan,omitempty"` + Neo4jOperators []string `json:"neo4j_operators,omitempty"` + Optimization *translate.OptimizationSummary `json:"optimization,omitempty"` + Baseline *BaselineComparison `json:"baseline,omitempty"` + FallbackReason string `json:"fallback_reason,omitempty"` + Error string `json:"error,omitempty"` +} + +type BaselineComparison struct { + BaselineMedian time.Duration `json:"baseline_median"` + CurrentMedian time.Duration `json:"current_median"` + Change time.Duration `json:"change"` + Ratio float64 `json:"ratio"` +} + +func newCaseResult(testCase ScaleCase, mode ExecutionMode, params map[string]any) CaseResult { + return CaseResult{ + Source: testCase.Source, + Dataset: testCase.Dataset, + Name: testCase.Name, + Category: testCase.Category, + ExecutionMode: mode, + Status: StatusOK, + Cypher: testCase.Cypher, + Params: params, + NodeParams: testCase.NodeParams, + ExpectedRowCount: testCase.Expected.RowCount, + } +} + +func computeDurationStats(durations []time.Duration) (DurationStats, error) { + if len(durations) == 0 { + return DurationStats{}, fmt.Errorf("duration stats require at least one duration") + } + + sortedDurations := append([]time.Duration(nil), durations...) + sort.Slice(sortedDurations, func(i, j int) bool { + return sortedDurations[i] < sortedDurations[j] + }) + + n := len(sortedDurations) + return DurationStats{ + Iterations: n, + Median: sortedDurations[n/2], + P95: sortedDurations[min(n*95/100, n-1)], + Max: sortedDurations[n-1], + }, nil +} + +func applyRowExpectation(result *CaseResult) { + if result.ExpectedRowCount != nil && result.RowCount != *result.ExpectedRowCount { + result.Status = StatusRowMismatch + result.Error = fmt.Sprintf("expected %d rows, got %d", *result.ExpectedRowCount, result.RowCount) + } +} + +func writeJSONLFile(path string, records []CaseResult) error { + if path == "" { + return writeJSONL(os.Stdout, records) + } + + if err := ensureOutputDir(path); err != nil { + return err + } + + output, err := os.Create(path) + if err != nil { + return err + } + defer output.Close() + + return writeJSONL(output, records) +} + +func writeJSONL(w io.Writer, records []CaseResult) error { + encoder := json.NewEncoder(w) + for _, record := range records { + if err := encoder.Encode(record); err != nil { + return err + } + } + + return nil +} + +func readJSONLFile(path string) ([]CaseResult, error) { + input, err := os.Open(path) + if err != nil { + return nil, err + } + defer input.Close() + + decoder := json.NewDecoder(input) + var records []CaseResult + for { + var record CaseResult + if err := decoder.Decode(&record); err != nil { + if errors.Is(err, io.EOF) { + break + } + + return nil, err + } + + records = append(records, record) + } + + return records, nil +} + +func ensureOutputDir(path string) error { + dir := filepath.Dir(path) + if dir == "." || dir == "" { + return nil + } + + return os.MkdirAll(dir, 0o755) +} + +func applyBaseline(path string, records []CaseResult) error { + baseline, err := readJSONLFile(path) + if err != nil { + return err + } + + byKey := make(map[string]CaseResult, len(baseline)) + for _, record := range baseline { + byKey[resultKey(record.Dataset, record.Name, record.ExecutionMode)] = record + } + + for idx := range records { + record := &records[idx] + previous, found := byKey[resultKey(record.Dataset, record.Name, record.ExecutionMode)] + if !found || previous.Stats.Iterations == 0 || record.Stats.Iterations == 0 || previous.Stats.Median == 0 { + continue + } + + record.Baseline = &BaselineComparison{ + BaselineMedian: previous.Stats.Median, + CurrentMedian: record.Stats.Median, + Change: record.Stats.Median - previous.Stats.Median, + Ratio: float64(record.Stats.Median) / float64(previous.Stats.Median), + } + } + + return nil +} + +func resultKey(dataset, name string, mode ExecutionMode) string { + return dataset + "\x00" + name + "\x00" + string(mode) +} diff --git a/cmd/graphbench/results_test.go b/cmd/graphbench/results_test.go new file mode 100644 index 00000000..11671641 --- /dev/null +++ b/cmd/graphbench/results_test.go @@ -0,0 +1,49 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestComputeDurationStatsRejectsEmptyDurations(t *testing.T) { + _, err := computeDurationStats(nil) + + require.ErrorContains(t, err, "at least one duration") +} + +func TestComputeDurationStatsCopiesAndSortsDurations(t *testing.T) { + durations := []time.Duration{ + 30 * time.Millisecond, + 10 * time.Millisecond, + 20 * time.Millisecond, + } + + stats, err := computeDurationStats(durations) + + require.NoError(t, err) + require.Equal(t, 3, stats.Iterations) + require.Equal(t, 20*time.Millisecond, stats.Median) + require.Equal(t, 30*time.Millisecond, stats.P95) + require.Equal(t, 30*time.Millisecond, stats.Max) + require.Equal(t, 30*time.Millisecond, durations[0]) + require.Equal(t, 10*time.Millisecond, durations[1]) + require.Equal(t, 20*time.Millisecond, durations[2]) +} diff --git a/cmd/graphbench/summary.go b/cmd/graphbench/summary.go new file mode 100644 index 00000000..437ebeae --- /dev/null +++ b/cmd/graphbench/summary.go @@ -0,0 +1,309 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "encoding/json" + "fmt" + "io" + "os" + "sort" + "strings" + "time" +) + +type Summary struct { + GeneratedAt time.Time `json:"generated_at"` + Modes []ModeSummary `json:"modes"` + Cases []CaseSummary `json:"cases"` + Regressions []BaselineEntry `json:"regressions,omitempty"` + Improvements []BaselineEntry `json:"improvements,omitempty"` +} + +type ModeSummary struct { + Mode ExecutionMode `json:"mode"` + Total int `json:"total"` + OK int `json:"ok"` + RowMismatch int `json:"row_mismatch"` + Error int `json:"error"` + NotImplemented int `json:"not_implemented"` +} + +type CaseSummary struct { + Source string `json:"source"` + Dataset string `json:"dataset"` + Name string `json:"name"` + Category string `json:"category"` + Modes map[ExecutionMode]ModeCaseCell `json:"modes"` +} + +type ModeCaseCell struct { + Status string `json:"status"` + Rows int64 `json:"rows,omitempty"` + Median time.Duration `json:"median,omitempty"` + Baseline *BaselineComparison `json:"baseline,omitempty"` + FallbackReason string `json:"fallback_reason,omitempty"` + Error string `json:"error,omitempty"` +} + +type BaselineEntry struct { + Dataset string `json:"dataset"` + Name string `json:"name"` + Mode ExecutionMode `json:"mode"` + BaselineMedian time.Duration `json:"baseline_median"` + CurrentMedian time.Duration `json:"current_median"` + Ratio float64 `json:"ratio"` +} + +func buildSummary(records []CaseResult) Summary { + summary := Summary{ + GeneratedAt: time.Now().UTC(), + } + + var ( + modeSummaries = map[ExecutionMode]*ModeSummary{} + caseSummaries = map[string]*CaseSummary{} + ) + + for _, record := range records { + modeSummary := modeSummaries[record.ExecutionMode] + if modeSummary == nil { + modeSummary = &ModeSummary{Mode: record.ExecutionMode} + modeSummaries[record.ExecutionMode] = modeSummary + } + modeSummary.Total++ + + switch record.Status { + case StatusOK: + modeSummary.OK++ + case StatusRowMismatch: + modeSummary.RowMismatch++ + case StatusError: + modeSummary.Error++ + case StatusNotImplemented: + modeSummary.NotImplemented++ + } + + var ( + caseKey = record.Source + "\x00" + record.Dataset + "\x00" + record.Name + caseSummary = caseSummaries[caseKey] + ) + if caseSummary == nil { + caseSummary = &CaseSummary{ + Source: record.Source, + Dataset: record.Dataset, + Name: record.Name, + Category: record.Category, + Modes: map[ExecutionMode]ModeCaseCell{}, + } + caseSummaries[caseKey] = caseSummary + } + + caseSummary.Modes[record.ExecutionMode] = ModeCaseCell{ + Status: record.Status, + Rows: record.RowCount, + Median: record.Stats.Median, + Baseline: record.Baseline, + FallbackReason: record.FallbackReason, + Error: record.Error, + } + + if record.Baseline != nil { + entry := BaselineEntry{ + Dataset: record.Dataset, + Name: record.Name, + Mode: record.ExecutionMode, + BaselineMedian: record.Baseline.BaselineMedian, + CurrentMedian: record.Baseline.CurrentMedian, + Ratio: record.Baseline.Ratio, + } + if record.Baseline.Ratio > 1 { + summary.Regressions = append(summary.Regressions, entry) + } else if record.Baseline.Ratio < 1 { + summary.Improvements = append(summary.Improvements, entry) + } + } + } + + for _, modeSummary := range modeSummaries { + summary.Modes = append(summary.Modes, *modeSummary) + } + sort.Slice(summary.Modes, func(i, j int) bool { + return summary.Modes[i].Mode < summary.Modes[j].Mode + }) + + for _, caseSummary := range caseSummaries { + summary.Cases = append(summary.Cases, *caseSummary) + } + sort.Slice(summary.Cases, func(i, j int) bool { + if summary.Cases[i].Dataset != summary.Cases[j].Dataset { + return summary.Cases[i].Dataset < summary.Cases[j].Dataset + } + + return summary.Cases[i].Name < summary.Cases[j].Name + }) + + sortBaselineEntries(summary.Regressions, true) + sortBaselineEntries(summary.Improvements, false) + return summary +} + +func sortBaselineEntries(entries []BaselineEntry, descending bool) { + sort.Slice(entries, func(i, j int) bool { + if descending { + return entries[i].Ratio > entries[j].Ratio + } + + return entries[i].Ratio < entries[j].Ratio + }) +} + +func writeMarkdownSummaryFile(path string, summary Summary) error { + if err := ensureOutputDir(path); err != nil { + return err + } + + output, err := os.Create(path) + if err != nil { + return err + } + defer output.Close() + + return writeMarkdownSummary(output, summary) +} + +func writeJSONSummaryFile(path string, summary Summary) error { + if err := ensureOutputDir(path); err != nil { + return err + } + + output, err := os.Create(path) + if err != nil { + return err + } + defer output.Close() + + encoder := json.NewEncoder(output) + encoder.SetIndent("", " ") + return encoder.Encode(summary) +} + +func writeMarkdownSummary(w io.Writer, summary Summary) error { + fmt.Fprintf(w, "# GraphBench Summary\n\n") + fmt.Fprintf(w, "Generated: %s\n\n", summary.GeneratedAt.Format(time.RFC3339)) + + fmt.Fprintf(w, "## Modes\n\n") + fmt.Fprintf(w, "| Mode | Total | OK | Row Mismatch | Error | Not Implemented |\n") + fmt.Fprintf(w, "| --- | ---: | ---: | ---: | ---: | ---: |\n") + for _, mode := range summary.Modes { + fmt.Fprintf(w, "| %s | %d | %d | %d | %d | %d |\n", + mode.Mode, + mode.Total, + mode.OK, + mode.RowMismatch, + mode.Error, + mode.NotImplemented, + ) + } + + fmt.Fprintf(w, "\n## Cases\n\n") + fmt.Fprintf(w, "| Case | Dataset | Category | postgres_sql | local_traversal | neo4j |\n") + fmt.Fprintf(w, "| --- | --- | --- | --- | --- | --- |\n") + for _, testCase := range summary.Cases { + fmt.Fprintf(w, "| %s | %s | %s | %s | %s | %s |\n", + escapeMarkdown(testCase.Name), + escapeMarkdown(testCase.Dataset), + escapeMarkdown(testCase.Category), + formatModeCell(testCase.Modes[ModePostgresSQL]), + formatModeCell(testCase.Modes[ModeLocalTraversal]), + formatModeCell(testCase.Modes[ModeNeo4j]), + ) + } + + if len(summary.Regressions) > 0 { + fmt.Fprintf(w, "\n## Baseline Regressions\n\n") + writeBaselineTable(w, summary.Regressions) + } + if len(summary.Improvements) > 0 { + fmt.Fprintf(w, "\n## Baseline Improvements\n\n") + writeBaselineTable(w, summary.Improvements) + } + + return nil +} + +func writeBaselineTable(w io.Writer, entries []BaselineEntry) { + fmt.Fprintf(w, "| Case | Dataset | Mode | Baseline | Current | Ratio |\n") + fmt.Fprintf(w, "| --- | --- | --- | ---: | ---: | ---: |\n") + for _, entry := range entries { + fmt.Fprintf(w, "| %s | %s | %s | %s | %s | %.2fx |\n", + escapeMarkdown(entry.Name), + escapeMarkdown(entry.Dataset), + entry.Mode, + formatDuration(entry.BaselineMedian), + formatDuration(entry.CurrentMedian), + entry.Ratio, + ) + } +} + +func formatModeCell(cell ModeCaseCell) string { + if cell.Status == "" { + return "-" + } + + var parts []string + if cell.Median > 0 { + parts = append(parts, formatDuration(cell.Median)) + if cell.Rows > 0 { + parts = append(parts, fmt.Sprintf("rows=%d", cell.Rows)) + } + } else { + parts = append(parts, cell.Status) + } + + if cell.Status != StatusOK && cell.Median > 0 { + parts = append(parts, cell.Status) + } + if cell.Baseline != nil { + parts = append(parts, fmt.Sprintf("%.2fx", cell.Baseline.Ratio)) + } + if cell.FallbackReason != "" { + parts = append(parts, cell.FallbackReason) + } + if cell.Error != "" { + parts = append(parts, cell.Error) + } + + return escapeMarkdown(strings.Join(parts, "; ")) +} + +func formatDuration(duration time.Duration) string { + ms := float64(duration.Microseconds()) / 1000.0 + if ms < 1 { + return fmt.Sprintf("%.2fms", ms) + } + if ms < 100 { + return fmt.Sprintf("%.1fms", ms) + } + + return fmt.Sprintf("%.0fms", ms) +} + +func escapeMarkdown(value string) string { + return strings.ReplaceAll(value, "|", "\\|") +} diff --git a/cmd/graphbench/summary_test.go b/cmd/graphbench/summary_test.go new file mode 100644 index 00000000..d0157399 --- /dev/null +++ b/cmd/graphbench/summary_test.go @@ -0,0 +1,86 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "bytes" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestApplyBaseline(t *testing.T) { + var ( + dir = t.TempDir() + path = filepath.Join(dir, "baseline.jsonl") + ) + require.NoError(t, writeJSONLFile(path, []CaseResult{{ + Dataset: "base", + Name: "case", + ExecutionMode: ModePostgresSQL, + Stats: DurationStats{ + Iterations: 1, + Median: 10 * time.Millisecond, + }, + }})) + + records := []CaseResult{{ + Dataset: "base", + Name: "case", + ExecutionMode: ModePostgresSQL, + Stats: DurationStats{ + Iterations: 1, + Median: 15 * time.Millisecond, + }, + }} + + require.NoError(t, applyBaseline(path, records)) + require.NotNil(t, records[0].Baseline) + require.Equal(t, 1.5, records[0].Baseline.Ratio) + require.Equal(t, 5*time.Millisecond, records[0].Baseline.Change) +} + +func TestWriteMarkdownSummary(t *testing.T) { + summary := buildSummary([]CaseResult{ + { + Dataset: "base", + Name: "case", + Category: "counts", + ExecutionMode: ModePostgresSQL, + Status: StatusOK, + RowCount: 1, + Stats: DurationStats{ + Iterations: 1, + Median: 2 * time.Millisecond, + }, + }, + { + Dataset: "base", + Name: "case", + Category: "counts", + ExecutionMode: ModeLocalTraversal, + Status: StatusNotImplemented, + FallbackReason: localTraversalUnavailableReason, + }, + }) + + var output bytes.Buffer + require.NoError(t, writeMarkdownSummary(&output, summary)) + require.Contains(t, output.String(), "| case | base | counts | 2.0ms; rows=1 | not_implemented; local traversal executor unavailable | - |") +} diff --git a/cmd/graphbench/types.go b/cmd/graphbench/types.go new file mode 100644 index 00000000..c941a01a --- /dev/null +++ b/cmd/graphbench/types.go @@ -0,0 +1,104 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "fmt" + "slices" + "strings" +) + +const ( + ModePostgresSQL ExecutionMode = "postgres_sql" + ModeLocalTraversal ExecutionMode = "local_traversal" + ModeNeo4j ExecutionMode = "neo4j" +) + +var validExecutionModes = []ExecutionMode{ + ModePostgresSQL, + ModeLocalTraversal, + ModeNeo4j, +} + +type ExecutionMode string + +func (s ExecutionMode) Valid() bool { + return slices.Contains(validExecutionModes, s) +} + +func parseExecutionMode(raw string) (ExecutionMode, error) { + mode := ExecutionMode(strings.TrimSpace(raw)) + if mode.Valid() { + return mode, nil + } + + return "", fmt.Errorf("unsupported execution mode %q", raw) +} + +type ScaleCorpus struct { + Cases []ScaleCase +} + +type ScaleCaseFile struct { + Cases []ScaleCase `json:"cases"` +} + +type ScaleCase struct { + Source string `json:"-"` + Name string `json:"name"` + Dataset string `json:"dataset"` + Category string `json:"category"` + Cypher string `json:"cypher"` + Params map[string]any `json:"params,omitempty"` + NodeParams map[string]string `json:"node_params,omitempty"` + Expected ExpectedResult `json:"expected"` + Observes ObservedValues `json:"observes"` + Shape WorkloadShape `json:"shape"` + CandidateModes []ExecutionMode `json:"candidate_modes"` + Tags []string `json:"tags,omitempty"` + ReferenceDesign *ReferenceDesign `json:"reference_design,omitempty"` +} + +type ExpectedResult struct { + RowCount *int64 `json:"row_count,omitempty"` + ResultKind string `json:"result_kind,omitempty"` +} + +type ObservedValues struct { + Paths bool `json:"paths"` + Nodes bool `json:"nodes"` + Relationships bool `json:"relationships"` + Properties bool `json:"properties"` +} + +type WorkloadShape struct { + RootPredicate string `json:"root_predicate,omitempty"` + TerminalPredicate string `json:"terminal_predicate,omitempty"` + EdgeKinds []string `json:"edge_kinds,omitempty"` + MinDepth *int `json:"min_depth,omitempty"` + MaxDepth *int `json:"max_depth,omitempty"` + PathMaterializationRequired bool `json:"path_materialization_required"` +} + +type ReferenceDesign struct { + AGERelevance []string `json:"age_relevance,omitempty"` + Notes string `json:"notes,omitempty"` +} + +func (s ScaleCase) Supports(mode ExecutionMode) bool { + return slices.Contains(s.CandidateModes, mode) +} diff --git a/cmd/plancorpus/README.md b/cmd/plancorpus/README.md new file mode 100644 index 00000000..3e49de85 --- /dev/null +++ b/cmd/plancorpus/README.md @@ -0,0 +1,26 @@ +# Plan Corpus Capture + +`plancorpus` captures query-plan diagnostics for the shared integration corpus. + +It reads `integration/testdata/cases` and `integration/testdata/templates`, loads the same datasets and inline fixtures used by the integration tests, and writes backend-specific JSONL plan records plus markdown and JSON summaries. + +## Usage + +```bash +PG_CONNECTION_STRING="postgres://postgres:password@localhost/db" \ +NEO4J_CONNECTION_STRING="neo4j://neo4j:password@localhost:7687" \ +go run ./cmd/plancorpus +``` + +Useful flags: + +| Flag | Default | Description | +| --- | --- | --- | +| `-dataset-dir` | `integration/testdata` | Integration corpus root | +| `-output-dir` | `.coverage` | Output directory | +| `-connection` | `CONNECTION_STRING` | Capture one backend selected by URL scheme | +| `-pg-connection` | `PG_CONNECTION_STRING` | PostgreSQL backend | +| `-neo4j-connection` | `NEO4J_CONNECTION_STRING` | Neo4j backend | +| `-summary` | `.coverage/plan-corpus-summary.md` | Markdown summary | +| `-summary-json` | `.coverage/plan-corpus-summary.json` | JSON summary | +| `-top` | `25` | Number of expensive PostgreSQL plans to include in summaries | diff --git a/cmd/plancorpus/capture.go b/cmd/plancorpus/capture.go new file mode 100644 index 00000000..dc4a7d81 --- /dev/null +++ b/cmd/plancorpus/capture.go @@ -0,0 +1,530 @@ +package main + +import ( + "context" + "fmt" + "net/url" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/jackc/pgx/v5/pgxpool" + neo4jcore "github.com/neo4j/neo4j-go-driver/v5/neo4j" + "github.com/specterops/dawgs" + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" + "github.com/specterops/dawgs/cypher/models/pgsql/translate" + "github.com/specterops/dawgs/drivers/neo4j" + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" + "github.com/specterops/dawgs/util/size" +) + +const defaultGraphName = "integration_test" + +type captureSpec struct { + DriverName string + Connection string +} + +type backendCapture struct { + spec captureSpec + db graph.Database + pgDriver *pg.Driver + pgGraphID int32 + neo4jDriver neo4jcore.Driver + neo4jDBName string +} + +func driverFromConnectionString(connStr string) (string, error) { + u, err := url.Parse(connStr) + if err != nil { + return "", fmt.Errorf("parse connection string: %w", err) + } + + switch u.Scheme { + case "postgres", "postgresql": + return pg.DriverName, nil + case neo4j.DriverName, "neo4j+s", "neo4j+ssc": + return neo4j.DriverName, nil + default: + return "", fmt.Errorf("unknown connection string scheme %q", u.Scheme) + } +} + +func captureCorpus(ctx context.Context, datasetDir string, suite corpus, spec captureSpec) ([]PlanRecord, error) { + backend, err := openBackend(ctx, suite, spec) + if err != nil { + return nil, err + } + defer backend.close(ctx) + + var records []PlanRecord + for _, datasetName := range suite.datasetNames { + group := suite.caseGroups[datasetName] + if group == nil { + continue + } + + var ( + datasetLoaded = false + ensureDatasetLoaded = func() error { + if datasetLoaded { + return nil + } + if err := clearGraph(ctx, backend.db); err != nil { + return err + } + if err := loadDataset(ctx, backend.db, datasetDir, datasetName); err != nil { + return err + } + datasetLoaded = true + return nil + } + ) + + for _, file := range group.files { + for _, testCase := range file.Cases { + if testCase.Fixture == nil { + if err := ensureDatasetLoaded(); err != nil { + return nil, err + } + } else { + if err := loadCommittedFixture(ctx, backend.db, testCase.Fixture); err != nil { + return nil, err + } + datasetLoaded = false + } + + record := backend.capture(ctx, CorpusQuery{ + Source: file.path, + Dataset: datasetName, + Name: testCase.Name, + Cypher: testCase.Cypher, + Params: testCase.Params, + }) + records = append(records, record) + } + } + } + + for _, file := range suite.templateFiles { + fileName := strings.TrimSuffix(filepath.Base(file.path), filepath.Ext(file.path)) + + for _, family := range file.Families { + if family.Fixture == nil { + return nil, fmt.Errorf("%s/%s has no fixture", file.path, family.Name) + } + + for _, variant := range family.Variants { + rendered, err := renderTemplate(family.Template, variant.Vars) + if err != nil { + return nil, fmt.Errorf("%s/%s/%s: %w", file.path, family.Name, variant.Name, err) + } + if err := loadCommittedFixture(ctx, backend.db, family.Fixture); err != nil { + return nil, err + } + + record := backend.capture(ctx, CorpusQuery{ + Source: file.path, + Name: fileName + "/" + family.Name + "/" + variant.Name, + Cypher: rendered, + Params: mergeParams(family.Params, variant.Params), + }) + records = append(records, record) + } + } + + for _, family := range file.Metamorphic { + if family.Fixture == nil { + return nil, fmt.Errorf("%s/%s has no fixture", file.path, family.Name) + } + if err := loadCommittedFixture(ctx, backend.db, family.Fixture); err != nil { + return nil, err + } + + for _, query := range family.Queries { + record := backend.capture(ctx, CorpusQuery{ + Source: file.path, + Name: fileName + "/" + family.Name + "/" + query.Name, + Cypher: query.Cypher, + Params: query.Params, + }) + records = append(records, record) + } + } + } + + return records, nil +} + +func openBackend(ctx context.Context, suite corpus, spec captureSpec) (*backendCapture, error) { + cfg := dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + ConnectionString: spec.Connection, + } + + if spec.DriverName == pg.DriverName { + poolCfg, err := pgxpool.ParseConfig(spec.Connection) + if err != nil { + return nil, fmt.Errorf("parse PostgreSQL pool configuration: %w", err) + } + pool, err := pg.NewPool(poolCfg) + if err != nil { + return nil, fmt.Errorf("create PostgreSQL pool: %w", err) + } + cfg.Pool = pool + } + + db, err := dawgs.Open(ctx, spec.DriverName, cfg) + if err != nil { + if cfg.Pool != nil { + cfg.Pool.Close() + } + return nil, fmt.Errorf("open %s database: %w", spec.DriverName, err) + } + + schema := graph.Schema{ + Graphs: []graph.Graph{{ + Name: defaultGraphName, + Nodes: suite.nodeKinds, + Edges: suite.edgeKinds, + }}, + DefaultGraph: graph.Graph{Name: defaultGraphName}, + } + if err := db.AssertSchema(ctx, schema); err != nil { + _ = db.Close(ctx) + return nil, fmt.Errorf("assert schema: %w", err) + } + + backend := &backendCapture{ + spec: spec, + db: db, + } + + switch spec.DriverName { + case pg.DriverName: + pgDriver, ok := db.(*pg.Driver) + if !ok { + _ = db.Close(ctx) + return nil, fmt.Errorf("expected *pg.Driver, got %T", db) + } + defaultGraph, ok := pgDriver.DefaultGraph() + if !ok { + _ = db.Close(ctx) + return nil, fmt.Errorf("PostgreSQL default graph is not set") + } + backend.pgDriver = pgDriver + backend.pgGraphID = defaultGraph.ID + + case neo4j.DriverName: + neo4jDriver, databaseName, err := openNeo4jPlanDriver(spec.Connection) + if err != nil { + _ = db.Close(ctx) + return nil, err + } + backend.neo4jDriver = neo4jDriver + backend.neo4jDBName = databaseName + } + + return backend, nil +} + +func (s *backendCapture) close(ctx context.Context) { + if s.neo4jDriver != nil { + _ = s.neo4jDriver.Close() + } + if s.db != nil { + _ = s.db.Close(ctx) + } +} + +func (s *backendCapture) capture(ctx context.Context, query CorpusQuery) PlanRecord { + record := PlanRecord{ + Driver: s.spec.DriverName, + Source: query.Source, + Dataset: query.Dataset, + Name: query.Name, + Cypher: query.Cypher, + Params: query.Params, + } + + switch s.spec.DriverName { + case pg.DriverName: + s.capturePostgres(ctx, query.Cypher, query.Params, &record) + case neo4j.DriverName: + s.captureNeo4j(query.Cypher, query.Params, &record) + } + + return record +} + +func (s *backendCapture) capturePostgres(ctx context.Context, cypherQuery string, params map[string]any, record *PlanRecord) { + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), cypherQuery) + if err != nil { + record.Error = err.Error() + return + } + + translation, err := translate.Translate(ctx, regularQuery, s.pgDriver.KindMapper(), params, s.pgGraphID) + if err != nil { + record.Error = err.Error() + return + } + + sqlQuery, err := translate.Translated(translation) + if err != nil { + record.Error = err.Error() + return + } + + var plan []string + if err := s.db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Raw("EXPLAIN "+sqlQuery, translation.Parameters) + defer result.Close() + + for result.Next() { + values := result.Values() + if len(values) == 0 { + continue + } + plan = append(plan, fmt.Sprint(values[0])) + } + + return result.Error() + }); err != nil { + record.Error = err.Error() + } + + record.SQL = sqlQuery + record.PGPlan = plan + record.PGOperators = postgresOperators(plan) + record.PlannedLowerings = loweringNames(translation.Optimization.PlannedLowerings) + record.AppliedLowerings = loweringNames(translation.Optimization.Lowerings) + record.SkippedLowerings = append([]translate.SkippedLowering(nil), translation.Optimization.SkippedLowerings...) + record.Optimization = &translation.Optimization +} + +func (s *backendCapture) captureNeo4j(cypherQuery string, params map[string]any, record *PlanRecord) { + session := s.neo4jDriver.NewSession(neo4jcore.SessionConfig{ + AccessMode: neo4jcore.AccessModeWrite, + DatabaseName: s.neo4jDBName, + }) + defer session.Close() + + result, err := session.Run("EXPLAIN "+cypherWithoutTerminator(cypherQuery), params) + if err != nil { + record.Error = err.Error() + return + } + + summary, err := result.Consume() + if err != nil { + record.Error = err.Error() + return + } + + if plan := summary.Plan(); plan != nil { + planNode := convertNeo4jPlan(plan) + record.Neo4jPlan = &planNode + record.Neo4jOperators = neo4jOperators(planNode) + } +} + +type neo4jPlanDriverConfig struct { + Target string + Username string + Password string + DatabaseName string +} + +func parseNeo4jPlanDriverConfig(connStr string) (neo4jPlanDriverConfig, error) { + connectionURL, err := url.Parse(connStr) + if err != nil { + return neo4jPlanDriverConfig{}, fmt.Errorf("parse Neo4j connection string: %w", err) + } + + if connectionURL.Scheme != neo4j.DriverName && connectionURL.Scheme != "neo4j+s" && connectionURL.Scheme != "neo4j+ssc" { + return neo4jPlanDriverConfig{}, fmt.Errorf("expected Neo4j connection string scheme, got %q", connectionURL.Scheme) + } + + password, ok := connectionURL.User.Password() + if !ok { + return neo4jPlanDriverConfig{}, fmt.Errorf("no password provided in Neo4j connection string") + } + + if connectionURL.Host == "" { + return neo4jPlanDriverConfig{}, fmt.Errorf("Neo4j connection string host is required") + } + + databaseName, err := neo4jDatabaseName(connectionURL) + if err != nil { + return neo4jPlanDriverConfig{}, err + } + + return neo4jPlanDriverConfig{ + Target: (&url.URL{ + Scheme: connectionURL.Scheme, + Host: connectionURL.Host, + RawQuery: connectionURL.RawQuery, + }).String(), + Username: connectionURL.User.Username(), + Password: password, + DatabaseName: databaseName, + }, nil +} + +func neo4jDatabaseName(connectionURL *url.URL) (string, error) { + databasePath := strings.Trim(connectionURL.EscapedPath(), "/") + if databasePath == "" { + return "", nil + } + + if strings.Contains(databasePath, "/") { + return "", fmt.Errorf("Neo4j database path must contain a single database name") + } + + databaseName, err := url.PathUnescape(databasePath) + if err != nil { + return "", fmt.Errorf("parse Neo4j database name: %w", err) + } + if strings.Contains(databaseName, "/") { + return "", fmt.Errorf("Neo4j database path must contain a single database name") + } + + return databaseName, nil +} + +func openNeo4jPlanDriver(connStr string) (neo4jcore.Driver, string, error) { + cfg, err := parseNeo4jPlanDriverConfig(connStr) + if err != nil { + return nil, "", err + } + + driver, err := neo4jcore.NewDriver( + cfg.Target, + neo4jcore.BasicAuth(cfg.Username, cfg.Password, ""), + ) + if err != nil { + return nil, "", err + } + + return driver, cfg.DatabaseName, nil +} + +func clearGraph(ctx context.Context, db graph.Database) error { + return db.WriteTransaction(ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }) +} + +func loadDataset(ctx context.Context, db graph.Database, datasetDir, name string) error { + f, err := os.Open(filepath.Join(datasetDir, name+".json")) + if err != nil { + return fmt.Errorf("open dataset %s: %w", name, err) + } + defer f.Close() + + if _, err := opengraph.Load(ctx, db, f); err != nil { + return fmt.Errorf("load dataset %s: %w", name, err) + } + return nil +} + +func loadCommittedFixture(ctx context.Context, db graph.Database, fixture *opengraph.Graph) error { + if fixture == nil { + return fmt.Errorf("fixture is nil") + } + + if err := clearGraph(ctx, db); err != nil { + return err + } + + return db.WriteTransaction(ctx, func(tx graph.Transaction) error { + _, err := opengraph.WriteGraphTx(tx, fixture) + return err + }) +} + +func convertNeo4jPlan(plan neo4jcore.Plan) Neo4jPlanNode { + node := Neo4jPlanNode{ + Operator: plan.Operator(), + Arguments: stringifyArguments(plan.Arguments()), + Identifiers: append([]string(nil), plan.Identifiers()...), + } + + for _, child := range plan.Children() { + node.Children = append(node.Children, convertNeo4jPlan(child)) + } + + return node +} + +func stringifyArguments(arguments map[string]any) map[string]string { + if len(arguments) == 0 { + return nil + } + + values := make(map[string]string, len(arguments)) + for key, value := range arguments { + values[key] = fmt.Sprint(value) + } + return values +} + +func postgresOperators(plan []string) []string { + operators := make([]string, 0, len(plan)) + for _, line := range plan { + trimmed := strings.TrimSpace(line) + trimmed = strings.TrimPrefix(trimmed, "->") + trimmed = strings.TrimSpace(trimmed) + if trimmed == "" || strings.HasPrefix(trimmed, "Planning ") { + continue + } + if idx := strings.Index(trimmed, " ("); idx >= 0 { + trimmed = trimmed[:idx] + } + operators = append(operators, trimmed) + } + return operators +} + +func neo4jOperators(root Neo4jPlanNode) []string { + var operators []string + var walk func(Neo4jPlanNode) + walk = func(node Neo4jPlanNode) { + operators = append(operators, node.Operator) + for _, child := range node.Children { + walk(child) + } + } + walk(root) + return operators +} + +func loweringNames(decisions []optimize.LoweringDecision) []string { + if len(decisions) == 0 { + return nil + } + + var ( + names = make([]string, 0, len(decisions)) + seen = make(map[string]struct{}, len(decisions)) + ) + for _, decision := range decisions { + name := decision.Name + if _, duplicate := seen[name]; duplicate { + continue + } + seen[name] = struct{}{} + names = append(names, name) + } + sort.Strings(names) + return names +} + +func cypherWithoutTerminator(cypherQuery string) string { + return strings.TrimSuffix(strings.TrimSpace(cypherQuery), ";") +} diff --git a/cmd/plancorpus/corpus.go b/cmd/plancorpus/corpus.go new file mode 100644 index 00000000..46fdd4e0 --- /dev/null +++ b/cmd/plancorpus/corpus.go @@ -0,0 +1,222 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" +) + +type corpus struct { + caseGroups map[string]*caseGroup + datasetNames []string + templateFiles []templateFile + nodeKinds graph.Kinds + edgeKinds graph.Kinds +} + +type caseGroup struct { + dataset string + files []caseFile +} + +type caseFile struct { + path string + Dataset string `json:"dataset"` + Cases []caseEntry `json:"cases"` +} + +type caseEntry struct { + Name string `json:"name"` + Cypher string `json:"cypher"` + Params map[string]any `json:"params,omitempty"` + Fixture *opengraph.Graph `json:"fixture,omitempty"` +} + +type templateFile struct { + path string + Families []templateFamily `json:"families,omitempty"` + Metamorphic []metamorphicFamily `json:"metamorphic,omitempty"` +} + +type templateFamily struct { + Name string `json:"name"` + Template string `json:"template"` + Params map[string]any `json:"params,omitempty"` + Fixture *opengraph.Graph `json:"fixture,omitempty"` + Variants []templateVariant `json:"variants"` +} + +type templateVariant struct { + Name string `json:"name"` + Vars map[string]string `json:"vars"` + Params map[string]any `json:"params,omitempty"` +} + +type metamorphicFamily struct { + Name string `json:"name"` + Fixture *opengraph.Graph `json:"fixture,omitempty"` + Queries []metamorphicQuery `json:"queries"` +} + +type metamorphicQuery struct { + Name string `json:"name"` + Cypher string `json:"cypher"` + Params map[string]any `json:"params,omitempty"` +} + +func loadCorpus(datasetDir string) (corpus, error) { + var loaded corpus + loaded.caseGroups = map[string]*caseGroup{} + + if err := loaded.loadCaseFiles(datasetDir); err != nil { + return corpus{}, err + } + if err := loaded.loadTemplateFiles(datasetDir); err != nil { + return corpus{}, err + } + if err := loaded.loadDatasetKinds(datasetDir); err != nil { + return corpus{}, err + } + + sort.Strings(loaded.datasetNames) + return loaded, nil +} + +func (s *corpus) loadCaseFiles(datasetDir string) error { + paths, err := filepath.Glob(filepath.Join(datasetDir, "cases", "*.json")) + if err != nil { + return fmt.Errorf("glob case files: %w", err) + } + if len(paths) == 0 { + return fmt.Errorf("no case files found under %s", filepath.Join(datasetDir, "cases")) + } + sort.Strings(paths) + + for _, path := range paths { + var file caseFile + if err := decodeJSONFile(path, &file); err != nil { + return err + } + file.path = filepath.ToSlash(path) + + dataset := file.Dataset + if dataset == "" { + dataset = "base" + } + if s.caseGroups[dataset] == nil { + s.caseGroups[dataset] = &caseGroup{dataset: dataset} + s.datasetNames = append(s.datasetNames, dataset) + } + s.caseGroups[dataset].files = append(s.caseGroups[dataset].files, file) + + for _, testCase := range file.Cases { + s.addFixtureKinds(testCase.Fixture) + } + } + + return nil +} + +func (s *corpus) loadTemplateFiles(datasetDir string) error { + paths, err := filepath.Glob(filepath.Join(datasetDir, "templates", "*.json")) + if err != nil { + return fmt.Errorf("glob template files: %w", err) + } + sort.Strings(paths) + + for _, path := range paths { + var file templateFile + if err := decodeJSONFile(path, &file); err != nil { + return err + } + file.path = filepath.ToSlash(path) + s.templateFiles = append(s.templateFiles, file) + + for _, family := range file.Families { + s.addFixtureKinds(family.Fixture) + } + for _, family := range file.Metamorphic { + s.addFixtureKinds(family.Fixture) + } + } + + return nil +} + +func (s *corpus) loadDatasetKinds(datasetDir string) error { + for _, datasetName := range s.datasetNames { + path := filepath.Join(datasetDir, datasetName+".json") + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open dataset %s: %w", datasetName, err) + } + + doc, parseErr := opengraph.ParseDocument(f) + closeErr := f.Close() + if parseErr != nil { + return fmt.Errorf("parse dataset %s: %w", datasetName, parseErr) + } + if closeErr != nil { + return fmt.Errorf("close dataset %s: %w", datasetName, closeErr) + } + + nodeKinds, edgeKinds := doc.Graph.Kinds() + s.nodeKinds = s.nodeKinds.Add(nodeKinds...) + s.edgeKinds = s.edgeKinds.Add(edgeKinds...) + } + + return nil +} + +func (s *corpus) addFixtureKinds(fixture *opengraph.Graph) { + if fixture == nil { + return + } + + nodeKinds, edgeKinds := fixture.Kinds() + s.nodeKinds = s.nodeKinds.Add(nodeKinds...) + s.edgeKinds = s.edgeKinds.Add(edgeKinds...) +} + +func decodeJSONFile(path string, target any) error { + raw, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("read %s: %w", path, err) + } + if err := json.Unmarshal(raw, target); err != nil { + return fmt.Errorf("decode %s: %w", path, err) + } + return nil +} + +func renderTemplate(template string, vars map[string]string) (string, error) { + rendered := template + for name, value := range vars { + rendered = strings.ReplaceAll(rendered, "{{"+name+"}}", value) + } + if strings.Contains(rendered, "{{") || strings.Contains(rendered, "}}") { + return "", fmt.Errorf("template has unresolved placeholders: %s", rendered) + } + return rendered, nil +} + +func mergeParams(base, overrides map[string]any) map[string]any { + if len(base) == 0 && len(overrides) == 0 { + return nil + } + + merged := make(map[string]any, len(base)+len(overrides)) + for key, value := range base { + merged[key] = value + } + for key, value := range overrides { + merged[key] = value + } + return merged +} diff --git a/cmd/plancorpus/corpus_test.go b/cmd/plancorpus/corpus_test.go new file mode 100644 index 00000000..141fa515 --- /dev/null +++ b/cmd/plancorpus/corpus_test.go @@ -0,0 +1,34 @@ +package main + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLoadCorpus(t *testing.T) { + suite, err := loadCorpus(filepath.Join("..", "..", "integration", "testdata")) + require.NoError(t, err) + + require.Contains(t, suite.caseGroups, "base") + require.Contains(t, suite.datasetNames, "base") + require.NotEmpty(t, suite.templateFiles) + require.NotEmpty(t, suite.nodeKinds) + require.NotEmpty(t, suite.edgeKinds) +} + +func TestRenderTemplateRequiresAllPlaceholders(t *testing.T) { + rendered, err := renderTemplate("match ({{name}}) return {{name}}", map[string]string{"name": "n"}) + require.NoError(t, err) + require.Equal(t, "match (n) return n", rendered) + + _, err = renderTemplate("match ({{name}}) return n", nil) + require.ErrorContains(t, err, "unresolved placeholders") +} + +func TestMergeParams(t *testing.T) { + merged := mergeParams(map[string]any{"a": 1, "b": 2}, map[string]any{"b": 3}) + require.Equal(t, map[string]any{"a": 1, "b": 3}, merged) + require.Nil(t, mergeParams(nil, nil)) +} diff --git a/cmd/plancorpus/main.go b/cmd/plancorpus/main.go new file mode 100644 index 00000000..3afdc069 --- /dev/null +++ b/cmd/plancorpus/main.go @@ -0,0 +1,184 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "os" + "path/filepath" +) + +type commandConfig struct { + DatasetDir string + OutputDir string + SummaryMarkdown string + SummaryJSON string + Connection string + PGConnection string + Neo4jConnection string + TopPlans int +} + +func main() { + cfg := commandConfig{} + flag.StringVar(&cfg.DatasetDir, "dataset-dir", "integration/testdata", "integration testdata directory") + flag.StringVar(&cfg.OutputDir, "output-dir", ".coverage", "directory for JSONL plan captures") + flag.StringVar(&cfg.SummaryMarkdown, "summary", "", "markdown summary path (default: output-dir/plan-corpus-summary.md)") + flag.StringVar(&cfg.SummaryJSON, "summary-json", "", "JSON summary path (default: output-dir/plan-corpus-summary.json)") + flag.StringVar(&cfg.Connection, "connection", os.Getenv("CONNECTION_STRING"), "single backend connection string") + flag.StringVar(&cfg.PGConnection, "pg-connection", os.Getenv("PG_CONNECTION_STRING"), "PostgreSQL connection string") + flag.StringVar(&cfg.Neo4jConnection, "neo4j-connection", os.Getenv("NEO4J_CONNECTION_STRING"), "Neo4j connection string") + flag.IntVar(&cfg.TopPlans, "top", defaultTopPlans, "number of expensive PostgreSQL plans to include in summaries") + flag.Parse() + + if err := run(context.Background(), cfg); err != nil { + fmt.Fprintf(os.Stderr, "plancorpus: %v\n", err) + os.Exit(1) + } +} + +func run(ctx context.Context, cfg commandConfig) error { + specs, err := captureSpecs(cfg) + if err != nil { + return err + } + + suite, err := loadCorpus(cfg.DatasetDir) + if err != nil { + return err + } + + if err := os.MkdirAll(cfg.OutputDir, 0755); err != nil { + return fmt.Errorf("create output directory: %w", err) + } + + var allRecords []PlanRecord + for _, spec := range specs { + records, err := captureCorpus(ctx, cfg.DatasetDir, suite, spec) + if err != nil { + return err + } + + outputPath := filepath.Join(cfg.OutputDir, "plan-corpus-"+spec.DriverName+".jsonl") + if err := writePlanRecords(outputPath, records); err != nil { + return err + } + + fmt.Fprintf(os.Stderr, "captured %d %s records in %s\n", len(records), spec.DriverName, outputPath) + allRecords = append(allRecords, records...) + } + + summary := buildSummary(allRecords, cfg.TopPlans) + if cfg.SummaryMarkdown == "" { + cfg.SummaryMarkdown = filepath.Join(cfg.OutputDir, "plan-corpus-summary.md") + } + if cfg.SummaryJSON == "" { + cfg.SummaryJSON = filepath.Join(cfg.OutputDir, "plan-corpus-summary.json") + } + + if err := writeSummaryFiles(cfg.SummaryMarkdown, cfg.SummaryJSON, summary); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "wrote summaries to %s and %s\n", cfg.SummaryMarkdown, cfg.SummaryJSON) + return nil +} + +func captureSpecs(cfg commandConfig) ([]captureSpec, error) { + specsByDriver := map[string]captureSpec{} + + if cfg.Connection != "" { + driverName, err := driverFromConnectionString(cfg.Connection) + if err != nil { + return nil, err + } + specsByDriver[driverName] = captureSpec{ + DriverName: driverName, + Connection: cfg.Connection, + } + } + + if cfg.PGConnection != "" { + specsByDriver[pgDriverName()] = captureSpec{ + DriverName: pgDriverName(), + Connection: cfg.PGConnection, + } + } + if cfg.Neo4jConnection != "" { + specsByDriver[neo4jDriverName()] = captureSpec{ + DriverName: neo4jDriverName(), + Connection: cfg.Neo4jConnection, + } + } + + if len(specsByDriver) == 0 { + return nil, fmt.Errorf("no connection string supplied; set CONNECTION_STRING or PG_CONNECTION_STRING/NEO4J_CONNECTION_STRING") + } + + var ( + orderedDrivers = []string{pgDriverName(), neo4jDriverName()} + specs = make([]captureSpec, 0, len(specsByDriver)) + ) + for _, driverName := range orderedDrivers { + if spec, found := specsByDriver[driverName]; found { + specs = append(specs, spec) + } + } + return specs, nil +} + +func pgDriverName() string { + return "pg" +} + +func neo4jDriverName() string { + return "neo4j" +} + +func writePlanRecords(path string, records []PlanRecord) error { + out, err := os.Create(path) + if err != nil { + return fmt.Errorf("create %s: %w", path, err) + } + + encoder := json.NewEncoder(out) + for _, record := range records { + if err := encoder.Encode(record); err != nil { + return fmt.Errorf("write %s: %w", path, err) + } + } + + return out.Close() +} + +func writeSummaryFiles(markdownPath, jsonPath string, summary PlanSummary) error { + if markdownPath != "" { + out, err := os.Create(markdownPath) + if err != nil { + return fmt.Errorf("create %s: %w", markdownPath, err) + } + if err := writeMarkdownSummary(out, summary); err != nil { + _ = out.Close() + return fmt.Errorf("write %s: %w", markdownPath, err) + } + if err := out.Close(); err != nil { + return fmt.Errorf("close %s: %w", markdownPath, err) + } + } + + if jsonPath != "" { + out, err := os.Create(jsonPath) + if err != nil { + return fmt.Errorf("create %s: %w", jsonPath, err) + } + if err := writeJSONSummary(out, summary); err != nil { + _ = out.Close() + return fmt.Errorf("write %s: %w", jsonPath, err) + } + if err := out.Close(); err != nil { + return fmt.Errorf("close %s: %w", jsonPath, err) + } + } + + return nil +} diff --git a/cmd/plancorpus/main_test.go b/cmd/plancorpus/main_test.go new file mode 100644 index 00000000..deff5ec1 --- /dev/null +++ b/cmd/plancorpus/main_test.go @@ -0,0 +1,92 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCaptureSpecs(t *testing.T) { + specs, err := captureSpecs(commandConfig{ + Connection: "neo4j://neo4j:password@localhost:7687", + PGConnection: "postgres://postgres:password@localhost/db", + Neo4jConnection: "neo4j://neo4j:override@localhost:7687", + }) + require.NoError(t, err) + require.Equal(t, []captureSpec{{ + DriverName: "pg", + Connection: "postgres://postgres:password@localhost/db", + }, { + DriverName: "neo4j", + Connection: "neo4j://neo4j:override@localhost:7687", + }}, specs) +} + +func TestCaptureSpecsRequiresConnection(t *testing.T) { + _, err := captureSpecs(commandConfig{}) + require.ErrorContains(t, err, "no connection string supplied") +} + +func TestDriverFromConnectionString(t *testing.T) { + driverName, err := driverFromConnectionString("postgresql://postgres:password@localhost/db") + require.NoError(t, err) + require.Equal(t, "pg", driverName) + + for _, connStr := range []string{ + "neo4j://neo4j:password@localhost:7687", + "neo4j+s://neo4j:password@localhost:7687", + "neo4j+ssc://neo4j:password@localhost:7687", + } { + driverName, err = driverFromConnectionString(connStr) + require.NoError(t, err) + require.Equal(t, "neo4j", driverName) + } + + _, err = driverFromConnectionString("mysql://localhost") + require.ErrorContains(t, err, "unknown connection string scheme") +} + +func TestParseNeo4jPlanDriverConfigPreservesURI(t *testing.T) { + testCases := []struct { + name string + connStr string + expectedTarget string + expectedDatabase string + }{{ + name: "plain routing", + connStr: "neo4j://neo4j:password@localhost:7687", + expectedTarget: "neo4j://localhost:7687", + expectedDatabase: "", + }, { + name: "secure routing", + connStr: "neo4j+s://neo4j:password@cluster.example:7687", + expectedTarget: "neo4j+s://cluster.example:7687", + expectedDatabase: "", + }, { + name: "self signed routing with database and query", + connStr: "neo4j+ssc://neo4j:password@cluster.example:7687/analytics?policy=fast", + expectedTarget: "neo4j+ssc://cluster.example:7687?policy=fast", + expectedDatabase: "analytics", + }} + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + cfg, err := parseNeo4jPlanDriverConfig(testCase.connStr) + require.NoError(t, err) + require.Equal(t, testCase.expectedTarget, cfg.Target) + require.Equal(t, "neo4j", cfg.Username) + require.Equal(t, "password", cfg.Password) + require.Equal(t, testCase.expectedDatabase, cfg.DatabaseName) + }) + } +} + +func TestParseNeo4jPlanDriverConfigRejectsNestedDatabasePath(t *testing.T) { + for _, connStr := range []string{ + "neo4j://neo4j:password@localhost:7687/db/extra", + "neo4j://neo4j:password@localhost:7687/db%2Fextra", + } { + _, err := parseNeo4jPlanDriverConfig(connStr) + require.ErrorContains(t, err, "single database name") + } +} diff --git a/cmd/plancorpus/report.go b/cmd/plancorpus/report.go new file mode 100644 index 00000000..5e62f1a6 --- /dev/null +++ b/cmd/plancorpus/report.go @@ -0,0 +1,307 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "regexp" + "sort" + "strconv" + "strings" + + "github.com/specterops/dawgs/cypher/models/pgsql/translate" +) + +const defaultTopPlans = 25 + +var postgresCostPattern = regexp.MustCompile(`cost=[0-9.]+\.\.([0-9.]+)`) + +type PlanSummary struct { + Drivers []DriverSummary `json:"drivers"` + TopPostgresPlans []CostedPlan `json:"top_postgres_plans,omitempty"` + PostgresOperators []Count `json:"postgres_operators,omitempty"` + Neo4jOperators []Count `json:"neo4j_operators,omitempty"` + PlannedLowerings []Count `json:"planned_lowerings,omitempty"` + AppliedLowerings []Count `json:"applied_lowerings,omitempty"` + SkippedLowerings []Count `json:"skipped_lowerings,omitempty"` + SkippedReasons []Count `json:"skipped_reasons,omitempty"` + FeatureCounts []Count `json:"feature_counts,omitempty"` + Errors []PlanError `json:"errors,omitempty"` +} + +type DriverSummary struct { + Driver string `json:"driver"` + Records int `json:"records"` + Errors int `json:"errors"` +} + +type Count struct { + Name string `json:"name"` + Count int `json:"count"` +} + +type CostedPlan struct { + Cost float64 `json:"cost"` + Driver string `json:"driver"` + Source string `json:"source"` + Dataset string `json:"dataset,omitempty"` + Name string `json:"name"` + Cypher string `json:"cypher"` + PlanRoot string `json:"plan_root"` + PlannedLowerings []string `json:"planned_lowerings,omitempty"` + AppliedLowerings []string `json:"applied_lowerings,omitempty"` + SkippedLowerings []string `json:"skipped_lowerings,omitempty"` +} + +type PlanError struct { + Driver string `json:"driver"` + Source string `json:"source"` + Name string `json:"name"` + Error string `json:"error"` +} + +func buildSummary(records []PlanRecord, topN int) PlanSummary { + if topN <= 0 { + topN = defaultTopPlans + } + + var ( + driverCounts = map[string]*DriverSummary{} + postgresOperatorCounts = map[string]int{} + neo4jOperatorCounts = map[string]int{} + plannedLoweringCounts = map[string]int{} + appliedLoweringCounts = map[string]int{} + skippedLoweringCounts = map[string]int{} + skippedReasonCounts = map[string]int{} + featureCounts = map[string]int{} + errors []PlanError + topPG []CostedPlan + ) + + for _, record := range records { + driver := driverCounts[record.Driver] + if driver == nil { + driver = &DriverSummary{Driver: record.Driver} + driverCounts[record.Driver] = driver + } + driver.Records++ + + if record.Error != "" { + driver.Errors++ + errors = append(errors, PlanError{ + Driver: record.Driver, + Source: record.Source, + Name: record.Name, + Error: record.Error, + }) + } + + for _, operator := range record.PGOperators { + postgresOperatorCounts[normalizePostgresOperator(operator)]++ + } + for _, operator := range record.Neo4jOperators { + neo4jOperatorCounts[operator]++ + } + for _, lowering := range record.PlannedLowerings { + plannedLoweringCounts[lowering]++ + } + for _, lowering := range record.AppliedLowerings { + appliedLoweringCounts[lowering]++ + } + for _, lowering := range record.SkippedLowerings { + skippedLoweringCounts[lowering.Name]++ + skippedReasonCounts[lowering.Name+": "+lowering.Reason]++ + } + + for _, line := range record.PGPlan { + switch { + case strings.Contains(line, "Recursive Union"): + featureCounts["PostgreSQL Recursive Union"]++ + case strings.Contains(line, "Function Scan on unnest"): + featureCounts["PostgreSQL Function Scan on unnest"]++ + case strings.Contains(line, "SubPlan "): + featureCounts["PostgreSQL SubPlan"]++ + case strings.Contains(line, "Filter: satisfied"): + featureCounts["PostgreSQL traversal satisfied filter"]++ + } + } + + if len(record.PGPlan) > 0 && record.Error == "" { + topPG = append(topPG, CostedPlan{ + Cost: postgresEstimatedCost(record.PGPlan[0]), + Driver: record.Driver, + Source: record.Source, + Dataset: record.Dataset, + Name: record.Name, + Cypher: record.Cypher, + PlanRoot: record.PGPlan[0], + PlannedLowerings: append([]string(nil), record.PlannedLowerings...), + AppliedLowerings: append([]string(nil), record.AppliedLowerings...), + SkippedLowerings: skippedLoweringLabels(record.SkippedLowerings), + }) + } + } + + sort.Slice(topPG, func(i, j int) bool { + return topPG[i].Cost > topPG[j].Cost + }) + if len(topPG) > topN { + topPG = topPG[:topN] + } + + return PlanSummary{ + Drivers: sortedDriverSummaries(driverCounts), + TopPostgresPlans: topPG, + PostgresOperators: sortedCounts(postgresOperatorCounts), + Neo4jOperators: sortedCounts(neo4jOperatorCounts), + PlannedLowerings: sortedCounts(plannedLoweringCounts), + AppliedLowerings: sortedCounts(appliedLoweringCounts), + SkippedLowerings: sortedCounts(skippedLoweringCounts), + SkippedReasons: sortedCounts(skippedReasonCounts), + FeatureCounts: sortedCounts(featureCounts), + Errors: errors, + } +} + +func skippedLoweringLabels(lowerings []translate.SkippedLowering) []string { + if len(lowerings) == 0 { + return nil + } + + labels := make([]string, len(lowerings)) + for idx, lowering := range lowerings { + labels[idx] = lowering.Name + ": " + lowering.Reason + } + + return labels +} + +func postgresEstimatedCost(planRoot string) float64 { + match := postgresCostPattern.FindStringSubmatch(planRoot) + if len(match) != 2 { + return 0 + } + + cost, err := strconv.ParseFloat(match[1], 64) + if err != nil { + return 0 + } + return cost +} + +func normalizePostgresOperator(operator string) string { + operator = strings.TrimSpace(operator) + if operator == "" { + return "" + } + if idx := strings.Index(operator, ":"); idx >= 0 { + return operator[:idx] + } + if idx := strings.Index(operator, " on "); idx >= 0 { + return operator[:idx] + } + if idx := strings.Index(operator, " using "); idx >= 0 { + return operator[:idx] + } + return operator +} + +func sortedDriverSummaries(drivers map[string]*DriverSummary) []DriverSummary { + sorted := make([]DriverSummary, 0, len(drivers)) + for _, summary := range drivers { + sorted = append(sorted, *summary) + } + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].Driver < sorted[j].Driver + }) + return sorted +} + +func sortedCounts(counts map[string]int) []Count { + sorted := make([]Count, 0, len(counts)) + for name, count := range counts { + if name == "" || count == 0 { + continue + } + sorted = append(sorted, Count{Name: name, Count: count}) + } + sort.Slice(sorted, func(i, j int) bool { + if sorted[i].Count == sorted[j].Count { + return sorted[i].Name < sorted[j].Name + } + return sorted[i].Count > sorted[j].Count + }) + return sorted +} + +func writeJSONSummary(w io.Writer, summary PlanSummary) error { + encoder := json.NewEncoder(w) + encoder.SetIndent("", " ") + return encoder.Encode(summary) +} + +func writeMarkdownSummary(w io.Writer, summary PlanSummary) error { + writeCounts := func(title string, counts []Count, limit int) { + if len(counts) == 0 { + return + } + fmt.Fprintf(w, "\n## %s\n\n| Name | Count |\n| --- | ---: |\n", title) + for idx, count := range counts { + if limit > 0 && idx >= limit { + break + } + fmt.Fprintf(w, "| %s | %d |\n", markdownCell(count.Name), count.Count) + } + } + + fmt.Fprintln(w, "# Cypher Plan Corpus Summary") + fmt.Fprintln(w, "\n## Drivers\n\n| Driver | Records | Errors |\n| --- | ---: | ---: |") + for _, driver := range summary.Drivers { + fmt.Fprintf(w, "| %s | %d | %d |\n", markdownCell(driver.Driver), driver.Records, driver.Errors) + } + + if len(summary.TopPostgresPlans) > 0 { + fmt.Fprintln(w, "\n## Top PostgreSQL Plans\n\n| Cost | Source | Name | Root | Lowerings |\n| ---: | --- | --- | --- | --- |") + for _, plan := range summary.TopPostgresPlans { + fmt.Fprintf( + w, + "| %.2f | %s | %s | %s | %s |\n", + plan.Cost, + markdownCell(plan.Source), + markdownCell(plan.Name), + markdownCell(plan.PlanRoot), + markdownCell(strings.Join(plan.PlannedLowerings, ", ")), + ) + } + } + + writeCounts("Feature Counts", summary.FeatureCounts, 0) + writeCounts("Planned Lowerings", summary.PlannedLowerings, 0) + writeCounts("Applied Lowerings", summary.AppliedLowerings, 0) + writeCounts("Skipped Lowerings", summary.SkippedLowerings, 0) + writeCounts("Skipped Lowering Reasons", summary.SkippedReasons, 0) + writeCounts("PostgreSQL Operators", summary.PostgresOperators, 25) + writeCounts("Neo4j Operators", summary.Neo4jOperators, 25) + + if len(summary.Errors) > 0 { + fmt.Fprintln(w, "\n## Capture Errors\n\n| Driver | Source | Name | Error |\n| --- | --- | --- | --- |") + for _, captureError := range summary.Errors { + fmt.Fprintf( + w, + "| %s | %s | %s | %s |\n", + markdownCell(captureError.Driver), + markdownCell(captureError.Source), + markdownCell(captureError.Name), + markdownCell(captureError.Error), + ) + } + } + + return nil +} + +func markdownCell(value string) string { + value = strings.ReplaceAll(value, "\n", " ") + value = strings.ReplaceAll(value, "|", "\\|") + return value +} diff --git a/cmd/plancorpus/report_test.go b/cmd/plancorpus/report_test.go new file mode 100644 index 00000000..f3616952 --- /dev/null +++ b/cmd/plancorpus/report_test.go @@ -0,0 +1,96 @@ +package main + +import ( + "bytes" + "testing" + + "github.com/specterops/dawgs/cypher/models/pgsql/translate" + "github.com/stretchr/testify/require" +) + +func TestBuildSummaryRanksPostgresPlansAndCountsSignals(t *testing.T) { + records := []PlanRecord{{ + Driver: "pg", + Source: "cases/a.json", + Name: "low", + Cypher: "match (n) return n", + PGPlan: []string{"Seq Scan on node_1 (cost=0.00..10.50 rows=1 width=8)", "Filter: satisfied"}, + PGOperators: []string{"Seq Scan on node_1", "Filter: satisfied"}, + PlannedLowerings: []string{"ProjectionPruning"}, + AppliedLowerings: []string{"ProjectionPruning"}, + }, { + Driver: "pg", + Source: "cases/b.json", + Name: "high", + Cypher: "match p=()-[*]->() return p", + PGPlan: []string{"Recursive Union (cost=0.00..99.25 rows=1 width=8)", "SubPlan 1", "Function Scan on unnest _path"}, + PGOperators: []string{"Recursive Union", "Function Scan on unnest _path"}, + PlannedLowerings: []string{"LatePathMaterialization"}, + AppliedLowerings: []string{"LatePathMaterialization"}, + SkippedLowerings: []translate.SkippedLowering{{ + Name: "PredicatePlacement", + Reason: "planned predicate placements were not consumed by this translation shape", + Count: 2, + }}, + }, { + Driver: "neo4j", + Source: "cases/a.json", + Name: "neo", + Cypher: "match (n) return n", + Neo4jOperators: []string{"ProduceResults@neo4j", "AllNodesScan@neo4j"}, + }, { + Driver: "pg", + Source: "cases/error.json", + Name: "error", + Error: "expected error", + }} + + summary := buildSummary(records, 1) + + require.Equal(t, []DriverSummary{{ + Driver: "neo4j", + Records: 1, + }, { + Driver: "pg", + Records: 3, + Errors: 1, + }}, summary.Drivers) + require.Len(t, summary.TopPostgresPlans, 1) + require.Equal(t, "high", summary.TopPostgresPlans[0].Name) + require.Contains(t, summary.FeatureCounts, Count{Name: "PostgreSQL Recursive Union", Count: 1}) + require.Contains(t, summary.FeatureCounts, Count{Name: "PostgreSQL SubPlan", Count: 1}) + require.Contains(t, summary.FeatureCounts, Count{Name: "PostgreSQL Function Scan on unnest", Count: 1}) + require.Contains(t, summary.FeatureCounts, Count{Name: "PostgreSQL traversal satisfied filter", Count: 1}) + require.Contains(t, summary.PostgresOperators, Count{Name: "Seq Scan", Count: 1}) + require.Contains(t, summary.Neo4jOperators, Count{Name: "ProduceResults@neo4j", Count: 1}) + require.Contains(t, summary.PlannedLowerings, Count{Name: "LatePathMaterialization", Count: 1}) + require.Contains(t, summary.SkippedLowerings, Count{Name: "PredicatePlacement", Count: 1}) + require.Contains(t, summary.SkippedReasons, Count{Name: "PredicatePlacement: planned predicate placements were not consumed by this translation shape", Count: 1}) + require.Contains(t, summary.Errors, PlanError{ + Driver: "pg", + Source: "cases/error.json", + Name: "error", + Error: "expected error", + }) +} + +func TestWriteMarkdownSummaryEscapesPipes(t *testing.T) { + summary := PlanSummary{ + Drivers: []DriverSummary{{Driver: "pg", Records: 1}}, + TopPostgresPlans: []CostedPlan{{ + Cost: 1.25, + Source: "cases/a.json", + Name: "pipe | name", + PlanRoot: "Seq Scan on node_1", + }}, + } + + var out bytes.Buffer + require.NoError(t, writeMarkdownSummary(&out, summary)) + require.Contains(t, out.String(), "pipe \\| name") +} + +func TestPostgresEstimatedCost(t *testing.T) { + require.Equal(t, 1180526.82, postgresEstimatedCost("Hash Join (cost=4136.05..1180526.82 rows=32097 width=68)")) + require.Zero(t, postgresEstimatedCost("not a plan")) +} diff --git a/cmd/plancorpus/types.go b/cmd/plancorpus/types.go new file mode 100644 index 00000000..9c4fa662 --- /dev/null +++ b/cmd/plancorpus/types.go @@ -0,0 +1,37 @@ +package main + +import "github.com/specterops/dawgs/cypher/models/pgsql/translate" + +type PlanRecord struct { + Driver string `json:"driver"` + Source string `json:"source"` + Dataset string `json:"dataset,omitempty"` + Name string `json:"name"` + Cypher string `json:"cypher"` + Params map[string]any `json:"params,omitempty"` + SQL string `json:"sql,omitempty"` + PGPlan []string `json:"pg_plan,omitempty"` + PGOperators []string `json:"pg_operators,omitempty"` + Neo4jPlan *Neo4jPlanNode `json:"neo4j_plan,omitempty"` + Neo4jOperators []string `json:"neo4j_operators,omitempty"` + PlannedLowerings []string `json:"planned_lowerings,omitempty"` + AppliedLowerings []string `json:"applied_lowerings,omitempty"` + SkippedLowerings []translate.SkippedLowering `json:"skipped_lowerings,omitempty"` + Optimization *translate.OptimizationSummary `json:"optimization,omitempty"` + Error string `json:"error,omitempty"` +} + +type Neo4jPlanNode struct { + Operator string `json:"operator"` + Arguments map[string]string `json:"arguments,omitempty"` + Identifiers []string `json:"identifiers,omitempty"` + Children []Neo4jPlanNode `json:"children,omitempty"` +} + +type CorpusQuery struct { + Source string + Dataset string + Name string + Cypher string + Params map[string]any +} diff --git a/cypher/Cypher Syntax Support.md b/cypher/Cypher Syntax Support.md index e65618da..35c07814 100644 --- a/cypher/Cypher Syntax Support.md +++ b/cypher/Cypher Syntax Support.md @@ -428,15 +428,16 @@ This indicates that there is a node with a value for `n.name` that is not parsab In the future, CySQL translation will cover most of the strict typing requirements automatically for users. -Property equality against the string literal or string parameter `'true'` or `'false'` is translated through PostgreSQL -JSON text extraction for backwards compatibility. This means a JSON boolean property value of `true` compares equal to -the string literal `'true'`. Other string equality operands use strict JSON scalar equality; use boolean or numeric -literals, such as `n.enabled = true` or `n.count = 1`, when typed JSON scalar equality is required. +Property equality against a string literal or text parameter is translated through PostgreSQL JSON text extraction with +a JSON string type guard. This keeps strings distinct from JSON booleans and numbers while allowing PostgreSQL +expression indexes such as `properties ->> 'objectid'` or `properties ->> 'name'` to accelerate exact string anchors. +Boolean and numeric literals continue to use strict JSON scalar equality; use boolean or numeric literals, such as +`n.enabled = true` or `n.count = 1`, when typed JSON scalar equality is required. ### Index Utilization Indexing in CySQL does not require a label specifier to be utilized. If the node property `name` is indexed in CySQL, -both: +exact string equality is emitted in a form compatible with PostgreSQL text expression indexes. Both: ``` match (n:User) where n.name = '1234' return n @@ -450,6 +451,10 @@ match (n) where n.name = '1234' return n will use the `name` index regardless of node label. +For substring and suffix searches, PostgreSQL can use explicit `TextSearchIndex`/trigram expression indexes requested +by schema, but CySQL does not add blanket suffix indexes during default schema assertion. Suffix forms are still being +kept conservative so `ENDS WITH`, reversed operands, null handling, and string type semantics remain backend-equivalent. + ### null Behavior Behavior around `null` in SQL differs from how Neo4j executes Cypher. Certain expression operators in Neo4j's diff --git a/cypher/models/cypher/copy.go b/cypher/models/cypher/copy.go index 77802df1..47cbb6d8 100644 --- a/cypher/models/cypher/copy.go +++ b/cypher/models/cypher/copy.go @@ -53,6 +53,9 @@ func Copy[T any](value T, extensions ...CopyExtension[T]) T { case *Quantifier: return any(typedValue.copy()).(T) + case *RangeQuantifier: + return any(typedValue.copy()).(T) + case *Where: return any(typedValue.copy()).(T) @@ -110,6 +113,18 @@ func Copy[T any](value T, extensions ...CopyExtension[T]) T { case *Literal: return any(typedValue.copy()).(T) + case *Properties: + return any(typedValue.copy()).(T) + + case MapLiteral: + return any(typedValue.copy()).(T) + + case *ListLiteral: + return any(typedValue.copy()).(T) + + case *MapItem: + return any(typedValue.copy()).(T) + case *ReadingClause: return any(typedValue.copy()).(T) diff --git a/cypher/models/cypher/copy_test.go b/cypher/models/cypher/copy_test.go index 390c6949..ee65ff2a 100644 --- a/cypher/models/cypher/copy_test.go +++ b/cypher/models/cypher/copy_test.go @@ -63,6 +63,7 @@ func TestCopy(t *testing.T) { validateCopy(t, &model2.IDInCollection{}) validateCopy(t, &model2.FilterExpression{}) validateCopy(t, &model2.Quantifier{}) + validateCopy(t, &model2.RangeQuantifier{Value: "*"}) validateCopy(t, &model2.MultiPartQueryPart{}) validateCopy(t, &model2.Remove{}) @@ -101,10 +102,27 @@ func TestCopy(t *testing.T) { validateCopy(t, &model2.Literal{ Null: true, }) + validateCopy(t, &model2.Properties{ + Map: model2.MapLiteral{ + "name": model2.NewStringLiteral("value"), + }, + }) + validateCopy(t, model2.MapLiteral{ + "name": model2.NewStringLiteral("value"), + }) + validateCopy(t, &model2.ListLiteral{ + model2.NewLiteral(1, false), + model2.NewLiteral(2, false), + }) + validateCopy(t, &model2.MapItem{ + Key: "name", + Value: model2.NewStringLiteral("value"), + }) validateCopy(t, &model2.Projection{ Distinct: true, All: true, }) + validateCopy(t, &model2.Return{}) validateCopy(t, &model2.ProjectionItem{}) validateCopy(t, &model2.PropertyLookup{ Symbol: "a", @@ -150,3 +168,47 @@ func TestCopy(t *testing.T) { validateCopy(t, []string{}) validateCopy(t, graph.Kinds{}) } + +func TestCopyPatternVariablesAreIndependent(t *testing.T) { + original := &model2.PatternPart{ + Variable: model2.NewVariableWithSymbol("p"), + PatternElements: []*model2.PatternElement{ + { + Element: &model2.NodePattern{ + Variable: model2.NewVariableWithSymbol("n"), + }, + }, + { + Element: &model2.RelationshipPattern{ + Variable: model2.NewVariableWithSymbol("r"), + }, + }, + }, + } + + copied := model2.Copy(original) + copied.Variable.Symbol = "copied_path" + copiedNode, _ := copied.PatternElements[0].AsNodePattern() + copiedNode.Variable.Symbol = "copied_node" + copiedRelationship, _ := copied.PatternElements[1].AsRelationshipPattern() + copiedRelationship.Variable.Symbol = "copied_relationship" + + originalNode, _ := original.PatternElements[0].AsNodePattern() + originalRelationship, _ := original.PatternElements[1].AsRelationshipPattern() + + require.Equal(t, "p", original.Variable.Symbol) + require.Equal(t, "n", originalNode.Variable.Symbol) + require.Equal(t, "r", originalRelationship.Variable.Symbol) +} + +func TestNilPatternElementHelpers(t *testing.T) { + var element *model2.PatternElement + + nodePattern, isNodePattern := element.AsNodePattern() + require.Nil(t, nodePattern) + require.False(t, isNodePattern) + + relationshipPattern, isRelationshipPattern := element.AsRelationshipPattern() + require.Nil(t, relationshipPattern) + require.False(t, isRelationshipPattern) +} diff --git a/cypher/models/cypher/model.go b/cypher/models/cypher/model.go index 27cdd549..b3a59b77 100644 --- a/cypher/models/cypher/model.go +++ b/cypher/models/cypher/model.go @@ -743,6 +743,16 @@ func NewRangeQuantifier(value string) *RangeQuantifier { } } +func (s *RangeQuantifier) copy() *RangeQuantifier { + if s == nil { + return s + } + + return &RangeQuantifier{ + Value: s.Value, + } +} + type KindMatcher struct { Reference Expression Kinds graph.Kinds @@ -881,12 +891,36 @@ type MapItem struct { Value Expression } +func (s *MapItem) copy() *MapItem { + if s == nil { + return nil + } + + return &MapItem{ + Key: s.Key, + Value: Copy(s.Value), + } +} + type MapLiteral map[string]Expression func NewMapLiteral() MapLiteral { return MapLiteral{} } +func (s MapLiteral) copy() MapLiteral { + if s == nil { + return nil + } + + mapCopy := NewMapLiteral() + for key, value := range s { + mapCopy[key] = Copy(value) + } + + return mapCopy +} + func (s MapLiteral) Items() []*MapItem { items := make([]*MapItem, 0, len(s)) @@ -924,6 +958,17 @@ func NewListLiteral() *ListLiteral { return &ListLiteral{} } +func (s *ListLiteral) copy() *ListLiteral { + if s == nil { + return nil + } + + listCopy := NewListLiteral() + *listCopy = Copy([]Expression(*s)) + + return listCopy +} + func NewStringListLiteral(values []string) *ListLiteral { literal := NewListLiteral() @@ -1287,6 +1332,10 @@ func (s *PatternElement) IsNodePattern() bool { } func (s *PatternElement) AsNodePattern() (*NodePattern, bool) { + if s == nil { + return nil, false + } + nodePattern, isNodePattern := s.Element.(*NodePattern) return nodePattern, isNodePattern } @@ -1297,6 +1346,10 @@ func (s *PatternElement) IsRelationshipPattern() bool { } func (s *PatternElement) AsRelationshipPattern() (*RelationshipPattern, bool) { + if s == nil { + return nil, false + } + relationshipPattern, isRelationshipPattern := s.Element.(*RelationshipPattern) return relationshipPattern, isRelationshipPattern } @@ -1310,6 +1363,17 @@ func NewProperties() *Properties { return &Properties{} } +func (s *Properties) copy() *Properties { + if s == nil { + return nil + } + + return &Properties{ + Map: Copy(s.Map), + Parameter: Copy(s.Parameter), + } +} + // NodePattern Type // // Kinds is a conjunction of types for the given node. @@ -1328,7 +1392,7 @@ func (s *NodePattern) copy() *NodePattern { } return &NodePattern{ - Variable: s.Variable, + Variable: Copy(s.Variable), Kinds: Copy(s.Kinds), Properties: Copy(s.Properties), } @@ -1358,7 +1422,7 @@ func (s *RelationshipPattern) copy() *RelationshipPattern { } return &RelationshipPattern{ - Variable: s.Variable, + Variable: Copy(s.Variable), Kinds: Copy(s.Kinds), Direction: s.Direction, Range: Copy(s.Range), @@ -1471,7 +1535,7 @@ func (s *Return) copy() *Return { } return &Return{ - Projection: s.Projection.copy(), + Projection: Copy(s.Projection), } } @@ -1492,7 +1556,7 @@ func (s *PatternPart) copy() *PatternPart { } return &PatternPart{ - Variable: s.Variable, + Variable: Copy(s.Variable), ShortestPathPattern: s.ShortestPathPattern, AllShortestPathsPattern: s.AllShortestPathsPattern, PatternElements: Copy(s.PatternElements), diff --git a/cypher/models/pgsql/format/format.go b/cypher/models/pgsql/format/format.go index e44e399c..9cbed49c 100644 --- a/cypher/models/pgsql/format/format.go +++ b/cypher/models/pgsql/format/format.go @@ -533,6 +533,18 @@ func formatNode(builder *OutputBuilder, rootExpr pgsql.SyntaxNode) error { exprStack = append(exprStack, typedNextExpr.Expression) exprStack = append(exprStack, pgsql.FormattingLiteral("(")) + case *pgsql.EdgeArrayFromPathIDs: + if typedNextExpr.PathIDs == nil { + return fmt.Errorf("edge array from path IDs has no path expression") + } + + exprStack = append( + exprStack, + pgsql.FormattingLiteral(") with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id)"), + typedNextExpr.PathIDs, + pgsql.FormattingLiteral("(select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest("), + ) + case pgsql.Parameter: if builder.MaterializeParameters { if parameterValue, hasParameter := builder.parameters[typedNextExpr.Identifier.String()]; !hasParameter { diff --git a/cypher/models/pgsql/functions.go b/cypher/models/pgsql/functions.go index d9e787f4..3d0b5f4e 100644 --- a/cypher/models/pgsql/functions.go +++ b/cypher/models/pgsql/functions.go @@ -12,6 +12,7 @@ const ( FunctionJSONBArrayElementsText Identifier = "jsonb_array_elements_text" FunctionJSONBBuildObject Identifier = "jsonb_build_object" FunctionJSONBArrayLength Identifier = "jsonb_array_length" + FunctionJSONBTypeof Identifier = "jsonb_typeof" FunctionToJSONB Identifier = "to_jsonb" FunctionCypherContains Identifier = "cypher_contains" FunctionCypherStartsWith Identifier = "cypher_starts_with" diff --git a/cypher/models/pgsql/model.go b/cypher/models/pgsql/model.go index ae3c750f..54854c50 100644 --- a/cypher/models/pgsql/model.go +++ b/cypher/models/pgsql/model.go @@ -406,6 +406,18 @@ func (s *Parenthetical) AsExpression() Expression { return s } +type EdgeArrayFromPathIDs struct { + PathIDs Expression +} + +func (s *EdgeArrayFromPathIDs) NodeType() string { + return "edge_array_from_path_ids" +} + +func (s *EdgeArrayFromPathIDs) AsExpression() Expression { + return s +} + type JoinType int const ( diff --git a/cypher/models/pgsql/optimize/OPTIMIZATION_PLAN.md b/cypher/models/pgsql/optimize/OPTIMIZATION_PLAN.md new file mode 100644 index 00000000..55bb5eb1 --- /dev/null +++ b/cypher/models/pgsql/optimize/OPTIMIZATION_PLAN.md @@ -0,0 +1,132 @@ +# Cypher to PostgreSQL Optimization Plan + +This plan tracks optimization and rewrite work identified by running the shared integration corpus against Neo4j and PostgreSQL and comparing plan shapes. + +## Phase 1: Baseline And Tooling + +Status: completed + +- Keep a reproducible plan-capture workflow. + - Capture PostgreSQL translated SQL, PostgreSQL `EXPLAIN`, Neo4j logical plan operator trees, and optimizer planned/applied lowerings. + - Read `integration/testdata/cases` and `integration/testdata/templates`. + - Write comparable JSONL output without changing product behavior. +- Add plan-summary reporting. + - Rank cases by PostgreSQL estimated cost. + - Count plan operators, recursive CTEs, subplans, path materialization indicators, and optimizer lowerings. + - Produce markdown and JSON summaries. + +## Phase 2: Quick Wins + +Status: completed + +- Add count-store fast paths for simple count queries: + - `MATCH (n) RETURN count(n)` + - `MATCH ()-[r]->() RETURN count(r)` + - `MATCH (...) RETURN count(*)` for the same exact node and directed-edge shapes. + - Typed variants where kind filters map cleanly. + - Implemented as `CountStoreFastPath` lowering for exact node and directed-edge count shapes. +- Audit the planned/applied `PredicatePlacement` gap. + - Distinguish missing translator consumption from intentional skipped placements. + - Add explicit skipped-placement reasons when a planned lowering is not applied. + - Plan-corpus summaries now report skipped lowerings and skipped-lowering reasons. + +## Phase 3: Path Materialization + +Status: completed + +- Share path materialization for repeated path functions. + - Target `nodes(p)`, `relationships(p)`, `size(relationships(p))`, `startNode`, `endNode`, and `type`. + - Avoid repeated `SubPlan` and `Function Scan on unnest` work per path binding. + - Materialize unprojected paths once through a lateral stage when final projections return a path and its components, or repeat node-bearing component expressions. +- Expand late path materialization coverage. + - Ensure paths are built only when needed for projection, filtering, or mutation semantics. + +## Phase 4: Traversal And Recursive CTEs + +Status: completed + +- Push predicates into recursive traversal anchors and steps where semantics allow. + - Endpoint kind/property predicates. + - Relationship type predicates. + - Bound-node filters. +- Improve traversal direction selection using endpoint selectivity. + - Bound IDs. + - Labels/kinds. + - Equality predicates. + - Finite relationship type sets. + - Plan direction flips for right-endpoint binding predicates from `WHERE`, not only inline node constraints. +- Broaden limit pushdown for variable-length path queries when ordering and distinct semantics permit early termination. + +## Phase 5: Suffix And Shared Endpoint Rewrites + +Status: completed + +- Improve expansion suffix pushdown for fixed suffixes after variable-length traversals. + - Include fixed suffix steps that terminate at already-bound endpoints with inline node constraints. + - Preserve bound-endpoint constraints in the pushed terminal satisfaction check when present. +- Improve `ExpandInto` and shared endpoint rewrites for ADCS-style fanout patterns. + - Constrain earlier using bound endpoint semi-joins or correlated expansion lowering where valid. + +## Phase 6: Validation + +Status: completed + +- Add focused regression tests per optimization. + - Optimizer/lowering selection tests. + - SQL shape translation tests. + - Backend-equivalent integration tests. + - Template corpus setup now clears stale graph data before rollback-only fixture cases, keeping repeated PostgreSQL and Neo4j validation runs deterministic. +- Benchmark after each workstream. + - Run unit tests. + - Run backend-specific integration tests. + - Run plan capture and compare summary deltas. + - `quality_backend` passes against `postgres://postgres:bhe4eva@localhost/bhe` and `neo4j://neo4j:neo4jj@localhost:7687`. + - Plan corpus capture records 396 PostgreSQL plans and 396 Neo4j plans; remaining capture errors are expected invalid-query cases surfaced by both systems or Neo4j-specific parameter-map syntax rejection. + +## Phase 7: Predicate Placement Accounting + +Status: completed + +- Record planned binding-scope predicate placements when traversal constraint consumption actually pushes the matching predicate into a fixed traversal step, expansion seed, expansion edge, or expansion terminal constraint. +- Keep skipped-lowering reports focused on predicates that were not consumed by the emitted translation shape, instead of marking already-pushed traversal predicates as skipped. +- Add SQL-shape regression tests for fixed traversal and expansion-root predicate consumption. +- Refreshed plan-corpus capture applies `PredicatePlacement` in 56 of 71 planned PostgreSQL cases, reducing skipped predicate placements from 65 to 15. + +## Phase 8: Cross-Clause Predicate Placement Planning + +Status: completed + +- Stop planning traversal predicate placements for binding predicates owned by a different `MATCH` clause. +- Preserve same-clause binding predicate placement for traversal and suffix pushdown decisions. +- Refreshed plan-corpus capture now plans and applies `PredicatePlacement` in the same 56 PostgreSQL cases, removing all skipped predicate-placement reports. + +## Phase 9: Live Dataset Assumption Checks + +Status: completed + +- Re-vet optimizer assumptions against a large live PostgreSQL graph with `EXPLAIN ANALYZE`. +- Exact string property anchors now lower to `jsonb_typeof(properties -> key) = 'string'` plus `properties ->> key = value`, + allowing existing `->>` expression indexes on selective fields such as `objectid` and `name` to be used without + matching JSON booleans or numbers. +- Relationship count fast paths remain endpoint-preserving for correctness, but the PostgreSQL schema now includes a + `kind_id`-first covering edge index so typed relationship counts have a direct access path instead of relying on + endpoint-oriented traversal indexes. +- Added PG-scoped manual integration coverage for strict string equality and a read-only live-plan check that asserts + indexed `objectid` lookups use a PostgreSQL index when the connected database exposes the expected expression index. + +## Phase 10: Common Search Follow-Up + +Status: completed + +- Lower typed pattern predicates into correlated relationship `EXISTS` checks when relationship type constraints and + both endpoint correlations are sufficient, avoiding fallback CTEs for common typed existence predicates. +- Lower membership-only `collect(entity)` projections to ID arrays and rewrite membership predicates to `id = any(...)`, + keeping full entity arrays only when the collected value is otherwise observed. +- Flip single-step bound-left variable expansions toward constrained terminal kinds when there is no path binding or + continuation step, and preserve the previous-frame endpoint correlation after the flip. +- Plan shortest-path terminal-filter materialization for kind-only terminal endpoints while keeping endpoint-pair + filters limited to property/search predicates that define the pair universe. +- Defer adding blanket suffix/reverse expression indexes to schema assertion. Live common searches use `objectid` + suffix predicates, but the translator still has multiple suffix-preserving forms (`LIKE`, `cypher_ends_with`, and + null-coalesced variants). Explicit `TextSearchIndex`/trigram indexes remain available for deployments that need + substring acceleration before those semantics are unified. diff --git a/cypher/models/pgsql/optimize/analysis.go b/cypher/models/pgsql/optimize/analysis.go new file mode 100644 index 00000000..82292034 --- /dev/null +++ b/cypher/models/pgsql/optimize/analysis.go @@ -0,0 +1,612 @@ +package optimize + +import ( + "fmt" + "sort" + "strings" + + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/walk" +) + +type QueryPartKind string + +const ( + QueryPartKindSingle QueryPartKind = "single" + QueryPartKindMulti QueryPartKind = "multi" +) + +type BarrierKind string + +const ( + BarrierKindReturn BarrierKind = "return" + BarrierKindWith BarrierKind = "with" + BarrierKindUnwind BarrierKind = "unwind" + BarrierKindOptionalMatch BarrierKind = "optional_match" + BarrierKindUpdate BarrierKind = "update" +) + +type BindingKind string + +const ( + BindingKindNode BindingKind = "node" + BindingKindRelationship BindingKind = "relationship" + BindingKindPath BindingKind = "path" +) + +type Analysis struct { + QueryParts []QueryPart +} + +type QueryPart struct { + Index int + Kind QueryPartKind + Regions []Region + Barriers []Barrier + ProjectionDependencies []string +} + +type Region struct { + QueryPartIndex int + StartClause int + EndClause int + Clauses []MatchClause + Bindings []Binding + BindingOccurrences []Binding + PathVariables []PathVariable + Predicates []Predicate +} + +type MatchClause struct { + Index int + PatternCount int + WherePredicates int +} + +type Barrier struct { + QueryPartIndex int + ClauseIndex int + Kind BarrierKind + Dependencies []string +} + +type Binding struct { + Symbol string + Kind BindingKind + ClauseIndex int + PatternIndex int +} + +type PathVariable struct { + Symbol string + ClauseIndex int + PatternIndex int + NodeCount int + RelationshipCount int + VariableLength bool + Dependencies []string +} + +type Predicate struct { + ClauseIndex int + ExpressionIndex int + Dependencies []string +} + +func Analyze(query *cypher.RegularQuery) Analysis { + if query == nil || query.SingleQuery == nil { + return Analysis{} + } + + if query.SingleQuery.MultiPartQuery != nil { + return analyzeMultiPartQuery(query.SingleQuery.MultiPartQuery) + } + + if query.SingleQuery.SinglePartQuery != nil { + return Analysis{ + QueryParts: []QueryPart{ + analyzeSinglePartQuery(0, QueryPartKindSingle, query.SingleQuery.SinglePartQuery), + }, + } + } + + return Analysis{} +} + +func (s Analysis) Diagnostics() []string { + var lines []string + + for _, queryPart := range s.QueryParts { + lines = append(lines, fmt.Sprintf( + "query_part[%d] kind=%s projection_deps=%s", + queryPart.Index, + queryPart.Kind, + strings.Join(queryPart.ProjectionDependencies, ","), + )) + + for regionIndex, region := range queryPart.Regions { + lines = append(lines, fmt.Sprintf( + "region[%d] part=%d clauses=%d..%d matches=%d bindings=%s paths=%s predicates=%s", + regionIndex, + region.QueryPartIndex, + region.StartClause, + region.EndClause, + len(region.Clauses), + formatBindings(region.Bindings), + formatPathVariables(region.PathVariables), + formatPredicates(region.Predicates), + )) + } + + for barrierIndex, barrier := range queryPart.Barriers { + lines = append(lines, fmt.Sprintf( + "barrier[%d] part=%d clause=%d kind=%s deps=%s", + barrierIndex, + barrier.QueryPartIndex, + barrier.ClauseIndex, + barrier.Kind, + strings.Join(barrier.Dependencies, ","), + )) + } + } + + return lines +} + +func (s Analysis) String() string { + return strings.Join(s.Diagnostics(), "\n") +} + +func analyzeMultiPartQuery(query *cypher.MultiPartQuery) Analysis { + var analysis Analysis + + for idx, part := range query.Parts { + analysis.QueryParts = append(analysis.QueryParts, analyzeMultiPartQueryPart(idx, part)) + } + + if query.SinglePartQuery != nil { + analysis.QueryParts = append(analysis.QueryParts, analyzeSinglePartQuery(len(query.Parts), QueryPartKindSingle, query.SinglePartQuery)) + } + + return analysis +} + +func analyzeMultiPartQueryPart(index int, part *cypher.MultiPartQueryPart) QueryPart { + queryPart := QueryPart{ + Index: index, + Kind: QueryPartKindMulti, + } + + if part == nil { + return queryPart + } + + queryPart.Regions, queryPart.Barriers = analyzeReadingClauses(index, part.ReadingClauses) + + if len(part.UpdatingClauses) > 0 { + queryPart.Barriers = append(queryPart.Barriers, Barrier{ + QueryPartIndex: index, + ClauseIndex: len(part.ReadingClauses), + Kind: BarrierKindUpdate, + }) + } + + if part.With != nil { + queryPart.ProjectionDependencies = projectionDependencies(part.With.Projection) + queryPart.Barriers = append(queryPart.Barriers, Barrier{ + QueryPartIndex: index, + ClauseIndex: len(part.ReadingClauses) + len(part.UpdatingClauses), + Kind: BarrierKindWith, + Dependencies: queryPart.ProjectionDependencies, + }) + } + + return queryPart +} + +func analyzeSinglePartQuery(index int, kind QueryPartKind, part *cypher.SinglePartQuery) QueryPart { + queryPart := QueryPart{ + Index: index, + Kind: kind, + } + + if part == nil { + return queryPart + } + + queryPart.Regions, queryPart.Barriers = analyzeReadingClauses(index, part.ReadingClauses) + + if len(part.UpdatingClauses) > 0 { + queryPart.Barriers = append(queryPart.Barriers, Barrier{ + QueryPartIndex: index, + ClauseIndex: len(part.ReadingClauses), + Kind: BarrierKindUpdate, + }) + } + + if part.Return != nil { + queryPart.ProjectionDependencies = projectionDependencies(part.Return.Projection) + queryPart.Barriers = append(queryPart.Barriers, Barrier{ + QueryPartIndex: index, + ClauseIndex: len(part.ReadingClauses) + len(part.UpdatingClauses), + Kind: BarrierKindReturn, + Dependencies: queryPart.ProjectionDependencies, + }) + } + + return queryPart +} + +func analyzeReadingClauses(queryPartIndex int, readingClauses []*cypher.ReadingClause) ([]Region, []Barrier) { + var ( + regions []Region + barriers []Barrier + currentRegion *Region + ) + + closeRegion := func() { + if currentRegion != nil { + regions = append(regions, *currentRegion) + currentRegion = nil + } + } + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Unwind != nil { + closeRegion() + barriers = append(barriers, Barrier{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + Kind: BarrierKindUnwind, + Dependencies: dependenciesForReadingClause(readingClause), + }) + continue + } + + match := readingClause.Match + if match == nil { + continue + } + + if match.Optional { + closeRegion() + barriers = append(barriers, Barrier{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + Kind: BarrierKindOptionalMatch, + Dependencies: dependenciesForMatch(match), + }) + continue + } + + if currentRegion == nil { + currentRegion = &Region{ + QueryPartIndex: queryPartIndex, + StartClause: clauseIndex, + EndClause: clauseIndex, + } + } + + currentRegion.EndClause = clauseIndex + currentRegion.Clauses = append(currentRegion.Clauses, MatchClause{ + Index: clauseIndex, + PatternCount: len(match.Pattern), + WherePredicates: wherePredicateCount(match.Where), + }) + + nextBindings := bindingsForMatch(clauseIndex, match) + currentRegion.BindingOccurrences = append(currentRegion.BindingOccurrences, nextBindings...) + currentRegion.Bindings = mergeBindings(currentRegion.Bindings, nextBindings) + currentRegion.PathVariables = mergePathVariables(currentRegion.PathVariables, pathVariablesForMatch(clauseIndex, match)) + currentRegion.Predicates = append(currentRegion.Predicates, predicatesForWhere(clauseIndex, match.Where)...) + } + + closeRegion() + + return regions, barriers +} + +func dependenciesForReadingClause(readingClause *cypher.ReadingClause) []string { + if readingClause == nil { + return nil + } + + if readingClause.Match != nil { + return dependenciesForMatch(readingClause.Match) + } + + if readingClause.Unwind != nil { + return sortedDependencies(readingClause.Unwind.Expression) + } + + return nil +} + +func dependenciesForMatch(match *cypher.Match) []string { + var deps []string + + if match == nil { + return nil + } + + for _, predicate := range predicatesForWhere(0, match.Where) { + deps = append(deps, predicate.Dependencies...) + } + + return sortedUniqueStrings(deps) +} + +func bindingsForMatch(clauseIndex int, match *cypher.Match) []Binding { + var bindings []Binding + + for patternIndex, pattern := range match.Pattern { + if pattern == nil { + continue + } + + if pattern.Variable != nil && pattern.Variable.Symbol != "" { + bindings = append(bindings, Binding{ + Symbol: pattern.Variable.Symbol, + Kind: BindingKindPath, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }) + } + + for _, element := range pattern.PatternElements { + if element == nil { + continue + } + + if nodePattern, isNodePattern := element.AsNodePattern(); isNodePattern { + if nodePattern.Variable != nil && nodePattern.Variable.Symbol != "" { + bindings = append(bindings, Binding{ + Symbol: nodePattern.Variable.Symbol, + Kind: BindingKindNode, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }) + } + } else if relationshipPattern, isRelationshipPattern := element.AsRelationshipPattern(); isRelationshipPattern { + if relationshipPattern.Variable != nil && relationshipPattern.Variable.Symbol != "" { + bindings = append(bindings, Binding{ + Symbol: relationshipPattern.Variable.Symbol, + Kind: BindingKindRelationship, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }) + } + } + } + } + + return bindings +} + +func pathVariablesForMatch(clauseIndex int, match *cypher.Match) []PathVariable { + var pathVariables []PathVariable + + for patternIndex, pattern := range match.Pattern { + if pattern == nil || pattern.Variable == nil || pattern.Variable.Symbol == "" { + continue + } + + pathVariable := PathVariable{ + Symbol: pattern.Variable.Symbol, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + Dependencies: patternDependencies(pattern), + } + + for _, element := range pattern.PatternElements { + if element == nil { + continue + } + + if element.IsNodePattern() { + pathVariable.NodeCount++ + } else if relationshipPattern, isRelationshipPattern := element.AsRelationshipPattern(); isRelationshipPattern { + pathVariable.RelationshipCount++ + + if relationshipPattern.Range != nil { + pathVariable.VariableLength = true + } + } + } + + pathVariables = append(pathVariables, pathVariable) + } + + return pathVariables +} + +func patternDependencies(pattern *cypher.PatternPart) []string { + var dependencies []string + + for _, element := range pattern.PatternElements { + if element == nil { + continue + } + + if nodePattern, isNodePattern := element.AsNodePattern(); isNodePattern { + if nodePattern.Variable != nil && nodePattern.Variable.Symbol != "" { + dependencies = append(dependencies, nodePattern.Variable.Symbol) + } + } else if relationshipPattern, isRelationshipPattern := element.AsRelationshipPattern(); isRelationshipPattern { + if relationshipPattern.Variable != nil && relationshipPattern.Variable.Symbol != "" { + dependencies = append(dependencies, relationshipPattern.Variable.Symbol) + } + } + } + + return sortedUniqueStrings(dependencies) +} + +func predicatesForWhere(clauseIndex int, where *cypher.Where) []Predicate { + if where == nil { + return nil + } + + var predicates []Predicate + for expressionIndex, expression := range where.GetAll() { + predicates = append(predicates, Predicate{ + ClauseIndex: clauseIndex, + ExpressionIndex: expressionIndex, + Dependencies: sortedDependencies(expression), + }) + } + + return predicates +} + +func projectionDependencies(projection *cypher.Projection) []string { + if projection == nil { + return nil + } + + var dependencies []string + for _, item := range projection.Items { + dependencies = append(dependencies, sortedDependencies(item)...) + } + + if projection.Order != nil { + dependencies = append(dependencies, sortedDependencies(projection.Order)...) + } + + if projection.Skip != nil { + dependencies = append(dependencies, sortedDependencies(projection.Skip)...) + } + + if projection.Limit != nil { + dependencies = append(dependencies, sortedDependencies(projection.Limit)...) + } + + return sortedUniqueStrings(dependencies) +} + +func sortedDependencies(node cypher.SyntaxNode) []string { + dependencies := map[string]struct{}{} + + if node == nil { + return nil + } + + _ = walk.Cypher(node, walk.NewSimpleVisitor[cypher.SyntaxNode](func(node cypher.SyntaxNode, _ walk.VisitorHandler) { + if variable, isVariable := node.(*cypher.Variable); isVariable && variable.Symbol != "" && variable.Symbol != cypher.TokenLiteralAsterisk { + dependencies[variable.Symbol] = struct{}{} + } + })) + + return sortedMapKeys(dependencies) +} + +func wherePredicateCount(where *cypher.Where) int { + if where == nil { + return 0 + } + + return where.Len() +} + +func mergeBindings(existing []Binding, next []Binding) []Binding { + seen := map[string]struct{}{} + for _, binding := range existing { + seen[bindingKey(binding)] = struct{}{} + } + + for _, binding := range next { + key := bindingKey(binding) + if _, hasBinding := seen[key]; hasBinding { + continue + } + + existing = append(existing, binding) + seen[key] = struct{}{} + } + + return existing +} + +func mergePathVariables(existing []PathVariable, next []PathVariable) []PathVariable { + seen := map[string]struct{}{} + for _, pathVariable := range existing { + seen[pathVariable.Symbol] = struct{}{} + } + + for _, pathVariable := range next { + if _, hasPathVariable := seen[pathVariable.Symbol]; hasPathVariable { + continue + } + + existing = append(existing, pathVariable) + seen[pathVariable.Symbol] = struct{}{} + } + + return existing +} + +func bindingKey(binding Binding) string { + return string(binding.Kind) + ":" + binding.Symbol +} + +func sortedMapKeys(values map[string]struct{}) []string { + keys := make([]string, 0, len(values)) + for key := range values { + keys = append(keys, key) + } + + sort.Strings(keys) + + return keys +} + +func sortedUniqueStrings(values []string) []string { + seen := map[string]struct{}{} + + for _, value := range values { + if value != "" { + seen[value] = struct{}{} + } + } + + return sortedMapKeys(seen) +} + +func formatBindings(bindings []Binding) string { + if len(bindings) == 0 { + return "" + } + + items := make([]string, 0, len(bindings)) + for _, binding := range bindings { + items = append(items, fmt.Sprintf("%s:%s", binding.Symbol, binding.Kind)) + } + + return strings.Join(items, ",") +} + +func formatPathVariables(pathVariables []PathVariable) string { + if len(pathVariables) == 0 { + return "" + } + + items := make([]string, 0, len(pathVariables)) + for _, pathVariable := range pathVariables { + items = append(items, pathVariable.Symbol) + } + + return strings.Join(items, ",") +} + +func formatPredicates(predicates []Predicate) string { + if len(predicates) == 0 { + return "" + } + + items := make([]string, 0, len(predicates)) + for _, predicate := range predicates { + items = append(items, strings.Join(predicate.Dependencies, "|")) + } + + return strings.Join(items, ",") +} diff --git a/cypher/models/pgsql/optimize/analysis_test.go b/cypher/models/pgsql/optimize/analysis_test.go new file mode 100644 index 00000000..0ea35be7 --- /dev/null +++ b/cypher/models/pgsql/optimize/analysis_test.go @@ -0,0 +1,139 @@ +package optimize + +import ( + "strings" + "testing" + + "github.com/specterops/dawgs/cypher/frontend" + "github.com/stretchr/testify/require" +) + +const adcsQuery = ` +MATCH (n:Group) +WHERE n.objectid = 'S-1-5-21-2643190041-1319121918-239771340-513' +MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) +MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) +WHERE ct.authenticationenabled = true +AND ct.requiresmanagerapproval = false +AND ct.enrolleesuppliessubject = true +AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) +RETURN p1, p2 +` + +func analyzeCypher(t *testing.T, query string) Analysis { + t.Helper() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), query) + require.NoError(t, err) + + return Analyze(regularQuery) +} + +func requireBinding(t *testing.T, bindings []Binding, symbol string, kind BindingKind) { + t.Helper() + + for _, binding := range bindings { + if binding.Symbol == symbol && binding.Kind == kind { + return + } + } + + t.Fatalf("expected binding %s:%s in %#v", symbol, kind, bindings) +} + +func requirePathVariable(t *testing.T, pathVariables []PathVariable, symbol string, relationshipCount int) { + t.Helper() + + for _, pathVariable := range pathVariables { + if pathVariable.Symbol == symbol { + require.Equal(t, relationshipCount, pathVariable.RelationshipCount) + require.True(t, pathVariable.VariableLength) + return + } + } + + t.Fatalf("expected path variable %s in %#v", symbol, pathVariables) +} + +func TestAnalyzeIdentifiesEligibleADCSRegion(t *testing.T) { + t.Parallel() + + analysis := analyzeCypher(t, adcsQuery) + + require.Len(t, analysis.QueryParts, 1) + + queryPart := analysis.QueryParts[0] + require.Equal(t, QueryPartKindSingle, queryPart.Kind) + require.Equal(t, []string{"p1", "p2"}, queryPart.ProjectionDependencies) + require.Len(t, queryPart.Regions, 1) + require.Len(t, queryPart.Barriers, 1) + require.Equal(t, BarrierKindReturn, queryPart.Barriers[0].Kind) + require.Equal(t, []string{"p1", "p2"}, queryPart.Barriers[0].Dependencies) + + region := queryPart.Regions[0] + require.Equal(t, 0, region.StartClause) + require.Equal(t, 2, region.EndClause) + require.Len(t, region.Clauses, 3) + require.Len(t, region.BindingOccurrences, 10) + require.Len(t, region.Predicates, 2) + require.Equal(t, []string{"n"}, region.Predicates[0].Dependencies) + require.Equal(t, []string{"ct"}, region.Predicates[1].Dependencies) + + requireBinding(t, region.Bindings, "n", BindingKindNode) + requireBinding(t, region.Bindings, "ca", BindingKindNode) + requireBinding(t, region.Bindings, "ct", BindingKindNode) + requireBinding(t, region.Bindings, "d", BindingKindNode) + requireBinding(t, region.Bindings, "p1", BindingKindPath) + requireBinding(t, region.Bindings, "p2", BindingKindPath) + + requirePathVariable(t, region.PathVariables, "p1", 4) + requirePathVariable(t, region.PathVariables, "p2", 5) +} + +func TestAnalyzeSegmentsRegionsAtSemanticBarriers(t *testing.T) { + t.Parallel() + + analysis := analyzeCypher(t, ` + MATCH (n:Group) + WITH n + MATCH (n)-[:MemberOf]->(m) + OPTIONAL MATCH (m)-[:MemberOf]->(x) + RETURN m + `) + + require.Len(t, analysis.QueryParts, 2) + + firstPart := analysis.QueryParts[0] + require.Equal(t, QueryPartKindMulti, firstPart.Kind) + require.Len(t, firstPart.Regions, 1) + require.Equal(t, []string{"n"}, firstPart.ProjectionDependencies) + require.Len(t, firstPart.Barriers, 1) + require.Equal(t, BarrierKindWith, firstPart.Barriers[0].Kind) + require.Equal(t, []string{"n"}, firstPart.Barriers[0].Dependencies) + + secondPart := analysis.QueryParts[1] + require.Equal(t, QueryPartKindSingle, secondPart.Kind) + require.Len(t, secondPart.Regions, 1) + require.Equal(t, 0, secondPart.Regions[0].StartClause) + require.Equal(t, 0, secondPart.Regions[0].EndClause) + require.Len(t, secondPart.Barriers, 2) + require.Equal(t, BarrierKindOptionalMatch, secondPart.Barriers[0].Kind) + require.Equal(t, BarrierKindReturn, secondPart.Barriers[1].Kind) + require.Equal(t, []string{"m"}, secondPart.ProjectionDependencies) +} + +func TestAnalysisDiagnosticsAreStable(t *testing.T) { + t.Parallel() + + var ( + analysis = analyzeCypher(t, adcsQuery) + diagnostics = strings.Join(analysis.Diagnostics(), "\n") + ) + + require.Contains(t, diagnostics, "query_part[0] kind=single projection_deps=p1,p2") + require.Contains(t, diagnostics, "region[0] part=0 clauses=0..2 matches=3") + require.Contains(t, diagnostics, "bindings=n:node,p1:path,ca:node,d:node,p2:path,ct:node") + require.Contains(t, diagnostics, "paths=p1,p2") + require.Contains(t, diagnostics, "predicates=n,ct") + require.Contains(t, diagnostics, "barrier[0] part=0 clause=3 kind=return deps=p1,p2") +} diff --git a/cypher/models/pgsql/optimize/locality.go b/cypher/models/pgsql/optimize/locality.go new file mode 100644 index 00000000..05ceccef --- /dev/null +++ b/cypher/models/pgsql/optimize/locality.go @@ -0,0 +1,196 @@ +package optimize + +import ( + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/walk" +) + +// FlattenConjunction collects the leaf operands of a left-recursive AND chain. +func FlattenConjunction(expr pgsql.Expression) []pgsql.Expression { + if bin, typeOK := expr.(*pgsql.BinaryExpression); !typeOK || bin.Operator != pgsql.OperatorAnd { + return []pgsql.Expression{expr} + } else { + return append(FlattenConjunction(bin.LOperand), FlattenConjunction(bin.ROperand)...) + } +} + +// ExpressionReferencesOnlyLocalIdentifiers returns true only when every binding +// reference found in the expression is a member of localScope. +func ExpressionReferencesOnlyLocalIdentifiers(expression pgsql.Expression, localScope *pgsql.IdentifierSet) bool { + isLocal := true + + walk.PgSQL(expression, walk.NewSimpleVisitor[pgsql.SyntaxNode]( + func(node pgsql.SyntaxNode, handler walk.VisitorHandler) { + switch typedNode := node.(type) { + case pgsql.ExistsExpression: + if !SubqueryReferencesOnlyLocalIdentifiers(typedNode.Subquery, localScope) { + isLocal = false + handler.SetDone() + } else { + handler.Consume() + } + + case pgsql.CompoundIdentifier: + if len(typedNode) > 0 && !localScope.Contains(typedNode[0]) { + isLocal = false + handler.SetDone() + } + + case pgsql.Identifier: + if !localScope.Contains(typedNode) { + isLocal = false + handler.SetDone() + } + + case pgsql.RowColumnReference: + if !ExpressionReferencesOnlyLocalIdentifiers(typedNode.Identifier, localScope) { + isLocal = false + handler.SetDone() + } else { + handler.Consume() + } + } + }, + )) + + return isLocal +} + +func SubqueryReferencesOnlyLocalIdentifiers(subquery pgsql.Subquery, localScope *pgsql.IdentifierSet) bool { + return QueryReferencesOnlyLocalIdentifiers(subquery.Query, localScope) +} + +func QueryReferencesOnlyLocalIdentifiers(query pgsql.Query, localScope *pgsql.IdentifierSet) bool { + if query.CommonTableExpressions != nil { + return false + } + + selectBody, isSelect := query.Body.(pgsql.Select) + if !isSelect { + return false + } + + if !SelectReferencesOnlyLocalIdentifiers(selectBody, localScope) { + return false + } + + for _, orderBy := range query.OrderBy { + if orderBy != nil && !ExpressionReferencesOnlyLocalIdentifiers(orderBy.Expression, localScope) { + return false + } + } + + return (query.Offset == nil || ExpressionReferencesOnlyLocalIdentifiers(query.Offset, localScope)) && + (query.Limit == nil || ExpressionReferencesOnlyLocalIdentifiers(query.Limit, localScope)) +} + +func AddFromClauseBindings(localScope *pgsql.IdentifierSet, fromClauses []pgsql.FromClause) { + for _, fromClause := range fromClauses { + AddFromExpressionBinding(localScope, fromClause.Source) + + for _, join := range fromClause.Joins { + AddFromExpressionBinding(localScope, join.Table) + } + } +} + +func AddFromExpressionBinding(localScope *pgsql.IdentifierSet, expression pgsql.Expression) { + switch typedExpression := expression.(type) { + case pgsql.TableReference: + if typedExpression.Binding.Set { + localScope.Add(typedExpression.Binding.Value) + } + + case pgsql.LateralSubquery: + if typedExpression.Binding.Set { + localScope.Add(typedExpression.Binding.Value) + } + } +} + +func addFromClauseSourceBinding(localScope *pgsql.IdentifierSet, fromClause pgsql.FromClause) { + AddFromExpressionBinding(localScope, fromClause.Source) +} + +func SelectReferencesOnlyLocalIdentifiers(selectBody pgsql.Select, localScope *pgsql.IdentifierSet) bool { + scopedIdentifiers := localScope.Copy() + + for _, fromClause := range selectBody.From { + if !FromExpressionReferencesOnlyLocalIdentifiers(fromClause.Source) { + return false + } + + addFromClauseSourceBinding(scopedIdentifiers, fromClause) + + for _, join := range fromClause.Joins { + if !FromExpressionReferencesOnlyLocalIdentifiers(join.Table) { + return false + } + + if join.JoinOperator.Constraint != nil && + !ExpressionReferencesOnlyLocalIdentifiers(join.JoinOperator.Constraint, scopedIdentifiers) { + return false + } + + AddFromExpressionBinding(scopedIdentifiers, join.Table) + } + } + + for _, projection := range selectBody.Projection { + if !ExpressionReferencesOnlyLocalIdentifiers(projection, scopedIdentifiers) { + return false + } + } + + for _, groupByExpression := range selectBody.GroupBy { + if !ExpressionReferencesOnlyLocalIdentifiers(groupByExpression, scopedIdentifiers) { + return false + } + } + + return (selectBody.Where == nil || ExpressionReferencesOnlyLocalIdentifiers(selectBody.Where, scopedIdentifiers)) && + (selectBody.Having == nil || ExpressionReferencesOnlyLocalIdentifiers(selectBody.Having, scopedIdentifiers)) +} + +func FromExpressionReferencesOnlyLocalIdentifiers(expression pgsql.Expression) bool { + switch expression.(type) { + case pgsql.TableReference: + return true + + default: + return false + } +} + +func IsLocalToScope(expression pgsql.Expression, localScope *pgsql.IdentifierSet) bool { + if expression == nil { + return true + } + + return ExpressionReferencesOnlyLocalIdentifiers(expression, localScope) +} + +// PartitionConstraintByLocality splits a conjunction (A AND B AND ...) into +// two expressions: one whose every binding reference is contained in +// localScope (safe for JOIN ON), and one that references outside identifiers +// (must stay in WHERE). +// +// Only top-level AND operands are split. If an expression is not a +// BinaryExpression with OperatorAnd, the whole expression is tested as a unit. +func PartitionConstraintByLocality(expression pgsql.Expression, localScope *pgsql.IdentifierSet) (pgsql.Expression, pgsql.Expression) { + var ( + joinConstraints pgsql.Expression + whereConstraints pgsql.Expression + terms = FlattenConjunction(expression) + ) + + for _, term := range terms { + if IsLocalToScope(term, localScope) { + joinConstraints = pgsql.OptionalAnd(joinConstraints, term) + } else { + whereConstraints = pgsql.OptionalAnd(whereConstraints, term) + } + } + + return joinConstraints, whereConstraints +} diff --git a/cypher/models/pgsql/optimize/lowering.go b/cypher/models/pgsql/optimize/lowering.go new file mode 100644 index 00000000..c4873145 --- /dev/null +++ b/cypher/models/pgsql/optimize/lowering.go @@ -0,0 +1,326 @@ +package optimize + +import ( + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/graph" +) + +const ( + LoweringProjectionPruning = "ProjectionPruning" + LoweringLatePathMaterialization = "LatePathMaterialization" + LoweringExpandIntoDetection = "ExpandIntoDetection" + LoweringTraversalDirection = "TraversalDirectionSelection" + LoweringShortestPathStrategy = "ShortestPathStrategySelection" + LoweringShortestPathFilter = "ShortestPathFilterMaterialization" + LoweringLimitPushdown = "LimitPushdown" + LoweringExpansionSuffixPushdown = "ExpansionSuffixPushdown" + LoweringPredicatePlacement = "PredicatePlacement" + LoweringCountStoreFastPath = "CountStoreFastPath" + LoweringCollectIDMembership = "CollectIDMembership" + LoweringAggregateTraversalCount = "AggregateTraversalCount" +) + +type LoweringDecision struct { + Name string `json:"name"` +} + +type PatternTarget struct { + QueryPartIndex int `json:"query_part_index"` + ClauseIndex int `json:"clause_index"` + PatternIndex int `json:"pattern_index"` + Predicate bool `json:"predicate,omitempty"` + PredicateIndex int `json:"predicate_index,omitempty"` +} + +func (s PatternTarget) TraversalStep(stepIndex int) TraversalStepTarget { + return TraversalStepTarget{ + QueryPartIndex: s.QueryPartIndex, + ClauseIndex: s.ClauseIndex, + PatternIndex: s.PatternIndex, + Predicate: s.Predicate, + PredicateIndex: s.PredicateIndex, + StepIndex: stepIndex, + } +} + +type TraversalStepTarget struct { + QueryPartIndex int `json:"query_part_index"` + ClauseIndex int `json:"clause_index"` + PatternIndex int `json:"pattern_index"` + Predicate bool `json:"predicate,omitempty"` + PredicateIndex int `json:"predicate_index,omitempty"` + StepIndex int `json:"step_index"` +} + +type ProjectionPruningDecision struct { + Target TraversalStepTarget `json:"target"` + ReferencedSymbols []string `json:"referenced_symbols,omitempty"` + PatternBindingReferenced bool `json:"pattern_binding_referenced,omitempty"` + OmitLeftNode bool `json:"omit_left_node,omitempty"` + OmitRelationship bool `json:"omit_relationship,omitempty"` + OmitRightNode bool `json:"omit_right_node,omitempty"` + OmitPathBinding bool `json:"omit_path_binding,omitempty"` +} + +type LatePathMaterializationMode string + +const ( + LatePathMaterializationPathEdgeID LatePathMaterializationMode = "path_edge_id" + LatePathMaterializationExpansionPath LatePathMaterializationMode = "expansion_path" + LatePathMaterializationEdgeComposite LatePathMaterializationMode = "edge_composite" +) + +type LatePathMaterializationDecision struct { + Target TraversalStepTarget `json:"target"` + Mode LatePathMaterializationMode `json:"mode"` +} + +type ExpandIntoDecision struct { + Target TraversalStepTarget `json:"target"` +} + +type TraversalDirectionDecision struct { + Target TraversalStepTarget `json:"target"` + Flip bool `json:"flip,omitempty"` + Reason string `json:"reason,omitempty"` +} + +type ShortestPathStrategy string + +const ( + ShortestPathStrategyBidirectional ShortestPathStrategy = "bidirectional" + ShortestPathStrategyUnidirectional ShortestPathStrategy = "unidirectional" +) + +type ShortestPathStrategyDecision struct { + Target TraversalStepTarget `json:"target"` + Strategy ShortestPathStrategy `json:"strategy"` + Reason string `json:"reason,omitempty"` +} + +type ShortestPathFilterMode string + +const ( + ShortestPathFilterTerminal ShortestPathFilterMode = "terminal" + ShortestPathFilterEndpointPair ShortestPathFilterMode = "endpoint_pair" +) + +type ShortestPathFilterDecision struct { + Target TraversalStepTarget `json:"target"` + Mode ShortestPathFilterMode `json:"mode"` + Reason string `json:"reason,omitempty"` +} + +type LimitPushdownMode string + +const ( + LimitPushdownTraversalCTE LimitPushdownMode = "traversal_cte" + LimitPushdownShortestPathHarness LimitPushdownMode = "shortest_path_harness" +) + +type LimitPushdownDecision struct { + Target TraversalStepTarget `json:"target"` + Mode LimitPushdownMode `json:"mode"` +} + +type ExpansionSuffixPushdownDecision struct { + Target TraversalStepTarget `json:"target"` + SuffixLength int `json:"suffix_length"` + SuffixStartStep int `json:"suffix_start_step"` + SuffixEndStep int `json:"suffix_end_step"` + PredicateAttachments []PredicateAttachment `json:"predicate_attachments,omitempty"` +} + +type PredicatePlacementDecision struct { + Target TraversalStepTarget `json:"target"` + Attachment PredicateAttachment `json:"attachment"` + Placement PredicateAttachmentScope `json:"placement"` +} + +type PatternPredicatePlacementMode string + +const ( + PatternPredicatePlacementExistence PatternPredicatePlacementMode = "existence" +) + +type PatternPredicatePlacementDecision struct { + Target TraversalStepTarget `json:"target"` + Mode PatternPredicatePlacementMode `json:"mode"` +} + +type CountStoreFastPathTarget string + +const ( + CountStoreFastPathNode CountStoreFastPathTarget = "node" + CountStoreFastPathEdge CountStoreFastPathTarget = "edge" +) + +type CountStoreFastPathDecision struct { + QueryPartIndex int `json:"query_part_index"` + ClauseIndex int `json:"clause_index"` + PatternIndex int `json:"pattern_index"` + BindingSymbol string `json:"binding_symbol,omitempty"` + Target CountStoreFastPathTarget `json:"target"` + KindSymbols []string `json:"kind_symbols,omitempty"` +} + +type AggregateTraversalCountDecision struct { + QueryPartIndex int `json:"query_part_index"` + SourceSymbol string `json:"source_symbol"` + TerminalSymbol string `json:"terminal_symbol"` + CountAlias string `json:"count_alias"` + Limit int64 `json:"limit,omitempty"` + Target TraversalStepTarget `json:"target"` +} + +type AggregateTraversalCountShape struct { + QueryPartIndex int + SourceSymbol string + TerminalSymbol string + CountAlias string + ReturnSourceAlias string + ReturnCountAlias string + ReturnCount bool + Limit int64 + SourceMatch *cypher.Match + TerminalMatch *cypher.Match + SourceKinds graph.Kinds + TerminalKinds graph.Kinds + RelationshipKinds graph.Kinds + Direction graph.Direction + MinDepth int64 + MaxDepth int64 + Target TraversalStepTarget +} + +type LoweringPlan struct { + ProjectionPruning []ProjectionPruningDecision `json:"projection_pruning,omitempty"` + LatePathMaterialization []LatePathMaterializationDecision `json:"late_path_materialization,omitempty"` + ExpandInto []ExpandIntoDecision `json:"expand_into,omitempty"` + TraversalDirection []TraversalDirectionDecision `json:"traversal_direction,omitempty"` + ShortestPathStrategy []ShortestPathStrategyDecision `json:"shortest_path_strategy,omitempty"` + ShortestPathFilter []ShortestPathFilterDecision `json:"shortest_path_filter,omitempty"` + LimitPushdown []LimitPushdownDecision `json:"limit_pushdown,omitempty"` + ExpansionSuffixPushdown []ExpansionSuffixPushdownDecision `json:"expansion_suffix_pushdown,omitempty"` + PredicatePlacement []PredicatePlacementDecision `json:"predicate_placement,omitempty"` + PatternPredicate []PatternPredicatePlacementDecision `json:"pattern_predicate_placement,omitempty"` + CountStoreFastPath []CountStoreFastPathDecision `json:"count_store_fast_path,omitempty"` + AggregateTraversalCount []AggregateTraversalCountDecision `json:"aggregate_traversal_count,omitempty"` +} + +func (s LoweringPlan) Empty() bool { + return len(s.ProjectionPruning) == 0 && + len(s.LatePathMaterialization) == 0 && + len(s.ExpandInto) == 0 && + len(s.TraversalDirection) == 0 && + len(s.ShortestPathStrategy) == 0 && + len(s.ShortestPathFilter) == 0 && + len(s.LimitPushdown) == 0 && + len(s.ExpansionSuffixPushdown) == 0 && + len(s.PredicatePlacement) == 0 && + len(s.PatternPredicate) == 0 && + len(s.CountStoreFastPath) == 0 && + len(s.AggregateTraversalCount) == 0 +} + +func (s LoweringPlan) Decisions() []LoweringDecision { + var decisions []LoweringDecision + add := func(name string, applied bool) { + if applied { + decisions = append(decisions, LoweringDecision{Name: name}) + } + } + + add(LoweringProjectionPruning, len(s.ProjectionPruning) > 0) + add(LoweringLatePathMaterialization, len(s.LatePathMaterialization) > 0) + add(LoweringExpandIntoDetection, len(s.ExpandInto) > 0) + add(LoweringTraversalDirection, len(s.TraversalDirection) > 0) + add(LoweringShortestPathStrategy, len(s.ShortestPathStrategy) > 0) + add(LoweringShortestPathFilter, len(s.ShortestPathFilter) > 0) + add(LoweringLimitPushdown, len(s.LimitPushdown) > 0) + add(LoweringExpansionSuffixPushdown, len(s.ExpansionSuffixPushdown) > 0) + add(LoweringPredicatePlacement, len(s.PredicatePlacement) > 0 || len(s.PatternPredicate) > 0) + add(LoweringCountStoreFastPath, len(s.CountStoreFastPath) > 0) + add(LoweringAggregateTraversalCount, len(s.AggregateTraversalCount) > 0) + + return decisions +} + +func IndexPatternTargets(query *cypher.RegularQuery) map[*cypher.PatternPart]PatternTarget { + targets := map[*cypher.PatternPart]PatternTarget{} + + if query == nil || query.SingleQuery == nil { + return targets + } + + if query.SingleQuery.MultiPartQuery != nil { + for queryPartIndex, part := range query.SingleQuery.MultiPartQuery.Parts { + if part == nil { + continue + } + + indexReadingClauseTargets(targets, queryPartIndex, part.ReadingClauses) + } + + if finalPart := query.SingleQuery.MultiPartQuery.SinglePartQuery; finalPart != nil { + indexReadingClauseTargets(targets, len(query.SingleQuery.MultiPartQuery.Parts), finalPart.ReadingClauses) + } + } else if query.SingleQuery.SinglePartQuery != nil { + indexReadingClauseTargets(targets, 0, query.SingleQuery.SinglePartQuery.ReadingClauses) + } + + return targets +} + +func IndexPatternPredicateTargets(query *cypher.RegularQuery) map[*cypher.PatternPredicate]PatternTarget { + targets := map[*cypher.PatternPredicate]PatternTarget{} + + if query == nil || query.SingleQuery == nil { + return targets + } + + if query.SingleQuery.MultiPartQuery != nil { + for queryPartIndex, part := range query.SingleQuery.MultiPartQuery.Parts { + if part == nil { + continue + } + + indexQueryPartPatternPredicateTargets(targets, queryPartIndex, part) + } + + if finalPart := query.SingleQuery.MultiPartQuery.SinglePartQuery; finalPart != nil { + indexQueryPartPatternPredicateTargets(targets, len(query.SingleQuery.MultiPartQuery.Parts), finalPart) + } + } else if query.SingleQuery.SinglePartQuery != nil { + indexQueryPartPatternPredicateTargets(targets, 0, query.SingleQuery.SinglePartQuery) + } + + return targets +} + +func indexReadingClauseTargets(targets map[*cypher.PatternPart]PatternTarget, queryPartIndex int, readingClauses []*cypher.ReadingClause) { + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + for patternIndex, patternPart := range readingClause.Match.Pattern { + targets[patternPart] = PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + } + } + } +} + +func indexQueryPartPatternPredicateTargets(targets map[*cypher.PatternPredicate]PatternTarget, queryPartIndex int, queryPart cypher.SyntaxNode) { + for predicateIndex, predicate := range patternPredicatesInQueryPart(queryPart) { + targets[predicate] = PatternTarget{ + QueryPartIndex: queryPartIndex, + PatternIndex: predicateIndex, + Predicate: true, + PredicateIndex: predicateIndex, + } + } +} diff --git a/cypher/models/pgsql/optimize/lowering_plan.go b/cypher/models/pgsql/optimize/lowering_plan.go new file mode 100644 index 00000000..94977198 --- /dev/null +++ b/cypher/models/pgsql/optimize/lowering_plan.go @@ -0,0 +1,2038 @@ +package optimize + +import ( + "strings" + + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/graph" +) + +type sourceTraversalStep struct { + LeftNode *cypher.NodePattern + Relationship *cypher.RelationshipPattern + RightNode *cypher.NodePattern +} + +type boundSourceSelectivity int + +const ( + traversalDirectionReasonRightBound = "right_bound" + traversalDirectionReasonRightConstrained = "right_constrained" + traversalDirectionReasonRightPredicate = "right_predicate" + traversalDirectionReasonTerminalKindOnlyEstimateWide = "terminal kind-only estimate too broad" + traversalDirectionReasonBoundSourceSelective = "bound source estimate selective" + + shortestPathStrategyReasonBoundEndpointPairs = "bound_endpoint_pairs" + shortestPathStrategyReasonEndpointPredicates = "endpoint_predicates" + + shortestPathFilterReasonTerminalPredicate = "terminal_predicate" + shortestPathFilterReasonEndpointPairPredicates = "endpoint_pair_predicates" +) + +const ( + boundSourceSelectivityNone boundSourceSelectivity = iota + boundSourceSelectivityKindOnly + boundSourceSelectivityPredicate + boundSourceSelectivityUnique + boundSourceSelectivityLimited + boundSourceSelectivityTopN +) + +func BuildLoweringPlan(query *cypher.RegularQuery, predicateAttachments []PredicateAttachment) (LoweringPlan, error) { + if query == nil || query.SingleQuery == nil { + return LoweringPlan{}, nil + } + + var plan LoweringPlan + + if query.SingleQuery.MultiPartQuery != nil { + var ( + carriedSymbols = map[string]struct{}{} + carriedSelectivity = map[string]boundSourceSelectivity{} + ) + + for queryPartIndex, part := range query.SingleQuery.MultiPartQuery.Parts { + if part == nil { + continue + } + + if err := appendQueryPartLowerings(&plan, queryPartIndex, part, part.ReadingClauses, predicateAttachments, carriedSymbols, carriedSelectivity); err != nil { + return LoweringPlan{}, err + } + + var ( + currentSymbols = copyStringSet(carriedSymbols) + currentSelectivity = copyBoundSourceSelectivity(carriedSelectivity) + ) + declareReadingClauseSymbols(currentSymbols, part.ReadingClauses) + declareReadingClauseSelectivity(currentSelectivity, part.ReadingClauses) + + carriedSymbols, carriedSelectivity = carryProjectionSelectivity(part.With.Projection, currentSymbols, currentSelectivity) + } + + if finalPart := query.SingleQuery.MultiPartQuery.SinglePartQuery; finalPart != nil { + if err := appendQueryPartLowerings(&plan, len(query.SingleQuery.MultiPartQuery.Parts), finalPart, finalPart.ReadingClauses, predicateAttachments, carriedSymbols, carriedSelectivity); err != nil { + return LoweringPlan{}, err + } + } + } else if singlePart := query.SingleQuery.SinglePartQuery; singlePart != nil { + if err := appendQueryPartLowerings(&plan, 0, singlePart, singlePart.ReadingClauses, predicateAttachments, nil, nil); err != nil { + return LoweringPlan{}, err + } + } + + appendPredicatePlacementDecisions(&plan, query, predicateAttachments) + attachPredicatePlacementsToSuffixPushdowns(&plan) + appendCountStoreFastPathDecisions(&plan, query) + appendAggregateTraversalCountDecisions(&plan, query) + return plan, nil +} + +func appendQueryPartLowerings( + plan *LoweringPlan, + queryPartIndex int, + queryPart cypher.SyntaxNode, + readingClauses []*cypher.ReadingClause, + predicateAttachments []PredicateAttachment, + initialDeclaredSymbols map[string]struct{}, + initialSelectivity map[string]boundSourceSelectivity, +) error { + sourceReferences, err := collectReferencedSourceIdentifiers(queryPart) + if err != nil { + return err + } + + appendProjectionPruningDecisions(plan, queryPartIndex, readingClauses, sourceReferences) + appendLatePathMaterializationDecisions(plan, queryPartIndex, readingClauses, sourceReferences) + appendPatternPredicateProjectionLowerings(plan, queryPartIndex, queryPart, sourceReferences) + appendPatternPredicatePlacementDecisions(plan, queryPartIndex, queryPart) + appendExpandIntoDecisions(plan, queryPartIndex, readingClauses) + appendTraversalDirectionDecisions(plan, queryPartIndex, readingClauses, bindingPredicateSymbols(predicateAttachments, queryPartIndex), initialDeclaredSymbols, initialSelectivity) + shortestPathSearchSymbols := shortestPathSearchPredicateSymbols(readingClauses) + appendShortestPathStrategyDecisions(plan, queryPartIndex, readingClauses, shortestPathSearchSymbols) + appendShortestPathFilterDecisions(plan, queryPartIndex, readingClauses, shortestPathSearchSymbols) + appendLimitPushdownDecisions(plan, queryPartIndex, queryPart, readingClauses) + appendExpansionSuffixPushdownDecisions(plan, queryPartIndex, readingClauses) + return nil +} + +func appendProjectionPruningDecisions(plan *LoweringPlan, queryPartIndex int, readingClauses []*cypher.ReadingClause, sourceReferences map[string]struct{}) { + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil || readingClause.Match.Optional { + continue + } + + for patternIndex, patternPart := range readingClause.Match.Pattern { + steps := traversalStepsForPattern(patternPart) + if len(steps) == 0 { + continue + } + + appendPatternProjectionPruningDecisions(plan, PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }, patternPart, steps, sourceReferences) + } + } +} + +func appendPatternProjectionPruningDecisions(plan *LoweringPlan, target PatternTarget, patternPart *cypher.PatternPart, steps []sourceTraversalStep, sourceReferences map[string]struct{}) { + pathReferenced := referencesSourceIdentifier(sourceReferences, variableSymbol(patternPart.Variable)) + + for stepIndex, step := range steps { + decision := ProjectionPruningDecision{ + Target: target.TraversalStep(stepIndex), + ReferencedSymbols: sortedMapKeys(sourceReferences), + PatternBindingReferenced: pathReferenced, + } + + edgeReferenced := referencesSourceIdentifier(sourceReferences, variableSymbol(step.Relationship.Variable)) + var hasPruning bool + if step.Relationship.Range != nil { + decision.OmitRelationship = !edgeReferenced + decision.OmitPathBinding = !pathReferenced + hasPruning = decision.OmitRelationship || decision.OmitPathBinding + } else { + var ( + leftReferenced = referencesSourceIdentifier(sourceReferences, variableSymbol(step.LeftNode.Variable)) + rightReferenced = referencesSourceIdentifier(sourceReferences, variableSymbol(step.RightNode.Variable)) + ) + + decision.OmitLeftNode = !(leftReferenced || pathReferenced) + decision.OmitRelationship = !(edgeReferenced || pathReferenced) + decision.OmitRightNode = !(rightReferenced || pathReferenced || stepIndex+1 < len(steps)) + hasPruning = decision.OmitLeftNode || decision.OmitRelationship || decision.OmitRightNode + } + + if hasPruning { + plan.ProjectionPruning = append(plan.ProjectionPruning, decision) + } + } +} + +func appendPatternPredicateProjectionLowerings(plan *LoweringPlan, queryPartIndex int, queryPart cypher.SyntaxNode, sourceReferences map[string]struct{}) { + for predicateIndex, predicate := range patternPredicatesInQueryPart(queryPart) { + var ( + patternPart = patternPartForPredicate(predicate) + steps = traversalStepsForPattern(patternPart) + ) + if len(steps) == 0 { + continue + } + + target := PatternTarget{ + QueryPartIndex: queryPartIndex, + PatternIndex: predicateIndex, + Predicate: true, + PredicateIndex: predicateIndex, + } + + appendPatternProjectionPruningDecisions(plan, target, patternPart, steps, sourceReferences) + appendPatternLatePathMaterializationDecisions(plan, target, patternPart, steps, sourceReferences) + } +} + +func appendPatternPredicatePlacementDecisions(plan *LoweringPlan, queryPartIndex int, queryPart cypher.SyntaxNode) { + for predicateIndex, predicate := range patternPredicatesInQueryPart(queryPart) { + var ( + patternPart = patternPartForPredicate(predicate) + steps = traversalStepsForPattern(patternPart) + ) + if len(steps) != 1 { + continue + } + + step := steps[0] + if step.Relationship == nil || + step.Relationship.Direction != graph.DirectionBoth || + relationshipPatternHasProperties(step.Relationship) || + nodePatternHasConstraints(step.LeftNode) || + nodePatternHasConstraints(step.RightNode) { + continue + } + + if variableSymbol(step.Relationship.Variable) != "" { + continue + } + + target := PatternTarget{ + QueryPartIndex: queryPartIndex, + PatternIndex: predicateIndex, + Predicate: true, + PredicateIndex: predicateIndex, + }.TraversalStep(0) + + plan.PatternPredicate = append(plan.PatternPredicate, PatternPredicatePlacementDecision{ + Target: target, + Mode: PatternPredicatePlacementExistence, + }) + } +} + +func appendLatePathMaterializationDecisions(plan *LoweringPlan, queryPartIndex int, readingClauses []*cypher.ReadingClause, sourceReferences map[string]struct{}) { + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil || readingClause.Match.Optional { + continue + } + + for patternIndex, patternPart := range readingClause.Match.Pattern { + steps := traversalStepsForPattern(patternPart) + appendPatternLatePathMaterializationDecisions(plan, PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }, patternPart, steps, sourceReferences) + } + } +} + +func appendPatternLatePathMaterializationDecisions(plan *LoweringPlan, target PatternTarget, patternPart *cypher.PatternPart, steps []sourceTraversalStep, sourceReferences map[string]struct{}) { + pathReferenced := referencesSourceIdentifier(sourceReferences, variableSymbol(patternPart.Variable)) + + for stepIndex, step := range steps { + stepTarget := target.TraversalStep(stepIndex) + + if step.Relationship.Range != nil { + if !pathReferenced { + continue + } + + plan.LatePathMaterialization = append(plan.LatePathMaterialization, LatePathMaterializationDecision{ + Target: stepTarget, + Mode: LatePathMaterializationExpansionPath, + }) + continue + } + + edgeReferenced := referencesSourceIdentifier(sourceReferences, variableSymbol(step.Relationship.Variable)) + if pathReferenced { + mode := LatePathMaterializationPathEdgeID + if edgeReferenced { + mode = LatePathMaterializationEdgeComposite + } + + plan.LatePathMaterialization = append(plan.LatePathMaterialization, LatePathMaterializationDecision{ + Target: stepTarget, + Mode: mode, + }) + continue + } + + if !edgeReferenced && stepIndex+1 < len(steps) { + plan.LatePathMaterialization = append(plan.LatePathMaterialization, LatePathMaterializationDecision{ + Target: stepTarget, + Mode: LatePathMaterializationPathEdgeID, + }) + } + } +} + +func appendExpandIntoDecisions(plan *LoweringPlan, queryPartIndex int, readingClauses []*cypher.ReadingClause) { + declaredSymbols := map[string]struct{}{} + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + match := readingClause.Match + if match.Optional { + declareMatchSymbols(declaredSymbols, match) + continue + } + + for patternIndex, patternPart := range match.Pattern { + var ( + steps = traversalStepsForPattern(patternPart) + declaredEndpoints = declaredSymbolsBeforeStepEndpoints(declaredSymbols, steps) + ) + + for stepIndex, step := range steps { + if step.Relationship.Range != nil { + continue + } + + var ( + leftSymbol = variableSymbol(step.LeftNode.Variable) + rightSymbol = variableSymbol(step.RightNode.Variable) + ) + _, leftBound := declaredEndpoints[stepIndex].BeforeLeftNode[leftSymbol] + _, rightBound := declaredEndpoints[stepIndex].BeforeRightNode[rightSymbol] + + if leftSymbol == "" { + leftBound = stepIndex > 0 + } + + if rightSymbol == "" || !leftBound || !rightBound { + continue + } + + plan.ExpandInto = append(plan.ExpandInto, ExpandIntoDecision{ + Target: PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }.TraversalStep(stepIndex), + }) + } + + declarePatternSymbols(declaredSymbols, patternPart) + } + + declareWhereSymbols(declaredSymbols, match) + } +} + +type declaredStepEndpoints struct { + BeforeLeftNode map[string]struct{} + BeforeRightNode map[string]struct{} +} + +func declaredSymbolsBeforeStepEndpoints(initial map[string]struct{}, steps []sourceTraversalStep) []declaredStepEndpoints { + var ( + declared = copyStringSet(initial) + endpoints = make([]declaredStepEndpoints, len(steps)) + ) + + for idx, step := range steps { + endpoints[idx].BeforeLeftNode = copyStringSet(declared) + addSymbol(declared, variableSymbol(step.LeftNode.Variable)) + addSymbol(declared, variableSymbol(step.Relationship.Variable)) + endpoints[idx].BeforeRightNode = copyStringSet(declared) + addSymbol(declared, variableSymbol(step.RightNode.Variable)) + } + + return endpoints +} + +func appendTraversalDirectionDecisions( + plan *LoweringPlan, + queryPartIndex int, + readingClauses []*cypher.ReadingClause, + predicateConstrainedSymbols map[string]struct{}, + initialDeclaredSymbols map[string]struct{}, + initialSelectivity map[string]boundSourceSelectivity, +) { + var ( + declaredSymbols = copyStringSet(initialDeclaredSymbols) + declaredSourceSelectivity = copyBoundSourceSelectivity(initialSelectivity) + ) + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + match := readingClause.Match + if match.Optional { + declareMatchSymbols(declaredSymbols, match) + continue + } + + for patternIndex, patternPart := range match.Pattern { + var ( + steps = traversalStepsForPattern(patternPart) + declaredEndpoints = declaredSymbolsBeforeStepEndpoints(declaredSymbols, steps) + ) + patternTarget := PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + } + + for stepIndex, step := range steps { + target := patternTarget.TraversalStep(stepIndex) + if decision, shouldFlip := traversalDirectionDecisionForStep( + target, + stepIndex, + step, + declaredEndpoints[stepIndex], + referencesSourceIdentifier(predicateConstrainedSymbols, variableSymbol(step.LeftNode.Variable)), + referencesSourceIdentifier(predicateConstrainedSymbols, variableSymbol(step.RightNode.Variable)), + ); shouldFlip { + plan.TraversalDirection = append(plan.TraversalDirection, decision) + } else if decision, shouldFlip := boundLeftExpansionDirectionDecisionForStep( + target, + patternPart, + steps, + stepIndex, + step, + declaredEndpoints[stepIndex], + referencesSourceIdentifier(predicateConstrainedSymbols, variableSymbol(step.RightNode.Variable)), + nodePatternSelectivity(step.RightNode, referencesSourceIdentifier(predicateConstrainedSymbols, variableSymbol(step.RightNode.Variable))), + declaredSourceSelectivity[variableSymbol(step.LeftNode.Variable)], + ); shouldFlip { + plan.TraversalDirection = append(plan.TraversalDirection, decision) + } + } + + declarePatternSymbols(declaredSymbols, patternPart) + } + + declareSelectiveMatchSymbols(declaredSourceSelectivity, match) + declareWhereSymbols(declaredSymbols, match) + } +} + +func bindingPredicateSymbols(predicateAttachments []PredicateAttachment, queryPartIndex int) map[string]struct{} { + symbols := map[string]struct{}{} + + for _, attachment := range predicateAttachments { + if attachment.QueryPartIndex != queryPartIndex { + continue + } + + for _, symbol := range attachment.BindingSymbols { + addSymbol(symbols, symbol) + } + } + + return symbols +} + +func copyBoundSourceSelectivity(values map[string]boundSourceSelectivity) map[string]boundSourceSelectivity { + copied := make(map[string]boundSourceSelectivity, len(values)) + for key, value := range values { + copied[key] = value + } + + return copied +} + +func carryProjectionSelectivity( + projection *cypher.Projection, + incomingSymbols map[string]struct{}, + incomingSelectivity map[string]boundSourceSelectivity, +) (map[string]struct{}, map[string]boundSourceSelectivity) { + var ( + carriedSymbols = map[string]struct{}{} + carriedSelectivity = map[string]boundSourceSelectivity{} + ) + + if projection == nil { + return carriedSymbols, carriedSelectivity + } + + projectionSelectivity := projectionCardinalitySelectivity(projection) + if projectionCarriesAllSymbols(projection) { + for symbol := range incomingSymbols { + addSymbol(carriedSymbols, symbol) + mergeBoundSourceSelectivity(carriedSelectivity, symbol, incomingSelectivity[symbol]) + mergeBoundSourceSelectivity(carriedSelectivity, symbol, projectionSelectivity) + } + } + + for _, item := range projection.Items { + symbol, alias, ok := projectionItemVariableSymbolAndAlias(item) + if !ok { + continue + } + if symbol == cypher.TokenLiteralAsterisk { + continue + } + + addSymbol(carriedSymbols, alias) + mergeBoundSourceSelectivity(carriedSelectivity, alias, incomingSelectivity[symbol]) + mergeBoundSourceSelectivity(carriedSelectivity, alias, projectionSelectivity) + } + + return carriedSymbols, carriedSelectivity +} + +func projectionCarriesAllSymbols(projection *cypher.Projection) bool { + if projection == nil { + return false + } + if projection.All || len(projection.Items) == 0 { + return true + } + + for _, item := range projection.Items { + if symbol, _, ok := projectionItemVariableSymbolAndAlias(item); ok && symbol == cypher.TokenLiteralAsterisk { + return true + } + if symbol, ok := expressionVariableSymbol(item); ok && symbol == cypher.TokenLiteralAsterisk { + return true + } + } + + return false +} + +func projectionCardinalitySelectivity(projection *cypher.Projection) boundSourceSelectivity { + if projection == nil || projection.Limit == nil { + return boundSourceSelectivityNone + } + + if projection.Order != nil || projectionHasAggregate(projection) { + return boundSourceSelectivityTopN + } + + return boundSourceSelectivityLimited +} + +func projectionHasAggregate(projection *cypher.Projection) bool { + if projection == nil { + return false + } + + for _, item := range projection.Items { + projectionItem, ok := item.(*cypher.ProjectionItem) + if !ok || projectionItem == nil { + continue + } + + if expressionHasAggregate(projectionItem.Expression) { + return true + } + } + + return false +} + +func expressionHasAggregate(expression cypher.Expression) bool { + switch typedExpression := expression.(type) { + case *cypher.FunctionInvocation: + return typedExpression != nil && strings.EqualFold(typedExpression.Name, cypher.CountFunction) + default: + return false + } +} + +func declareSelectiveMatchSymbols(symbols map[string]boundSourceSelectivity, match *cypher.Match) { + if match == nil { + return + } + + for _, patternPart := range match.Pattern { + for _, nodePattern := range nodePatternsForPattern(patternPart) { + if nodePattern == nil { + continue + } + + symbol := variableSymbol(nodePattern.Variable) + if symbol == "" { + continue + } + + mergeBoundSourceSelectivity(symbols, symbol, propertyConstraintSelectivity(nodePattern.Properties)) + } + } + + if match.Where == nil { + return + } + + for _, expression := range match.Where.Expressions { + for _, term := range cypherConjunctionTerms(expression) { + if symbol, selectivity, ok := propertyPredicateSelectivity(term); ok { + mergeBoundSourceSelectivity(symbols, symbol, selectivity) + } + } + } +} + +func declareReadingClauseSymbols(symbols map[string]struct{}, readingClauses []*cypher.ReadingClause) { + for _, readingClause := range readingClauses { + if readingClause != nil { + declareMatchSymbols(symbols, readingClause.Match) + } + } +} + +func declareReadingClauseSelectivity(symbols map[string]boundSourceSelectivity, readingClauses []*cypher.ReadingClause) { + for _, readingClause := range readingClauses { + if readingClause != nil { + declareSelectiveMatchSymbols(symbols, readingClause.Match) + } + } +} + +func nodePatternsForPattern(patternPart *cypher.PatternPart) []*cypher.NodePattern { + if patternPart == nil { + return nil + } + + nodePatterns := make([]*cypher.NodePattern, 0, len(patternPart.PatternElements)) + for _, element := range patternPart.PatternElements { + if nodePattern, ok := element.AsNodePattern(); ok { + nodePatterns = append(nodePatterns, nodePattern) + } + } + + return nodePatterns +} + +func mergeBoundSourceSelectivity(symbols map[string]boundSourceSelectivity, symbol string, selectivity boundSourceSelectivity) { + if selectivity > symbols[symbol] { + symbols[symbol] = selectivity + } +} + +func propertyPredicateSelectivity(expression cypher.Expression) (string, boundSourceSelectivity, bool) { + comparison, isComparison := expression.(*cypher.Comparison) + if !isComparison || len(comparison.Partials) != 1 { + return "", boundSourceSelectivityNone, false + } + + partial := comparison.Partials[0] + if partial.Operator != cypher.OperatorEquals { + return "", boundSourceSelectivityNone, false + } + + if symbol, property, ok := propertyLookupSymbol(comparison.Left); ok && !expressionReferencesAnySource(partial.Right) { + return symbol, propertySelectivity(property, partial.Right), true + } + + if symbol, property, ok := propertyLookupSymbol(partial.Right); ok && !expressionReferencesAnySource(comparison.Left) { + return symbol, propertySelectivity(property, comparison.Left), true + } + + return "", boundSourceSelectivityNone, false +} + +func propertyConstraintSelectivity(expression cypher.Expression) boundSourceSelectivity { + properties, ok := expression.(*cypher.Properties) + if !ok || properties == nil || properties.Parameter != nil { + return boundSourceSelectivityNone + } + + highest := boundSourceSelectivityNone + for property, value := range properties.Map { + if selectivity := propertySelectivity(property, value); selectivity > highest { + highest = selectivity + } + } + + return highest +} + +func propertySelectivity(property string, value cypher.Expression) boundSourceSelectivity { + if strings.EqualFold(property, "objectid") && expressionIsConstant(value) { + return boundSourceSelectivityUnique + } + + if expressionIsConstant(value) { + return boundSourceSelectivityPredicate + } + + return boundSourceSelectivityNone +} + +func expressionIsConstant(expression cypher.Expression) bool { + switch typedExpression := expression.(type) { + case *cypher.Literal: + return typedExpression != nil && !typedExpression.Null + case *cypher.Parameter: + return typedExpression != nil + default: + return false + } +} + +func propertyLookupSymbol(expression cypher.Expression) (string, string, bool) { + propertyLookup, isPropertyLookup := expression.(*cypher.PropertyLookup) + if !isPropertyLookup || propertyLookup == nil { + return "", "", false + } + + variable, isVariable := propertyLookup.Atom.(*cypher.Variable) + if !isVariable || variable == nil || variable.Symbol == "" || propertyLookup.Symbol == "" { + return "", "", false + } + + return variable.Symbol, propertyLookup.Symbol, true +} + +func nodePatternHasUniquePropertyConstraint(nodePattern *cypher.NodePattern) bool { + return nodePattern != nil && propertyConstraintSelectivity(nodePattern.Properties) == boundSourceSelectivityUnique +} + +func nodePatternSelectivity(nodePattern *cypher.NodePattern, hasAttachedPredicate bool) boundSourceSelectivity { + if nodePattern == nil { + return boundSourceSelectivityNone + } + + selectivity := boundSourceSelectivityNone + if len(nodePattern.Kinds) > 0 { + selectivity = boundSourceSelectivityKindOnly + } + + mergeSelectivityValue(&selectivity, propertyConstraintSelectivity(nodePattern.Properties)) + if hasAttachedPredicate { + mergeSelectivityValue(&selectivity, boundSourceSelectivityPredicate) + } + + return selectivity +} + +func mergeSelectivityValue(current *boundSourceSelectivity, next boundSourceSelectivity) { + if next > *current { + *current = next + } +} + +func shortestPathSearchPredicateSymbols(readingClauses []*cypher.ReadingClause) map[string]struct{} { + symbols := map[string]struct{}{} + + for _, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil || readingClause.Match.Where == nil { + continue + } + + for _, expression := range readingClause.Match.Where.Expressions { + addShortestPathSearchPredicateSymbols(symbols, expression) + } + } + + return symbols +} + +func addShortestPathSearchPredicateSymbols(symbols map[string]struct{}, expression cypher.Expression) { + for _, term := range cypherConjunctionTerms(expression) { + if symbol, ok := shortestPathSearchPredicateSymbol(term); ok { + addSymbol(symbols, symbol) + } + } +} + +func cypherConjunctionTerms(expression cypher.Expression) []cypher.Expression { + if conjunction, isConjunction := expression.(*cypher.Conjunction); isConjunction { + var terms []cypher.Expression + for _, subexpression := range conjunction.Expressions { + terms = append(terms, cypherConjunctionTerms(subexpression)...) + } + + return terms + } + + return []cypher.Expression{expression} +} + +func shortestPathSearchPredicateSymbol(expression cypher.Expression) (string, bool) { + comparison, isComparison := expression.(*cypher.Comparison) + if !isComparison || len(comparison.Partials) != 1 { + return "", false + } + + partial := comparison.Partials[0] + if !isEndpointSearchOperator(partial.Operator) { + return "", false + } + + if symbol, ok := propertyLookupVariableSymbol(comparison.Left); ok && !expressionReferencesAnySource(partial.Right) { + return symbol, true + } + + if symbol, ok := propertyLookupVariableSymbol(partial.Right); ok && !expressionReferencesAnySource(comparison.Left) { + return symbol, true + } + + return "", false +} + +func isEndpointSearchOperator(operator cypher.Operator) bool { + switch operator { + case cypher.OperatorEquals, + cypher.OperatorRegexMatch, + cypher.OperatorGreaterThan, + cypher.OperatorGreaterThanOrEqualTo, + cypher.OperatorLessThan, + cypher.OperatorLessThanOrEqualTo, + cypher.OperatorStartsWith, + cypher.OperatorEndsWith, + cypher.OperatorContains, + cypher.OperatorIn: + return true + default: + return false + } +} + +func propertyLookupVariableSymbol(expression cypher.Expression) (string, bool) { + propertyLookup, isPropertyLookup := expression.(*cypher.PropertyLookup) + if !isPropertyLookup || propertyLookup == nil { + return "", false + } + + variable, isVariable := propertyLookup.Atom.(*cypher.Variable) + if !isVariable || variable == nil || variable.Symbol == "" { + return "", false + } + + return variable.Symbol, true +} + +func expressionReferencesAnySource(expression cypher.Expression) bool { + references, err := collectReferencedSourceIdentifiers(expression) + return err != nil || len(references) > 0 +} + +func traversalDirectionDecisionForStep( + target TraversalStepTarget, + stepIndex int, + step sourceTraversalStep, + declaredEndpoints declaredStepEndpoints, + leftHasAttachedPredicate bool, + rightHasAttachedPredicate bool, +) (TraversalDirectionDecision, bool) { + if leftEndpointBoundForStep(stepIndex, step, declaredEndpoints) { + return TraversalDirectionDecision{}, false + } + + var ( + rightSymbol = variableSymbol(step.RightNode.Variable) + leftSymbol = variableSymbol(step.LeftNode.Variable) + ) + if rightSymbol != "" { + if _, rightBound := declaredEndpoints.BeforeRightNode[rightSymbol]; rightBound { + if rightSymbol == leftSymbol { + return TraversalDirectionDecision{}, false + } + + return TraversalDirectionDecision{ + Target: target, + Flip: true, + Reason: traversalDirectionReasonRightBound, + }, true + } + } + + var ( + leftConstrained = nodePatternHasConstraints(step.LeftNode) || leftHasAttachedPredicate + rightConstrained = nodePatternHasConstraints(step.RightNode) || rightHasAttachedPredicate + ) + + if rightConstrained && !leftConstrained { + reason := traversalDirectionReasonRightConstrained + if !nodePatternHasConstraints(step.RightNode) && rightHasAttachedPredicate { + reason = traversalDirectionReasonRightPredicate + } + + return TraversalDirectionDecision{ + Target: target, + Flip: true, + Reason: reason, + }, true + } + + return TraversalDirectionDecision{}, false +} + +func boundLeftExpansionDirectionDecisionForStep( + target TraversalStepTarget, + patternPart *cypher.PatternPart, + steps []sourceTraversalStep, + stepIndex int, + step sourceTraversalStep, + declaredEndpoints declaredStepEndpoints, + rightHasAttachedPredicate bool, + rightSelectivity boundSourceSelectivity, + leftSourceSelectivity boundSourceSelectivity, +) (TraversalDirectionDecision, bool) { + if patternPart == nil || + patternPart.Variable != nil || + patternPart.ShortestPathPattern || + patternPart.AllShortestPathsPattern || + len(steps) != 1 || + stepIndex != 0 || + step.Relationship == nil || + step.Relationship.Range == nil || + step.Relationship.Direction == graph.DirectionBoth || + step.Relationship.Variable != nil || + nodePatternHasConstraints(step.LeftNode) || + !nodePatternHasConstraints(step.RightNode) { + return TraversalDirectionDecision{}, false + } + + var ( + leftSymbol = variableSymbol(step.LeftNode.Variable) + rightSymbol = variableSymbol(step.RightNode.Variable) + ) + if leftSymbol == "" || leftSymbol == rightSymbol { + return TraversalDirectionDecision{}, false + } + + if _, leftBound := declaredEndpoints.BeforeLeftNode[leftSymbol]; !leftBound { + return TraversalDirectionDecision{}, false + } + + if rightSymbol != "" { + if _, rightBound := declaredEndpoints.BeforeRightNode[rightSymbol]; rightBound { + return TraversalDirectionDecision{}, false + } + } + + if leftSourceSelectivity >= boundSourceSelectivityUnique && rightSelectivity < boundSourceSelectivityUnique { + return TraversalDirectionDecision{ + Target: target, + Reason: traversalDirectionReasonBoundSourceSelective, + }, true + } + + if step.RightNode.Properties == nil && !rightHasAttachedPredicate { + return TraversalDirectionDecision{ + Target: target, + Reason: traversalDirectionReasonTerminalKindOnlyEstimateWide, + }, true + } + + return TraversalDirectionDecision{ + Target: target, + Flip: true, + Reason: traversalDirectionReasonRightConstrained, + }, true +} + +func appendShortestPathStrategyDecisions(plan *LoweringPlan, queryPartIndex int, readingClauses []*cypher.ReadingClause, predicateConstrainedSymbols map[string]struct{}) { + declaredSymbols := map[string]struct{}{} + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + match := readingClause.Match + if match.Optional { + declareMatchSymbols(declaredSymbols, match) + continue + } + + for patternIndex, patternPart := range match.Pattern { + if patternPart == nil || (!patternPart.ShortestPathPattern && !patternPart.AllShortestPathsPattern) { + declarePatternSymbols(declaredSymbols, patternPart) + continue + } + + var ( + steps = traversalStepsForPattern(patternPart) + declaredEndpoints = declaredSymbolsBeforeStepEndpoints(declaredSymbols, steps) + ) + patternTarget := PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + } + + for stepIndex, step := range steps { + if step.Relationship.Range == nil { + continue + } + + if decision, shouldPlan := shortestPathStrategyDecisionForStep( + patternTarget.TraversalStep(stepIndex), + step, + declaredEndpoints[stepIndex], + predicateConstrainedSymbols, + ); shouldPlan { + plan.ShortestPathStrategy = append(plan.ShortestPathStrategy, decision) + } + } + + declarePatternSymbols(declaredSymbols, patternPart) + } + + declareWhereSymbols(declaredSymbols, match) + } +} + +func shortestPathStrategyDecisionForStep( + target TraversalStepTarget, + step sourceTraversalStep, + declaredEndpoints declaredStepEndpoints, + predicateConstrainedSymbols map[string]struct{}, +) (ShortestPathStrategyDecision, bool) { + var ( + leftSymbol = variableSymbol(step.LeftNode.Variable) + rightSymbol = variableSymbol(step.RightNode.Variable) + ) + + _, rightBound := declaredEndpoints.BeforeRightNode[rightSymbol] + if leftEndpointBoundForStep(target.StepIndex, step, declaredEndpoints) && rightSymbol != "" && rightBound { + return ShortestPathStrategyDecision{ + Target: target, + Strategy: ShortestPathStrategyBidirectional, + Reason: shortestPathStrategyReasonBoundEndpointPairs, + }, true + } + + if endpointHasSearchConstraint(step.LeftNode, leftSymbol, predicateConstrainedSymbols) && + endpointHasSearchConstraint(step.RightNode, rightSymbol, predicateConstrainedSymbols) { + return ShortestPathStrategyDecision{ + Target: target, + Strategy: ShortestPathStrategyBidirectional, + Reason: shortestPathStrategyReasonEndpointPredicates, + }, true + } + + return ShortestPathStrategyDecision{}, false +} + +func endpointHasSearchConstraint(nodePattern *cypher.NodePattern, symbol string, predicateConstrainedSymbols map[string]struct{}) bool { + if nodePattern == nil { + return false + } + + return nodePattern.Properties != nil || referencesSourceIdentifier(predicateConstrainedSymbols, symbol) +} + +func endpointHasTerminalFilterConstraint(nodePattern *cypher.NodePattern, symbol string, predicateConstrainedSymbols map[string]struct{}) bool { + if nodePattern == nil { + return false + } + + return nodePatternHasConstraints(nodePattern) || referencesSourceIdentifier(predicateConstrainedSymbols, symbol) +} + +func appendShortestPathFilterDecisions(plan *LoweringPlan, queryPartIndex int, readingClauses []*cypher.ReadingClause, predicateConstrainedSymbols map[string]struct{}) { + declaredSymbols := map[string]struct{}{} + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + match := readingClause.Match + if match.Optional { + declareMatchSymbols(declaredSymbols, match) + continue + } + + for patternIndex, patternPart := range match.Pattern { + if patternPart == nil || (!patternPart.ShortestPathPattern && !patternPart.AllShortestPathsPattern) { + declarePatternSymbols(declaredSymbols, patternPart) + continue + } + + var ( + steps = traversalStepsForPattern(patternPart) + declaredEndpoints = declaredSymbolsBeforeStepEndpoints(declaredSymbols, steps) + ) + patternTarget := PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + } + + for stepIndex, step := range steps { + if step.Relationship.Range == nil { + continue + } + + if decision, shouldPlan := shortestPathFilterDecisionForStep( + plan, + patternTarget.TraversalStep(stepIndex), + step, + declaredEndpoints[stepIndex], + predicateConstrainedSymbols, + ); shouldPlan { + plan.ShortestPathFilter = append(plan.ShortestPathFilter, decision) + } + } + + declarePatternSymbols(declaredSymbols, patternPart) + } + + declareWhereSymbols(declaredSymbols, match) + } +} + +func shortestPathFilterDecisionForStep( + plan *LoweringPlan, + target TraversalStepTarget, + step sourceTraversalStep, + declaredEndpoints declaredStepEndpoints, + predicateConstrainedSymbols map[string]struct{}, +) (ShortestPathFilterDecision, bool) { + var ( + leftSymbol = variableSymbol(step.LeftNode.Variable) + rightSymbol = variableSymbol(step.RightNode.Variable) + ) + if rightSymbol != "" { + if _, rightBound := declaredEndpoints.BeforeRightNode[rightSymbol]; rightBound { + return ShortestPathFilterDecision{}, false + } + } + + var ( + leftSearchConstrained = endpointHasSearchConstraint(step.LeftNode, leftSymbol, predicateConstrainedSymbols) + rightSearchConstrained = endpointHasSearchConstraint(step.RightNode, rightSymbol, predicateConstrainedSymbols) + ) + if !endpointHasTerminalFilterConstraint(step.RightNode, rightSymbol, predicateConstrainedSymbols) { + return ShortestPathFilterDecision{}, false + } + + if hasShortestPathBidirectionalStrategy(plan, target) && leftSearchConstrained && rightSearchConstrained { + return ShortestPathFilterDecision{ + Target: target, + Mode: ShortestPathFilterEndpointPair, + Reason: shortestPathFilterReasonEndpointPairPredicates, + }, true + } + + return ShortestPathFilterDecision{ + Target: target, + Mode: ShortestPathFilterTerminal, + Reason: shortestPathFilterReasonTerminalPredicate, + }, true +} + +func hasShortestPathBidirectionalStrategy(plan *LoweringPlan, target TraversalStepTarget) bool { + if plan == nil { + return false + } + + for _, decision := range plan.ShortestPathStrategy { + if decision.Target == target && decision.Strategy == ShortestPathStrategyBidirectional { + return true + } + } + + return false +} + +func appendLimitPushdownDecisions(plan *LoweringPlan, queryPartIndex int, queryPart cypher.SyntaxNode, readingClauses []*cypher.ReadingClause) { + if !queryPartAllowsLimitPushdown(queryPart, readingClauses) { + return + } + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil || readingClause.Match.Optional { + continue + } + + for patternIndex, patternPart := range readingClause.Match.Pattern { + if patternPart == nil { + continue + } + if patternPart.AllShortestPathsPattern { + continue + } + + patternTarget := PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + } + + for stepIndex, step := range traversalStepsForPattern(patternPart) { + mode := LimitPushdownTraversalCTE + if patternPart.ShortestPathPattern && step.Relationship.Range != nil { + mode = LimitPushdownShortestPathHarness + } + + plan.LimitPushdown = append(plan.LimitPushdown, LimitPushdownDecision{ + Target: patternTarget.TraversalStep(stepIndex), + Mode: mode, + }) + } + } + } +} + +func queryPartAllowsLimitPushdown(queryPart cypher.SyntaxNode, readingClauses []*cypher.ReadingClause) bool { + projection, updatingClauseCount := queryPartProjection(queryPart) + if projection == nil || + projection.Limit == nil || + projection.Skip != nil || + projection.Order != nil || + projection.Distinct || + len(readingClauses) != 1 || + updatingClauseCount > 0 { + return false + } + + return true +} + +func queryPartProjection(queryPart cypher.SyntaxNode) (*cypher.Projection, int) { + switch typedQueryPart := queryPart.(type) { + case *cypher.SinglePartQuery: + if typedQueryPart.Return == nil { + return nil, len(typedQueryPart.UpdatingClauses) + } + + return typedQueryPart.Return.Projection, len(typedQueryPart.UpdatingClauses) + + case *cypher.MultiPartQueryPart: + if typedQueryPart.With == nil { + return nil, len(typedQueryPart.UpdatingClauses) + } + + return typedQueryPart.With.Projection, len(typedQueryPart.UpdatingClauses) + + default: + return nil, 0 + } +} + +func appendExpansionSuffixPushdownDecisions(plan *LoweringPlan, queryPartIndex int, readingClauses []*cypher.ReadingClause) { + declaredSymbols := map[string]struct{}{} + + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + match := readingClause.Match + if match.Optional { + declareMatchSymbols(declaredSymbols, match) + continue + } + + for patternIndex, patternPart := range match.Pattern { + var ( + steps = traversalStepsForPattern(patternPart) + declaredEndpoints = declaredSymbolsBeforeStepEndpoints(declaredSymbols, steps) + ) + + for stepIndex, step := range steps { + if step.Relationship.Range == nil || stepIndex+1 >= len(steps) { + continue + } + + target := PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + }.TraversalStep(stepIndex) + if hasTraversalDirectionFlip(plan, target) || expansionStepMayFlipForConstraintBalance(stepIndex, step, declaredEndpoints[stepIndex]) { + continue + } + + if suffixLength := expansionSuffixPushdownLength(steps[stepIndex+1:]); suffixLength > 0 { + plan.ExpansionSuffixPushdown = append(plan.ExpansionSuffixPushdown, ExpansionSuffixPushdownDecision{ + Target: target, + SuffixLength: suffixLength, + SuffixStartStep: stepIndex + 1, + SuffixEndStep: stepIndex + suffixLength, + }) + } + } + + declarePatternSymbols(declaredSymbols, patternPart) + } + + declareWhereSymbols(declaredSymbols, match) + } +} + +func expansionStepMayFlipForConstraintBalance(stepIndex int, step sourceTraversalStep, declaredEndpoints declaredStepEndpoints) bool { + _, mayFlip := traversalDirectionDecisionForStep(TraversalStepTarget{}, stepIndex, step, declaredEndpoints, false, false) + return mayFlip +} + +func leftEndpointBoundForStep(stepIndex int, step sourceTraversalStep, declaredEndpoints declaredStepEndpoints) bool { + leftSymbol := variableSymbol(step.LeftNode.Variable) + if leftSymbol == "" { + return stepIndex > 0 + } + + _, leftBound := declaredEndpoints.BeforeLeftNode[leftSymbol] + return leftBound +} + +func hasTraversalDirectionFlip(plan *LoweringPlan, target TraversalStepTarget) bool { + if plan == nil { + return false + } + + for _, decision := range plan.TraversalDirection { + if decision.Target == target && decision.Flip { + return true + } + } + + return false +} + +type bindingTargetKey struct { + QueryPartIndex int + Symbol string +} + +func appendPredicatePlacementDecisions(plan *LoweringPlan, query *cypher.RegularQuery, predicateAttachments []PredicateAttachment) { + if len(predicateAttachments) == 0 { + return + } + + bindingTargets := indexBindingTargets(query) + for _, attachment := range predicateAttachments { + if attachment.Scope != PredicateAttachmentScopeBinding || len(attachment.BindingSymbols) != 1 { + continue + } + + target, hasTarget := bindingTargets[bindingTargetKey{ + QueryPartIndex: attachment.QueryPartIndex, + Symbol: attachment.BindingSymbols[0], + }] + if !hasTarget { + continue + } + if target.ClauseIndex != attachment.ClauseIndex { + continue + } + + plan.PredicatePlacement = append(plan.PredicatePlacement, PredicatePlacementDecision{ + Target: target, + Attachment: attachment, + Placement: attachment.Scope, + }) + } +} + +func attachPredicatePlacementsToSuffixPushdowns(plan *LoweringPlan) { + for suffixIdx := range plan.ExpansionSuffixPushdown { + suffix := &plan.ExpansionSuffixPushdown[suffixIdx] + for _, placement := range plan.PredicatePlacement { + if placement.Target.QueryPartIndex != suffix.Target.QueryPartIndex || + placement.Target.ClauseIndex != suffix.Target.ClauseIndex || + placement.Target.PatternIndex != suffix.Target.PatternIndex { + continue + } + + if placement.Target.StepIndex > suffix.Target.StepIndex && + placement.Target.StepIndex <= suffix.Target.StepIndex+suffix.SuffixLength { + suffix.PredicateAttachments = append(suffix.PredicateAttachments, placement.Attachment) + } + } + } +} + +func appendCountStoreFastPathDecisions(plan *LoweringPlan, query *cypher.RegularQuery) { + if decision, ok := countStoreFastPathDecision(query); ok { + plan.CountStoreFastPath = append(plan.CountStoreFastPath, decision) + } +} + +func appendAggregateTraversalCountDecisions(plan *LoweringPlan, query *cypher.RegularQuery) { + if shape, ok := AggregateTraversalCountShapeForQuery(query); ok { + plan.AggregateTraversalCount = append(plan.AggregateTraversalCount, AggregateTraversalCountDecision{ + QueryPartIndex: shape.QueryPartIndex, + SourceSymbol: shape.SourceSymbol, + TerminalSymbol: shape.TerminalSymbol, + CountAlias: shape.CountAlias, + Limit: shape.Limit, + Target: shape.Target, + }) + } +} + +func AggregateTraversalCountShapeForQuery(query *cypher.RegularQuery) (AggregateTraversalCountShape, bool) { + if query == nil || query.SingleQuery == nil || query.SingleQuery.MultiPartQuery == nil { + return AggregateTraversalCountShape{}, false + } + + multiPartQuery := query.SingleQuery.MultiPartQuery + if len(multiPartQuery.Parts) != 1 || multiPartQuery.Parts[0] == nil || multiPartQuery.SinglePartQuery == nil { + return AggregateTraversalCountShape{}, false + } + + part := multiPartQuery.Parts[0] + if len(part.UpdatingClauses) > 0 || len(part.ReadingClauses) != 2 || part.With == nil || part.With.Where != nil { + return AggregateTraversalCountShape{}, false + } + + sourceMatch, sourceNode, sourceSymbol, ok := aggregateTraversalSourceMatch(part.ReadingClauses[0]) + if !ok { + return AggregateTraversalCountShape{}, false + } + + terminalMatch, relationship, terminalNode, terminalSymbol, ok := aggregateTraversalMatch(part.ReadingClauses[1], sourceSymbol) + if !ok { + return AggregateTraversalCountShape{}, false + } + + countAlias, ok := aggregateTraversalWithProjection(part.With.Projection, sourceSymbol, terminalSymbol) + if !ok { + return AggregateTraversalCountShape{}, false + } + + finalProjection, ok := aggregateTraversalFinalProjection(multiPartQuery.SinglePartQuery, sourceSymbol, countAlias) + if !ok { + return AggregateTraversalCountShape{}, false + } + + minDepth, maxDepth, ok := aggregateTraversalDepthBounds(relationship.Range) + if !ok { + return AggregateTraversalCountShape{}, false + } + + return AggregateTraversalCountShape{ + QueryPartIndex: 0, + SourceSymbol: sourceSymbol, + TerminalSymbol: terminalSymbol, + CountAlias: countAlias, + ReturnSourceAlias: finalProjection.SourceAlias, + ReturnCountAlias: finalProjection.CountAlias, + ReturnCount: finalProjection.ReturnCount, + Limit: finalProjection.Limit, + SourceMatch: sourceMatch, + TerminalMatch: terminalMatch, + SourceKinds: sourceNode.Kinds, + TerminalKinds: terminalNode.Kinds, + RelationshipKinds: relationship.Kinds, + Direction: relationship.Direction, + MinDepth: minDepth, + MaxDepth: maxDepth, + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + }, true +} + +func aggregateTraversalSourceMatch(readingClause *cypher.ReadingClause) (*cypher.Match, *cypher.NodePattern, string, bool) { + if readingClause == nil || readingClause.Match == nil { + return nil, nil, "", false + } + + match := readingClause.Match + if match.Optional || len(match.Pattern) != 1 { + return nil, nil, "", false + } + + patternPart := match.Pattern[0] + nodePattern, ok := singleNodePattern(patternPart) + if !ok || nodePattern == nil || nodePattern.Variable == nil || nodePattern.Variable.Symbol == "" || nodePattern.Properties != nil { + return nil, nil, "", false + } + + for _, dependency := range sortedDependencies(match.Where) { + if dependency != nodePattern.Variable.Symbol { + return nil, nil, "", false + } + } + + return match, nodePattern, nodePattern.Variable.Symbol, true +} + +func aggregateTraversalMatch(readingClause *cypher.ReadingClause, sourceSymbol string) (*cypher.Match, *cypher.RelationshipPattern, *cypher.NodePattern, string, bool) { + if readingClause == nil || readingClause.Match == nil { + return nil, nil, nil, "", false + } + + match := readingClause.Match + if match.Optional || len(match.Pattern) != 1 { + return nil, nil, nil, "", false + } + + patternPart := match.Pattern[0] + if patternPart == nil || patternPart.Variable != nil || patternPart.ShortestPathPattern || patternPart.AllShortestPathsPattern || len(patternPart.PatternElements) != 3 { + return nil, nil, nil, "", false + } + + leftNode, leftOK := patternPart.PatternElements[0].AsNodePattern() + relationship, relationshipOK := patternPart.PatternElements[1].AsRelationshipPattern() + rightNode, rightOK := patternPart.PatternElements[2].AsNodePattern() + if !leftOK || !relationshipOK || !rightOK || + leftNode == nil || relationship == nil || rightNode == nil || + variableSymbol(leftNode.Variable) != sourceSymbol || + leftNode.Properties != nil || + relationship.Variable != nil || + relationship.Range == nil || + relationship.Properties != nil || + relationship.Direction == graph.DirectionBoth || + rightNode.Properties != nil || + rightNode.Variable == nil || + rightNode.Variable.Symbol == "" { + return nil, nil, nil, "", false + } + + if match.Where != nil { + for _, dependency := range sortedDependencies(match.Where) { + if dependency != rightNode.Variable.Symbol { + return nil, nil, nil, "", false + } + } + } + + return match, relationship, rightNode, rightNode.Variable.Symbol, true +} + +func aggregateTraversalWithProjection(projection *cypher.Projection, sourceSymbol, terminalSymbol string) (string, bool) { + if projection == nil || projection.All || projection.Order != nil || projection.Skip != nil || projection.Limit != nil || len(projection.Items) != 2 { + return "", false + } + + if symbol, ok := projectionItemVariableSymbol(projection.Items[0]); !ok || symbol != sourceSymbol { + return "", false + } + + countAlias, ok := projectionItemCountAlias(projection.Items[1], terminalSymbol) + if !ok { + return "", false + } + + return countAlias, true +} + +type aggregateTraversalFinalProjectionShape struct { + SourceAlias string + CountAlias string + ReturnCount bool + Limit int64 +} + +func aggregateTraversalFinalProjection(queryPart *cypher.SinglePartQuery, sourceSymbol, countAlias string) (aggregateTraversalFinalProjectionShape, bool) { + if queryPart == nil || len(queryPart.ReadingClauses) > 0 || len(queryPart.UpdatingClauses) > 0 || queryPart.Return == nil || queryPart.Return.Projection == nil { + return aggregateTraversalFinalProjectionShape{}, false + } + + projection := queryPart.Return.Projection + if projection.Distinct || projection.All || projection.Skip != nil || projection.Order == nil || projection.Limit == nil || len(projection.Items) < 1 || len(projection.Items) > 2 { + return aggregateTraversalFinalProjectionShape{}, false + } + + finalProjection := aggregateTraversalFinalProjectionShape{ + SourceAlias: sourceSymbol, + CountAlias: countAlias, + } + + var ( + sourceSeen = false + countSeen = false + ) + for _, item := range projection.Items { + symbol, alias, ok := projectionItemVariableSymbolAndAlias(item) + if !ok { + return aggregateTraversalFinalProjectionShape{}, false + } + + switch symbol { + case sourceSymbol: + if sourceSeen { + return aggregateTraversalFinalProjectionShape{}, false + } + sourceSeen = true + finalProjection.SourceAlias = alias + case countAlias: + if countSeen { + return aggregateTraversalFinalProjectionShape{}, false + } + countSeen = true + finalProjection.ReturnCount = true + finalProjection.CountAlias = alias + default: + return aggregateTraversalFinalProjectionShape{}, false + } + } + if !sourceSeen { + return aggregateTraversalFinalProjectionShape{}, false + } + + if len(projection.Order.Items) != 1 || projection.Order.Items[0] == nil || projection.Order.Items[0].Ascending { + return aggregateTraversalFinalProjectionShape{}, false + } + + if orderSymbol, ok := expressionVariableSymbol(projection.Order.Items[0].Expression); !ok || (orderSymbol != countAlias && orderSymbol != finalProjection.CountAlias) { + return aggregateTraversalFinalProjectionShape{}, false + } + + limit, ok := literalInt64(projection.Limit.Value) + if !ok { + return aggregateTraversalFinalProjectionShape{}, false + } + finalProjection.Limit = limit + return finalProjection, true +} + +func aggregateTraversalDepthBounds(patternRange *cypher.PatternRange) (int64, int64, bool) { + if patternRange == nil { + return 0, 0, false + } + + minDepth := int64(1) + if patternRange.StartIndex != nil { + minDepth = *patternRange.StartIndex + } + if minDepth < 1 { + return 0, 0, false + } + + maxDepth := int64(15) + if patternRange.EndIndex != nil { + maxDepth = *patternRange.EndIndex + } + if maxDepth < minDepth { + return 0, 0, false + } + + return minDepth, maxDepth, true +} + +func projectionItemVariableSymbol(expression cypher.Expression) (string, bool) { + projectionItem, ok := expression.(*cypher.ProjectionItem) + if !ok || projectionItem == nil || projectionItem.Alias != nil { + return "", false + } + + return expressionVariableSymbol(projectionItem.Expression) +} + +func projectionItemVariableSymbolAndAlias(expression cypher.Expression) (string, string, bool) { + projectionItem, ok := expression.(*cypher.ProjectionItem) + if !ok || projectionItem == nil { + return "", "", false + } + + symbol, ok := expressionVariableSymbol(projectionItem.Expression) + if !ok { + return "", "", false + } + + alias := symbol + if projectionItem.Alias != nil { + if projectionItem.Alias.Symbol == "" { + return "", "", false + } + + alias = projectionItem.Alias.Symbol + } + + return symbol, alias, true +} + +func expressionVariableSymbol(expression cypher.Expression) (string, bool) { + variable, ok := expression.(*cypher.Variable) + if !ok || variable == nil || variable.Symbol == "" { + return "", false + } + + return variable.Symbol, true +} + +func projectionItemCountAlias(expression cypher.Expression, terminalSymbol string) (string, bool) { + projectionItem, ok := expression.(*cypher.ProjectionItem) + if !ok || projectionItem == nil || projectionItem.Alias == nil || projectionItem.Alias.Symbol == "" { + return "", false + } + + function, ok := projectionItem.Expression.(*cypher.FunctionInvocation) + if !ok || function == nil || !strings.EqualFold(function.Name, cypher.CountFunction) || + function.Distinct || len(function.Namespace) > 0 || len(function.Arguments) != 1 { + return "", false + } + + if !aggregateTraversalCountArgumentMatches(function.Arguments[0], terminalSymbol) { + return "", false + } + + return projectionItem.Alias.Symbol, true +} + +func aggregateTraversalCountArgumentMatches(expression cypher.Expression, terminalSymbol string) bool { + if symbol, ok := expressionVariableSymbol(expression); ok { + return symbol == terminalSymbol + } + + rangeQuantifier, ok := expression.(*cypher.RangeQuantifier) + return ok && rangeQuantifier != nil && rangeQuantifier.Value == cypher.TokenLiteralAsterisk +} + +func literalInt64(expression cypher.Expression) (int64, bool) { + literal, ok := expression.(*cypher.Literal) + if !ok || literal == nil || literal.Null { + return 0, false + } + + switch value := literal.Value.(type) { + case int: + return int64(value), value >= 0 + case int8: + return int64(value), value >= 0 + case int16: + return int64(value), value >= 0 + case int32: + return int64(value), value >= 0 + case int64: + return value, value >= 0 + default: + return 0, false + } +} + +func countStoreFastPathDecision(query *cypher.RegularQuery) (CountStoreFastPathDecision, bool) { + if query == nil || query.SingleQuery == nil || query.SingleQuery.SinglePartQuery == nil { + return CountStoreFastPathDecision{}, false + } + + queryPart := query.SingleQuery.SinglePartQuery + if len(queryPart.UpdatingClauses) > 0 || len(queryPart.ReadingClauses) != 1 { + return CountStoreFastPathDecision{}, false + } + + countArgument, ok := simpleCountProjectionArgument(queryPart.Return) + if !ok { + return CountStoreFastPathDecision{}, false + } + + readingClause := queryPart.ReadingClauses[0] + if readingClause == nil || readingClause.Match == nil { + return CountStoreFastPathDecision{}, false + } + + match := readingClause.Match + if match.Optional || match.Where != nil || len(match.Pattern) != 1 { + return CountStoreFastPathDecision{}, false + } + + patternPart := match.Pattern[0] + if patternPart == nil || patternPart.Variable != nil || patternPart.ShortestPathPattern || patternPart.AllShortestPathsPattern { + return CountStoreFastPathDecision{}, false + } + + if len(patternPart.PatternElements) == 1 { + nodePattern, ok := patternPart.PatternElements[0].AsNodePattern() + if !ok || nodePattern == nil || nodePattern.Properties != nil { + return CountStoreFastPathDecision{}, false + } + + bindingSymbol := variableSymbol(nodePattern.Variable) + if countArgument != cypher.TokenLiteralAsterisk && countArgument != bindingSymbol { + return CountStoreFastPathDecision{}, false + } + + return CountStoreFastPathDecision{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + BindingSymbol: bindingSymbol, + Target: CountStoreFastPathNode, + KindSymbols: kindSymbols(nodePattern.Kinds), + }, true + } + + if len(patternPart.PatternElements) != 3 { + return CountStoreFastPathDecision{}, false + } + + leftNode, leftOK := patternPart.PatternElements[0].AsNodePattern() + relationship, relationshipOK := patternPart.PatternElements[1].AsRelationshipPattern() + rightNode, rightOK := patternPart.PatternElements[2].AsNodePattern() + if !leftOK || !relationshipOK || !rightOK { + return CountStoreFastPathDecision{}, false + } + + if constrainedCountFastPathEndpoint(leftNode) || constrainedCountFastPathEndpoint(rightNode) || + relationship == nil || relationship.Range != nil || relationship.Properties != nil || + relationship.Direction == graph.DirectionBoth { + return CountStoreFastPathDecision{}, false + } + + bindingSymbol := variableSymbol(relationship.Variable) + if countArgument != cypher.TokenLiteralAsterisk && countArgument != bindingSymbol { + return CountStoreFastPathDecision{}, false + } + + return CountStoreFastPathDecision{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + BindingSymbol: bindingSymbol, + Target: CountStoreFastPathEdge, + KindSymbols: kindSymbols(relationship.Kinds), + }, true +} + +func simpleCountProjectionArgument(returnClause *cypher.Return) (string, bool) { + if returnClause == nil || returnClause.Projection == nil { + return "", false + } + + projection := returnClause.Projection + if projection.Distinct || projection.All || projection.Order != nil || projection.Skip != nil || projection.Limit != nil || len(projection.Items) != 1 { + return "", false + } + + projectionItem, ok := projection.Items[0].(*cypher.ProjectionItem) + if !ok || projectionItem == nil { + return "", false + } + + function, ok := projectionItem.Expression.(*cypher.FunctionInvocation) + if !ok || function == nil || !strings.EqualFold(function.Name, cypher.CountFunction) || + function.Distinct || len(function.Namespace) > 0 || len(function.Arguments) != 1 { + return "", false + } + + switch argument := function.Arguments[0].(type) { + case *cypher.Variable: + if argument == nil { + return "", false + } + + return argument.Symbol, true + case *cypher.RangeQuantifier: + if argument != nil && argument.Value == cypher.TokenLiteralAsterisk { + return cypher.TokenLiteralAsterisk, true + } + } + + return "", false +} + +func constrainedCountFastPathEndpoint(nodePattern *cypher.NodePattern) bool { + return nodePattern == nil || nodePattern.Variable != nil || len(nodePattern.Kinds) > 0 || nodePattern.Properties != nil +} + +func kindSymbols(kinds graph.Kinds) []string { + if len(kinds) == 0 { + return nil + } + + symbols := make([]string, len(kinds)) + for idx, kind := range kinds { + symbols[idx] = kind.String() + } + + return symbols +} + +func indexBindingTargets(query *cypher.RegularQuery) map[bindingTargetKey]TraversalStepTarget { + targets := map[bindingTargetKey]TraversalStepTarget{} + + if query == nil || query.SingleQuery == nil { + return targets + } + + if query.SingleQuery.MultiPartQuery != nil { + for queryPartIndex, part := range query.SingleQuery.MultiPartQuery.Parts { + if part == nil { + continue + } + + indexReadingClauseBindingTargets(targets, queryPartIndex, part.ReadingClauses) + } + + if finalPart := query.SingleQuery.MultiPartQuery.SinglePartQuery; finalPart != nil { + indexReadingClauseBindingTargets(targets, len(query.SingleQuery.MultiPartQuery.Parts), finalPart.ReadingClauses) + } + } else if query.SingleQuery.SinglePartQuery != nil { + indexReadingClauseBindingTargets(targets, 0, query.SingleQuery.SinglePartQuery.ReadingClauses) + } + + return targets +} + +func indexReadingClauseBindingTargets(targets map[bindingTargetKey]TraversalStepTarget, queryPartIndex int, readingClauses []*cypher.ReadingClause) { + for clauseIndex, readingClause := range readingClauses { + if readingClause == nil || readingClause.Match == nil { + continue + } + + for patternIndex, patternPart := range readingClause.Match.Pattern { + patternTarget := PatternTarget{ + QueryPartIndex: queryPartIndex, + ClauseIndex: clauseIndex, + PatternIndex: patternIndex, + } + + for stepIndex, step := range traversalStepsForPattern(patternPart) { + stepTarget := patternTarget.TraversalStep(stepIndex) + setBindingTarget(targets, queryPartIndex, variableSymbol(step.LeftNode.Variable), stepTarget) + setBindingTarget(targets, queryPartIndex, variableSymbol(step.Relationship.Variable), stepTarget) + setBindingTarget(targets, queryPartIndex, variableSymbol(step.RightNode.Variable), stepTarget) + } + } + } +} + +func setBindingTarget(targets map[bindingTargetKey]TraversalStepTarget, queryPartIndex int, symbol string, target TraversalStepTarget) { + if symbol == "" { + return + } + + key := bindingTargetKey{ + QueryPartIndex: queryPartIndex, + Symbol: symbol, + } + if _, exists := targets[key]; !exists { + targets[key] = target + } +} + +func expansionSuffixPushdownLength(suffixSteps []sourceTraversalStep) int { + var suffixLength int + + for _, step := range suffixSteps { + if step.Relationship.Range != nil || step.Relationship.Direction == graph.DirectionBoth { + break + } + + suffixLength++ + } + + return suffixLength +} + +func declareMatchSymbols(declared map[string]struct{}, match *cypher.Match) { + if match == nil { + return + } + + for _, patternPart := range match.Pattern { + declarePatternSymbols(declared, patternPart) + } + + declareWhereSymbols(declared, match) +} + +func declarePatternSymbols(declared map[string]struct{}, patternPart *cypher.PatternPart) { + if patternPart == nil { + return + } + + addSymbol(declared, variableSymbol(patternPart.Variable)) + for _, element := range patternPart.PatternElements { + if element == nil { + continue + } + + if nodePattern, isNodePattern := element.AsNodePattern(); isNodePattern { + addSymbol(declared, variableSymbol(nodePattern.Variable)) + } else if relationshipPattern, isRelationshipPattern := element.AsRelationshipPattern(); isRelationshipPattern { + addSymbol(declared, variableSymbol(relationshipPattern.Variable)) + } + } +} + +func declareWhereSymbols(declared map[string]struct{}, match *cypher.Match) { + for _, dependency := range dependenciesForMatch(match) { + addSymbol(declared, dependency) + } +} + +func nodePatternHasConstraints(nodePattern *cypher.NodePattern) bool { + return nodePattern != nil && (len(nodePattern.Kinds) > 0 || nodePattern.Properties != nil) +} + +func relationshipPatternHasProperties(relationshipPattern *cypher.RelationshipPattern) bool { + return relationshipPattern != nil && relationshipPattern.Properties != nil +} + +func addSymbol(symbols map[string]struct{}, symbol string) { + if symbol != "" { + symbols[symbol] = struct{}{} + } +} + +func copyStringSet(values map[string]struct{}) map[string]struct{} { + copied := make(map[string]struct{}, len(values)) + for value := range values { + copied[value] = struct{}{} + } + + return copied +} + +func traversalStepsForPattern(patternPart *cypher.PatternPart) []sourceTraversalStep { + if patternPart == nil { + return nil + } + + var ( + steps []sourceTraversalStep + leftNode *cypher.NodePattern + relationship *cypher.RelationshipPattern + ) + + for _, element := range patternPart.PatternElements { + if element == nil { + continue + } + + if nodePattern, isNodePattern := element.AsNodePattern(); isNodePattern { + if leftNode == nil { + leftNode = nodePattern + continue + } + + if relationship != nil { + steps = append(steps, sourceTraversalStep{ + LeftNode: leftNode, + Relationship: relationship, + RightNode: nodePattern, + }) + } + + leftNode = nodePattern + relationship = nil + } else if relationshipPattern, isRelationshipPattern := element.AsRelationshipPattern(); isRelationshipPattern { + relationship = relationshipPattern + } + } + + return steps +} + +func variableSymbol(variable *cypher.Variable) string { + if variable == nil { + return "" + } + + return variable.Symbol +} diff --git a/cypher/models/pgsql/optimize/optimizer.go b/cypher/models/pgsql/optimize/optimizer.go new file mode 100644 index 00000000..d115167d --- /dev/null +++ b/cypher/models/pgsql/optimize/optimizer.go @@ -0,0 +1,165 @@ +package optimize + +import "github.com/specterops/dawgs/cypher/models/cypher" + +type Rule interface { + Name() string + Apply(*Plan) (bool, error) +} + +type RuleResult struct { + Name string `json:"name"` + Applied bool `json:"applied"` +} + +type PredicateAttachmentScope string + +const ( + PredicateAttachmentScopeBinding PredicateAttachmentScope = "binding" + PredicateAttachmentScopeRegion PredicateAttachmentScope = "region" +) + +type PredicateAttachment struct { + QueryPartIndex int `json:"query_part_index"` + RegionIndex int `json:"region_index"` + ClauseIndex int `json:"clause_index"` + ExpressionIndex int `json:"expression_index"` + Scope PredicateAttachmentScope `json:"scope"` + BindingSymbols []string `json:"binding_symbols"` + Dependencies []string `json:"dependencies"` +} + +type Plan struct { + Query *cypher.RegularQuery + Analysis Analysis + LoweringPlan LoweringPlan + Rules []RuleResult + PredicateAttachments []PredicateAttachment +} + +type Optimizer struct { + rules []Rule +} + +func NewOptimizer(rules ...Rule) Optimizer { + return Optimizer{ + rules: rules, + } +} + +func DefaultRules() []Rule { + return []Rule{ + ConservativePatternReorderingRule{}, + PredicateAttachmentRule{}, + } +} + +func Optimize(query *cypher.RegularQuery) (Plan, error) { + return NewOptimizer(DefaultRules()...).Optimize(query) +} + +func (s Optimizer) Optimize(query *cypher.RegularQuery) (Plan, error) { + if query == nil { + return Plan{}, nil + } + + plan := Plan{ + Query: cypher.Copy(query), + } + plan.Analysis = Analyze(plan.Query) + + for _, rule := range s.rules { + applied, err := rule.Apply(&plan) + if err != nil { + return Plan{}, err + } + + plan.Rules = append(plan.Rules, RuleResult{ + Name: rule.Name(), + Applied: applied, + }) + plan.Analysis = Analyze(plan.Query) + } + + if loweringPlan, err := BuildLoweringPlan(plan.Query, plan.PredicateAttachments); err != nil { + return Plan{}, err + } else { + plan.LoweringPlan = loweringPlan + } + + return plan, nil +} + +type PredicateAttachmentRule struct{} + +func (s PredicateAttachmentRule) Name() string { + return "PredicateAttachment" +} + +func (s PredicateAttachmentRule) Apply(plan *Plan) (bool, error) { + plan.PredicateAttachments = AttachPredicates(plan.Analysis) + return len(plan.PredicateAttachments) > 0, nil +} + +func AttachPredicates(analysis Analysis) []PredicateAttachment { + var attachments []PredicateAttachment + + for _, queryPart := range analysis.QueryParts { + for regionIndex, region := range queryPart.Regions { + regionBindings := regionBindingSymbols(region) + + for _, predicate := range region.Predicates { + var ( + bindingSymbols = predicateBindingSymbols(predicate, regionBindings) + scope = PredicateAttachmentScopeRegion + ) + + if len(bindingSymbols) == 1 && len(predicate.Dependencies) == 1 { + scope = PredicateAttachmentScopeBinding + } + + attachments = append(attachments, PredicateAttachment{ + QueryPartIndex: region.QueryPartIndex, + RegionIndex: regionIndex, + ClauseIndex: predicate.ClauseIndex, + ExpressionIndex: predicate.ExpressionIndex, + Scope: scope, + BindingSymbols: copyStrings(bindingSymbols), + Dependencies: copyStrings(predicate.Dependencies), + }) + } + } + } + + return attachments +} + +func regionBindingSymbols(region Region) map[string]struct{} { + bindings := map[string]struct{}{} + + for _, binding := range region.Bindings { + bindings[binding.Symbol] = struct{}{} + } + + return bindings +} + +func predicateBindingSymbols(predicate Predicate, regionBindings map[string]struct{}) []string { + var bindingSymbols []string + + for _, dependency := range predicate.Dependencies { + if _, isRegionBinding := regionBindings[dependency]; isRegionBinding { + bindingSymbols = append(bindingSymbols, dependency) + } + } + + return bindingSymbols +} + +func copyStrings(values []string) []string { + if values == nil { + return nil + } + + return append([]string(nil), values...) +} diff --git a/cypher/models/pgsql/optimize/optimizer_test.go b/cypher/models/pgsql/optimize/optimizer_test.go new file mode 100644 index 00000000..b1f577ac --- /dev/null +++ b/cypher/models/pgsql/optimize/optimizer_test.go @@ -0,0 +1,1384 @@ +package optimize + +import ( + "testing" + + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models" + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/stretchr/testify/require" +) + +type testRule struct { + name string +} + +func (s testRule) Name() string { + return s.name +} + +func (s testRule) Apply(plan *Plan) (bool, error) { + return false, nil +} + +type testBindingLookup map[pgsql.Identifier]pgsql.DataType + +func (s testBindingLookup) LookupDataType(identifier pgsql.Identifier) (pgsql.DataType, bool) { + dataType, found := s[identifier] + return dataType, found +} + +func TestOptimizeCopiesAndAnalyzesQuery(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), adcsQuery) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.NotSame(t, regularQuery, plan.Query) + require.Len(t, plan.Analysis.QueryParts, 1) + require.Len(t, plan.Analysis.QueryParts[0].Regions, 1) + require.Equal(t, []string{"p1", "p2"}, plan.Analysis.QueryParts[0].ProjectionDependencies) + require.Equal(t, []RuleResult{ + {Name: "ConservativePatternReordering", Applied: false}, + {Name: "PredicateAttachment", Applied: true}, + }, plan.Rules) + require.Len(t, plan.PredicateAttachments, 2) +} + +func TestOptimizePlansADCSFanoutRewrite(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), adcsQuery) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + + ctPredicate := PredicateAttachment{ + QueryPartIndex: 0, + RegionIndex: 0, + ClauseIndex: 2, + ExpressionIndex: 0, + Scope: PredicateAttachmentScopeBinding, + BindingSymbols: []string{"ct"}, + Dependencies: []string{"ct"}, + } + + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringExpansionSuffixPushdown}) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringPredicatePlacement}) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringExpandIntoDetection}) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringLatePathMaterialization}) + + require.Contains(t, plan.LoweringPlan.ExpansionSuffixPushdown, ExpansionSuffixPushdownDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + SuffixLength: 3, + SuffixStartStep: 1, + SuffixEndStep: 3, + }) + require.Contains(t, plan.LoweringPlan.ExpansionSuffixPushdown, ExpansionSuffixPushdownDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 0, + }, + SuffixLength: 2, + SuffixStartStep: 1, + SuffixEndStep: 2, + PredicateAttachments: []PredicateAttachment{ctPredicate}, + }) + require.Contains(t, plan.LoweringPlan.ExpansionSuffixPushdown, ExpansionSuffixPushdownDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 3, + }, + SuffixLength: 1, + SuffixStartStep: 4, + SuffixEndStep: 4, + }) + + require.Contains(t, plan.LoweringPlan.ExpandInto, ExpandIntoDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 2, + }, + }) + require.Contains(t, plan.LoweringPlan.ExpandInto, ExpandIntoDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 4, + }, + }) + require.Contains(t, plan.LoweringPlan.PredicatePlacement, PredicatePlacementDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 1, + }, + Attachment: ctPredicate, + Placement: PredicateAttachmentScopeBinding, + }) +} + +func TestOptimizerRunsRulesAndRefreshesAnalysis(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), `MATCH (n) RETURN n`) + require.NoError(t, err) + + plan, err := NewOptimizer(testRule{name: "test"}).Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []RuleResult{{Name: "test", Applied: false}}, plan.Rules) + require.Len(t, plan.Analysis.QueryParts, 1) + require.Len(t, plan.Analysis.QueryParts[0].Regions, 1) +} + +func TestDefaultPredicateAttachmentRuleReportsSkippedWhenNoPredicatesExist(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), `MATCH (n) RETURN n`) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []RuleResult{ + {Name: "ConservativePatternReordering", Applied: false}, + {Name: "PredicateAttachment", Applied: false}, + }, plan.Rules) + require.Empty(t, plan.PredicateAttachments) +} + +func TestLoweringPlanReportsProjectionPruning(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (n)-[r:MemberOf]->(m) + RETURN m + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []LoweringDecision{{Name: LoweringProjectionPruning}}, plan.LoweringPlan.Decisions()) + require.Equal(t, []ProjectionPruningDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + ReferencedSymbols: []string{"m"}, + OmitLeftNode: true, + OmitRelationship: true, + }}, plan.LoweringPlan.ProjectionPruning) +} + +func TestLoweringPlanProjectionPruningKeepsUpdateTargets(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (a)-[r:MemberOf]->(m) + SET a.name = 'updated', r.seen = true + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []ProjectionPruningDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + ReferencedSymbols: []string{"a", "r"}, + OmitRightNode: true, + }}, plan.LoweringPlan.ProjectionPruning) +} + +func TestLoweringPlanReportsPatternPredicateProjectionPruning(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (s) + WHERE (s)-[]->() + RETURN s + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.ProjectionPruning, ProjectionPruningDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + Predicate: true, + StepIndex: 0, + }, + ReferencedSymbols: []string{"s"}, + OmitRelationship: true, + OmitRightNode: true, + }) +} + +func TestLoweringPlanReportsPatternPredicateExistencePlacement(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (s) + WHERE NOT (s)-[]-() + RETURN s + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringPredicatePlacement}) + require.Equal(t, []PatternPredicatePlacementDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + Predicate: true, + StepIndex: 0, + }, + Mode: PatternPredicatePlacementExistence, + }}, plan.LoweringPlan.PatternPredicate) +} + +func TestLoweringPlanReportsTypedPatternPredicateExistencePlacement(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (n:Domain), (m:Domain) + WHERE (n)-[:SpoofSIDHistory|AbuseTGTDelegation]-(m) + RETURN n + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringPredicatePlacement}) + require.Equal(t, []PatternPredicatePlacementDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + Predicate: true, + StepIndex: 0, + }, + Mode: PatternPredicatePlacementExistence, + }}, plan.LoweringPlan.PatternPredicate) +} + +func TestSelectivityModelPlansTraversalDirection(t *testing.T) { + t.Parallel() + + var ( + model = NewSelectivityModel(testBindingLookup{}) + rightIDLookup = pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{pgsql.Identifier("n1"), pgsql.ColumnID}, + pgsql.OperatorEquals, + pgsql.NewLiteral(1, pgsql.Int), + ) + ) + + shouldFlip, err := model.ShouldFlipTraversalDirection(false, false, nil, rightIDLookup) + require.NoError(t, err) + require.True(t, shouldFlip) + + shouldFlip, err = model.ShouldFlipTraversalDirection(true, false, nil, rightIDLookup) + require.NoError(t, err) + require.False(t, shouldFlip) + + shouldFlip, err = model.ShouldFlipTraversalDirection(false, true, nil, nil) + require.NoError(t, err) + require.True(t, shouldFlip) +} + +func TestLoweringPlanReportsLatePathMaterialization(t *testing.T) { + t.Parallel() + + t.Run("path edge id", func(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[r:MemberOf]->(m) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []LatePathMaterializationDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: LatePathMaterializationPathEdgeID, + }}, plan.LoweringPlan.LatePathMaterialization) + }) + + t.Run("relationship composite", func(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[r:MemberOf]->(m) + RETURN p, r + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, LatePathMaterializationEdgeComposite, plan.LoweringPlan.LatePathMaterialization[0].Mode) + }) + + t.Run("continuation relationship id", func(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (n)-[:MemberOf]->(m)-[:Enroll]->(ca) + RETURN ca + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.LatePathMaterialization, LatePathMaterializationDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: LatePathMaterializationPathEdgeID, + }) + }) + + t.Run("pattern predicate continuation relationship id", func(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (s) + WHERE (s)-[]->()-[]->() + RETURN s + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.LatePathMaterialization, LatePathMaterializationDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + Predicate: true, + StepIndex: 0, + }, + Mode: LatePathMaterializationPathEdgeID, + }) + }) +} + +func TestLoweringPlanReportsExpansionSuffixPushdown(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]->(ca:EnterpriseCA) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringExpansionSuffixPushdown}) + require.Equal(t, []ExpansionSuffixPushdownDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + SuffixLength: 1, + SuffixStartStep: 1, + SuffixEndStep: 1, + }}, plan.LoweringPlan.ExpansionSuffixPushdown) +} + +func TestLoweringPlanIncludesConstrainedBoundEndpointInExpansionSuffix(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (ca) + MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]->(ct:CertTemplate)-[:PublishedTo]->(ca:EnterpriseCA) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringExpansionSuffixPushdown}) + require.Contains(t, plan.LoweringPlan.ExpansionSuffixPushdown, ExpansionSuffixPushdownDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + SuffixLength: 2, + SuffixStartStep: 1, + SuffixEndStep: 2, + }) +} + +func TestLoweringPlanReportsCountStoreFastPath(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + query string + expected CountStoreFastPathDecision + }{ + { + name: "node count", + query: "MATCH (n:Group) RETURN count(n)", + expected: CountStoreFastPathDecision{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + BindingSymbol: "n", + Target: CountStoreFastPathNode, + KindSymbols: []string{"Group"}, + }, + }, + { + name: "node count star", + query: "MATCH (:Group) RETURN count(*)", + expected: CountStoreFastPathDecision{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + Target: CountStoreFastPathNode, + KindSymbols: []string{"Group"}, + }, + }, + { + name: "edge count", + query: "MATCH ()-[r:MemberOf]->() RETURN count(r)", + expected: CountStoreFastPathDecision{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + BindingSymbol: "r", + Target: CountStoreFastPathEdge, + KindSymbols: []string{"MemberOf"}, + }, + }, + { + name: "edge count star", + query: "MATCH ()-[:MemberOf]->() RETURN count(*)", + expected: CountStoreFastPathDecision{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + Target: CountStoreFastPathEdge, + KindSymbols: []string{"MemberOf"}, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), testCase.query) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringCountStoreFastPath}) + require.Equal(t, []CountStoreFastPathDecision{testCase.expected}, plan.LoweringPlan.CountStoreFastPath) + }) + } +} + +func TestLoweringPlanPlacesBindingPredicates(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]->(ca:EnterpriseCA) + WHERE ca.name = 'target' + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringPredicatePlacement}) + require.Len(t, plan.LoweringPlan.PredicatePlacement, 1) + require.Equal(t, TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 1, + }, plan.LoweringPlan.PredicatePlacement[0].Target) + require.Equal(t, []string{"ca"}, plan.LoweringPlan.PredicatePlacement[0].Attachment.BindingSymbols) + require.Equal(t, []PredicateAttachment{plan.LoweringPlan.PredicatePlacement[0].Attachment}, plan.LoweringPlan.ExpansionSuffixPushdown[0].PredicateAttachments) +} + +func TestLoweringPlanDoesNotPlaceCrossClauseBindingPredicates(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (n:Group) + WHERE n.objectid = 'S-1-5-21-1' + MATCH p = (n)-[:MemberOf*1..]->(ca:EnterpriseCA) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.NotEmpty(t, plan.PredicateAttachments) + require.Empty(t, plan.LoweringPlan.PredicatePlacement) + require.NotContains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringPredicatePlacement}) +} + +func TestLoweringPlanReportsExpandInto(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (a:Group) + MATCH (b:Group) + MATCH p = (a)-[:MemberOf]->(b) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringExpandIntoDetection}) + require.Equal(t, []ExpandIntoDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 0, + }, + }}, plan.LoweringPlan.ExpandInto) +} + +func TestLoweringPlanReportsExpandIntoForAnonymousContinuationEndpoint(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (d:Domain) + MATCH p = (ca:EnterpriseCA)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.ExpandInto, ExpandIntoDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 1, + }, + }) +} + +func TestLoweringPlanReportsTraversalDirectionForConstrainedRightEndpoint(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[:MemberOf*1..]->(ca:EnterpriseCA) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Flip: true, + Reason: traversalDirectionReasonRightConstrained, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanReportsTraversalDirectionForBoundRightEndpoint(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (ca:EnterpriseCA) + MATCH p = (n)-[:MemberOf*1..]->(ca) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + Flip: true, + Reason: traversalDirectionReasonRightBound, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanSkipsTraversalDirectionWhenLeftEndpointHasBindingPredicate(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[:MemberOf*1..]->(ca:EnterpriseCA) + WHERE n.name = 'target' + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Empty(t, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanSkipsTraversalDirectionWhenLeftEndpointHasRegionPredicate(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + WITH 'target' AS name + MATCH p = (n)-[:MemberOf]->(ca:EnterpriseCA) + WHERE n.name STARTS WITH name + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Empty(t, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanReportsTraversalDirectionForRightEndpointPredicate(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[:MemberOf*1..]->(ca) + WHERE ca.name = 'target' + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Flip: true, + Reason: traversalDirectionReasonRightPredicate, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanReportsTraversalDirectionForBoundLeftExpansionToConstrainedRightEndpoint(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true AND u.enabled = true + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {name: 'target'}) + RETURN c + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + Flip: true, + Reason: traversalDirectionReasonRightConstrained, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanSkipsBoundLeftDirectionForSelectiveSource(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.objectid = 'S-1-5-21-1-1100' + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {name: 'target'}) + RETURN c + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + Reason: traversalDirectionReasonBoundSourceSelective, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanSkipsBoundLeftDirectionAfterPriorLimit(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true + WITH u + LIMIT 10 + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {name: 'target'}) + RETURN c + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 1, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Reason: traversalDirectionReasonBoundSourceSelective, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanSkipsBoundLeftDirectionAfterGreedyProjectionLimit(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true + WITH * + LIMIT 10 + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {name: 'target'}) + RETURN c + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 1, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Reason: traversalDirectionReasonBoundSourceSelective, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanAllowsUniqueRightEndpointAfterPriorLimit(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true + WITH u + LIMIT 10 + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {objectid: 'S-1-5-21-1-2000'}) + RETURN c + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 1, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Flip: true, + Reason: traversalDirectionReasonRightConstrained, + }}, plan.LoweringPlan.TraversalDirection) +} + +func TestLoweringPlanReportsAggregateTraversalCountForBoundExpansionCount(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true AND u.enabled = true + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) + WITH DISTINCT u, COUNT(c) AS adminCount + RETURN u + ORDER BY adminCount DESC + LIMIT 100 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Equal(t, []TraversalDirectionDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + Reason: traversalDirectionReasonTerminalKindOnlyEstimateWide, + }}, plan.LoweringPlan.TraversalDirection) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringAggregateTraversalCount}) + require.Equal(t, []AggregateTraversalCountDecision{{ + QueryPartIndex: 0, + SourceSymbol: "u", + TerminalSymbol: "c", + CountAlias: "adminCount", + Limit: 100, + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + }}, plan.LoweringPlan.AggregateTraversalCount) +} + +func TestLoweringPlanReportsAggregateTraversalCountForRowCount(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true AND u.enabled = true + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) + WITH DISTINCT u, COUNT(*) AS adminCount + RETURN u + ORDER BY adminCount DESC + LIMIT 100 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringAggregateTraversalCount}) + require.Equal(t, "adminCount", plan.LoweringPlan.AggregateTraversalCount[0].CountAlias) +} + +func TestLoweringPlanReportsAggregateTraversalCountWhenReturningCountAlias(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) + WITH DISTINCT u, COUNT(c) AS adminCount + RETURN u AS user, adminCount AS privileges + ORDER BY privileges DESC + LIMIT 100 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringAggregateTraversalCount}) + + shape, ok := AggregateTraversalCountShapeForQuery(plan.Query) + require.True(t, ok) + require.Equal(t, "user", shape.ReturnSourceAlias) + require.True(t, shape.ReturnCount) + require.Equal(t, "privileges", shape.ReturnCountAlias) +} + +func TestLoweringPlanReportsAggregateTraversalCountWithTerminalFilter(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) + WHERE c.enabled = true + WITH DISTINCT u, COUNT(c) AS adminCount + RETURN u + ORDER BY adminCount DESC + LIMIT 100 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringAggregateTraversalCount}) +} + +func TestLoweringPlanSkipsAggregateTraversalCountWithCorrelatedTerminalFilter(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (u:User) + WHERE u.hasspn = true + MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) + WHERE c.name = u.name + WITH DISTINCT u, COUNT(c) AS adminCount + RETURN u + ORDER BY adminCount DESC + LIMIT 100 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.NotContains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringAggregateTraversalCount}) +} + +func TestLoweringPlanSkipsSuffixPushdownAfterRightEndpointPredicateDirectionFlip(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[:MemberOf*1..]->(ca)-[:TrustedForNTAuth]->(d:Domain) + WHERE ca.name = 'target' + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringTraversalDirection}) + require.Empty(t, plan.LoweringPlan.ExpansionSuffixPushdown) +} + +func TestLoweringPlanReportsShortestPathStrategyForEndpointPredicates(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = allShortestPaths((s)-[:MemberOf*1..]->(e)) + WHERE s.name = 'source' AND e.name = 'target' + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringShortestPathStrategy}) + require.Equal(t, []ShortestPathStrategyDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Strategy: ShortestPathStrategyBidirectional, + Reason: shortestPathStrategyReasonEndpointPredicates, + }}, plan.LoweringPlan.ShortestPathStrategy) + require.Equal(t, []ShortestPathFilterDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: ShortestPathFilterEndpointPair, + Reason: shortestPathFilterReasonEndpointPairPredicates, + }}, plan.LoweringPlan.ShortestPathFilter) +} + +func TestLoweringPlanReportsShortestPathStrategyForBoundEndpointPairs(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (a:Group) + MATCH (b:EnterpriseCA) + MATCH p = shortestPath((a)-[:MemberOf*1..]->(b)) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringShortestPathStrategy}) + require.Equal(t, []ShortestPathStrategyDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 2, + PatternIndex: 0, + StepIndex: 0, + }, + Strategy: ShortestPathStrategyBidirectional, + Reason: shortestPathStrategyReasonBoundEndpointPairs, + }}, plan.LoweringPlan.ShortestPathStrategy) +} + +func TestLoweringPlanSkipsShortestPathStrategyForLabelOnlyEndpoints(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = allShortestPaths((s:Group)-[:MemberOf*1..]->(e:EnterpriseCA)) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Empty(t, plan.LoweringPlan.ShortestPathStrategy) +} + +func TestLoweringPlanReportsShortestPathTerminalFilter(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (s:Group {name: 'source'}) + MATCH p = shortestPath((s)-[:MemberOf*1..]->(e)) + WHERE e.name = 'target' + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringShortestPathFilter}) + require.Equal(t, []ShortestPathFilterDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 1, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: ShortestPathFilterTerminal, + Reason: shortestPathFilterReasonTerminalPredicate, + }}, plan.LoweringPlan.ShortestPathFilter) +} + +func TestLoweringPlanReportsShortestPathTerminalFilterForKindOnlyTerminal(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = shortestPath((s:Group)-[:MemberOf|GenericAll|AdminTo*1..]->(t:Tag_Tier_Zero)) + WHERE s.objectid ENDS WITH '-513' AND s <> t + RETURN p + LIMIT 1000 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.NotContains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringShortestPathStrategy}) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringShortestPathFilter}) + require.Equal(t, []ShortestPathFilterDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: ShortestPathFilterTerminal, + Reason: shortestPathFilterReasonTerminalPredicate, + }}, plan.LoweringPlan.ShortestPathFilter) +} + +func TestLoweringPlanReportsTraversalLimitPushdown(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n:Group)-[:MemberOf]->(m:Group) + RETURN p + LIMIT 1 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringLimitPushdown}) + require.Equal(t, []LimitPushdownDecision{{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: LimitPushdownTraversalCTE, + }}, plan.LoweringPlan.LimitPushdown) +} + +func TestLoweringPlanReportsShortestPathLimitPushdown(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = shortestPath((s)-[:MemberOf*1..]->(e)) + WHERE s.name = 'source' AND e.name = 'target' + RETURN p + LIMIT 1 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Contains(t, plan.LoweringPlan.Decisions(), LoweringDecision{Name: LoweringLimitPushdown}) + require.Contains(t, plan.LoweringPlan.LimitPushdown, LimitPushdownDecision{ + Target: TraversalStepTarget{ + QueryPartIndex: 0, + ClauseIndex: 0, + PatternIndex: 0, + StepIndex: 0, + }, + Mode: LimitPushdownShortestPathHarness, + }) +} + +func TestLoweringPlanSkipsAllShortestPathLimitPushdown(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = allShortestPaths((s)-[:MemberOf*1..]->(e)) + WHERE s.name = 'source' AND e.name = 'target' + RETURN p + LIMIT 1 + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Empty(t, plan.LoweringPlan.LimitPushdown) +} + +func TestLoweringPlanSkipsOptionalMatchLimitPushdown(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n)-[:MemberOf]->(m:Group) + RETURN p + LIMIT 1 + `) + require.NoError(t, err) + require.Len(t, regularQuery.SingleQuery.SinglePartQuery.ReadingClauses, 1) + regularQuery.SingleQuery.SinglePartQuery.ReadingClauses[0].Match.Optional = true + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Empty(t, plan.LoweringPlan.LimitPushdown) +} + +func TestSelectReferencesOnlyLocalIdentifiersValidatesJoinConstraintsIncrementally(t *testing.T) { + t.Parallel() + + tableRef := func(alias pgsql.Identifier) pgsql.TableReference { + return pgsql.TableReference{ + Name: pgsql.CompoundIdentifier{pgsql.TableNode}, + Binding: models.OptionalValue(alias), + } + } + + selectBody := pgsql.Select{ + Projection: []pgsql.SelectItem{ + pgsql.CompoundIdentifier{pgsql.Identifier("a"), pgsql.ColumnID}, + }, + From: []pgsql.FromClause{{ + Source: tableRef(pgsql.Identifier("a")), + Joins: []pgsql.Join{{ + Table: tableRef(pgsql.Identifier("b")), + JoinOperator: pgsql.JoinOperator{ + Constraint: pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{pgsql.Identifier("b"), pgsql.ColumnID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{pgsql.Identifier("c"), pgsql.ColumnID}, + ), + }, + }, { + Table: tableRef(pgsql.Identifier("c")), + }}, + }}, + } + + require.False(t, SelectReferencesOnlyLocalIdentifiers(selectBody, pgsql.NewIdentifierSet())) +} + +func TestMeasureSelectivityPopReturnsTopFrame(t *testing.T) { + t.Parallel() + + visitor := newMeasureSelectivityVisitor(NewSelectivityModel(nil)) + visitor.addSelectivity(7) + visitor.pushSelectivity(11) + visitor.addSelectivity(13) + + require.Equal(t, 24, visitor.popSelectivity()) + require.Equal(t, 7, visitor.Selectivity()) +} + +func TestCollectReferencedSourceIdentifiersIgnoresMatchDeclarations(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (n)-[r:MemberOf]->(m) + RETURN m + `) + require.NoError(t, err) + + references, err := collectReferencedSourceIdentifiers(regularQuery) + require.NoError(t, err) + require.NotContains(t, references, "n") + require.NotContains(t, references, "r") + require.Contains(t, references, "m") +} + +func TestLoweringPlanSkipsDirectionlessExpansionSuffixPushdown(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]-(ca:EnterpriseCA) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Empty(t, plan.LoweringPlan.ExpansionSuffixPushdown) +} + +func TestPredicateAttachmentRuleAssignsSingleBindingPredicates(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), adcsQuery) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Len(t, plan.PredicateAttachments, 2) + + require.Equal(t, PredicateAttachment{ + QueryPartIndex: 0, + RegionIndex: 0, + ClauseIndex: 0, + ExpressionIndex: 0, + Scope: PredicateAttachmentScopeBinding, + BindingSymbols: []string{"n"}, + Dependencies: []string{"n"}, + }, plan.PredicateAttachments[0]) + + require.Equal(t, PredicateAttachment{ + QueryPartIndex: 0, + RegionIndex: 0, + ClauseIndex: 2, + ExpressionIndex: 0, + Scope: PredicateAttachmentScopeBinding, + BindingSymbols: []string{"ct"}, + Dependencies: []string{"ct"}, + }, plan.PredicateAttachments[1]) +} + +func TestPredicateAttachmentRuleKeepsMultiBindingPredicatesAtRegionScope(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (a)-[:MemberOf]->(b) + WHERE a.objectid = b.objectid + RETURN a + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Len(t, plan.PredicateAttachments, 1) + + require.Equal(t, PredicateAttachment{ + QueryPartIndex: 0, + RegionIndex: 0, + ClauseIndex: 0, + ExpressionIndex: 0, + Scope: PredicateAttachmentScopeRegion, + BindingSymbols: []string{"a", "b"}, + Dependencies: []string{"a", "b"}, + }, plan.PredicateAttachments[0]) +} + +func firstNodeSymbol(readingClause *cypher.ReadingClause) string { + if readingClause == nil || readingClause.Match == nil || len(readingClause.Match.Pattern) == 0 { + return "" + } + + nodePattern, ok := singleNodePattern(readingClause.Match.Pattern[0]) + if !ok || nodePattern.Variable == nil { + return "" + } + + return nodePattern.Variable.Symbol +} + +func TestConservativePatternReorderingMovesIndependentNodeAnchorsEarlier(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (a) + MATCH (b:Group {objectid: 'target'}) + MATCH p = (a)-[:MemberOf]->(b) + RETURN p + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []RuleResult{ + {Name: "ConservativePatternReordering", Applied: true}, + {Name: "PredicateAttachment", Applied: false}, + }, plan.Rules) + + readingClauses := plan.Query.SingleQuery.SinglePartQuery.ReadingClauses + require.Equal(t, "b", firstNodeSymbol(readingClauses[0])) + require.Equal(t, "a", firstNodeSymbol(readingClauses[1])) + require.Len(t, readingClauses[2].Match.Pattern[0].PatternElements, 3) +} + +func TestConservativePatternReorderingKeepsDependentAnchorsInPlace(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (a) + MATCH (b:Group) + WHERE b.name = a.name + RETURN b + `) + require.NoError(t, err) + + plan, err := Optimize(regularQuery) + require.NoError(t, err) + require.Equal(t, []RuleResult{ + {Name: "ConservativePatternReordering", Applied: false}, + {Name: "PredicateAttachment", Applied: true}, + }, plan.Rules) + + readingClauses := plan.Query.SingleQuery.SinglePartQuery.ReadingClauses + require.Equal(t, "a", firstNodeSymbol(readingClauses[0])) + require.Equal(t, "b", firstNodeSymbol(readingClauses[1])) +} diff --git a/cypher/models/pgsql/optimize/pattern_predicates.go b/cypher/models/pgsql/optimize/pattern_predicates.go new file mode 100644 index 00000000..16e9349a --- /dev/null +++ b/cypher/models/pgsql/optimize/pattern_predicates.go @@ -0,0 +1,45 @@ +package optimize + +import ( + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/walk" +) + +type patternPredicateCollector struct { + walk.VisitorHandler + predicates []*cypher.PatternPredicate +} + +func (s *patternPredicateCollector) Enter(node cypher.SyntaxNode) { + if predicate, isPatternPredicate := node.(*cypher.PatternPredicate); isPatternPredicate { + s.predicates = append(s.predicates, predicate) + } +} + +func (s *patternPredicateCollector) Visit(cypher.SyntaxNode) {} +func (s *patternPredicateCollector) Exit(cypher.SyntaxNode) {} + +func patternPredicatesInQueryPart(queryPart cypher.SyntaxNode) []*cypher.PatternPredicate { + if queryPart == nil { + return nil + } + + collector := &patternPredicateCollector{ + VisitorHandler: walk.NewCancelableErrorHandler(), + } + if err := walk.Cypher(queryPart, collector); err != nil { + return nil + } + + return collector.predicates +} + +func patternPartForPredicate(predicate *cypher.PatternPredicate) *cypher.PatternPart { + if predicate == nil { + return nil + } + + return &cypher.PatternPart{ + PatternElements: predicate.PatternElements, + } +} diff --git a/cypher/models/pgsql/optimize/reordering.go b/cypher/models/pgsql/optimize/reordering.go new file mode 100644 index 00000000..4a380108 --- /dev/null +++ b/cypher/models/pgsql/optimize/reordering.go @@ -0,0 +1,220 @@ +package optimize + +import ( + "sort" + + "github.com/specterops/dawgs/cypher/models/cypher" +) + +type ConservativePatternReorderingRule struct{} + +func (s ConservativePatternReorderingRule) Name() string { + return "ConservativePatternReordering" +} + +func (s ConservativePatternReorderingRule) Apply(plan *Plan) (bool, error) { + if plan == nil || plan.Query == nil || plan.Query.SingleQuery == nil { + return false, nil + } + + if plan.Query.SingleQuery.MultiPartQuery != nil { + return reorderMultiPartQuery(plan.Query.SingleQuery.MultiPartQuery, plan.Analysis), nil + } + + if plan.Query.SingleQuery.SinglePartQuery != nil { + return reorderSinglePartQuery(plan.Query.SingleQuery.SinglePartQuery, plan.Analysis), nil + } + + return false, nil +} + +type reorderCandidate struct { + clause *cypher.ReadingClause + rank int + index int +} + +func reorderMultiPartQuery(query *cypher.MultiPartQuery, analysis Analysis) bool { + var applied bool + + for partIndex, part := range query.Parts { + if part == nil { + continue + } + + if queryPart, ok := analysisQueryPart(analysis, partIndex); ok { + applied = reorderReadingClauses(part.ReadingClauses, queryPart.Regions) || applied + } + } + + if query.SinglePartQuery != nil { + if queryPart, ok := analysisQueryPart(analysis, len(query.Parts)); ok { + applied = reorderReadingClauses(query.SinglePartQuery.ReadingClauses, queryPart.Regions) || applied + } + } + + return applied +} + +func reorderSinglePartQuery(query *cypher.SinglePartQuery, analysis Analysis) bool { + if queryPart, ok := analysisQueryPart(analysis, 0); ok { + return reorderReadingClauses(query.ReadingClauses, queryPart.Regions) + } + + return false +} + +func analysisQueryPart(analysis Analysis, index int) (QueryPart, bool) { + for _, queryPart := range analysis.QueryParts { + if queryPart.Index == index { + return queryPart, true + } + } + + return QueryPart{}, false +} + +func reorderReadingClauses(readingClauses []*cypher.ReadingClause, regions []Region) bool { + var applied bool + + for _, region := range regions { + if region.StartClause < 0 || region.EndClause >= len(readingClauses) || region.StartClause >= region.EndClause { + continue + } + + applied = reorderRegion(readingClauses[region.StartClause:region.EndClause+1]) || applied + } + + return applied +} + +func reorderRegion(regionClauses []*cypher.ReadingClause) bool { + var ( + candidates = make([]reorderCandidate, len(regionClauses)) + declaredBefore = map[string]struct{}{} + ) + + for idx, clause := range regionClauses { + candidates[idx] = reorderCandidate{ + clause: clause, + rank: matchClauseRank(clause, declaredBefore), + index: idx, + } + + for _, binding := range bindingsForReadingClause(idx, clause) { + declaredBefore[binding.Symbol] = struct{}{} + } + } + + sort.SliceStable(candidates, func(i, j int) bool { + return candidates[i].rank < candidates[j].rank + }) + + var applied bool + for idx, candidate := range candidates { + if regionClauses[idx] != candidate.clause { + applied = true + regionClauses[idx] = candidate.clause + } + } + + return applied +} + +func matchClauseRank(readingClause *cypher.ReadingClause, declaredBefore map[string]struct{}) int { + if isIndependentNodeAnchor(readingClause, declaredBefore) { + return 0 + } + + return 1 +} + +func isIndependentNodeAnchor(readingClause *cypher.ReadingClause, declaredBefore map[string]struct{}) bool { + if readingClause == nil || readingClause.Match == nil { + return false + } + + match := readingClause.Match + if match.Optional || len(match.Pattern) != 1 { + return false + } + + nodePattern, ok := singleNodePattern(match.Pattern[0]) + if !ok || nodePattern.Variable == nil || nodePattern.Variable.Symbol == "" { + return false + } + + if _, alreadyDeclared := declaredBefore[nodePattern.Variable.Symbol]; alreadyDeclared { + return false + } + + if !isSelectiveNodeAnchor(nodePattern, match.Where) { + return false + } + + declared := bindingSymbolSet(bindingsForMatch(0, match)) + for _, dependency := range localMatchDependencies(match) { + if _, isLocal := declared[dependency]; !isLocal { + return false + } + } + + return true +} + +func singleNodePattern(pattern *cypher.PatternPart) (*cypher.NodePattern, bool) { + if pattern == nil || pattern.Variable != nil || len(pattern.PatternElements) != 1 { + return nil, false + } + + return pattern.PatternElements[0].AsNodePattern() +} + +func isSelectiveNodeAnchor(nodePattern *cypher.NodePattern, where *cypher.Where) bool { + return len(nodePattern.Kinds) > 0 || nodePattern.Properties != nil || wherePredicateCount(where) > 0 +} + +func localMatchDependencies(match *cypher.Match) []string { + if match == nil { + return nil + } + + var dependencies []string + for _, pattern := range match.Pattern { + if pattern == nil { + continue + } + + for _, element := range pattern.PatternElements { + if element == nil { + continue + } + + if nodePattern, ok := element.AsNodePattern(); ok { + dependencies = append(dependencies, sortedDependencies(nodePattern.Properties)...) + } else if relationshipPattern, ok := element.AsRelationshipPattern(); ok { + dependencies = append(dependencies, sortedDependencies(relationshipPattern.Properties)...) + } + } + } + + dependencies = append(dependencies, dependenciesForMatch(match)...) + return sortedUniqueStrings(dependencies) +} + +func bindingSymbolSet(bindings []Binding) map[string]struct{} { + symbols := make(map[string]struct{}, len(bindings)) + for _, binding := range bindings { + symbols[binding.Symbol] = struct{}{} + } + + return symbols +} + +func bindingsForReadingClause(clauseIndex int, readingClause *cypher.ReadingClause) []Binding { + if readingClause == nil || readingClause.Match == nil { + return nil + } + + return bindingsForMatch(clauseIndex, readingClause.Match) +} diff --git a/cypher/models/pgsql/optimize/selectivity.go b/cypher/models/pgsql/optimize/selectivity.go new file mode 100644 index 00000000..1d33aba2 --- /dev/null +++ b/cypher/models/pgsql/optimize/selectivity.go @@ -0,0 +1,299 @@ +package optimize + +import ( + "fmt" + + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/walk" +) + +const ( + // Below are a select set of constants to represent different weights to represent, roughly, the selectivity + // of a given PGSQL expression. These weights are meant to be inexact and are only useful in comparison to other + // summed weights. + // + // The goal of these weights are to enable reordering of queries such that the more selective side of a traversal + // step is expanded first. Eventually, these weights may also enable reordering of multipart queries. + + // Entity ID references are a safe selectivity bet. A direct reference will typically take the form of: + // `n0.id = 1` or some other direct comparison against the entity's ID. All entity IDs are covered by a unique + // b-tree index, making them both highly selective and lucrative to weight higher. + selectivityWeightEntityIDReference = 125 + + // Unique node properties are both covered by a compatible index and unique, making them highly selective. + selectivityWeightUniqueNodeProperty = 100 + + // Bound identifiers are heavily weighted for preserving join order integrity. + selectivityWeightBoundIdentifier = 700 + + // Operators that narrow the search space are given a higher selectivity. + selectivityWeightNarrowSearch = 30 + + // Operators that perform string searches are given a higher selectivity. + selectivityWeightStringSearch = 20 + + // Operators that perform range comparisons are reasonably selective. + selectivityWeightRangeComparison = 10 + + // Conjunctions can narrow search space, especially when compounded, but may be order dependent and unreliable as + // a good selectivity heuristic. + selectivityWeightConjunction = 5 + + // Exclusions can narrow the search space but often only slightly. + selectivityWeightNotEquals = 1 + + // Disjunctions expand search space by adding a secondary, conditional operation. + selectivityWeightDisjunction = -100 + + // selectivityFlipThreshold is the minimum score advantage the right-hand node must hold + // over the left-hand node before constraint balancing commits to a traversal direction flip. + // It is set to selectivityWeightNarrowSearch so that structural AST noise, in particular the + // per-AND-node conjunction bonus, cannot trigger a flip on its own. A single meaningful + // narrowing predicate (=, IN, kind filter) on the right side is sufficient to clear this + // bar; a bare AND connector (weight 5) or a range comparison on an unindexed property + // (weight 10) is not. + selectivityFlipThreshold = selectivityWeightNarrowSearch + + // selectivityBidirectionalAnchorThreshold is the minimum score each endpoint must carry + // before shortest-path translation starts a bidirectional search from both sides. This + // keeps broad label-only endpoints out of bidirectional BFS; a single kind predicate + // scores below this threshold, while a materially narrower property predicate can clear it. + selectivityBidirectionalAnchorThreshold = selectivityWeightNarrowSearch * 2 +) + +// knownNodePropertySelectivity is a hack to enable the selectivity measurement to take advantage of known property +// indexes or uniqueness constraints. +// +// Eventually, this should be replaced by a tool that can introspect a graph schema and derive this map. +var knownNodePropertySelectivity = map[string]int{ + "objectid": selectivityWeightUniqueNodeProperty, // Object ID contains a unique constraint giving this a high degree of selectivity. + "name": selectivityWeightUniqueNodeProperty, // Name contains a unique constraint giving this a high degree of selectivity. + "system_tags": selectivityWeightNarrowSearch, // Searches that use the system_tags property are likely to have a higher degree of selectivity. +} + +type BindingLookup interface { + LookupDataType(identifier pgsql.Identifier) (pgsql.DataType, bool) +} + +type SelectivityModel struct { + bindings BindingLookup +} + +func NewSelectivityModel(bindings BindingLookup) SelectivityModel { + return SelectivityModel{ + bindings: bindings, + } +} + +type propertyLookup struct { + reference pgsql.CompoundIdentifier + field string +} + +type measureSelectivityVisitor struct { + walk.Visitor[pgsql.SyntaxNode] + + model SelectivityModel + selectivityStack []int +} + +func newMeasureSelectivityVisitor(model SelectivityModel) *measureSelectivityVisitor { + return &measureSelectivityVisitor{ + Visitor: walk.NewVisitor[pgsql.SyntaxNode](), + model: model, + selectivityStack: []int{0}, + } +} + +func (s *measureSelectivityVisitor) Selectivity() int { + return s.selectivityStack[0] +} + +func (s *measureSelectivityVisitor) popSelectivity() int { + value := s.selectivityStack[len(s.selectivityStack)-1] + s.selectivityStack = s.selectivityStack[:len(s.selectivityStack)-1] + + return value +} + +func (s *measureSelectivityVisitor) pushSelectivity(value int) { + s.selectivityStack = append(s.selectivityStack, value) +} + +func (s *measureSelectivityVisitor) addSelectivity(value int) { + if len(s.selectivityStack) == 0 { + s.pushSelectivity(value) + } else { + s.selectivityStack[len(s.selectivityStack)-1] += value + } +} + +func isColumnIDRef(expression pgsql.Expression) bool { + switch typedExpression := expression.(type) { + case pgsql.CompoundIdentifier: + if typedExpression.HasField() { + switch typedExpression.Field() { + case pgsql.ColumnID: + return true + } + } + } + + return false +} + +func binaryExpressionToPropertyLookup(expression *pgsql.BinaryExpression) (propertyLookup, error) { + if reference, typeOK := expression.LOperand.(pgsql.CompoundIdentifier); !typeOK { + return propertyLookup{}, fmt.Errorf("expected left operand for property lookup to be a compound identifier but found type: %T", expression.LOperand) + } else if field, typeOK := expression.ROperand.(pgsql.Literal); !typeOK { + return propertyLookup{}, fmt.Errorf("expected right operand for property lookup to be a literal but found type: %T", expression.ROperand) + } else if field.CastType != pgsql.Text { + return propertyLookup{}, fmt.Errorf("expected property lookup field a string literal but found data type: %s", field.CastType) + } else if stringField, typeOK := field.Value.(string); !typeOK { + return propertyLookup{}, fmt.Errorf("expected property lookup field a string literal but found data type: %T", field) + } else { + return propertyLookup{ + reference: reference, + field: stringField, + }, nil + } +} + +func (s *measureSelectivityVisitor) Enter(node pgsql.SyntaxNode) { + switch typedNode := node.(type) { + case *pgsql.UnaryExpression: + switch typedNode.Operator { + case pgsql.OperatorNot: + s.pushSelectivity(0) + } + + case *pgsql.BinaryExpression: + var ( + lOperandIsID = isColumnIDRef(typedNode.LOperand) + rOperandIsID = isColumnIDRef(typedNode.ROperand) + ) + + if lOperandIsID && !rOperandIsID { + // Point lookup: n0.id = ; highly selective. + s.addSelectivity(selectivityWeightEntityIDReference) + } else if rOperandIsID && !lOperandIsID { + // Canonically unusual, but handle it the same. + s.addSelectivity(selectivityWeightEntityIDReference) + } + + // If both sides are ID refs, this is a join condition; do not score as a point lookup. + switch typedNode.Operator { + case pgsql.OperatorOr: + s.addSelectivity(selectivityWeightDisjunction) + + case pgsql.OperatorNotEquals: + s.addSelectivity(selectivityWeightNotEquals) + + case pgsql.OperatorAnd: + s.addSelectivity(selectivityWeightConjunction) + + case pgsql.OperatorLessThan, pgsql.OperatorGreaterThan, pgsql.OperatorLessThanOrEqualTo, pgsql.OperatorGreaterThanOrEqualTo: + s.addSelectivity(selectivityWeightRangeComparison) + + case pgsql.OperatorLike, pgsql.OperatorILike, pgsql.OperatorRegexMatch, pgsql.OperatorSimilarTo: + s.addSelectivity(selectivityWeightStringSearch) + + case pgsql.OperatorIn, pgsql.OperatorEquals, pgsql.OperatorIs: + s.addSelectivity(selectivityWeightNarrowSearch) + + case pgsql.OperatorPGArrayOverlap, pgsql.OperatorArrayOverlap: + s.addSelectivity(selectivityWeightNarrowSearch) + + case pgsql.OperatorPGArrayLHSContainsRHS: + // @> is strictly more selective than &&: all kind_ids must be present. + s.addSelectivity(selectivityWeightNarrowSearch + selectivityWeightConjunction) + + case pgsql.OperatorJSONField, pgsql.OperatorJSONTextField, pgsql.OperatorPropertyLookup: + if propertyLookup, err := binaryExpressionToPropertyLookup(typedNode); err != nil { + s.SetError(err) + } else { + leftIdentifier := propertyLookup.reference.Root() + if s.model.bindings == nil { + return + } + + if dataType, bound := s.model.bindings.LookupDataType(leftIdentifier); !bound { + s.SetErrorf("unable to lookup identifier %s", leftIdentifier) + } else { + switch dataType { + case pgsql.ExpansionRootNode, pgsql.ExpansionTerminalNode, pgsql.NodeComposite: + if selectivity, hasKnownSelectivity := knownNodePropertySelectivity[propertyLookup.field]; hasKnownSelectivity { + s.addSelectivity(selectivity) + } + } + } + } + } + } +} + +func (s *measureSelectivityVisitor) Exit(node pgsql.SyntaxNode) { + switch typedNode := node.(type) { + case *pgsql.UnaryExpression: + switch typedNode.Operator { + case pgsql.OperatorNot: + selectivity := s.popSelectivity() + s.addSelectivity(-selectivity) + } + } +} + +func (s SelectivityModel) Measure(expression pgsql.Expression) (int, error) { + visitor := newMeasureSelectivityVisitor(s) + + if expression != nil { + if err := walk.PgSQL(expression, visitor); err != nil { + return 0, err + } + } + + return visitor.Selectivity(), nil +} + +func (s SelectivityModel) ShouldFlipTraversalDirection(leftBound, rightBound bool, leftExpression, rightExpression pgsql.Expression) (bool, error) { + if leftBound { + return false, nil + } + + if rightBound { + return true, nil + } + + leftSelectivity, err := s.Measure(leftExpression) + if err != nil { + return false, err + } + + rightSelectivity, err := s.Measure(rightExpression) + if err != nil { + return false, err + } + + return rightSelectivity-leftSelectivity >= selectivityFlipThreshold, nil +} + +func (s SelectivityModel) EndpointSelectivity(expression pgsql.Expression, bound, hasPreviousFrameBinding bool) (int, error) { + selectivity, err := s.Measure(expression) + if err != nil { + return 0, err + } + + if bound && hasPreviousFrameBinding { + selectivity += selectivityWeightBoundIdentifier + } + + return selectivity, nil +} + +func MeasureSelectivity(bindings BindingLookup, expression pgsql.Expression) (int, error) { + return NewSelectivityModel(bindings).Measure(expression) +} + +func IsBidirectionalSearchAnchor(selectivity int) bool { + return selectivity >= selectivityBidirectionalAnchorThreshold +} diff --git a/cypher/models/pgsql/optimize/source_references.go b/cypher/models/pgsql/optimize/source_references.go new file mode 100644 index 00000000..01dde537 --- /dev/null +++ b/cypher/models/pgsql/optimize/source_references.go @@ -0,0 +1,125 @@ +package optimize + +import ( + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/walk" +) + +type sourceReferenceCollector struct { + walk.VisitorHandler + + referencedIdentifiers map[string]struct{} + matchPatternDeclarationRefs map[string]int + matchPatternDeclarations map[*cypher.PatternPart]struct{} + matchPatternDeclarationDepth int +} + +func newSourceReferenceCollector() *sourceReferenceCollector { + return &sourceReferenceCollector{ + VisitorHandler: walk.NewCancelableErrorHandler(), + referencedIdentifiers: map[string]struct{}{}, + matchPatternDeclarationRefs: map[string]int{}, + matchPatternDeclarations: map[*cypher.PatternPart]struct{}{}, + } +} + +func (s *sourceReferenceCollector) addVariable(variable *cypher.Variable) { + if variable != nil && variable.Symbol != "" { + s.referencedIdentifiers[variable.Symbol] = struct{}{} + } +} + +func (s *sourceReferenceCollector) addMatchPatternDeclaration(variable *cypher.Variable) { + if variable != nil && variable.Symbol != "" { + s.matchPatternDeclarationRefs[variable.Symbol] += 1 + } +} + +func (s *sourceReferenceCollector) collectRepeatedMatchPatternDeclarations() { + for identifier, numDeclarations := range s.matchPatternDeclarationRefs { + if numDeclarations > 1 { + s.referencedIdentifiers[identifier] = struct{}{} + } + } +} + +func (s *sourceReferenceCollector) isMatchPatternDeclaration(patternPart *cypher.PatternPart) bool { + _, isDeclaration := s.matchPatternDeclarations[patternPart] + return isDeclaration +} + +func (s *sourceReferenceCollector) Enter(node cypher.SyntaxNode) { + switch typedNode := node.(type) { + case *cypher.Match: + for _, patternPart := range typedNode.Pattern { + s.matchPatternDeclarations[patternPart] = struct{}{} + } + + case *cypher.PatternPart: + if s.isMatchPatternDeclaration(typedNode) { + s.addMatchPatternDeclaration(typedNode.Variable) + s.matchPatternDeclarationDepth += 1 + } else { + s.addVariable(typedNode.Variable) + } + + case *cypher.NodePattern: + if s.matchPatternDeclarationDepth == 0 { + s.addVariable(typedNode.Variable) + } else { + s.addMatchPatternDeclaration(typedNode.Variable) + } + + case *cypher.RelationshipPattern: + if s.matchPatternDeclarationDepth == 0 { + s.addVariable(typedNode.Variable) + } else { + s.addMatchPatternDeclaration(typedNode.Variable) + } + + case *cypher.PropertyLookup: + if variable, isVariable := typedNode.Atom.(*cypher.Variable); isVariable { + s.addVariable(variable) + } + + case *cypher.Variable: + if s.matchPatternDeclarationDepth == 0 { + s.addVariable(typedNode) + } + } +} + +func (s *sourceReferenceCollector) Visit(cypher.SyntaxNode) {} + +func (s *sourceReferenceCollector) Exit(node cypher.SyntaxNode) { + if patternPart, isPatternPart := node.(*cypher.PatternPart); isPatternPart && s.isMatchPatternDeclaration(patternPart) { + s.matchPatternDeclarationDepth -= 1 + } +} + +func collectReferencedSourceIdentifiers(root cypher.SyntaxNode) (map[string]struct{}, error) { + if root == nil { + return map[string]struct{}{}, nil + } + + collector := newSourceReferenceCollector() + if err := walk.Cypher(root, collector); err != nil { + return collector.referencedIdentifiers, err + } + + collector.collectRepeatedMatchPatternDeclarations() + return collector.referencedIdentifiers, nil +} + +func referencesSourceIdentifier(references map[string]struct{}, symbol string) bool { + if _, referencesAll := references[cypher.TokenLiteralAsterisk]; referencesAll { + return true + } + + if symbol == "" { + return false + } + + _, referenced := references[symbol] + return referenced +} diff --git a/cypher/models/pgsql/pgtypes.go b/cypher/models/pgsql/pgtypes.go index 2ff9b7db..8e68dd6a 100644 --- a/cypher/models/pgsql/pgtypes.go +++ b/cypher/models/pgsql/pgtypes.go @@ -106,6 +106,7 @@ const ( ExpansionRootNode DataType = "expansion_root_node" ExpansionEdge DataType = "expansion_edge" ExpansionTerminalNode DataType = "expansion_terminal_node" + PathEdge DataType = "path_edge" ) func (s DataType) IsKnown() bool { diff --git a/cypher/models/pgsql/test/translation_cases/create.sql b/cypher/models/pgsql/test/translation_cases/create.sql index b3fc65ce..55c74074 100644 --- a/cypher/models/pgsql/test/translation_cases/create.sql +++ b/cypher/models/pgsql/test/translation_cases/create.sql @@ -69,7 +69,8 @@ with s0 as (select nextval(pg_get_serial_sequence('node', 'id'))::int8 as n0_id) with s0 as (select nextval(pg_get_serial_sequence('node', 'id'))::int8 as n0_id), s1 as (insert into node (graph_id, id, kind_ids, properties) select 0, s0.n0_id, array [1]::int2[], jsonb_build_object('name', 'abc')::jsonb from s0 returning id as n0_id, (id, kind_ids, properties)::nodecomposite as n0), s2 as (select s1.n0 as n0 from s0, s1 where s1.n0_id = s0.n0_id), s3 as (select s2.n0 as n0, nextval(pg_get_serial_sequence('node', 'id'))::int8 as n1_id from s2), s4 as (insert into node (graph_id, id, kind_ids, properties) select 0, s3.n1_id, array [2]::int2[], jsonb_build_object('name', 'test')::jsonb from s3 returning id as n1_id, (id, kind_ids, properties)::nodecomposite as n1), s5 as (select s3.n0 as n0, s4.n1 as n1 from s3, s4 where s4.n1_id = s3.n1_id), s6 as (select s5.n0 as n0, s5.n1 as n1, nextval(pg_get_serial_sequence('node', 'id'))::int8 as n2_id from s5), s7 as (insert into node (graph_id, id, kind_ids, properties) select 0, s6.n2_id, array [1]::int2[], jsonb_build_object('name', 'other')::jsonb from s6 returning id as n2_id, (id, kind_ids, properties)::nodecomposite as n2), s8 as (select s6.n0 as n0, s6.n1 as n1, s7.n2 as n2 from s6, s7 where s7.n2_id = s6.n2_id), s9 as (select s8.n0 as n0, s8.n1 as n1, s8.n2 as n2, nextval(pg_get_serial_sequence('edge', 'id'))::int8 as e0_id from s8), s10 as (insert into edge (graph_id, id, start_id, end_id, kind_id, properties) select 0, s9.e0_id, (s9.n0).id, (s9.n1).id, 3, jsonb_build_object('prop', 123)::jsonb from s9 returning id as e0_id, (id, start_id, end_id, kind_id, properties)::edgecomposite as e0), s11 as (select s9.n0 as n0, s9.n1 as n1, s9.n2 as n2, s10.e0 as e0 from s9, s10 where s10.e0_id = s9.e0_id), s12 as (select s11.e0 as e0, s11.n0 as n0, s11.n1 as n1, s11.n2 as n2, nextval(pg_get_serial_sequence('edge', 'id'))::int8 as e1_id from s11), s13 as (insert into edge (graph_id, id, start_id, end_id, kind_id, properties) select 0, s12.e1_id, (s12.n2).id, (s12.n1).id, 4, jsonb_build_object()::jsonb from s12 returning id as e1_id, (id, start_id, end_id, kind_id, properties)::edgecomposite as e1), s14 as (select s12.e0 as e0, s12.n0 as n0, s12.n1 as n1, s12.n2 as n2, s13.e1 as e1 from s12, s13 where s13.e1_id = s12.e1_id) select s14.n1 as c from s14; -- case: create p = (:NodeKind1 {name: 'abc'})-[:EdgeKind1 {prop: 123}]->(:NodeKind2 {name: 'test'}) return p -with s0 as (select nextval(pg_get_serial_sequence('node', 'id'))::int8 as n0_id), s1 as (insert into node (graph_id, id, kind_ids, properties) select 0, s0.n0_id, array [1]::int2[], jsonb_build_object('name', 'abc')::jsonb from s0 returning id as n0_id, (id, kind_ids, properties)::nodecomposite as n0), s2 as (select s1.n0 as n0 from s0, s1 where s1.n0_id = s0.n0_id), s3 as (select s2.n0 as n0, nextval(pg_get_serial_sequence('node', 'id'))::int8 as n1_id from s2), s4 as (insert into node (graph_id, id, kind_ids, properties) select 0, s3.n1_id, array [2]::int2[], jsonb_build_object('name', 'test')::jsonb from s3 returning id as n1_id, (id, kind_ids, properties)::nodecomposite as n1), s5 as (select s3.n0 as n0, s4.n1 as n1 from s3, s4 where s4.n1_id = s3.n1_id), s6 as (select s5.n0 as n0, s5.n1 as n1, nextval(pg_get_serial_sequence('edge', 'id'))::int8 as e0_id from s5), s7 as (insert into edge (graph_id, id, start_id, end_id, kind_id, properties) select 0, s6.e0_id, (s6.n0).id, (s6.n1).id, 3, jsonb_build_object('prop', 123)::jsonb from s6 returning id as e0_id, (id, start_id, end_id, kind_id, properties)::edgecomposite as e0), s8 as (select s6.n0 as n0, s6.n1 as n1, s7.e0 as e0 from s6, s7 where s7.e0_id = s6.e0_id) select (array [s8.n0, s8.n1]::nodecomposite[], array [s8.e0]::edgecomposite[])::pathcomposite as p from s8; +with s0 as (select nextval(pg_get_serial_sequence('node', 'id'))::int8 as n0_id), s1 as (insert into node (graph_id, id, kind_ids, properties) select 0, s0.n0_id, array [1]::int2[], jsonb_build_object('name', 'abc')::jsonb from s0 returning id as n0_id, (id, kind_ids, properties)::nodecomposite as n0), s2 as (select s1.n0 as n0 from s0, s1 where s1.n0_id = s0.n0_id), s3 as (select s2.n0 as n0, nextval(pg_get_serial_sequence('node', 'id'))::int8 as n1_id from s2), s4 as (insert into node (graph_id, id, kind_ids, properties) select 0, s3.n1_id, array [2]::int2[], jsonb_build_object('name', 'test')::jsonb from s3 returning id as n1_id, (id, kind_ids, properties)::nodecomposite as n1), s5 as (select s3.n0 as n0, s4.n1 as n1 from s3, s4 where s4.n1_id = s3.n1_id), s6 as (select s5.n0 as n0, s5.n1 as n1, nextval(pg_get_serial_sequence('edge', 'id'))::int8 as e0_id from s5), s7 as (insert into edge (graph_id, id, start_id, end_id, kind_id, properties) select 0, s6.e0_id, (s6.n0).id, (s6.n1).id, 3, jsonb_build_object('prop', 123)::jsonb from s6 returning id as e0_id, (id, start_id, end_id, kind_id, properties)::edgecomposite as e0), s8 as (select s6.n0 as n0, s6.n1 as n1, s7.e0 as e0 from s6, s7 where s7.e0_id = s6.e0_id) select case when (s8.n0).id is null or (s8.e0).id is null or (s8.n1).id is null then null else (array [s8.n0, s8.n1]::nodecomposite[], array [s8.e0]::edgecomposite[])::pathcomposite end as p from s8; -- case: match (a:NodeKind1) with a create (b:NodeKind2 {source: a.name}) return a, b with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n0 from s1), s2 as (select s0.n0 as n0, nextval(pg_get_serial_sequence('node', 'id'))::int8 as n1_id from s0), s3 as (insert into node (graph_id, id, kind_ids, properties) select 0, s2.n1_id, array [2]::int2[], jsonb_build_object('source', ((s2.n0).properties ->> 'name'))::jsonb from s2 returning id as n1_id, (id, kind_ids, properties)::nodecomposite as n1), s4 as (select s2.n0 as n0, s3.n1 as n1 from s2, s3 where s3.n1_id = s2.n1_id) select s4.n0 as a, s4.n1 as b from s4; + diff --git a/cypher/models/pgsql/test/translation_cases/delete.sql b/cypher/models/pgsql/test/translation_cases/delete.sql index 540765f8..c6695b5d 100644 --- a/cypher/models/pgsql/test/translation_cases/delete.sql +++ b/cypher/models/pgsql/test/translation_cases/delete.sql @@ -21,5 +21,5 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s1 as (delete from edge e1 using s0 where (s0.e0).id = e1.id) select 1; -- case: match ()-[]->()-[r:EdgeKind1]->() delete r -with s0 as (select (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3]::int2[])), s2 as (delete from edge e2 using s1 where (s1.e1).id = e2.id) select 1; +with s0 as (select e0.id as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3]::int2[]) and e1.id != s0.e0), s2 as (delete from edge e2 using s1 where (s1.e1).id = e2.id) select 1; diff --git a/cypher/models/pgsql/test/translation_cases/multipart.sql b/cypher/models/pgsql/test/translation_cases/multipart.sql index a30ecd12..48741b90 100644 --- a/cypher/models/pgsql/test/translation_cases/multipart.sql +++ b/cypher/models/pgsql/test/translation_cases/multipart.sql @@ -21,40 +21,40 @@ with s0 as (select '1' as i0), s1 as (select s0.i0 as i0, (n0.id, n0.kind_ids, n with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'value'))::jsonb = to_jsonb((1)::int8)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n0 from s1), s2 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (n1.id = (s0.n0).id)) select s2.n1 as b from s2; -- case: match (n:NodeKind1) where n.value = 1 with n match (f) where f.name = 'me' with f match (b) where id(b) = id(f) return b -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'value'))::jsonb = to_jsonb((1)::int8)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n0 from s1), s2 as (with s3 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (((n1.properties -> 'name'))::jsonb = to_jsonb(('me')::text)::jsonb)) select s3.n1 as n1 from s3), s4 as (select s2.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s2, node n2 where (n2.id = (s2.n1).id)) select s4.n2 as b from s4; +with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'value'))::jsonb = to_jsonb((1)::int8)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n0 from s1), s2 as (with s3 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'me'))) select s3.n1 as n1 from s3), s4 as (select s2.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s2, node n2 where (n2.id = (s2.n1).id)) select s4.n2 as b from s4; -- case: match (n:NodeKind1)-[:EdgeKind1*1..]->(:NodeKind2)-[:EdgeKind2]->(m:NodeKind1) where (n:NodeKind1 or n:NodeKind2) and n.enabled = true with m, collect(distinct(n)) as p where size(p) >= 10 return m -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied), s3 as (select s1.e0 as e0, s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])) select s3.n2 as n2, array_remove(coalesce(array_agg(distinct (s3.n0))::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i0 from s3 group by n2) select s0.n2 as m from s0 where (cardinality(s0.i0)::int >= 10); +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and exists (select 1 from edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where n1.id = e1.start_id and e1.kind_id = any (array [4]::int2[]))), s3 as (select s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != all (s1.ep0)) select s3.n2 as n2, array_remove(coalesce(array_agg(distinct (s3.n0))::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i0 from s3 group by n2) select s0.n2 as m from s0 where (cardinality(s0.i0)::int >= 10); -- case: match (n:NodeKind1)-[:EdgeKind1*1..]->(:NodeKind2)-[:EdgeKind2]->(m:NodeKind1) where (n:NodeKind1 or n:NodeKind2) and n.enabled = true with m, count(distinct(n)) as p where p >= 10 return m -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied), s3 as (select s1.e0 as e0, s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])) select s3.n2 as n2, count(distinct (s3.n0))::int8 as i0 from s3 group by n2) select s0.n2 as m from s0 where (s0.i0 >= 10); +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and exists (select 1 from edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where n1.id = e1.start_id and e1.kind_id = any (array [4]::int2[]))), s3 as (select s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != all (s1.ep0)) select s3.n2 as n2, count(distinct (s3.n0))::int8 as i0 from s3 group by n2) select s0.n2 as m from s0 where (s0.i0 >= 10); -- case: match (n:NodeKind1)-[:EdgeKind1*1..]->(:NodeKind2)-[:EdgeKind2]->(m:NodeKind1) where (n:NodeKind1 or n:NodeKind2) and n.enabled = true with m, count(distinct(n)) as p where p >= 10 return m -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied), s3 as (select s1.e0 as e0, s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])) select s3.n2 as n2, count(distinct (s3.n0))::int8 as i0 from s3 group by n2) select s0.n2 as m from s0 where (s0.i0 >= 10); +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and exists (select 1 from edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where n1.id = e1.start_id and e1.kind_id = any (array [4]::int2[]))), s3 as (select s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != all (s1.ep0)) select s3.n2 as n2, count(distinct (s3.n0))::int8 as i0 from s3 group by n2) select s0.n2 as m from s0 where (s0.i0 >= 10); -- case: with 365 as max_days match (n:NodeKind1) where n.pwdlastset < (datetime().epochseconds - (max_days * 86400)) and not n.pwdlastset IN [-1.0, 0.0] return n limit 100 with s0 as (select 365 as i0), s1 as (select s0.i0 as i0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from s0, node n0 where (not ((n0.properties ->> 'pwdlastset'))::float8 = any (array [- 1, 0]::float8[]) and ((n0.properties ->> 'pwdlastset'))::numeric < (extract(epoch from now()::timestamp with time zone)::numeric - (s0.i0 * 86400))) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n from s1 limit 100; -- case: match (n:NodeKind1) where n.hasspn = true and n.enabled = true and not n.objectid ends with '-502' and not coalesce(n.gmsa, false) = true and not coalesce(n.msa, false) = true match (n)-[:EdgeKind1|EdgeKind2*1..]->(c:NodeKind2) with distinct n, count(c) as adminCount return n order by adminCount desc limit 100 -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'hasspn'))::jsonb = to_jsonb((true)::bool)::jsonb and ((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb and not coalesce((n0.properties ->> 'objectid'), '')::text like '%-502' and not coalesce(((n0.properties ->> 'gmsa'))::bool, false)::bool = true and not coalesce(((n0.properties ->> 'msa'))::bool, false)::bool = true) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s1.n0).id as root_id from s1), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.start_id = s3_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s3.root_id, e0.end_id, s3.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s3.path || e0.id from s3 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s3.next_id and e0.id != all (s3.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s3.depth < 15 and not s3.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s3.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s3.next_id offset 0) n1 on true where s3.satisfied and (s1.n0).id = s3.root_id) select distinct s2.n0 as n0, count(s2.n1)::int8 as i0 from s2 group by n0) select s0.n0 as n from s0 order by s0.i0 desc limit 100; +with recursive candidate_sources(root_id) as (select source_node.id as root_id from node source_node where (((source_node.properties -> 'hasspn'))::jsonb = to_jsonb((true)::bool)::jsonb and ((source_node.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb and not coalesce((source_node.properties ->> 'objectid'), '')::text like '%-502' and not coalesce(((source_node.properties ->> 'gmsa'))::bool, false)::bool = true and not coalesce(((source_node.properties ->> 'msa'))::bool, false)::bool = true) and source_node.kind_ids operator (pg_catalog.@>) array [1]::int2[]), traversal(root_id, next_id, depth, path) as (select candidate_sources.root_id, e.end_id, 1, array [e.id]::int8[] from candidate_sources join edge e on e.start_id = candidate_sources.root_id where e.kind_id = any (array [3, 4]::int2[]) union all select traversal.root_id, e.end_id, traversal.depth + 1, traversal.path || e.id from traversal join lateral (select e.id, e.start_id, e.end_id from edge e where e.start_id = traversal.next_id and e.id != all (traversal.path) and e.kind_id = any (array [3, 4]::int2[]) offset 0) e on true where traversal.depth < 15), terminal_nodes(id) as materialized (select terminal_node.id from node terminal_node where terminal_node.kind_ids operator (pg_catalog.@>) array [2]::int2[]), terminal_hits(root_id) as (select traversal.root_id from traversal join terminal_nodes on terminal_nodes.id = traversal.next_id), ranked(root_id, adminCount) as (select terminal_hits.root_id, count(*)::int8 as adminCount from terminal_hits group by terminal_hits.root_id order by adminCount desc limit 100) select (source_node.id, source_node.kind_ids, source_node.properties)::nodecomposite as n from ranked join node source_node on source_node.id = ranked.root_id order by ranked.adminCount desc; -- case: match (n:NodeKind1) where n.objectid = 'S-1-5-21-1260426776-3623580948-1897206385-23225' match p = (n)-[:EdgeKind1|EdgeKind2*1..]->(c:NodeKind2) return p -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'objectid'))::jsonb = to_jsonb(('S-1-5-21-1260426776-3623580948-1897206385-23225')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n0).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and (s0.n0).id = s2.root_id) select ordered_edges_to_path(s1.n0, s1.e0, array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite as p from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'objectid')) = 'string' and (n0.properties ->> 'objectid') = 'S-1-5-21-1260426776-3623580948-1897206385-23225')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n0).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and (s0.n0).id = s2.root_id) select case when (s1.n0).id is null or s1.ep0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as p from s1; -- case: match (g1:NodeKind1) where g1.name starts with 'test' with collect (g1.domain) as excludes match (d:NodeKind2) where d.name starts with 'other' and not d.name in excludes return d with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'name') like 'test%') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select array_remove(coalesce(array_agg(((s1.n0).properties ->> 'domain'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s1), s2 as (select s0.i0 as i0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (not (n1.properties ->> 'name') = any (s0.i0) and (n1.properties ->> 'name') like 'other%') and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]) select s2.n1 as d from s2; -- case: with 'a' as uname match (o:NodeKind1) where o.name starts with uname and o.domain = ' ' return o -with s0 as (select 'a' as i0), s1 as (select s0.i0 as i0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from s0, node n0 where (((n0.properties -> 'domain'))::jsonb = to_jsonb((' ')::text)::jsonb and cypher_starts_with((n0.properties ->> 'name'), (i0)::text)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as o from s1; +with s0 as (select 'a' as i0), s1 as (select s0.i0 as i0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from s0, node n0 where ((jsonb_typeof((n0.properties -> 'domain')) = 'string' and (n0.properties ->> 'domain') = ' ') and cypher_starts_with((n0.properties ->> 'name'), (i0)::text)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as o from s1; -- case: match (dc)-[r:EdgeKind1*0..]->(g:NodeKind1) where g.objectid ends with '-516' with collect(dc) as exclude match p = (c:NodeKind2)-[n:EdgeKind2]->(u:NodeKind2)-[:EdgeKind2*1..]->(g:NodeKind1) where g.objectid ends with '-512' and not c in exclude return p limit 100 -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((n1.properties ->> 'objectid') like '%-516') and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select s2_seed.root_id, s2_seed.root_id, 0, false, false, array []::int8[] from s2_seed union all select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from s2_seed join edge e0 on e0.end_id = s2_seed.root_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.start_id, s2.depth + 1, false, false, e0.id || s2.path from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true where s2.depth < 15 and not s2.is_cycle and s2.depth > 0) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.next_id offset 0) n0 on true) select array_remove(coalesce(array_agg(s1.n0)::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i0 from s1), s3 as (select (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.i0 as i0, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s0, edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e1.start_id join node n3 on n3.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n3.id = e1.end_id where e1.kind_id = any (array [4]::int2[])), s4 as (with recursive s5_seed(root_id) as not materialized (select distinct (s3.n3).id as root_id from s3), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.start_id, e2.end_id, 1, ((n4.properties ->> 'objectid') like '%-512') and n4.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.start_id = e2.end_id, array [e2.id] from s5_seed join edge e2 on e2.start_id = s5_seed.root_id join node n4 on n4.id = e2.end_id where e2.kind_id = any (array [4]::int2[]) union all select s5.root_id, e2.end_id, s5.depth + 1, ((n4.properties ->> 'objectid') like '%-512') and n4.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s5.path || e2.id from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.start_id = s5.next_id and e2.id != all (s5.path) and e2.kind_id = any (array [4]::int2[]) offset 0) e2 on true join node n4 on n4.id = e2.end_id where s5.depth < 15 and not s5.is_cycle) select s3.e1 as e1, (select coalesce(array_agg((e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s5.path) with ordinality as _path(id, ordinality) join edge e2 on e2.id = _path.id) as e2, s5.path as ep1, s3.i0 as i0, s3.n2 as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s3, s5 join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.root_id offset 0) n3 on true join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.next_id offset 0) n4 on true where s5.satisfied and (s3.n3).id = s5.root_id) select ordered_edges_to_path(s4.n2, array [s4.e1]::edgecomposite[] || s4.e2, array [s4.n2, s4.n3, s4.n4]::nodecomposite[])::pathcomposite as p from s4 where (not (s4.n2).id = any ((select (_unnest_elem).id from unnest(s4.i0) as _unnest_elem))) limit 100; +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((n1.properties ->> 'objectid') like '%-516') and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select s2_seed.root_id, s2_seed.root_id, 0, false, false, array []::int8[] from s2_seed union all select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from s2_seed join edge e0 on e0.end_id = s2_seed.root_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.start_id, s2.depth + 1, false, false, e0.id || s2.path from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true where s2.depth < 15 and not s2.is_cycle and s2.depth > 0) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.next_id offset 0) n0 on true) select array_remove(coalesce(array_agg((n0).id)::int8[], array []::int8[])::int8[], null)::int8[] as i0 from s1), s3 as (select e1.id as e1, s0.i0 as i0, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s0, edge e1 join node n3 on n3.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n3.id = e1.end_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e1.start_id where (not n2.id = any (s0.i0)) and e1.kind_id = any (array [4]::int2[])), s4 as (with recursive s5_seed(root_id) as not materialized (select distinct (s3.n3).id as root_id from s3), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.start_id, e2.end_id, 1, ((n4.properties ->> 'objectid') like '%-512') and n4.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.start_id = e2.end_id, array [e2.id] from s5_seed join edge e2 on e2.start_id = s5_seed.root_id join node n4 on n4.id = e2.end_id where e2.kind_id = any (array [4]::int2[]) union all select s5.root_id, e2.end_id, s5.depth + 1, ((n4.properties ->> 'objectid') like '%-512') and n4.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s5.path || e2.id from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.start_id = s5.next_id and e2.id != all (s5.path) and e2.kind_id = any (array [4]::int2[]) offset 0) e2 on true join node n4 on n4.id = e2.end_id where s5.depth < 15 and not s5.is_cycle) select s3.e1 as e1, s5.path as ep1, s3.i0 as i0, s3.n2 as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s3, s5 join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.root_id offset 0) n3 on true join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.next_id offset 0) n4 on true where s5.satisfied and (s3.n3).id = s5.root_id limit 100) select case when (s4.n2).id is null or s4.e1 is null or (s4.n3).id is null or s4.ep1 is null or (s4.n4).id is null then null else ordered_edges_to_path(s4.n2, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s4.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s4.ep1) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s4.n2, s4.n3, s4.n4]::nodecomposite[])::pathcomposite end as p from s4 limit 100; -- case: match (n:NodeKind1)<-[:EdgeKind1]-(:NodeKind2) where n.objectid ends with '-516' with n, count(n) as dc_count where dc_count = 1 return n with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on ((n0.properties ->> 'objectid') like '%-516') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[])) select s1.n0 as n0, count(s1.n0)::int8 as i0 from s1 group by n0) select s0.n0 as n from s0 where (s0.i0 = 1); -- case: match (n:NodeKind1)-[:EdgeKind1]->(m:NodeKind2) where n.enabled = true with n, collect(distinct(n)) as p where size(p) >= 100 match p = (n)-[:EdgeKind1]->(m) return p limit 10 -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on (((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select s1.n0 as n0, array_remove(coalesce(array_agg(distinct (s1.n0))::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i0 from s1 group by n0), s2 as (select (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.i0 as i0, s0.n0 as n0, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (cardinality(s0.i0)::int >= 100) and (s0.n0).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3]::int2[]) limit 10) select (array [s2.n0, s2.n2]::nodecomposite[], array [s2.e1]::edgecomposite[])::pathcomposite as p from s2 limit 10; +with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on (((n0.properties -> 'enabled'))::jsonb = to_jsonb((true)::bool)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select s1.n0 as n0, array_remove(coalesce(array_agg(distinct (s1.n0))::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i0 from s1 group by n0), s2 as (select e1.id as e1, s0.i0 as i0, s0.n0 as n0, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (cardinality(s0.i0)::int >= 100) and (s0.n0).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3]::int2[]) limit 10) select case when (s2.n0).id is null or s2.e1 is null or (s2.n2).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s2.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n2]::nodecomposite[])::pathcomposite end as p from s2 limit 10; -- case: with "a" as check, "b" as ref match p = (u)-[:EdgeKind1]->(g:NodeKind1) where u.name starts with check and u.domain = ref with collect(tolower(g.samaccountname)) as refmembership, tolower(u.samaccountname) as samname return refmembership, samname with s0 as (select 'a' as i0, 'b' as i1), s1 as (with s2 as (select s0.i0 as i0, s0.i1 as i1, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) and ((n0.properties ->> 'domain') = s0.i1 and cypher_starts_with((n0.properties ->> 'name'), (i0)::text)::bool)) select array_remove(coalesce(array_agg(lower(((s2.n1).properties ->> 'samaccountname'))::text)::text[], array []::text[])::text[], null)::text[] as i2, lower(((s2.n0).properties ->> 'samaccountname'))::text as i3 from s2 group by lower(((s2.n0).properties ->> 'samaccountname'))::text) select s1.i2 as refmembership, s1.i3 as samname from s1; @@ -66,29 +66,29 @@ with s0 as (select 'a' as i0, 'b' as i1), s1 as (with s2 as (select s0.i0 as i0, with s0 as (select 'a' as i0, 'b' as i1), s1 as (with s2 as (select s0.i0 as i0, s0.i1 as i1, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) and ((n0.properties ->> 'domain') = s0.i1 and cypher_starts_with((n0.properties ->> 'name'), (i0)::text)::bool)) select array_remove(coalesce(array_agg(lower(((s2.n1).properties ->> 'samaccountname'))::text)::text[], array []::text[])::text[], null)::text[] as i2, lower(((s2.n0).properties ->> 'samaccountname'))::text as i3 from s2 group by lower(((s2.n0).properties ->> 'samaccountname'))::text), s3 as (select s1.i2 as i2, s1.i3 as i3, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s1, edge e1 join node n2 on n2.id = e1.start_id join node n3 on n3.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n3.id = e1.end_id where (not lower((n3.properties ->> 'samaccountname'))::text = any (s1.i2)) and e1.kind_id = any (array [4]::int2[]) and (lower((n2.properties ->> 'samaccountname'))::text = s1.i3)) select s3.n3 as g from s3; -- case: match p =(n:NodeKind1)<-[r:EdgeKind1|EdgeKind2*..3]-(u:NodeKind1) where n.domain = 'test' with n, count(r) as incomingCount where incomingCount > 90 with collect(n) as lotsOfAdmins match p =(n:NodeKind1)<-[:EdgeKind1]-() where n in lotsOfAdmins return p -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'domain'))::jsonb = to_jsonb(('test')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.end_id = e0.start_id, array [e0.id] from s2_seed join edge e0 on e0.end_id = s2_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s2.root_id, e0.start_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s2.depth < 3 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied) select s1.n0 as n0, count(s1.e0)::int8 as i0 from s1 group by n0), s3 as (select array_remove(coalesce(array_agg(s0.n0)::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i1 from s0 where (s0.i0 > 90)), s4 as (select (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s3.i1 as i1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s3, edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id join node n3 on n3.id = e1.start_id where e1.kind_id = any (array [3]::int2[])) select (array [s4.n2, s4.n3]::nodecomposite[], array [s4.e1]::edgecomposite[])::pathcomposite as p from s4 where ((s4.n2).id = any ((select (_unnest_elem).id from unnest(s4.i1) as _unnest_elem))); +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'domain')) = 'string' and (n0.properties ->> 'domain') = 'test')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.end_id = e0.start_id, array [e0.id] from s2_seed join edge e0 on e0.end_id = s2_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s2.root_id, e0.start_id, s2.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s2.depth < 3 and not s2.is_cycle) select (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied) select s1.n0 as n0, count(s1.e0)::int8 as i0 from s1 group by n0), s3 as (select array_remove(coalesce(array_agg((n0).id)::int8[], array []::int8[])::int8[], null)::int8[] as i1 from s0 where (s0.i0 > 90)), s4 as (select e1.id as e1, s3.i1 as i1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s3, edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id join node n3 on n3.id = e1.start_id where e1.kind_id = any (array [3]::int2[]) and (n2.id = any (s3.i1))) select case when (s4.n2).id is null or s4.e1 is null or (s4.n3).id is null then null else ordered_edges_to_path(s4.n2, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s4.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s4.n2, s4.n3]::nodecomposite[])::pathcomposite end as p from s4; -- case: match (u:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) with g match (g)<-[:EdgeKind1]-(u:NodeKind1) return g with s0 as (with s1 as (select (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select s1.n1 as n1 from s1), s2 as (select s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.start_id where e1.kind_id = any (array [3]::int2[])) select s2.n1 as g from s2; -- case: match (cg:NodeKind1) where cg.name =~ ".*TT" and cg.domain = "MY DOMAIN" with collect (cg.email) as emails match (o:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) where g.name starts with "blah" and not g.email in emails return o -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'name') ~ '.*TT' and ((n0.properties -> 'domain'))::jsonb = to_jsonb(('MY DOMAIN')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select array_remove(coalesce(array_agg(((s1.n0).properties ->> 'email'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s1), s2 as (select s0.i0 as i0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, edge e0 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) and (not (n2.properties ->> 'email') = any (s0.i0) and (n2.properties ->> 'name') like 'blah%')) select s2.n1 as o from s2; +with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'name') ~ '.*TT' and (jsonb_typeof((n0.properties -> 'domain')) = 'string' and (n0.properties ->> 'domain') = 'MY DOMAIN')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select array_remove(coalesce(array_agg(((s1.n0).properties ->> 'email'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s1), s2 as (select s0.i0 as i0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, edge e0 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) and (not (n2.properties ->> 'email') = any (s0.i0) and (n2.properties ->> 'name') like 'blah%')) select s2.n1 as o from s2; -- case: match (e) match p = ()-[]->(e) return p limit 1 -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0 join edge e0 on (s0.n0).id = e0.end_id join node n1 on n1.id = e0.start_id) select (array [s1.n1, s1.n0]::nodecomposite[], array [s1.e0]::edgecomposite[])::pathcomposite as p from s1 limit 1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select e0.id as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0 join edge e0 on (s0.n0).id = e0.end_id join node n1 on n1.id = e0.start_id) select case when (s1.n1).id is null or s1.e0 is null or (s1.n0).id is null then null else ordered_edges_to_path(s1.n1, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n1, s1.n0]::nodecomposite[])::pathcomposite end as p from s1 limit 1; -- case: match p = (a)-[]->() match q = ()-[]->(a) return p, q -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n0).id = e1.end_id join node n2 on n2.id = e1.start_id) select (array [s1.n0, s1.n1]::nodecomposite[], array [s1.e0]::edgecomposite[])::pathcomposite as p, (array [s1.n2, s1.n0]::nodecomposite[], array [s1.e1]::edgecomposite[])::pathcomposite as q from s1; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, e1.id as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n0).id = e1.end_id join node n2 on n2.id = e1.start_id) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as p, case when (s1.n2).id is null or s1.e1 is null or (s1.n0).id is null then null else ordered_edges_to_path(s1.n2, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n2, s1.n0]::nodecomposite[])::pathcomposite end as q from s1; -- case: match (m:NodeKind1)-[*1..]->(g:NodeKind2)-[]->(c3:NodeKind1) where not g.name in ["foo"] with collect(g.name) as bar match p=(m:NodeKind1)-[*1..]->(g:NodeKind2) where g.name in bar return p -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id union all select s2.root_id, e0.end_id, s2.depth + 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied), s3 as (select s1.e0 as e0, s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id) select array_remove(coalesce(array_agg(((s3.n1).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s3), s4 as (with recursive s5_seed(root_id) as not materialized (select n4.id as root_id from s0, node n4 where n4.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n4.properties ->> 'name') = any (s0.i0))), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.end_id, e2.start_id, 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.end_id = e2.start_id, array [e2.id] from s5_seed join edge e2 on e2.end_id = s5_seed.root_id join node n3 on n3.id = e2.start_id union select s5.root_id, e2.start_id, s5.depth + 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e2.id || s5.path from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.end_id = s5.next_id and e2.id != all (s5.path) offset 0) e2 on true join node n3 on n3.id = e2.start_id where s5.depth < 15 and not s5.is_cycle) select (select coalesce(array_agg((e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s5.path) with ordinality as _path(id, ordinality) join edge e2 on e2.id = _path.id) as e2, s5.path as ep1, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, s5 join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.root_id offset 0) n4 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.next_id offset 0) n3 on true where s5.satisfied) select ordered_edges_to_path(s4.n3, s4.e2, array [s4.n3, s4.n4]::nodecomposite[])::pathcomposite as p from s4; +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id union all select s2.root_id, e0.end_id, s2.depth + 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and exists (select 1 from edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where n1.id = e1.start_id)), s3 as (select s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.id != all (s1.ep0)) select array_remove(coalesce(array_agg(((s3.n1).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s3), s4 as (with recursive s5_seed(root_id) as not materialized (select n4.id as root_id from s0, node n4 where n4.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n4.properties ->> 'name') = any (s0.i0))), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.end_id, e2.start_id, 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.end_id = e2.start_id, array [e2.id] from s5_seed join edge e2 on e2.end_id = s5_seed.root_id join node n3 on n3.id = e2.start_id union select s5.root_id, e2.start_id, s5.depth + 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e2.id || s5.path from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.end_id = s5.next_id and e2.id != all (s5.path) offset 0) e2 on true join node n3 on n3.id = e2.start_id where s5.depth < 15 and not s5.is_cycle) select s5.path as ep1, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, s5 join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.root_id offset 0) n4 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.next_id offset 0) n3 on true where s5.satisfied) select case when (s4.n3).id is null or s4.ep1 is null or (s4.n4).id is null then null else ordered_edges_to_path(s4.n3, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s4.ep1) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s4.n3, s4.n4]::nodecomposite[])::pathcomposite end as p from s4; -- case: match (m:NodeKind1)-[:EdgeKind1*1..]->(g:NodeKind2)-[:EdgeKind2]->(c3:NodeKind1) where m.samaccountname =~ '^[A-Z]{1,3}[0-9]{1,3}$' and not m.samaccountname contains "DEX" and not g.name IN ["D"] and not m.samaccountname =~ "^.*$" with collect(g.name) as admingroups match p=(m:NodeKind1)-[:EdgeKind1*1..]->(g:NodeKind2) where m.samaccountname =~ '^[A-Z]{1,3}[0-9]{1,3}$' and g.name in admingroups and not m.samaccountname =~ "^.*$" return p -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties ->> 'samaccountname') ~ '^[A-Z]{1,3}[0-9]{1,3}$' and not coalesce((n0.properties ->> 'samaccountname'), '')::text like '%DEX%' and not (n0.properties ->> 'samaccountname') ~ '^.*$') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (not (n1.properties ->> 'name') = any (array ['D']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, (not (n1.properties ->> 'name') = any (array ['D']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied), s3 as (select s1.e0 as e0, s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])) select array_remove(coalesce(array_agg(((s3.n1).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s3), s4 as (with recursive s5_seed(root_id) as not materialized (select n4.id as root_id from s0, node n4 where n4.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n4.properties ->> 'name') = any (s0.i0))), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.end_id, e2.start_id, 1, ((n3.properties ->> 'samaccountname') ~ '^[A-Z]{1,3}[0-9]{1,3}$' and not (n3.properties ->> 'samaccountname') ~ '^.*$') and n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.end_id = e2.start_id, array [e2.id] from s5_seed join edge e2 on e2.end_id = s5_seed.root_id join node n3 on n3.id = e2.start_id where e2.kind_id = any (array [3]::int2[]) union select s5.root_id, e2.start_id, s5.depth + 1, ((n3.properties ->> 'samaccountname') ~ '^[A-Z]{1,3}[0-9]{1,3}$' and not (n3.properties ->> 'samaccountname') ~ '^.*$') and n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e2.id || s5.path from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.end_id = s5.next_id and e2.id != all (s5.path) and e2.kind_id = any (array [3]::int2[]) offset 0) e2 on true join node n3 on n3.id = e2.start_id where s5.depth < 15 and not s5.is_cycle) select (select coalesce(array_agg((e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s5.path) with ordinality as _path(id, ordinality) join edge e2 on e2.id = _path.id) as e2, s5.path as ep1, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, s5 join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.root_id offset 0) n4 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.next_id offset 0) n3 on true where s5.satisfied) select ordered_edges_to_path(s4.n3, s4.e2, array [s4.n3, s4.n4]::nodecomposite[])::pathcomposite as p from s4; +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties ->> 'samaccountname') ~ '^[A-Z]{1,3}[0-9]{1,3}$' and not coalesce((n0.properties ->> 'samaccountname'), '')::text like '%DEX%' and not (n0.properties ->> 'samaccountname') ~ '^.*$') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (not (n1.properties ->> 'name') = any (array ['D']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, (not (n1.properties ->> 'name') = any (array ['D']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and exists (select 1 from edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where n1.id = e1.start_id and e1.kind_id = any (array [4]::int2[]))), s3 as (select s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != all (s1.ep0)) select array_remove(coalesce(array_agg(((s3.n1).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s3), s4 as (with recursive s5_seed(root_id) as not materialized (select n4.id as root_id from s0, node n4 where n4.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n4.properties ->> 'name') = any (s0.i0))), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.end_id, e2.start_id, 1, ((n3.properties ->> 'samaccountname') ~ '^[A-Z]{1,3}[0-9]{1,3}$' and not (n3.properties ->> 'samaccountname') ~ '^.*$') and n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.end_id = e2.start_id, array [e2.id] from s5_seed join edge e2 on e2.end_id = s5_seed.root_id join node n3 on n3.id = e2.start_id where e2.kind_id = any (array [3]::int2[]) union select s5.root_id, e2.start_id, s5.depth + 1, ((n3.properties ->> 'samaccountname') ~ '^[A-Z]{1,3}[0-9]{1,3}$' and not (n3.properties ->> 'samaccountname') ~ '^.*$') and n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e2.id || s5.path from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.end_id = s5.next_id and e2.id != all (s5.path) and e2.kind_id = any (array [3]::int2[]) offset 0) e2 on true join node n3 on n3.id = e2.start_id where s5.depth < 15 and not s5.is_cycle) select s5.path as ep1, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, s5 join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.root_id offset 0) n4 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.next_id offset 0) n3 on true where s5.satisfied) select case when (s4.n3).id is null or s4.ep1 is null or (s4.n4).id is null then null else ordered_edges_to_path(s4.n3, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s4.ep1) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s4.n3, s4.n4]::nodecomposite[])::pathcomposite end as p from s4; -- case: match (a:NodeKind2)-[:EdgeKind1]->(g:NodeKind1)-[:EdgeKind2]->(s:NodeKind2) with count(a) as uc where uc > 5 match p = (a)-[:EdgeKind1]->(g)-[:EdgeKind2]->(s) return p -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s2 as (select s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])) select count(s2.n0)::int8 as i0 from s2), s3 as (select (e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite as e2, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, edge e2 join node n3 on n3.id = e2.start_id join node n4 on n4.id = e2.end_id where e2.kind_id = any (array [3]::int2[]) and (s0.i0 > 5)), s4 as (select s3.e2 as e2, (e3.id, e3.start_id, e3.end_id, e3.kind_id, e3.properties)::edgecomposite as e3, s3.i0 as i0, s3.n3 as n3, s3.n4 as n4, (n5.id, n5.kind_ids, n5.properties)::nodecomposite as n5 from s3 join edge e3 on (s3.n4).id = e3.start_id join node n5 on n5.id = e3.end_id where e3.kind_id = any (array [4]::int2[])) select (array [s4.n3, s4.n4, s4.n5]::nodecomposite[], array [s4.e2, s4.e3]::edgecomposite[])::pathcomposite as p from s4; +with s0 as (with s1 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s2 as (select s1.e0 as e0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != s1.e0) select count(s2.n0)::int8 as i0 from s2), s3 as (select e2.id as e2, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, edge e2 join node n3 on n3.id = e2.start_id join node n4 on n4.id = e2.end_id where e2.kind_id = any (array [3]::int2[]) and (s0.i0 > 5)), s4 as (select s3.e2 as e2, e3.id as e3, s3.i0 as i0, s3.n3 as n3, s3.n4 as n4, (n5.id, n5.kind_ids, n5.properties)::nodecomposite as n5 from s3 join edge e3 on (s3.n4).id = e3.start_id join node n5 on n5.id = e3.end_id where e3.kind_id = any (array [4]::int2[]) and e3.id != s3.e2) select case when (s4.n3).id is null or s4.e2 is null or (s4.n4).id is null or s4.e3 is null or (s4.n5).id is null then null else ordered_edges_to_path(s4.n3, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s4.e2]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s4.e3]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s4.n3, s4.n4, s4.n5]::nodecomposite[])::pathcomposite end as p from s4; -- case: match (g:NodeKind1) optional match (g)<-[r:EdgeKind1]-(m:NodeKind2) with g, count(r) as memberCount where memberCount = 0 return g -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s1.n0 as n0 from s1 join edge e0 on (s1.n0).id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[])), s3 as (select s1.n0 as n0, s2.e0 as e0 from s1 left outer join s2 on (s1.n0 = s2.n0)) select s3.n0 as n0, count(s3.e0)::int8 as i0 from s3 group by n0) select s0.n0 as g from s0 where (s0.i0 = 0); +with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s1.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join edge e0 on (s1.n0).id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[])), s3 as (select s1.n0 as n0, s2.e0 as e0, s2.n1 as n1 from s1 left outer join s2 on (s1.n0 = s2.n0)) select s3.n0 as n0, count(s3.e0)::int8 as i0 from s3 group by n0) select s0.n0 as g from s0 where (s0.i0 = 0); diff --git a/cypher/models/pgsql/test/translation_cases/nodes.sql b/cypher/models/pgsql/test/translation_cases/nodes.sql index 8a2884eb..8565f7b4 100644 --- a/cypher/models/pgsql/test/translation_cases/nodes.sql +++ b/cypher/models/pgsql/test/translation_cases/nodes.sql @@ -24,7 +24,7 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as n from s0 where ((array(select _kind.name from generate_subscripts((s0.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s0.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[] = array ['NodeKind1', 'NodeKind2']::text[]); -- case: match (n) where n.name = 'n3' with labels(n) as labels return labels, size(labels) -with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb)) select (array(select _kind.name from generate_subscripts((s1.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s1.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[] as i0 from s1) select s0.i0 as labels, cardinality(s0.i0)::int from s0; +with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n3'))) select (array(select _kind.name from generate_subscripts((s1.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s1.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[] as i0 from s1) select s0.i0 as labels, cardinality(s0.i0)::int from s0; -- case: match (n) with 1 as _kind_idx, n return labels(n), _kind_idx with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select 1 as i0, s1.n0 as n0 from s1) select (array(select _kind.name from generate_subscripts((s0.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s0.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[], s0.i0 as _kind_idx from s0; @@ -45,10 +45,10 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (coalesce((n0.properties ->> 'name'), '')::text = '1234')) select s0.n0 as n from s0; -- case: match (n) where n.name = '1234' return n -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)) select s0.n0 as n from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '1234'))) select s0.n0 as n from s0; -- case: match (n:NodeKind1 {name: "SOME NAME"}) return n -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and ((n0.properties -> 'name'))::jsonb = to_jsonb(('SOME NAME')::text)::jsonb) select s0.n0 as n from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'SOME NAME')) select s0.n0 as n from s0; -- case: match (n) where n.objectid in $p return n -- cypher_params: {"p":["1","2","3"]} @@ -58,7 +58,7 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from -- case: match (s) where s.name = $myParam return s -- cypher_params: {"myParam":"123"} -- pgsql_params:{"pi0":"123"} -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb((@pi0::text)::text)::jsonb)) select s0.n0 as s from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = @pi0::text))) select s0.n0 as s from s0; -- case: match (s) return s with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0; @@ -79,7 +79,7 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.kind_ids operator (pg_catalog.@>) array [2]::int2[])) select s0.n0 as s from s0; -- case: match (s) where s.name = '1234' return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)) select s0.n0 as s from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '1234'))) select s0.n0 as s from s0; -- case: match (s:NodeKind1), (e:NodeKind2) where s.selected or s.tid = e.tid and e.enabled return s, e with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((((s0.n0).properties ->> 'selected'))::bool or ((s0.n0).properties -> 'tid') = (n1.properties -> 'tid') and ((n1.properties ->> 'enabled'))::bool) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]) select s1.n0 as s, s1.n1 as e from s1; @@ -88,7 +88,7 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties ->> 'value'))::int8 + 2 / 3 > 10)) select s0.n0 as s from s0; -- case: match (s), (e) where s.name = 'n1' return s, e.name as othername -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb)), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1) select s1.n0 as s, ((s1.n1).properties -> 'name') as othername from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1'))), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1) select s1.n0 as s, ((s1.n1).properties -> 'name') as othername from s1; -- case: match (s) where s.name in ['option 1', 'option 2'] return s with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'name') = any (array ['option 1', 'option 2']::text[]))) select s0.n0 as s from s0; @@ -103,13 +103,13 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ? 'system_tags' and not (n0.properties -> 'system_tags') = ('null')::jsonb) and not (n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]))) select (s0.n0).id from s0; -- case: match (s), (e) where s.name = '1234' and e.other = 1234 return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (((n1.properties -> 'other'))::jsonb = to_jsonb((1234)::int8)::jsonb)) select s1.n0 as s from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '1234'))), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (((n1.properties -> 'other'))::jsonb = to_jsonb((1234)::int8)::jsonb)) select s1.n0 as s from s1; -- case: match (s), (e) where s.name = '1234' or e.other = 1234 return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((((s0.n0).properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb or ((n1.properties -> 'other'))::jsonb = to_jsonb((1234)::int8)::jsonb)) select s1.n0 as s from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((jsonb_typeof(((s0.n0).properties -> 'name')) = 'string' and ((s0.n0).properties ->> 'name') = '1234') or ((n1.properties -> 'other'))::jsonb = to_jsonb((1234)::int8)::jsonb)) select s1.n0 as s from s1; -- case: match (n), (k) where n.name = '1234' and k.name = '1234' match (e) where e.name = n.name return k, e -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (((n1.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)), s2 as (select s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1, node n2 where ((n2.properties -> 'name') = ((s1.n0).properties -> 'name'))) select s2.n1 as k, s2.n2 as e from s2; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '1234'))), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = '1234'))), s2 as (select s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s1, node n2 where ((n2.properties -> 'name') = ((s1.n0).properties -> 'name'))) select s2.n1 as k, s2.n2 as e from s2; -- case: match (n) return n skip 5 limit 10 with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as n from s0 offset 5 limit 10; @@ -151,10 +151,10 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties ->> 'created_at'))::timestamp without time zone = ('2019-06-01T18:40:32.142')::timestamp without time zone)) select s0.n0 as s from s0; -- case: match (s) where not (s.name = '123') return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (not (((n0.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb))) select s0.n0 as s from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (not ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '123')))) select s0.n0 as s from s0; -- case: match (s) where s.isassignabletorole = 'true' return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'isassignabletorole') = 'true')) select s0.n0 as s from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'isassignabletorole')) = 'string' and (n0.properties ->> 'isassignabletorole') = 'true'))) select s0.n0 as s from s0; -- case: match (s) where s.isassignabletorole = true return s with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'isassignabletorole'))::jsonb = to_jsonb((true)::bool)::jsonb)) select s0.n0 as s from s0; @@ -208,19 +208,19 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and lower((n0.properties ->> 'tenantid'))::text like '%myid%' and (n0.properties ->> 'system_tags') like '%tag%')) select s0.n0 as n from s0; -- case: match (s) where not (s)-[]-() return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not exists (select 1 from edge e0 where e0.start_id = (s0.n0).id or e0.end_id = (s0.n0).id)); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not exists (select 1 from edge e0 where (e0.start_id = (s0.n0).id or e0.end_id = (s0.n0).id))); -- case: match (s) where not (s)-[]->()-[]->() return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n1 on n1.id = e0.end_id where (s0.n0).id = e0.start_id), s2 as (select s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.id = e1.end_id) select count(*) > 0 from s2)); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select e0.id as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n1 on n1.id = e0.end_id where (s0.n0).id = e0.start_id), s2 as (select s1.e0 as e0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != s1.e0) select count(*) > 0 from s2)); -- case: match (s) where not (s)-[{prop: 'a'}]-({name: 'n3'}) return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0 from s0 join edge e0 on ((s0.n0).id = e0.end_id or (s0.n0).id = e0.start_id) join node n1 on ((n1.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb and (n1.id = e0.end_id or n1.id = e0.start_id) where ((s0.n0).id <> n1.id) and ((e0.properties -> 'prop'))::jsonb = to_jsonb(('a')::text)::jsonb) select count(*) > 0 from s1)); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0 from s0 join edge e0 on ((s0.n0).id = e0.end_id or (s0.n0).id = e0.start_id) join node n1 on (jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n3') and (n1.id = e0.end_id or n1.id = e0.start_id) where ((s0.n0).id <> n1.id) and (jsonb_typeof((e0.properties -> 'prop')) = 'string' and (e0.properties ->> 'prop') = 'a')) select count(*) > 0 from s1)); -- case: match (s) where not (s)<-[{prop: 'a'}]-({name: 'n3'}) return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0 from edge e0 join node n1 on ((n1.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb and n1.id = e0.start_id where ((e0.properties -> 'prop'))::jsonb = to_jsonb(('a')::text)::jsonb and (s0.n0).id = e0.end_id) select count(*) > 0 from s1)); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0 from edge e0 join node n1 on (jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n3') and n1.id = e0.start_id where (jsonb_typeof((e0.properties -> 'prop')) = 'string' and (e0.properties ->> 'prop') = 'a') and (s0.n0).id = e0.end_id) select count(*) > 0 from s1)); -- case: match (n:NodeKind1) where n.distinguishedname = toUpper('admin') return n -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'distinguishedname'))::jsonb = to_jsonb((upper('admin')::text)::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s0.n0 as n from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'distinguishedname')) = 'string' and (n0.properties ->> 'distinguishedname') = upper('admin')::text)) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s0.n0 as n from s0; -- case: match (n:NodeKind1) where n.distinguishedname starts with toUpper('admin') return n with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (cypher_starts_with((n0.properties ->> 'distinguishedname'), (upper('admin')::text)::text)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s0.n0 as n from s0; @@ -232,10 +232,10 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (cypher_ends_with((n0.properties ->> 'distinguishedname'), (upper('admin')::text)::text)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s0.n0 as n from s0; -- case: match (s) where not (s)-[{prop: 'a'}]->({name: 'n3'}) return s -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0 from edge e0 join node n1 on ((n1.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb and n1.id = e0.end_id where ((e0.properties -> 'prop'))::jsonb = to_jsonb(('a')::text)::jsonb and (s0.n0).id = e0.start_id) select count(*) > 0 from s1)); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select s0.n0 as s from s0 where (not (with s1 as (select s0.n0 as n0 from edge e0 join node n1 on (jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n3') and n1.id = e0.end_id where (jsonb_typeof((e0.properties -> 'prop')) = 'string' and (e0.properties ->> 'prop') = 'a') and (s0.n0).id = e0.start_id) select count(*) > 0 from s1)); -- case: match (s) where not (s)-[]-() return id(s) -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select (s0.n0).id from s0 where (not exists (select 1 from edge e0 where e0.start_id = (s0.n0).id or e0.end_id = (s0.n0).id)); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select (s0.n0).id from s0 where (not exists (select 1 from edge e0 where (e0.start_id = (s0.n0).id or e0.end_id = (s0.n0).id))); -- case: match (n) where n.system_tags contains ($param) return n -- pgsql_params:{"pi0":null} @@ -344,7 +344,8 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((n1.properties ->> 'distinguishedname') = ((s0.n0).properties ->> 'unknown') || (n1.properties ->> 'unknown')) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (select s0.n0 as n0, s1.n1 as n1 from s0 left outer join s1 on (s0.n0 = s1.n0)), s3 as (select s2.n0 as n0, s2.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s2, node n2 where ((n2.properties -> 'distinguishedname') <> ((s2.n0).properties -> 'otherunknown')) and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s4 as (select s2.n0 as n0, s2.n1 as n1, s3.n2 as n2 from s2 left outer join s3 on (s2.n1 = s3.n1) and (s2.n0 = s3.n0)) select s4.n0 as n, s4.n1 as m, s4.n2 as o from s4; -- case: match (n) where n.name = "alpha' || (SELECT inet_server_addr()::text::int) || '" return n -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('alpha'' || (SELECT inet_server_addr()::text::int) || ''')::text)::jsonb)) select s0.n0 as n from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'alpha'' || (SELECT inet_server_addr()::text::int) || '''))) select s0.n0 as n from s0; -- case: match (g:NodeKind2) where not ((g)<-[:EdgeKind1]-(:NodeKind1)) return g with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]) select s0.n0 as g from s0 where (not ((with s1 as (select s0.n0 as n0 from edge e0 join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) and (s0.n0).id = e0.end_id) select count(*) > 0 from s1))); + diff --git a/cypher/models/pgsql/test/translation_cases/parameters.sql b/cypher/models/pgsql/test/translation_cases/parameters.sql index e1212eb8..26a6a5f8 100644 --- a/cypher/models/pgsql/test/translation_cases/parameters.sql +++ b/cypher/models/pgsql/test/translation_cases/parameters.sql @@ -29,9 +29,10 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from -- case: match (n) where n.isassignabletorole = $p0 return n -- cypher_params: {"p0":"true"} -- pgsql_params:{"pi0":"true"} -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'isassignabletorole') = @pi0::text)) select s0.n0 as n from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'isassignabletorole')) = 'string' and (n0.properties ->> 'isassignabletorole') = @pi0::text))) select s0.n0 as n from s0; -- case: match (n) where n.isassignabletorole = $p0 return n -- cypher_params: {"p0":true} -- pgsql_params:{"pi0":true} with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'isassignabletorole'))::jsonb = to_jsonb((@pi0::bool)::bool)::jsonb)) select s0.n0 as n from s0; + diff --git a/cypher/models/pgsql/test/translation_cases/pattern_binding.sql b/cypher/models/pgsql/test/translation_cases/pattern_binding.sql index 5b06f866..35883e93 100644 --- a/cypher/models/pgsql/test/translation_cases/pattern_binding.sql +++ b/cypher/models/pgsql/test/translation_cases/pattern_binding.sql @@ -15,79 +15,80 @@ -- SPDX-License-Identifier: Apache-2.0 -- case: match p = (:NodeKind1) return p -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select (array [s0.n0]::nodecomposite[], array []::edgecomposite[])::pathcomposite as p from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select case when (s0.n0).id is null then null else (array [s0.n0]::nodecomposite[], array []::edgecomposite[])::pathcomposite end as p from s0; -- case: match p = (n:NodeKind1) where n.name contains 'test' return p -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'name') like '%test%') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select (array [s0.n0]::nodecomposite[], array []::edgecomposite[])::pathcomposite as p from s0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'name') like '%test%') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select case when (s0.n0).id is null then null else (array [s0.n0]::nodecomposite[], array []::edgecomposite[])::pathcomposite end as p from s0; -- case: match p = ()-[]->() return p -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite as p from s0; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select case when (s0.n0).id is null or s0.e0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s0.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = ()-[]->() return nodes(p) -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select (((array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite).nodes)::nodecomposite[] from s0; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select ((case when (s0.n0).id is null or s0.e0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s0.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end).nodes)::nodecomposite[] from s0; -- case: match p = (:NodeKind1)-[:EdgeKind1|EdgeKind2*1..1]->(:NodeKind2) where any(r in relationships(p) where type(r) STARTS WITH 'EdgeKind') return p -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 1 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where (((select count(*)::int from unnest((s0.e0)::edgecomposite[]) as i0 where (kind_name(i0.kind_id)::text like 'EdgeKind%')) >= 1)::bool); +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 1 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where (((select count(*)::int from unnest(((select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id))::edgecomposite[]) as i0 where (kind_name(i0.kind_id)::text like 'EdgeKind%')) >= 1)::bool); -- case: match p=(:NodeKind1)-[r]->(:NodeKind1) where r.isacl return p limit 100 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where (((e0.properties ->> 'isacl'))::bool) limit 100) select (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite as p from s0 limit 100; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where (((e0.properties ->> 'isacl'))::bool) limit 100) select case when (s0.n0).id is null or (s0.e0).id is null or (s0.n1).id is null then null else (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite end as p from s0 limit 100; -- case: match p = ()-[r1]->()-[r2]->(e) return e -with s0 as (select (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id) select s1.n2 as e from s1; +with s0 as (select e0.id as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != s0.e0) select s1.n2 as e from s1; -- case: match ()-[r1]->()-[r2]->()-[]->() where r1.name = 'a' and r2.name = 'b' return r1 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where (((e0.properties -> 'name'))::jsonb = to_jsonb(('a')::text)::jsonb)), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where (((e1.properties -> 'name'))::jsonb = to_jsonb(('b')::text)::jsonb)), s2 as (select s1.e0 as e0, s1.e1 as e1, s1.n1 as n1, s1.n2 as n2 from s1 join edge e2 on (s1.n2).id = e2.start_id join node n3 on n3.id = e2.end_id) select s2.e0 as r1 from s2; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where ((jsonb_typeof((e0.properties -> 'name')) = 'string' and (e0.properties ->> 'name') = 'a'))), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where ((jsonb_typeof((e1.properties -> 'name')) = 'string' and (e1.properties ->> 'name') = 'b')) and e1.id != (s0.e0).id), s2 as (select s1.e0 as e0, s1.e1 as e1, s1.n1 as n1, s1.n2 as n2 from s1 join edge e2 on (s1.n2).id = e2.start_id join node n3 on n3.id = e2.end_id where e2.id != (s1.e0).id and e2.id != (s1.e1).id) select s2.e0 as r1 from s2; -- case: match p = (a)-[]->()<-[]-(f) where a.name = 'value' and f.is_target return p -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('value')::text)::jsonb) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on (((n2.properties ->> 'is_target'))::bool) and n2.id = e1.start_id) select (array [s1.n0, s1.n1, s1.n2]::nodecomposite[], array [s1.e0, s1.e1]::edgecomposite[])::pathcomposite as p from s1; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'value')) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, e1.id as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on (((n2.properties ->> 'is_target'))::bool) and n2.id = e1.start_id where e1.id != s0.e0) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null or s1.e1 is null or (s1.n2).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1, s1.n2]::nodecomposite[])::pathcomposite end as p from s1; -- case: match p = ()-[*..]->() return p limit 1 -with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true limit 1) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 1; +with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true limit 1) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 1; -- case: match p = (s)-[*..]->(i)-[]->() where id(s) = 1 and i.name = 'n3' return p limit 1 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (n0.id = 1)), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb), false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id limit 1) select ordered_edges_to_path(s2.n0, s2.e0 || array [s2.e1]::edgecomposite[], array [s2.n0, s2.n1, s2.n2]::nodecomposite[])::pathcomposite as p from s2 limit 1; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n3'))), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, (n0.id = 1), e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id union all select s1.root_id, e0.start_id, s1.depth + 1, (n0.id = 1), false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied), s2 as (select e1.id as e1, s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != all (s0.ep0) limit 1) select case when (s2.n0).id is null or s2.ep0 is null or (s2.n1).id is null or s2.e1 is null or (s2.n2).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s2.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1, s2.n2]::nodecomposite[])::pathcomposite end as p from s2 limit 1; -- case: match p = ()-[e:EdgeKind1]->()-[:EdgeKind1*..]->() return e, p -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.start_id, e1.end_id, 1, false, e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id where e1.kind_id = any (array [3]::int2[]) union all select s2.root_id, e1.end_id, s2.depth + 1, false, false, s2.path || e1.id from s2 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.start_id = s2.next_id and e1.id != all (s2.path) and e1.kind_id = any (array [3]::int2[]) offset 0) e1 on true where s2.depth < 15 and not s2.is_cycle) select s0.e0 as e0, (select coalesce(array_agg((e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e1 on e1.id = _path.id) as e1, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s2.next_id offset 0) n2 on true where (s0.n1).id = s2.root_id) select s1.e0 as e, ordered_edges_to_path(s1.n0, array [s1.e0]::edgecomposite[] || s1.e1, array [s1.n0, s1.n1, s1.n2]::nodecomposite[])::pathcomposite as p from s1; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.start_id, e1.end_id, 1, false, e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id where e1.kind_id = any (array [3]::int2[]) union all select s2.root_id, e1.end_id, s2.depth + 1, false, false, s2.path || e1.id from s2 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.start_id = s2.next_id and e1.id != all (s2.path) and e1.kind_id = any (array [3]::int2[]) offset 0) e1 on true where s2.depth < 15 and not s2.is_cycle) select s0.e0 as e0, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s2.next_id offset 0) n2 on true where (s0.n1).id = s2.root_id) select s1.e0 as e, case when (s1.n0).id is null or (s1.e0).id is null or (s1.n1).id is null or s1.ep0 is null or (s1.n2).id is null then null else ordered_edges_to_path(s1.n0, array [s1.e0]::edgecomposite[] || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1, s1.n2]::nodecomposite[])::pathcomposite end as p from s1; -- case: match p = (m:NodeKind1)-[:EdgeKind1]->(c:NodeKind2) where m.objectid ends with "-513" and not toUpper(c.operatingsystem) contains "SERVER" return p limit 1000 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((n0.properties ->> 'objectid') like '%-513') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on (not upper((n1.properties ->> 'operatingsystem'))::text like '%SERVER%') and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) limit 1000) select (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite as p from s0 limit 1000; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((n0.properties ->> 'objectid') like '%-513') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on (not upper((n1.properties ->> 'operatingsystem'))::text like '%SERVER%') and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) limit 1000) select case when (s0.n0).id is null or s0.e0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s0.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 1000; -- case: match p = (:NodeKind1)-[:EdgeKind1|EdgeKind2]->(e:NodeKind2)-[:EdgeKind2]->(:NodeKind1) where 'a' in e.values or 'b' in e.values or size(e.values) = 0 return p -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on ('a' = any (jsonb_to_text_array((n1.properties -> 'values'))::text[]) or 'b' = any (jsonb_to_text_array((n1.properties -> 'values'))::text[]) or jsonb_array_length((n1.properties -> 'values'))::int = 0) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])) select (array [s1.n0, s1.n1, s1.n2]::nodecomposite[], array [s1.e0, s1.e1]::edgecomposite[])::pathcomposite as p from s1; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on ('a' = any (jsonb_to_text_array((n1.properties -> 'values'))::text[]) or 'b' = any (jsonb_to_text_array((n1.properties -> 'values'))::text[]) or jsonb_array_length((n1.properties -> 'values'))::int = 0) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])), s1 as (select s0.e0 as e0, e1.id as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != s0.e0) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null or s1.e1 is null or (s1.n2).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1, s1.n2]::nodecomposite[])::pathcomposite end as p from s1; -- case: match p = (n:NodeKind1)-[r]-(m:NodeKind1) return p -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (n0.id = e0.end_id or n0.id = e0.start_id) join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (n1.id = e0.end_id or n1.id = e0.start_id) where (n0.id <> n1.id)) select (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite as p from s0; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (n0.id = e0.end_id or n0.id = e0.start_id) join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (n1.id = e0.end_id or n1.id = e0.start_id) where (n0.id <> n1.id)) select case when (s0.n0).id is null or s0.e0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s0.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = (:NodeKind1)-[:EdgeKind1]->(:NodeKind2)-[:EdgeKind2*1..]->(t:NodeKind2) where coalesce(t.system_tags, '') contains 'admin_tier_0' return p limit 1000 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.start_id, e1.end_id, 1, (coalesce((n2.properties ->> 'system_tags'), '')::text like '%admin_tier_0%') and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[], e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) union all select s2.root_id, e1.end_id, s2.depth + 1, (coalesce((n2.properties ->> 'system_tags'), '')::text like '%admin_tier_0%') and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e1.id from s2 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.start_id = s2.next_id and e1.id != all (s2.path) and e1.kind_id = any (array [4]::int2[]) offset 0) e1 on true join node n2 on n2.id = e1.end_id where s2.depth < 15 and not s2.is_cycle) select s0.e0 as e0, (select coalesce(array_agg((e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e1 on e1.id = _path.id) as e1, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s2.next_id offset 0) n2 on true where s2.satisfied and (s0.n1).id = s2.root_id limit 1000) select ordered_edges_to_path(s1.n0, array [s1.e0]::edgecomposite[] || s1.e1, array [s1.n0, s1.n1, s1.n2]::nodecomposite[])::pathcomposite as p from s1 limit 1000; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.start_id, e1.end_id, 1, (coalesce((n2.properties ->> 'system_tags'), '')::text like '%admin_tier_0%') and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[], e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) union all select s2.root_id, e1.end_id, s2.depth + 1, (coalesce((n2.properties ->> 'system_tags'), '')::text like '%admin_tier_0%') and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e1.id from s2 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.start_id = s2.next_id and e1.id != all (s2.path) and e1.kind_id = any (array [4]::int2[]) offset 0) e1 on true join node n2 on n2.id = e1.end_id where s2.depth < 15 and not s2.is_cycle) select s0.e0 as e0, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s2.next_id offset 0) n2 on true where s2.satisfied and (s0.n1).id = s2.root_id limit 1000) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null or s1.ep0 is null or (s1.n2).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1, s1.n2]::nodecomposite[])::pathcomposite end as p from s1 limit 1000; -- case: match (u:NodeKind1) where u.samaccountname in ["foo", "bar"] match p = (u)-[:EdgeKind1|EdgeKind2*1..3]->(t) where coalesce(t.system_tags, '') contains 'admin_tier_0' return p limit 1000 -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'samaccountname') = any (array ['foo', 'bar']::text[])) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n0).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (coalesce((n1.properties ->> 'system_tags'), '')::text like '%admin_tier_0%'), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, (coalesce((n1.properties ->> 'system_tags'), '')::text like '%admin_tier_0%'), false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 3 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and (s0.n0).id = s2.root_id) select ordered_edges_to_path(s1.n0, s1.e0, array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite as p from s1 limit 1000; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((n0.properties ->> 'samaccountname') = any (array ['foo', 'bar']::text[])) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n0).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (coalesce((n1.properties ->> 'system_tags'), '')::text like '%admin_tier_0%'), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s2.root_id, e0.end_id, s2.depth + 1, (coalesce((n1.properties ->> 'system_tags'), '')::text like '%admin_tier_0%'), false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 3 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and (s0.n0).id = s2.root_id) select case when (s1.n0).id is null or s1.ep0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as p from s1 limit 1000; -- case: match (x:NodeKind1) where x.name = 'foo' match (y:NodeKind2) where y.name = 'bar' match p=(x)-[:EdgeKind1]->(y) return p -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('foo')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (((n1.properties -> 'name'))::jsonb = to_jsonb(('bar')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e0 on (s1.n0).id = e0.start_id join node n1 on (s1.n1).id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select (array [s2.n0, s2.n1]::nodecomposite[], array [s2.e0]::edgecomposite[])::pathcomposite as p from s2; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'foo')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'bar')) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (select e0.id as e0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e0 on (s1.n0).id = e0.start_id and (s1.n1).id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select case when (s2.n0).id is null or s2.e0 is null or (s2.n1).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s2.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite end as p from s2; -- case: match (x:NodeKind1{name:'foo'}) match (y:NodeKind2{name:'bar'}) match p=(x)-[:EdgeKind1]->(y) return p -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and ((n0.properties -> 'name'))::jsonb = to_jsonb(('foo')::text)::jsonb), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n1.properties -> 'name'))::jsonb = to_jsonb(('bar')::text)::jsonb), s2 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e0 on (s1.n0).id = e0.start_id join node n1 on (s1.n1).id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select (array [s2.n0, s2.n1]::nodecomposite[], array [s2.e0]::edgecomposite[])::pathcomposite as p from s2; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'foo')), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and (jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'bar')), s2 as (select e0.id as e0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e0 on (s1.n0).id = e0.start_id and (s1.n1).id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select case when (s2.n0).id is null or s2.e0 is null or (s2.n1).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s2.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite end as p from s2; -- case: match (x:NodeKind1{name:'foo'}) match p=(x)-[:EdgeKind1]->(y:NodeKind2{name:'bar'}) return p -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and ((n0.properties -> 'name'))::jsonb = to_jsonb(('foo')::text)::jsonb), s1 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0 join edge e0 on (s0.n0).id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n1.properties -> 'name'))::jsonb = to_jsonb(('bar')::text)::jsonb and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select (array [s1.n0, s1.n1]::nodecomposite[], array [s1.e0]::edgecomposite[])::pathcomposite as p from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'foo')), s1 as (select e0.id as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0 join edge e0 on (s0.n0).id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and (jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'bar') and n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as p from s1; -- case: match (e) match p = ()-[]->(e) return p limit 1 -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0 join edge e0 on (s0.n0).id = e0.end_id join node n1 on n1.id = e0.start_id) select (array [s1.n1, s1.n0]::nodecomposite[], array [s1.e0]::edgecomposite[])::pathcomposite as p from s1 limit 1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select e0.id as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0 join edge e0 on (s0.n0).id = e0.end_id join node n1 on n1.id = e0.start_id) select case when (s1.n1).id is null or s1.e0 is null or (s1.n0).id is null then null else ordered_edges_to_path(s1.n1, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n1, s1.n0]::nodecomposite[])::pathcomposite end as p from s1 limit 1; -- case: match p = (a)-[]->() match q = ()-[]->(a) return p, q -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n0).id = e1.end_id join node n2 on n2.id = e1.start_id) select (array [s1.n0, s1.n1]::nodecomposite[], array [s1.e0]::edgecomposite[])::pathcomposite as p, (array [s1.n2, s1.n0]::nodecomposite[], array [s1.e1]::edgecomposite[])::pathcomposite as q from s1; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, e1.id as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n0).id = e1.end_id join node n2 on n2.id = e1.start_id) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as p, case when (s1.n2).id is null or s1.e1 is null or (s1.n0).id is null then null else ordered_edges_to_path(s1.n2, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n2, s1.n0]::nodecomposite[])::pathcomposite end as q from s1; -- case: match (m:NodeKind1)-[*1..]->(g:NodeKind2)-[]->(c3:NodeKind1) where not g.name in ["foo"] with collect(g.name) as bar match p=(m:NodeKind1)-[*1..]->(g:NodeKind2) where g.name in bar return p -with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id union all select s2.root_id, e0.end_id, s2.depth + 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied), s3 as (select s1.e0 as e0, s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id) select array_remove(coalesce(array_agg(((s3.n1).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s3), s4 as (with recursive s5_seed(root_id) as not materialized (select n4.id as root_id from s0, node n4 where n4.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n4.properties ->> 'name') = any (s0.i0))), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.end_id, e2.start_id, 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.end_id = e2.start_id, array [e2.id] from s5_seed join edge e2 on e2.end_id = s5_seed.root_id join node n3 on n3.id = e2.start_id union select s5.root_id, e2.start_id, s5.depth + 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e2.id || s5.path from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.end_id = s5.next_id and e2.id != all (s5.path) offset 0) e2 on true join node n3 on n3.id = e2.start_id where s5.depth < 15 and not s5.is_cycle) select (select coalesce(array_agg((e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s5.path) with ordinality as _path(id, ordinality) join edge e2 on e2.id = _path.id) as e2, s5.path as ep1, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, s5 join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.root_id offset 0) n4 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.next_id offset 0) n3 on true where s5.satisfied) select ordered_edges_to_path(s4.n3, s4.e2, array [s4.n3, s4.n4]::nodecomposite[])::pathcomposite as p from s4; +with s0 as (with s1 as (with recursive s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id join node n1 on n1.id = e0.end_id union all select s2.root_id, e0.end_id, s2.depth + 1, (not (n1.properties ->> 'name') = any (array ['foo']::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s2.path || e0.id from s2 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s2.next_id and e0.id != all (s2.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s2.depth < 15 and not s2.is_cycle) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s2.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.next_id offset 0) n1 on true where s2.satisfied and exists (select 1 from edge e1 join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where n1.id = e1.start_id)), s3 as (select s1.ep0 as ep0, s1.n0 as n0, s1.n1 as n1 from s1 join edge e1 on (s1.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id = e1.end_id where e1.id != all (s1.ep0)) select array_remove(coalesce(array_agg(((s3.n1).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray as i0 from s3), s4 as (with recursive s5_seed(root_id) as not materialized (select n4.id as root_id from s0, node n4 where n4.kind_ids operator (pg_catalog.@>) array [2]::int2[] and ((n4.properties ->> 'name') = any (s0.i0))), s5(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.end_id, e2.start_id, 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.end_id = e2.start_id, array [e2.id] from s5_seed join edge e2 on e2.end_id = s5_seed.root_id join node n3 on n3.id = e2.start_id union select s5.root_id, e2.start_id, s5.depth + 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e2.id || s5.path from s5 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.end_id = s5.next_id and e2.id != all (s5.path) offset 0) e2 on true join node n3 on n3.id = e2.start_id where s5.depth < 15 and not s5.is_cycle) select s5.path as ep1, s0.i0 as i0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3, (n4.id, n4.kind_ids, n4.properties)::nodecomposite as n4 from s0, s5 join lateral (select n4.id, n4.kind_ids, n4.properties from node n4 where n4.id = s5.root_id offset 0) n4 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s5.next_id offset 0) n3 on true where s5.satisfied) select case when (s4.n3).id is null or s4.ep1 is null or (s4.n4).id is null then null else ordered_edges_to_path(s4.n3, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s4.ep1) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s4.n3, s4.n4]::nodecomposite[])::pathcomposite end as p from s4; -- case: MATCH p=(:Computer)-[r:HasSession]->(:User) WHERE r.lastseen >= datetime() - duration('P3D') RETURN p LIMIT 100 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [5]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [6]::int2[] and n1.id = e0.end_id where (((e0.properties ->> 'lastseen'))::timestamp with time zone >= now()::timestamp with time zone - interval 'P3D') and e0.kind_id = any (array [7]::int2[]) limit 100) select (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite as p from s0 limit 100; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [5]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [6]::int2[] and n1.id = e0.end_id where (((e0.properties ->> 'lastseen'))::timestamp with time zone >= now()::timestamp with time zone - interval 'P3D') and e0.kind_id = any (array [7]::int2[]) limit 100) select case when (s0.n0).id is null or (s0.e0).id is null or (s0.n1).id is null then null else (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite end as p from s0 limit 100; -- case: MATCH p=(:GPO)-[r:GPLink|Contains*1..]->(:Base) WHERE HEAD(r).enforced OR NONE(n in TAIL(TAIL(NODES(p))) WHERE (n:OU AND n.blocksinheritance)) RETURN p -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [8]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [11, 12]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [11, 12]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where (((((s0.e0)[1]).properties ->> 'enforced'))::bool or ((select count(*)::int from unnest(coalesce((coalesce((((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])[2:cardinality(((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])::int], array []::nodecomposite[])::nodecomposite[])[2:cardinality(coalesce((((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])[2:cardinality(((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])::int], array []::nodecomposite[])::nodecomposite[])::int], array []::nodecomposite[])::nodecomposite[]) as i0 where ((i0.kind_ids operator (pg_catalog.@>) array [9]::int2[] and ((i0.properties ->> 'blocksinheritance'))::bool))) = 0 and coalesce((coalesce((((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])[2:cardinality(((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])::int], array []::nodecomposite[])::nodecomposite[])[2:cardinality(coalesce((((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])[2:cardinality(((ordered_edges_to_path(((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, s0.e0, array [((s0.n0).id, (s0.n0).kind_ids, (s0.n0).properties)::nodecomposite, ((s0.n1).id, (s0.n1).kind_ids, (s0.n1).properties)::nodecomposite]::nodecomposite[])::pathcomposite).nodes)::nodecomposite[])::int], array []::nodecomposite[])::nodecomposite[])::int], array []::nodecomposite[])::nodecomposite[] is not null)::bool); +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [8]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [11, 12]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [11, 12]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select s2.pc0 as p from s0, lateral (select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as pc0 offset 0) s2 where (((((s0.e0)[1]).properties ->> 'enforced'))::bool or ((select count(*)::int from unnest(coalesce((coalesce((((s2.pc0).nodes)::nodecomposite[])[2:], array []::nodecomposite[])::nodecomposite[])[2:], array []::nodecomposite[])::nodecomposite[]) as i0 where ((i0.kind_ids operator (pg_catalog.@>) array [9]::int2[] and ((i0.properties ->> 'blocksinheritance'))::bool))) = 0 and coalesce((coalesce((((s2.pc0).nodes)::nodecomposite[])[2:], array []::nodecomposite[])::nodecomposite[])[2:], array []::nodecomposite[])::nodecomposite[] is not null)::bool); -- case: MATCH p=(:GPO)-[r:GPLink|Contains*1..]->(:Base) WHERE NONE(x in TAIL(r) WHERE NOT type(x) = 'Contains') RETURN p -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [8]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [11, 12]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [11, 12]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where (((select count(*)::int from unnest(coalesce((s0.e0)[2:cardinality(s0.e0)::int], array []::edgecomposite[])::edgecomposite[]) as i0 where (not i0.kind_id = 12)) = 0 and coalesce((s0.e0)[2:cardinality(s0.e0)::int], array []::edgecomposite[])::edgecomposite[] is not null)::bool); +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [8]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [11, 12]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [10]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [11, 12]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where (((select count(*)::int from unnest(coalesce((s0.e0)[2:], array []::edgecomposite[])::edgecomposite[]) as i0 where (not i0.kind_id = 12)) = 0 and coalesce((s0.e0)[2:], array []::edgecomposite[])::edgecomposite[] is not null)::bool); + diff --git a/cypher/models/pgsql/test/translation_cases/pattern_expansion.sql b/cypher/models/pgsql/test/translation_cases/pattern_expansion.sql index 06878fc5..895d3c41 100644 --- a/cypher/models/pgsql/test/translation_cases/pattern_expansion.sql +++ b/cypher/models/pgsql/test/translation_cases/pattern_expansion.sql @@ -15,70 +15,71 @@ -- SPDX-License-Identifier: Apache-2.0 -- case: match (n)-[*..]->(e) return n, e -with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true) select s0.n0 as n, s0.n1 as e from s0; +with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true) select s0.n0 as n, s0.n1 as e from s0; -- case: match (n)-[*1..2]->(e) return n, e -with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 2 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true) select s0.n0 as n, s0.n1 as e from s0; +with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 2 and not s1.is_cycle) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true) select s0.n0 as n, s0.n1 as e from s0; -- case: match (n)-[*3..5]->(e) return n, e -with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 5 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.depth >= 3) select s0.n0 as n, s0.n1 as e from s0; +with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, false, e0.start_id = e0.end_id, array [e0.id] from edge e0 union all select s1.root_id, e0.end_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 5 and not s1.is_cycle) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.depth >= 3) select s0.n0 as n, s0.n1 as e from s0; -- case: match (n)<-[*2..5]-(e) return n, e -with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from edge e0 union all select s1.root_id, e0.start_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 5 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.depth >= 2) select s0.n0 as n, s0.n1 as e from s0; +with s0 as (with recursive s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from edge e0 union all select s1.root_id, e0.start_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 5 and not s1.is_cycle) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.depth >= 2) select s0.n0 as n, s0.n1 as e from s0; -- case: match p = (n)-[*..]->(e:NodeKind1) return p -with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id union all select s1.root_id, e0.start_id, s1.depth + 1, false, false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id union all select s1.root_id, e0.start_id, s1.depth + 1, false, false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match (n)-[*..]->(e:NodeKind1) where n.name = 'n1' return e -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb)), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select s0.n1 as e from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1'))), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select s0.n1 as e from s0; -- case: match (n)-[*..]->(e:NodeKind1) where n.name = 'n2' return n -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n2')::text)::jsonb)), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select s0.n0 as n from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n2'))), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select s0.n0 as n from s0; -- case: match (n)-[*..]->(e:NodeKind1)-[]->(l) where n.name = 'n1' return l -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb)), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (select s0.e0 as e0, s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id) select s2.n2 as l from s2; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1'))), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (select s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != all (s0.ep0)) select s2.n2 as l from s2; -- case: match (n)-[*2..3]->(e:NodeKind1)-[]->(l) where n.name = 'n1' return l -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb)), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 3 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.depth >= 2 and s1.satisfied), s2 as (select s0.e0 as e0, s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id) select s2.n2 as l from s2; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1'))), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 3 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.depth >= 2 and s1.satisfied), s2 as (select s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != all (s0.ep0)) select s2.n2 as l from s2; -- case: match (n)-[]->(e:NodeKind1)-[*2..3]->(l) where n.name = 'n1' return l -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb) and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.start_id, e1.end_id, 1, false, e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id union all select s2.root_id, e1.end_id, s2.depth + 1, false, false, s2.path || e1.id from s2 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.start_id = s2.next_id and e1.id != all (s2.path) offset 0) e1 on true where s2.depth < 3 and not s2.is_cycle) select (select coalesce(array_agg((e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e1 on e1.id = _path.id) as e1, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s2.next_id offset 0) n2 on true where s2.depth >= 2 and (s0.n1).id = s2.root_id) select s1.n2 as l from s1; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1')) and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id), s1 as (with recursive s2_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.start_id, e1.end_id, 1, false, e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id union all select s2.root_id, e1.end_id, s2.depth + 1, false, false, s2.path || e1.id from s2 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.start_id = s2.next_id and e1.id != all (s2.path) offset 0) e1 on true where s2.depth < 3 and not s2.is_cycle) select s0.e0 as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s2.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s2.next_id offset 0) n2 on true where s2.depth >= 2 and (s0.n1).id = s2.root_id) select s1.n2 as l from s1; -- case: match (n)-[*..]->(e)-[:EdgeKind1|EdgeKind2]->()-[*..]->(l) where n.name = 'n1' and e.name = 'n2' return l -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb)), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('n2')::text)::jsonb), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('n2')::text)::jsonb), false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (select s0.e0 as e0, s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3, 4]::int2[])), s3 as (with recursive s4_seed(root_id) as not materialized (select distinct (s2.n2).id as root_id from s2), s4(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.start_id, e2.end_id, 1, false, e2.start_id = e2.end_id, array [e2.id] from s4_seed join edge e2 on e2.start_id = s4_seed.root_id union all select s4.root_id, e2.end_id, s4.depth + 1, false, false, s4.path || e2.id from s4 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.start_id = s4.next_id and e2.id != all (s4.path) offset 0) e2 on true where s4.depth < 15 and not s4.is_cycle) select s2.e0 as e0, (select coalesce(array_agg((e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s4.path) with ordinality as _path(id, ordinality) join edge e2 on e2.id = _path.id) as e2, s2.ep0 as ep0, s4.path as ep1, s2.n0 as n0, s2.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s2, s4 join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s4.root_id offset 0) n2 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s4.next_id offset 0) n3 on true where (s2.n2).id = s4.root_id) select s3.n3 as l from s3; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1'))), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n2')), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n2')), false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied and exists (select 1 from edge e1 join node n2 on n2.id = e1.end_id where n1.id = e1.start_id and e1.kind_id = any (array [3, 4]::int2[]))), s2 as (select e1.id as e1, s0.ep0 as ep0, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3, 4]::int2[]) and e1.id != all (s0.ep0)), s3 as (with recursive s4_seed(root_id) as not materialized (select distinct (s2.n2).id as root_id from s2), s4(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.start_id, e2.end_id, 1, false, e2.start_id = e2.end_id, array [e2.id] from s4_seed join edge e2 on e2.start_id = s4_seed.root_id union all select s4.root_id, e2.end_id, s4.depth + 1, false, false, s4.path || e2.id from s4 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.start_id = s4.next_id and e2.id != all (s4.path) offset 0) e2 on true where s4.depth < 15 and not s4.is_cycle) select s2.e1 as e1, s2.ep0 as ep0, s2.n0 as n0, s2.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s2, s4 join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s4.root_id offset 0) n2 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s4.next_id offset 0) n3 on true where (s2.n2).id = s4.root_id) select s3.n3 as l from s3; -- case: match p = (:NodeKind1)-[:EdgeKind1*1..]->(n:NodeKind2) where 'admin_tier_0' in split(n.system_tags, ' ') return p limit 1000 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ('admin_tier_0' = any (string_to_array((n1.properties ->> 'system_tags'), ' ')::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied limit 1000) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 1000; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ('admin_tier_0' = any (string_to_array((n1.properties ->> 'system_tags'), ' ')::text[])) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied limit 1000) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 1000; -- case: match p = (s:NodeKind1)-[*..]->(e:NodeKind2) where s <> e return p -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied and (n0.id <> n1.id)) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id union all select s1.root_id, e0.end_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied and (n0.id <> n1.id)) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = (g:NodeKind1)-[:EdgeKind1|EdgeKind2*]->(target:NodeKind1) where g.objectid ends with '1234' and target.objectid ends with '4567' return p -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties ->> 'objectid') like '%1234') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, ((n1.properties ->> 'objectid') like '%4567') and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, ((n1.properties ->> 'objectid') like '%4567') and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties ->> 'objectid') like '%1234') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.start_id, e0.end_id, 1, ((n1.properties ->> 'objectid') like '%4567') and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, ((n1.properties ->> 'objectid') like '%4567') and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = (m:NodeKind2)-[:EdgeKind1*1..]->(n:NodeKind1) where n.objectid = '1234' return p limit 10 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (((n1.properties -> 'objectid'))::jsonb = to_jsonb(('1234')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n0.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n0.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied limit 10) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 10; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((jsonb_typeof((n1.properties -> 'objectid')) = 'string' and (n1.properties ->> 'objectid') = '1234')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n0.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n0.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied limit 10) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 10; -- case: match p = (:NodeKind1)<-[:EdgeKind1|EdgeKind2*..]-() return p limit 10 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true limit 10) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 10; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, false, e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, false, false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true limit 10) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 10; -- case: match p = (:NodeKind1)<-[:EdgeKind1|EdgeKind2*..]-(:NodeKind2)<-[:EdgeKind1|EdgeKind2*2..]-(:NodeKind1) return p limit 10 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.end_id, e1.start_id, 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], e1.end_id = e1.start_id, array [e1.id] from s3_seed join edge e1 on e1.end_id = s3_seed.root_id join node n2 on n2.id = e1.start_id where e1.kind_id = any (array [3, 4]::int2[]) union all select s3.root_id, e1.start_id, s3.depth + 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s3.path || e1.id from s3 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.end_id = s3.next_id and e1.id != all (s3.path) and e1.kind_id = any (array [3, 4]::int2[]) offset 0) e1 on true join node n2 on n2.id = e1.start_id where s3.depth < 15 and not s3.is_cycle) select s0.e0 as e0, (select coalesce(array_agg((e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e1 on e1.id = _path.id) as e1, s0.ep0 as ep0, s3.path as ep1, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s3 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s3.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s3.next_id offset 0) n2 on true where s3.depth >= 2 and s3.satisfied and (s0.n1).id = s3.root_id limit 10) select ordered_edges_to_path(s2.n0, s2.e0 || s2.e1, array [s2.n0, s2.n1, s2.n2]::nodecomposite[])::pathcomposite as p from s2 limit 10; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.end_id, e1.start_id, 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], e1.end_id = e1.start_id, array [e1.id] from s3_seed join edge e1 on e1.end_id = s3_seed.root_id join node n2 on n2.id = e1.start_id where e1.kind_id = any (array [3, 4]::int2[]) union all select s3.root_id, e1.start_id, s3.depth + 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s3.path || e1.id from s3 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.end_id = s3.next_id and e1.id != all (s3.path) and e1.kind_id = any (array [3, 4]::int2[]) offset 0) e1 on true join node n2 on n2.id = e1.start_id where s3.depth < 15 and not s3.is_cycle) select s0.ep0 as ep0, s3.path as ep1, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s3 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s3.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s3.next_id offset 0) n2 on true where s3.depth >= 2 and s3.satisfied and (s0.n1).id = s3.root_id limit 10) select case when (s2.n0).id is null or s2.ep0 is null or (s2.n1).id is null or s2.ep1 is null or (s2.n2).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep1) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1, s2.n2]::nodecomposite[])::pathcomposite end as p from s2 limit 10; -- case: match p = (:NodeKind1)<-[:EdgeKind1|EdgeKind2*..]-(:NodeKind2)<-[:EdgeKind1|EdgeKind2*..]-(:NodeKind1) return p limit 10 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.end_id, e1.start_id, 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], e1.end_id = e1.start_id, array [e1.id] from s3_seed join edge e1 on e1.end_id = s3_seed.root_id join node n2 on n2.id = e1.start_id where e1.kind_id = any (array [3, 4]::int2[]) union all select s3.root_id, e1.start_id, s3.depth + 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s3.path || e1.id from s3 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.end_id = s3.next_id and e1.id != all (s3.path) and e1.kind_id = any (array [3, 4]::int2[]) offset 0) e1 on true join node n2 on n2.id = e1.start_id where s3.depth < 15 and not s3.is_cycle) select s0.e0 as e0, (select coalesce(array_agg((e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e1 on e1.id = _path.id) as e1, s0.ep0 as ep0, s3.path as ep1, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s3 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s3.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s3.next_id offset 0) n2 on true where s3.satisfied and (s0.n1).id = s3.root_id limit 10) select ordered_edges_to_path(s2.n0, s2.e0 || s2.e1, array [s2.n0, s2.n1, s2.n2]::nodecomposite[])::pathcomposite as p from s2 limit 10; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n1.kind_ids operator (pg_catalog.@>) array [2]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s0.n1).id as root_id from s0), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e1.end_id, e1.start_id, 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], e1.end_id = e1.start_id, array [e1.id] from s3_seed join edge e1 on e1.end_id = s3_seed.root_id join node n2 on n2.id = e1.start_id where e1.kind_id = any (array [3, 4]::int2[]) union all select s3.root_id, e1.start_id, s3.depth + 1, n2.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s3.path || e1.id from s3 join lateral (select e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties from edge e1 where e1.end_id = s3.next_id and e1.id != all (s3.path) and e1.kind_id = any (array [3, 4]::int2[]) offset 0) e1 on true join node n2 on n2.id = e1.start_id where s3.depth < 15 and not s3.is_cycle) select s0.ep0 as ep0, s3.path as ep1, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s3 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s3.root_id offset 0) n1 on true join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s3.next_id offset 0) n2 on true where s3.satisfied and (s0.n1).id = s3.root_id limit 10) select case when (s2.n0).id is null or s2.ep0 is null or (s2.n1).id is null or s2.ep1 is null or (s2.n2).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep1) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1, s2.n2]::nodecomposite[])::pathcomposite end as p from s2 limit 10; -- case: match p = (n:NodeKind1)-[:EdgeKind1|EdgeKind2*1..2]->(r:NodeKind2) where r.name =~ '(?i)Global Administrator.*|User Administrator.*|Cloud Application Administrator.*|Authentication Policy Administrator.*|Exchange Administrator.*|Helpdesk Administrator.*|Privileged Authentication Administrator.*' return p limit 10 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((n1.properties ->> 'name') ~ '(?i)Global Administrator.*|User Administrator.*|Cloud Application Administrator.*|Authentication Policy Administrator.*|Exchange Administrator.*|Helpdesk Administrator.*|Privileged Authentication Administrator.*') and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 2 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied limit 10) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 10; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((n1.properties ->> 'name') ~ '(?i)Global Administrator.*|User Administrator.*|Cloud Application Administrator.*|Authentication Policy Administrator.*|Exchange Administrator.*|Helpdesk Administrator.*|Privileged Authentication Administrator.*') and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, n0.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, e0.id || s1.path from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3, 4]::int2[]) offset 0) e0 on true join node n0 on n0.id = e0.start_id where s1.depth < 2 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.root_id offset 0) n1 on true join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.next_id offset 0) n0 on true where s1.satisfied limit 10) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 10; -- case: match p = (t:NodeKind2)<-[:EdgeKind1*1..]-(a) where (a:NodeKind1 or a:NodeKind2) and t.objectid ends with '-512' return p limit 1000 -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties ->> 'objectid') like '%-512') and n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, ((n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n1.kind_ids operator (pg_catalog.@>) array [2]::int2[])), e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, ((n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n1.kind_ids operator (pg_catalog.@>) array [2]::int2[])), false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied limit 1000) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 1000; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties ->> 'objectid') like '%-512') and n0.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select e0.end_id, e0.start_id, 1, ((n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n1.kind_ids operator (pg_catalog.@>) array [2]::int2[])), e0.end_id = e0.start_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.start_id, s1.depth + 1, ((n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] or n1.kind_ids operator (pg_catalog.@>) array [2]::int2[])), false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.end_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.start_id where s1.depth < 15 and not s1.is_cycle) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied limit 1000) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 1000; -- case: match p=(n:NodeKind1)-[:EdgeKind1|EdgeKind2]->(g:NodeKind1)-[:EdgeKind2]->(:NodeKind2)-[:EdgeKind1*1..]->(m:NodeKind1) where n.objectid = m.objectid return p limit 100 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[])), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s1.n2).id as root_id from s1), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.start_id, e2.end_id, 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.start_id = e2.end_id, array [e2.id] from s3_seed join edge e2 on e2.start_id = s3_seed.root_id join node n3 on n3.id = e2.end_id where e2.kind_id = any (array [3]::int2[]) union all select s3.root_id, e2.end_id, s3.depth + 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s3.path || e2.id from s3 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.start_id = s3.next_id and e2.id != all (s3.path) and e2.kind_id = any (array [3]::int2[]) offset 0) e2 on true join node n3 on n3.id = e2.end_id where s3.depth < 15 and not s3.is_cycle) select s1.e0 as e0, s1.e1 as e1, (select coalesce(array_agg((e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e2 on e2.id = _path.id) as e2, s3.path as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s1, s3 join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s3.root_id offset 0) n2 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s3.next_id offset 0) n3 on true where s3.satisfied and (s1.n2).id = s3.root_id and (((s1.n0).properties -> 'objectid') = (n3.properties -> 'objectid')) limit 100) select ordered_edges_to_path(s2.n0, array [s2.e0]::edgecomposite[] || array [s2.e1]::edgecomposite[] || s2.e2, array [s2.n0, s2.n1, s2.n2, s2.n3]::nodecomposite[])::pathcomposite as p from s2 limit 100; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])), s1 as (select s0.e0 as e0, e1.id as e1, s0.n0 as n0, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e1.end_id where e1.kind_id = any (array [4]::int2[]) and e1.id != s0.e0), s2 as (with recursive s3_seed(root_id) as not materialized (select distinct (s1.n2).id as root_id from s1), s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select e2.start_id, e2.end_id, 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], e2.start_id = e2.end_id, array [e2.id] from s3_seed join edge e2 on e2.start_id = s3_seed.root_id join node n3 on n3.id = e2.end_id where e2.kind_id = any (array [3]::int2[]) union all select s3.root_id, e2.end_id, s3.depth + 1, n3.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s3.path || e2.id from s3 join lateral (select e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties from edge e2 where e2.start_id = s3.next_id and e2.id != all (s3.path) and e2.kind_id = any (array [3]::int2[]) offset 0) e2 on true join node n3 on n3.id = e2.end_id where s3.depth < 15 and not s3.is_cycle) select s1.e0 as e0, s1.e1 as e1, s3.path as ep0, s1.n0 as n0, s1.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n3 from s1, s3 join lateral (select n2.id, n2.kind_ids, n2.properties from node n2 where n2.id = s3.root_id offset 0) n2 on true join lateral (select n3.id, n3.kind_ids, n3.properties from node n3 where n3.id = s3.next_id offset 0) n3 on true where s3.satisfied and (s1.n2).id = s3.root_id and (((s1.n0).properties -> 'objectid') = (n3.properties -> 'objectid')) limit 100) select case when (s2.n0).id is null or s2.e0 is null or (s2.n1).id is null or s2.e1 is null or (s2.n2).id is null or s2.ep0 is null or (s2.n3).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s2.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s2.e1]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id) || (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1, s2.n2, s2.n3]::nodecomposite[])::pathcomposite end as p from s2 limit 100; -- case: match (a:NodeKind1)-[:EdgeKind1*0..]->(b:NodeKind1) where a.name = 'solo' and b.name = 'solo' return a.name, b.name -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('solo')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select s1_seed.root_id, s1_seed.root_id, 0, (((n1.properties -> 'name'))::jsonb = to_jsonb(('solo')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, array []::int8[] from s1_seed join node n1 on n1.id = s1_seed.root_id union all select e0.start_id, e0.end_id, 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('solo')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('solo')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle and s1.depth > 0) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select ((s0.n0).properties -> 'name'), ((s0.n1).properties -> 'name') from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'solo')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select s1_seed.root_id, s1_seed.root_id, 0, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'solo')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, array []::int8[] from s1_seed join node n1 on n1.id = s1_seed.root_id union all select e0.start_id, e0.end_id, 1, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'solo')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'solo')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle and s1.depth > 0) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select ((s0.n0).properties -> 'name'), ((s0.n1).properties -> 'name') from s0; -- case: match (a:NodeKind1)-[:EdgeKind1*0..]->(b:NodeKind1) where a.name = 'zero-source' and b.name = 'zero-target' return count(b) -with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('zero-source')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select s1_seed.root_id, s1_seed.root_id, 0, (((n1.properties -> 'name'))::jsonb = to_jsonb(('zero-target')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, array []::int8[] from s1_seed join node n1 on n1.id = s1_seed.root_id union all select e0.start_id, e0.end_id, 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('zero-target')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, (((n1.properties -> 'name'))::jsonb = to_jsonb(('zero-target')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle and s1.depth > 0) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select count(s0.n1)::int8 from s0; +with s0 as (with recursive s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'zero-source')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select s1_seed.root_id, s1_seed.root_id, 0, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'zero-target')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, array []::int8[] from s1_seed join node n1 on n1.id = s1_seed.root_id union all select e0.start_id, e0.end_id, 1, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'zero-target')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]) union all select s1.root_id, e0.end_id, s1.depth + 1, ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'zero-target')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[], false, s1.path || e0.id from s1 join lateral (select e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties from edge e0 where e0.start_id = s1.next_id and e0.id != all (s1.path) and e0.kind_id = any (array [3]::int2[]) offset 0) e0 on true join node n1 on n1.id = e0.end_id where s1.depth < 15 and not s1.is_cycle and s1.depth > 0) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join lateral (select n0.id, n0.kind_ids, n0.properties from node n0 where n0.id = s1.root_id offset 0) n0 on true join lateral (select n1.id, n1.kind_ids, n1.properties from node n1 where n1.id = s1.next_id offset 0) n1 on true where s1.satisfied) select count(s0.n1)::int8 from s0; + diff --git a/cypher/models/pgsql/test/translation_cases/quantifiers.sql b/cypher/models/pgsql/test/translation_cases/quantifiers.sql index 198db68f..c4d2f249 100644 --- a/cypher/models/pgsql/test/translation_cases/quantifiers.sql +++ b/cypher/models/pgsql/test/translation_cases/quantifiers.sql @@ -37,6 +37,7 @@ with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposit -- case: MATCH (m:NodeKind1) WHERE ANY(name in m.serviceprincipalnames WHERE name CONTAINS "PHANTOM") WITH m MATCH (n:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) WHERE g.objectid ENDS WITH '-525' WITH m, COLLECT(n) AS matchingNs WHERE NONE(t IN matchingNs WHERE t.objectid = m.objectid) RETURN m with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((select count(*)::int from unnest(jsonb_to_text_array((n0.properties -> 'serviceprincipalnames'))) as i0 where (i0 like '%PHANTOM%')) >= 1)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n0 from s1), s2 as (with s3 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, edge e0 join node n2 on ((n2.properties ->> 'objectid') like '%-525') and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[])) select s3.n0 as n0, array_remove(coalesce(array_agg(s3.n1)::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i1 from s3 group by n0) select s2.n0 as m from s2 where (((select count(*)::int from unnest(s2.i1) as i2 where ((i2.properties -> 'objectid') = ((s2.n0).properties -> 'objectid'))) = 0 and s2.i1 is not null)::bool); + -- case: WITH [1, 2] AS nums MATCH (n:NodeKind1) WHERE ANY(num IN nums + [3] WHERE num = 3) RETURN n with s0 as (select array [1, 2]::int8[] as i0), s1 as (select s0.i0 as i0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from s0, node n0 where (((select count(*)::int from unnest(s0.i0 || array [3]::int8[]) as i1 where (i1 = 3)) >= 1)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n from s1; @@ -45,3 +46,4 @@ with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposit -- case: MATCH (m:NodeKind1) WHERE ANY(name in m.serviceprincipalnames WHERE name CONTAINS "PHANTOM") WITH m MATCH (n:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) WHERE g.objectid ENDS WITH '-525' WITH m, COLLECT(n) AS matchingNs WHERE NONE(t IN matchingNs WHERE t.objectid = m.objectid) RETURN m with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((select count(*)::int from unnest(jsonb_to_text_array((n0.properties -> 'serviceprincipalnames'))) as i0 where (i0 like '%PHANTOM%')) >= 1)::bool) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) select s1.n0 as n0 from s1), s2 as (with s3 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, edge e0 join node n2 on ((n2.properties ->> 'objectid') like '%-525') and n2.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n2.id = e0.end_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id = e0.start_id where e0.kind_id = any (array [3]::int2[])) select s3.n0 as n0, array_remove(coalesce(array_agg(s3.n1)::nodecomposite[], array []::nodecomposite[])::nodecomposite[], null)::nodecomposite[] as i1 from s3 group by n0) select s2.n0 as m from s2 where (((select count(*)::int from unnest(s2.i1) as i2 where ((i2.properties -> 'objectid') = ((s2.n0).properties -> 'objectid'))) = 0 and s2.i1 is not null)::bool); + diff --git a/cypher/models/pgsql/test/translation_cases/scalar_aggregation.sql b/cypher/models/pgsql/test/translation_cases/scalar_aggregation.sql index 755a4eb0..21458c72 100644 --- a/cypher/models/pgsql/test/translation_cases/scalar_aggregation.sql +++ b/cypher/models/pgsql/test/translation_cases/scalar_aggregation.sql @@ -92,9 +92,15 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from -- case: MATCH (n) RETURN count(n) AS total ORDER BY total DESC with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select count(s0.n0)::int8 as total from s0 order by total desc; +-- case: MATCH (n) RETURN toInteger(n.value) + count(n) +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select (((s0.n0).properties ->> 'value'))::int8 + count(s0.n0)::int8 from s0 group by (((s0.n0).properties ->> 'value'))::int8; + -- case: MATCH (n) WITH toInteger(n.value) AS value, count(n) AS node_count RETURN value + node_count with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select (((s1.n0).properties ->> 'value'))::int8 as i0, count(s1.n0)::int8 as i1 from s1 group by (((s1.n0).properties ->> 'value'))::int8) select s0.i0 + s0.i1 from s0; +-- case: MATCH (n) WITH toInteger(n.value) + count(n) AS score RETURN score +with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select (((s1.n0).properties ->> 'value'))::int8 + count(s1.n0)::int8 as i0 from s1 group by (((s1.n0).properties ->> 'value'))::int8) select s0.i0 as score from s0; + -- case: MATCH (n) WITH toInteger(n.value) AS value, count(n) AS node_count WITH value + node_count AS score RETURN score with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select (((s1.n0).properties ->> 'value'))::int8 as i0, count(s1.n0)::int8 as i1 from s1 group by (((s1.n0).properties ->> 'value'))::int8), s2 as (select s0.i0 + s0.i1 as i2 from s0) select s2.i2 as score from s2; @@ -103,3 +109,4 @@ with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposit -- case: MATCH (n) WITH sum(n.age) / count(n) AS avg_age RETURN avg_age with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select sum((((s1.n0).properties ->> 'age'))::float8)::numeric / count(s1.n0)::int8 as i0 from s1) select s0.i0 as avg_age from s0; + diff --git a/cypher/models/pgsql/test/translation_cases/shortest_paths.sql b/cypher/models/pgsql/test/translation_cases/shortest_paths.sql index 928a1855..91bbce1d 100644 --- a/cypher/models/pgsql/test/translation_cases/shortest_paths.sql +++ b/cypher/models/pgsql/test/translation_cases/shortest_paths.sql @@ -16,80 +16,81 @@ -- case: match p = allShortestPaths((s:NodeKind1)-[*..]->()) return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from edge where end_id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from edge where end_id = e0.start_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.id != all (s1.path);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = allShortestPaths((s:NodeKind1)-[*..]->({name: "123"})) return p --- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((n1.properties -\u003e 'name'))::jsonb = to_jsonb(('123')::text)::jsonb) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id) = 0 then true else shortest_path_self_endpoint_error(e0.end_id, e0.end_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), false, e0.id || s1.path from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.id != all (s1.path);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n0.id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +-- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (jsonb_typeof((n1.properties -\u003e 'name')) = 'string' and (n1.properties -\u003e\u003e 'name') = '123')) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id) = 0 then true else shortest_path_self_endpoint_error(e0.end_id, e0.end_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), false, e0.id || s1.path from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.id != all (s1.path);"} +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n0.id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id is not null;')::text)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = allShortestPaths((s:NodeKind1)-[*..]->(e)) where e.name = '123' return p --- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (((n1.properties -\u003e 'name'))::jsonb = to_jsonb(('123')::text)::jsonb)) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id) = 0 then true else shortest_path_self_endpoint_error(e0.end_id, e0.end_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), false, e0.id || s1.path from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.id != all (s1.path);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n0.id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +-- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((jsonb_typeof((n1.properties -\u003e 'name')) = 'string' and (n1.properties -\u003e\u003e 'name') = '123'))) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id) = 0 then true else shortest_path_self_endpoint_error(e0.end_id, e0.end_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), false, e0.id || s1.path from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.id != all (s1.path);"} +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n0.id from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id is not null;')::text)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p=shortestPath((n:NodeKind1)-[:EdgeKind1*1..]->(m)) where 'admin_tier_0' in split(m.system_tags, ' ') and n.objectid ends with '-513' and n<>m return p limit 1000 -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties -\u003e\u003e 'objectid') like '%-513') and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s1.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s1.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ('admin_tier_0' = any (string_to_array((n1.properties -\u003e\u003e 'system_tags'), ' ')::text[]))) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s1.root_id), false, e0.id || s1.path from backward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s1.root_id and backward_visited.id = e0.start_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where ((n0.properties ->> ''objectid'') like ''%-513'') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (''admin_tier_0'' = any (string_to_array((n1.properties ->> ''system_tags''), '' '')::text[])) and n0.id is not null and n1.id is not null;')::text, (1000)::int8)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n0).id <> (s0.n1).id) limit 1000; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where ((n0.properties ->> ''objectid'') like ''%-513'') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (''admin_tier_0'' = any (string_to_array((n1.properties ->> ''system_tags''), '' '')::text[])) and n0.id is not null and n1.id is not null;')::text, (1000)::int8)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n0).id <> (s0.n1).id) limit 1000; -- case: match p=shortestPath((n:NodeKind1)-[:EdgeKind1*1..]->(m)) where 'admin_tier_0' in split(m.system_tags, ' ') and n.objectid ends with '-513' and m<>n return p limit 1000 -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((n0.properties -\u003e\u003e 'objectid') like '%-513') and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s1.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s1.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ('admin_tier_0' = any (string_to_array((n1.properties -\u003e\u003e 'system_tags'), ' ')::text[]))) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s1.root_id), false, e0.id || s1.path from backward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s1.root_id and backward_visited.id = e0.start_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where ((n0.properties ->> ''objectid'') like ''%-513'') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (''admin_tier_0'' = any (string_to_array((n1.properties ->> ''system_tags''), '' '')::text[])) and n0.id is not null and n1.id is not null;')::text, (1000)::int8)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n1).id <> (s0.n0).id) limit 1000; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where ((n0.properties ->> ''objectid'') like ''%-513'') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (''admin_tier_0'' = any (string_to_array((n1.properties ->> ''system_tags''), '' '')::text[])) and n0.id is not null and n1.id is not null;')::text, (1000)::int8)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n1).id <> (s0.n0).id) limit 1000; -- case: match p=shortestPath((t:NodeKind1)<-[:EdgeKind1|EdgeKind2*1..]-(s:NodeKind2)) where coalesce(t.system_tags, '') contains 'admin_tier_0' and t.name =~ 'name.*' and s<>t return p limit 1000 -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (coalesce((n0.properties -\u003e\u003e 'system_tags'), '')::text like '%admin_tier_0%' and (n0.properties -\u003e\u003e 'name') ~ 'name.*') and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3, 4]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3, 4]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from visited where visited.root_id = s1.root_id and visited.id = e0.start_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n1.id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id is not null;')::text, (1000)::int8)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n1).id <> (s0.n0).id) limit 1000; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n1.id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id is not null;')::text, (1000)::int8)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n1).id <> (s0.n0).id) limit 1000; -- case: match p=shortestPath((a)-[:EdgeKind1*]->(b)) where id(a) = 1 and id(b) = 2 return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (n0.id = 1)) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s1.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s1.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (n1.id = 2)) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s1.root_id), false, e0.id || s1.path from backward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s1.root_id and backward_visited.id = e0.start_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where (n0.id = 1) and (n1.id = 2) and n0.id is not null and n1.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where (n0.id = 1) and (n1.id = 2) and n0.id is not null and n1.id is not null;')::text)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p=shortestPath((a)-[:EdgeKind1*]->(b:NodeKind1)) where a <> b return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where n1.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from edge where end_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from edge where end_id = e0.end_id), false, e0.id || s1.path from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from visited where visited.root_id = s1.root_id and visited.id = e0.start_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n0).id <> (s0.n1).id); +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n0).id <> (s0.n1).id); -- case: match p=shortestPath((a:NodeKind2)-[:EdgeKind1*]->(b)) where a <> b return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@\u003e) array [2]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from edge where end_id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from edge where end_id = e0.start_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from visited where visited.root_id = s1.root_id and visited.id = e0.end_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n0).id <> (s0.n1).id); +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n0).id <> (s0.n1).id); -- case: match p=shortestPath((b)<-[:EdgeKind1*]-(a)) where id(a) = 1 and id(b) = 2 return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (n0.id = 2)) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.end_id and traversal_pair_filter.terminal_id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_pair_filter where traversal_pair_filter.root_id = e0.end_id and traversal_pair_filter.terminal_id = e0.end_id) = 0 then true else shortest_path_self_endpoint_error(e0.end_id, e0.end_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s1.root_id and traversal_pair_filter.terminal_id = e0.start_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s1.root_id and forward_visited.id = e0.start_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (n1.id = 1)) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.end_id and traversal_pair_filter.terminal_id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.end_id and traversal_pair_filter.terminal_id = s1.root_id), false, e0.id || s1.path from backward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s1.root_id and backward_visited.id = e0.end_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where (n0.id = 2) and (n1.id = 1) and n0.id is not null and n1.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where (n0.id = 2) and (n1.id = 1) and n0.id is not null and n1.id is not null;')::text)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p = allShortestPaths((m:NodeKind1)<-[:EdgeKind1*..]-(n)) where coalesce(m.system_tags, '') contains 'admin_tier_0' and n.name = '123' and n <> m return p --- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (((n1.properties -\u003e 'name'))::jsonb = to_jsonb(('123')::text)::jsonb)) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s1.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (coalesce((n0.properties -\u003e\u003e 'system_tags'), '')::text like '%admin_tier_0%') and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s1.root_id), false, e0.id || s1.path from backward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_asp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n1.id, n0.id from node n1, node n0 where (((n1.properties -> ''name''))::jsonb = to_jsonb((''123'')::text)::jsonb) and (coalesce((n0.properties ->> ''system_tags''), '''')::text like ''%admin_tier_0%'') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id is not null and n0.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n1).id <> (s0.n0).id); +-- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((jsonb_typeof((n1.properties -\u003e 'name')) = 'string' and (n1.properties -\u003e\u003e 'name') = '123'))) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s1.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (coalesce((n0.properties -\u003e\u003e 'system_tags'), '')::text like '%admin_tier_0%') and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s1.root_id), false, e0.id || s1.path from backward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path);"} +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_asp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n1.id, n0.id from node n1, node n0 where ((jsonb_typeof((n1.properties -> ''name'')) = ''string'' and (n1.properties ->> ''name'') = ''123'')) and (coalesce((n0.properties ->> ''system_tags''), '''')::text like ''%admin_tier_0%'') and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n1.id is not null and n0.id is not null;')::text)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n1).id <> (s0.n0).id); -- case: match p=shortestPath((a)-[:EdgeKind1*]->(b:NodeKind1)) where a <> b return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n1.id as root_id from node n1 where n1.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from edge where end_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.end_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.start_id, s1.depth + 1, exists (select 1 from edge where end_id = e0.end_id), false, e0.id || s1.path from forward_front s1 join edge e0 on e0.end_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from visited where visited.root_id = s1.root_id and visited.id = e0.start_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 where ((s0.n0).id <> (s0.n1).id); +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n1 on n1.id = s1.root_id join node n0 on n0.id = s1.next_id) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 where ((s0.n0).id <> (s0.n1).id); -- case: match p=(c:NodeKind1)-[]->(u:NodeKind2) match p2=shortestPath((u:NodeKind2)-[*1..]->(d:NodeKind1)) return p, p2 limit 500 -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s2_seed(root_id) as not materialized (select distinct n1.id as root_id from traversal_root_filter s2_seed_filter join node n1 on n1.id = s2_seed_filter.id where n1.kind_ids operator (pg_catalog.@\u003e) array [2]::int2[]) select e1.start_id, e1.end_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e1.end_id), e1.start_id = e1.end_id, array [e1.id] from s2_seed join edge e1 on e1.start_id = s2_seed.root_id where case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e1.start_id) = 0 then true else shortest_path_self_endpoint_error(e1.start_id, e1.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s2.root_id, e1.end_id, s2.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e1.end_id), false, s2.path || e1.id from forward_front s2 join edge e1 on e1.start_id = s2.next_id where e1.id != all (s2.path) and not exists (select 1 from visited where visited.root_id = s2.root_id and visited.id = e1.end_id);"} -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id), s1 as (with s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('insert into traversal_root_filter (id) select distinct (s0.n1).id from s0 where (s0.n1).id is not null;')::text, ('insert into traversal_terminal_filter (id) select distinct n2.id from node n2 where n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id is not null;')::text)) select s0.e0 as e0, (select coalesce(array_agg((e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e1 on e1.id = _path.id) as e1, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join node n1 on n1.id = s2.root_id join node n2 on n2.id = s2.next_id where (s0.n1).id = s2.root_id and case when s2.root_id != s2.next_id then true else shortest_path_self_endpoint_error(s2.root_id, s2.next_id) end) select (array [s1.n0, s1.n1]::nodecomposite[], array [s1.e0]::edgecomposite[])::pathcomposite as p, ordered_edges_to_path(s1.n1, s1.e1, array [s1.n1, s1.n2]::nodecomposite[])::pathcomposite as p2 from s1 limit 500; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id), s1 as (with s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('insert into traversal_root_filter (id) select distinct (s0.n1).id from s0 where (s0.n1).id is not null;')::text, ('insert into traversal_terminal_filter (id) select distinct n2.id from node n2 where n2.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n2.id is not null;')::text)) select s0.e0 as e0, s2.path as ep0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, s2 join node n1 on n1.id = s2.root_id join node n2 on n2.id = s2.next_id where (s0.n1).id = s2.root_id and case when s2.root_id != s2.next_id then true else shortest_path_self_endpoint_error(s2.root_id, s2.next_id) end) select case when (s1.n0).id is null or s1.e0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s1.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as p, case when (s1.n1).id is null or s1.ep0 is null or (s1.n2).id is null then null else ordered_edges_to_path(s1.n1, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n1, s1.n2]::nodecomposite[])::pathcomposite end as p2 from s1 limit 500; -- case: match p = allShortestPaths((a)-[:EdgeKind1*..]->()) return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select e0.start_id, e0.end_id, 1, exists (select 1 from edge where end_id = e0.start_id), e0.start_id = e0.end_id, array [e0.id] from edge e0 where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from edge where end_id = e0.start_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_asp_harness(@pi0::text, @pi1::text, 15)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0; -- case: match p=shortestPath((n:NodeKind1)-[:EdgeKind1*1..]->(m:NodeKind2)) return p limit 10 -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s1_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s1_seed join edge e0 on e0.start_id = s1_seed.root_id where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s1.root_id, e0.end_id, s1.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id), false, s1.path || e0.id from forward_front s1 join edge e0 on e0.start_id = s1.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s1.path) and not exists (select 1 from visited where visited.root_id = s1.root_id and visited.id = e0.end_id);"} -with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n1.id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id is not null;')::text, (10)::int8)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select ordered_edges_to_path(s0.n0, s0.e0, array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite as p from s0 limit 10; +with s0 as (with s1(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n1.id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id is not null;')::text, (10)::int8)) select s1.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1 join node n0 on n0.id = s1.root_id join node n1 on n1.id = s1.next_id where case when s1.root_id != s1.next_id then true else shortest_path_self_endpoint_error(s1.root_id, s1.next_id) end) select case when (s0.n0).id is null or s0.ep0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s0.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 10; -- case: match (a:NodeKind1), (b:NodeKind2) match p=shortestPath((a)-[:EdgeKind1*]->(b)) return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s3_seed(root_id) as not materialized (select s3_seed_filter.id as root_id from traversal_root_filter s3_seed_filter) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.start_id = s3_seed.root_id where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s3.root_id, e0.end_id, s3.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s3.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s3.path || e0.id from forward_front s3 join edge e0 on e0.start_id = s3.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s3.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s3.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s3_seed(root_id) as not materialized (select s3_seed_filter.id as root_id from traversal_terminal_filter s3_seed_filter) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.end_id = s3_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s3.root_id, e0.start_id, s3.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s3.root_id), false, e0.id || s3.path from backward_front s3 join edge e0 on e0.end_id = s3.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s3.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s3.root_id and backward_visited.id = e0.start_id);"} -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (with s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct (s1.n0).id, (s1.n1).id from s1 where (s1.n0).id is not null and (s1.n1).id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join node n0 on n0.id = s3.root_id join node n1 on n1.id = s3.next_id where (s1.n0).id = s3.root_id and (s1.n1).id = s3.next_id and case when s3.root_id != s3.next_id then true else shortest_path_self_endpoint_error(s3.root_id, s3.next_id) end) select ordered_edges_to_path(s2.n0, s2.e0, array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite as p from s2; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (with s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct (s1.n0).id, (s1.n1).id from s1 where (s1.n0).id is not null and (s1.n1).id is not null;')::text)) select s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join node n0 on n0.id = s3.root_id join node n1 on n1.id = s3.next_id where (s1.n0).id = s3.root_id and (s1.n1).id = s3.next_id and case when s3.root_id != s3.next_id then true else shortest_path_self_endpoint_error(s3.root_id, s3.next_id) end) select case when (s2.n0).id is null or s2.ep0 is null or (s2.n1).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite end as p from s2; -- case: match (a:NodeKind1), (b:NodeKind2) match p=allShortestPaths((a)-[:EdgeKind1*..]->(b)) return p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s3_seed(root_id) as not materialized (select s3_seed_filter.id as root_id from traversal_root_filter s3_seed_filter) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.start_id = s3_seed.root_id where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s3.root_id, e0.end_id, s3.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s3.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s3.path || e0.id from forward_front s3 join edge e0 on e0.start_id = s3.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s3.path);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s3_seed(root_id) as not materialized (select s3_seed_filter.id as root_id from traversal_terminal_filter s3_seed_filter) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.end_id = s3_seed.root_id where e0.kind_id = any (array [3]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s3.root_id, e0.start_id, s3.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s3.root_id), false, e0.id || s3.path from backward_front s3 join edge e0 on e0.end_id = s3.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s3.path);"} -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (with s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_asp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct (s1.n0).id, (s1.n1).id from s1 where (s1.n0).id is not null and (s1.n1).id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join node n0 on n0.id = s3.root_id join node n1 on n1.id = s3.next_id where (s1.n0).id = s3.root_id and (s1.n1).id = s3.next_id and case when s3.root_id != s3.next_id then true else shortest_path_self_endpoint_error(s3.root_id, s3.next_id) end) select ordered_edges_to_path(s2.n0, s2.e0, array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite as p from s2; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[]), s2 as (with s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_asp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct (s1.n0).id, (s1.n1).id from s1 where (s1.n0).id is not null and (s1.n1).id is not null;')::text)) select s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join node n0 on n0.id = s3.root_id join node n1 on n1.id = s3.next_id where (s1.n0).id = s3.root_id and (s1.n1).id = s3.next_id and case when s3.root_id != s3.next_id then true else shortest_path_self_endpoint_error(s3.root_id, s3.next_id) end) select case when (s2.n0).id is null or s2.ep0 is null or (s2.n1).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite end as p from s2; -- case: match p=shortestPath((u:NodeKind1)-[:EdgeKind1*1..]->(g:NodeKind2)) with distinct g as Group, count(u) as UserCount return Group.name, UserCount order by UserCount desc limit 5 -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id where e0.kind_id = any (array [3]::int2[]) and case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s2.root_id, e0.end_id, s2.depth + 1, exists (select 1 from traversal_terminal_filter where traversal_terminal_filter.id = e0.end_id), false, s2.path || e0.id from forward_front s2 join edge e0 on e0.start_id = s2.next_id where e0.kind_id = any (array [3]::int2[]) and e0.id != all (s2.path) and not exists (select 1 from visited where visited.root_id = s2.root_id and visited.id = e0.end_id);"} -with s0 as (with s1 as (with s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n1.id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join node n0 on n0.id = s2.root_id join node n1 on n1.id = s2.next_id where case when s2.root_id != s2.next_id then true else shortest_path_self_endpoint_error(s2.root_id, s2.next_id) end) select distinct s1.n1 as n2, count(s1.n0)::int8 as i0 from s1 group by n1) select ((s0.n2).properties -> 'name'), s0.i0 as UserCount from s0 order by s0.i0 desc limit 5; +with s0 as (with s1 as (with s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from unidirectional_sp_harness(@pi0::text, @pi1::text, 15, ('')::text, ('insert into traversal_terminal_filter (id) select distinct n1.id from node n1 where n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id is not null;')::text)) select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join node n0 on n0.id = s2.root_id join node n1 on n1.id = s2.next_id where case when s2.root_id != s2.next_id then true else shortest_path_self_endpoint_error(s2.root_id, s2.next_id) end) select distinct s1.n1 as n2, count(s1.n0)::int8 as i0 from s1 group by n1) select ((s0.n2).properties -> 'name'), s0.i0 as UserCount from s0 order by s0.i0 desc limit 5; -- case: MATCH (g1:Group) MATCH (g2:Group) WHERE g1.name STARTS WITH 'DOMAIN USERS@' AND g2.name STARTS WITH 'DOMAIN ADMINS@' MATCH p=shortestPath((g1)-[:AddAllowedToAct|AddMember|AdminTo|AllExtendedRights|AllowedToDelegate|CanRDP|Contains|ForceChangePassword|GenericAll|GenericWrite|GetChangesAll|GetChanges|HasSession|MemberOf|Owns|ReadLAPSPassword|SQLAdmin|TrustedBy|WriteAccountRestrictions|WriteOwner*1..]->(g2)) WHERE NONE(r IN relationships(p) WHERE type(r) = 'HasSession' AND startNode(r).name = 'DF-WIN10-DEV01.DUMPSTER.FIRE') RETURN p -- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s3_seed(root_id) as not materialized (select s3_seed_filter.id as root_id from traversal_root_filter s3_seed_filter) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.start_id = s3_seed.root_id where e0.kind_id = any (array [14, 15, 16, 17, 18, 19, 12, 20, 21, 22, 23, 24, 7, 25, 26, 27, 28, 29, 30, 31]::int2[]) and case when (select count(*)::int8 from traversal_terminal_filter where traversal_terminal_filter.id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s3.root_id, e0.end_id, s3.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s3.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s3.path || e0.id from forward_front s3 join edge e0 on e0.start_id = s3.next_id where e0.kind_id = any (array [14, 15, 16, 17, 18, 19, 12, 20, 21, 22, 23, 24, 7, 25, 26, 27, 28, 29, 30, 31]::int2[]) and e0.id != all (s3.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s3.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s3_seed(root_id) as not materialized (select s3_seed_filter.id as root_id from traversal_terminal_filter s3_seed_filter) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s3_seed join edge e0 on e0.end_id = s3_seed.root_id where e0.kind_id = any (array [14, 15, 16, 17, 18, 19, 12, 20, 21, 22, 23, 24, 7, 25, 26, 27, 28, 29, 30, 31]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s3.root_id, e0.start_id, s3.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s3.root_id), false, e0.id || s3.path from backward_front s3 join edge e0 on e0.end_id = s3.next_id where e0.kind_id = any (array [14, 15, 16, 17, 18, 19, 12, 20, 21, 22, 23, 24, 7, 25, 26, 27, 28, 29, 30, 31]::int2[]) and e0.id != all (s3.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s3.root_id and backward_visited.id = e0.start_id);"} -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [13]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((n1.properties ->> 'name') like 'DOMAIN ADMINS@%' and ((s0.n0).properties ->> 'name') like 'DOMAIN USERS@%') and n1.kind_ids operator (pg_catalog.@>) array [13]::int2[]), s2 as (with s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct (s1.n0).id, (s1.n1).id from s1 where (s1.n0).id is not null and (s1.n1).id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s3.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join node n0 on n0.id = s3.root_id join node n1 on n1.id = s3.next_id where (s1.n0).id = s3.root_id and (s1.n1).id = s3.next_id and case when s3.root_id != s3.next_id then true else shortest_path_self_endpoint_error(s3.root_id, s3.next_id) end) select ordered_edges_to_path(s2.n0, s2.e0, array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite as p from s2 where (((select count(*)::int from unnest((s2.e0)::edgecomposite[]) as i0 where ((((start_node(i0)::nodecomposite).properties -> 'name'))::jsonb = to_jsonb(('DF-WIN10-DEV01.DUMPSTER.FIRE')::text)::jsonb and i0.kind_id = 7)) = 0 and (s2.e0)::edgecomposite[] is not null)::bool); +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where n0.kind_ids operator (pg_catalog.@>) array [13]::int2[]), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((n1.properties ->> 'name') like 'DOMAIN ADMINS@%' and ((s0.n0).properties ->> 'name') like 'DOMAIN USERS@%') and n1.kind_ids operator (pg_catalog.@>) array [13]::int2[]), s2 as (with s3(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct (s1.n0).id, (s1.n1).id from s1 where (s1.n0).id is not null and (s1.n1).id is not null;')::text)) select s3.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s1, s3 join node n0 on n0.id = s3.root_id join node n1 on n1.id = s3.next_id where (s1.n0).id = s3.root_id and (s1.n1).id = s3.next_id and case when s3.root_id != s3.next_id then true else shortest_path_self_endpoint_error(s3.root_id, s3.next_id) end) select case when (s2.n0).id is null or s2.ep0 is null or (s2.n1).id is null then null else ordered_edges_to_path(s2.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s2.n0, s2.n1]::nodecomposite[])::pathcomposite end as p from s2 where (((select count(*)::int from unnest(((select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id))::edgecomposite[]) as i0 where ((jsonb_typeof(((start_node(i0)::nodecomposite).properties -> 'name')) = 'string' and ((start_node(i0)::nodecomposite).properties ->> 'name') = 'DF-WIN10-DEV01.DUMPSTER.FIRE') and i0.kind_id = 7)) = 0 and ((select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id))::edgecomposite[] is not null)::bool); -- case: match p=shortestPath((s:NodeKind1)-[:EdgeKind1|HasSession*1..]->(d:NodeKind1)) where s.name = 'path-filter-src' and d.name = 'path-filter-dst' with p where none(r in relationships(p) where type(r) = 'HasSession' and startNode(r).name = 'blocked-session-host') return p --- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where (((n0.properties -\u003e 'name'))::jsonb = to_jsonb(('path-filter-src')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id where e0.kind_id = any (array [3, 7]::int2[]) and case when (select count(*)::int8 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s2.root_id, e0.end_id, s2.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s2.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s2.path || e0.id from forward_front s2 join edge e0 on e0.start_id = s2.next_id where e0.kind_id = any (array [3, 7]::int2[]) and e0.id != all (s2.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s2.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s2_seed(root_id) as not materialized (select n1.id as root_id from node n1 where (((n1.properties -\u003e 'name'))::jsonb = to_jsonb(('path-filter-dst')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.end_id = s2_seed.root_id where e0.kind_id = any (array [3, 7]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s2.root_id, e0.start_id, s2.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s2.root_id), false, e0.id || s2.path from backward_front s2 join edge e0 on e0.end_id = s2.next_id where e0.kind_id = any (array [3, 7]::int2[]) and e0.id != all (s2.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s2.root_id and backward_visited.id = e0.start_id);"} -with s0 as (with s1 as (with s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where (((n0.properties -> ''name''))::jsonb = to_jsonb((''path-filter-src'')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and (((n1.properties -> ''name''))::jsonb = to_jsonb((''path-filter-dst'')::text)::jsonb) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id is not null and n1.id is not null;')::text)) select (select coalesce(array_agg((e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s2.path) with ordinality as _path(id, ordinality) join edge e0 on e0.id = _path.id) as e0, s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join node n0 on n0.id = s2.root_id join node n1 on n1.id = s2.next_id where case when s2.root_id != s2.next_id then true else shortest_path_self_endpoint_error(s2.root_id, s2.next_id) end) select ordered_edges_to_path(s1.n0, s1.e0, array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite as pc0 from s1) select s0.pc0 as p from s0 where (((select count(*)::int from unnest(((s0.pc0).edges)::edgecomposite[]) as i0 where ((((start_node(i0)::nodecomposite).properties -> 'name'))::jsonb = to_jsonb(('blocked-session-host')::text)::jsonb and i0.kind_id = 7)) = 0 and ((s0.pc0).edges)::edgecomposite[] is not null)::bool); +-- pgsql_params:{"pi0":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s2_seed(root_id) as not materialized (select n0.id as root_id from node n0 where ((jsonb_typeof((n0.properties -\u003e 'name')) = 'string' and (n0.properties -\u003e\u003e 'name') = 'path-filter-src')) and n0.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.start_id, e0.end_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.start_id = s2_seed.root_id where e0.kind_id = any (array [3, 7]::int2[]) and case when (select count(*)::int8 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.start_id) = 0 then true else shortest_path_self_endpoint_error(e0.start_id, e0.start_id) end;","pi1":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s2.root_id, e0.end_id, s2.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = s2.root_id and traversal_pair_filter.terminal_id = e0.end_id), false, s2.path || e0.id from forward_front s2 join edge e0 on e0.start_id = s2.next_id where e0.kind_id = any (array [3, 7]::int2[]) and e0.id != all (s2.path) and not exists (select 1 from forward_visited where forward_visited.root_id = s2.root_id and forward_visited.id = e0.end_id);","pi2":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) with s2_seed(root_id) as not materialized (select n1.id as root_id from node n1 where ((jsonb_typeof((n1.properties -\u003e 'name')) = 'string' and (n1.properties -\u003e\u003e 'name') = 'path-filter-dst')) and n1.kind_ids operator (pg_catalog.@\u003e) array [1]::int2[]) select e0.end_id, e0.start_id, 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = e0.end_id), e0.start_id = e0.end_id, array [e0.id] from s2_seed join edge e0 on e0.end_id = s2_seed.root_id where e0.kind_id = any (array [3, 7]::int2[]);","pi3":"insert into next_front (root_id, next_id, depth, satisfied, is_cycle, path) select s2.root_id, e0.start_id, s2.depth + 1, exists (select 1 from traversal_pair_filter where traversal_pair_filter.root_id = e0.start_id and traversal_pair_filter.terminal_id = s2.root_id), false, e0.id || s2.path from backward_front s2 join edge e0 on e0.end_id = s2.next_id where e0.kind_id = any (array [3, 7]::int2[]) and e0.id != all (s2.path) and not exists (select 1 from backward_visited where backward_visited.root_id = s2.root_id and backward_visited.id = e0.start_id);"} +with s0 as (with s1 as (with s2(root_id, next_id, depth, satisfied, is_cycle, path) as (select * from bidirectional_sp_harness(@pi0::text, @pi1::text, @pi2::text, @pi3::text, 15, ('')::text, ('')::text, ('insert into traversal_pair_filter (root_id, terminal_id) select distinct n0.id, n1.id from node n0, node n1 where ((jsonb_typeof((n0.properties -> ''name'')) = ''string'' and (n0.properties ->> ''name'') = ''path-filter-src'')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and ((jsonb_typeof((n1.properties -> ''name'')) = ''string'' and (n1.properties ->> ''name'') = ''path-filter-dst'')) and n1.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id is not null and n1.id is not null;')::text)) select s2.path as ep0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s2 join node n0 on n0.id = s2.root_id join node n1 on n1.id = s2.next_id where case when s2.root_id != s2.next_id then true else shortest_path_self_endpoint_error(s2.root_id, s2.next_id) end) select case when (s1.n0).id is null or s1.ep0 is null or (s1.n1).id is null then null else ordered_edges_to_path(s1.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(s1.ep0) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s1.n0, s1.n1]::nodecomposite[])::pathcomposite end as pc0 from s1) select s0.pc0 as p from s0 where (((select count(*)::int from unnest(((s0.pc0).edges)::edgecomposite[]) as i0 where ((jsonb_typeof(((start_node(i0)::nodecomposite).properties -> 'name')) = 'string' and ((start_node(i0)::nodecomposite).properties ->> 'name') = 'blocked-session-host') and i0.kind_id = 7)) = 0 and ((s0.pc0).edges)::edgecomposite[] is not null)::bool); + diff --git a/cypher/models/pgsql/test/translation_cases/stepwise_traversal.sql b/cypher/models/pgsql/test/translation_cases/stepwise_traversal.sql index 1369b5f4..f48a2bcf 100644 --- a/cypher/models/pgsql/test/translation_cases/stepwise_traversal.sql +++ b/cypher/models/pgsql/test/translation_cases/stepwise_traversal.sql @@ -42,16 +42,16 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1 from s0, edge e1 join node n2 on n2.id = e1.start_id join node n3 on n3.id = e1.end_id) select s1.e0 as r, s1.e1 as e from s1; -- case: match p = (:NodeKind1)-[:EdgeKind1|EdgeKind2]->(c:NodeKind2) where '123' in c.prop2 or '243' in c.prop2 or size(c.prop2) = 0 return p limit 10 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on ('123' = any (jsonb_to_text_array((n1.properties -> 'prop2'))::text[]) or '243' = any (jsonb_to_text_array((n1.properties -> 'prop2'))::text[]) or jsonb_array_length((n1.properties -> 'prop2'))::int = 0) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) limit 10) select (array [s0.n0, s0.n1]::nodecomposite[], array [s0.e0]::edgecomposite[])::pathcomposite as p from s0 limit 10; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on ('123' = any (jsonb_to_text_array((n1.properties -> 'prop2'))::text[]) or '243' = any (jsonb_to_text_array((n1.properties -> 'prop2'))::text[]) or jsonb_array_length((n1.properties -> 'prop2'))::int = 0) and n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[]) limit 10) select case when (s0.n0).id is null or s0.e0 is null or (s0.n1).id is null then null else ordered_edges_to_path(s0.n0, (select coalesce(array_agg((_edge.id, _edge.start_id, _edge.end_id, _edge.kind_id, _edge.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(array [s0.e0]::int8[]) with ordinality as _path(id, ordinality) join edge _edge on _edge.id = _path.id), array [s0.n0, s0.n1]::nodecomposite[])::pathcomposite end as p from s0 limit 10; -- case: match ()-[r:EdgeKind1]->() return count(r) as the_count -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select count(s0.e0)::int8 as the_count from s0; +select count(*)::int8 as the_count from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[]); -- case: match ()-[r:EdgeKind1]->() return count(r) as the_count limit 1 with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])) select count(s0.e0)::int8 as the_count from s0 limit 1; -- case: match ()-[r:EdgeKind1]->({name: "123"}) return count(r) as the_count -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n1 on ((n1.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb and n1.id = e0.end_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[])) select count(s0.e0)::int8 as the_count from s0; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n1 on (jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = '123') and n1.id = e0.end_id join node n0 on n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[])) select count(s0.e0)::int8 as the_count from s0; -- case: match (s)-[r]->(e) where id(e) = $a and not (id(s) = $b) and (r:EdgeKind1 or r:EdgeKind2) and not (s.objectid ends with $c or e.objectid ends with $d) return distinct id(s), id(r), id(e) -- cypher_params: {"a":1,"b":2,"c":"123","d":"456"} @@ -59,7 +59,7 @@ with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::e with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n1 on n1.id = e0.end_id join node n0 on (not (n0.id = @pi1::float8)) and n0.id = e0.start_id where ((e0.kind_id = any (array [3]::int2[]) or e0.kind_id = any (array [4]::int2[]))) and (not (cypher_ends_with((n0.properties ->> 'objectid'), (@pi2::text)::text)::bool or cypher_ends_with((n1.properties ->> 'objectid'), (@pi3::text)::text)::bool) and n1.id = @pi0::float8)) select distinct (s0.n0).id, (s0.e0).id, (s0.n1).id from s0; -- case: match (s)-[r]->(e) where s.name = '123' and e:NodeKind1 and not r.property return s, r, e -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb) and n0.id = e0.start_id join node n1 on (n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]) and n1.id = e0.end_id where (not ((e0.properties ->> 'property'))::bool)) select s0.n0 as s, s0.e0 as r, s0.n1 as e from s0; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '123')) and n0.id = e0.start_id join node n1 on (n1.kind_ids operator (pg_catalog.@>) array [1]::int2[]) and n1.id = e0.end_id where (not ((e0.properties ->> 'property'))::bool)) select s0.n0 as s, s0.e0 as r, s0.n1 as e from s0; -- case: match ()-[r]->() where r.value = 42 return r with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where (((e0.properties -> 'value'))::jsonb = to_jsonb((42)::int8)::jsonb)) select s0.e0 as r from s0; @@ -68,37 +68,37 @@ with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::e with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where (((e0.properties ->> 'bool_prop'))::bool)) select s0.e0 as r from s0; -- case: match (n)-[r]->() where n.name = '123' return n, r -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select s0.n0 as n, s0.e0 as r from s0; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '123')) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select s0.n0 as n, s0.e0 as r from s0; -- case: match (n:NodeKind1)-[r]->() where n.name = '123' or n.name = '321' or n.name = '222' or n.name = '333' return n, r -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb or ((n0.properties -> 'name'))::jsonb = to_jsonb(('321')::text)::jsonb or ((n0.properties -> 'name'))::jsonb = to_jsonb(('222')::text)::jsonb or ((n0.properties -> 'name'))::jsonb = to_jsonb(('333')::text)::jsonb) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select s0.n0 as n, s0.e0 as r from s0; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '123') or (jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '321') or (jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '222') or (jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '333')) and n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.id = e0.end_id) select s0.n0 as n, s0.e0 as r from s0; -- case: match (s)-[r]->(e) where s.name = '123' and e.name = '321' return s, r, e -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb) and n0.id = e0.start_id join node n1 on (((n1.properties -> 'name'))::jsonb = to_jsonb(('321')::text)::jsonb) and n1.id = e0.end_id) select s0.n0 as s, s0.e0 as r, s0.n1 as e from s0; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '123')) and n0.id = e0.start_id join node n1 on ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = '321')) and n1.id = e0.end_id) select s0.n0 as s, s0.e0 as r, s0.n1 as e from s0; -- case: match (f), (s)-[r]->(e) where not f.bool_field and s.name = '123' and e.name = '321' return f, s, r, e -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (not ((n0.properties ->> 'bool_field'))::bool)), s1 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, edge e0 join node n1 on (((n1.properties -> 'name'))::jsonb = to_jsonb(('123')::text)::jsonb) and n1.id = e0.start_id join node n2 on (((n2.properties -> 'name'))::jsonb = to_jsonb(('321')::text)::jsonb) and n2.id = e0.end_id) select s1.n0 as f, s1.n1 as s, s1.e0 as r, s1.n2 as e from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (not ((n0.properties ->> 'bool_field'))::bool)), s1 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0, edge e0 join node n1 on ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = '123')) and n1.id = e0.start_id join node n2 on ((jsonb_typeof((n2.properties -> 'name')) = 'string' and (n2.properties ->> 'name') = '321')) and n2.id = e0.end_id) select s1.n0 as f, s1.n1 as s, s1.e0 as r, s1.n2 as e from s1; -- case: match ()-[e0]->(n)<-[e1]-() return e0, n, e1 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on n2.id = e1.start_id) select s1.e0 as e0, s1.n1 as n, s1.e1 as e1 from s1; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on n2.id = e1.start_id where e1.id != (s0.e0).id) select s1.e0 as e0, s1.n1 as n, s1.e1 as e1 from s1; -- case: match ()-[e0]->(n)-[e1]->() return e0, n, e1 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id) select s1.e0 as e0, s1.n1 as n, s1.e1 as e1 from s1; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != (s0.e0).id) select s1.e0 as e0, s1.n1 as n, s1.e1 as e1 from s1; -- case: match ()<-[e0]-(n)<-[e1]-() return e0, n, e1 -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.end_id join node n1 on n1.id = e0.start_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on n2.id = e1.start_id) select s1.e0 as e0, s1.n1 as n, s1.e1 as e1 from s1; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.end_id join node n1 on n1.id = e0.start_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.end_id join node n2 on n2.id = e1.start_id where e1.id != (s0.e0).id) select s1.e0 as e0, s1.n1 as n, s1.e1 as e1 from s1; -- case: match (s)<-[r:EdgeKind1|EdgeKind2]-(e) return s.name, e.name with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.end_id join node n1 on n1.id = e0.start_id where e0.kind_id = any (array [3, 4]::int2[])) select ((s0.n0).properties -> 'name'), ((s0.n1).properties -> 'name') from s0; -- case: match (s)-[:EdgeKind1|EdgeKind2]->(e)-[:EdgeKind1]->() return s.name as s_name, e.name as e_name -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])), s1 as (select s0.n0 as n0, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3]::int2[])) select ((s1.n0).properties -> 'name') as s_name, ((s1.n1).properties -> 'name') as e_name from s1; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])), s1 as (select s0.e0 as e0, s0.n0 as n0, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.kind_id = any (array [3]::int2[]) and e1.id != s0.e0) select ((s1.n0).properties -> 'name') as s_name, ((s1.n1).properties -> 'name') as e_name from s1; -- case: match (s:NodeKind1)-[r:EdgeKind1|EdgeKind2]->(e:NodeKind2) return s.name, e.name with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])) select ((s0.n0).properties -> 'name'), ((s0.n1).properties -> 'name') from s0; -- case: match (s)-[r:EdgeKind1]->() where (s)-[r {prop: 'a'}]->() return s -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where ((e0.properties -> 'prop'))::jsonb = to_jsonb(('a')::text)::jsonb and e0.kind_id = any (array [3]::int2[])) select s0.n0 as s from s0 where ((with s1 as (select s0.e0 as e0, s0.n0 as n0 from edge e0 join node n2 on n2.id = (s0.e0).end_id where (s0.n0).id = (s0.e0).start_id) select count(*) > 0 from s1)); +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where (jsonb_typeof((e0.properties -> 'prop')) = 'string' and (e0.properties ->> 'prop') = 'a') and e0.kind_id = any (array [3]::int2[])) select s0.n0 as s from s0 where ((with s1 as (select s0.e0 as e0, s0.n0 as n0 from edge e0 join node n2 on n2.id = (s0.e0).end_id where (s0.n0).id = (s0.e0).start_id) select count(*) > 0 from s1)); -- case: match (s)-[r:EdgeKind1]->(e) where not (s.system_tags contains 'admin_tier_0') and id(e) = 1 return id(s), labels(s), id(r), type(r) with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n1 on (n1.id = 1) and n1.id = e0.end_id join node n0 on (not (coalesce((n0.properties ->> 'system_tags'), '')::text like '%admin\_tier\_0%')) and n0.id = e0.start_id where e0.kind_id = any (array [3]::int2[])) select (s0.n0).id, (array(select _kind.name from generate_subscripts((s0.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s0.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[], (s0.e0).id, kind_name((s0.e0).kind_id)::text from s0; @@ -113,7 +113,8 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1 with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where (n1.id <> n0.id)) select s0.n1 as n2 from s0; -- case: match ()-[r]->()-[e]->(n) where r <> e return n -with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where ((s0.e0).id <> e1.id)) select s1.n2 as n from s1; +with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n1 as n1, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n2 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where ((s0.e0).id <> e1.id) and e1.id != (s0.e0).id) select s1.n2 as n from s1; -- case: match (s:NodeKind1:NodeKind2)-[r:EdgeKind1|EdgeKind2]->(e:NodeKind2:NodeKind1) return s.name, e.name with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on n0.kind_ids operator (pg_catalog.@>) array [1, 2]::int2[] and n0.id = e0.start_id join node n1 on n1.kind_ids operator (pg_catalog.@>) array [2, 1]::int2[] and n1.id = e0.end_id where e0.kind_id = any (array [3, 4]::int2[])) select ((s0.n0).properties -> 'name'), ((s0.n1).properties -> 'name') from s0; + diff --git a/cypher/models/pgsql/test/translation_cases/unwind.sql b/cypher/models/pgsql/test/translation_cases/unwind.sql index 8c2c8160..4c00ab6e 100644 --- a/cypher/models/pgsql/test/translation_cases/unwind.sql +++ b/cypher/models/pgsql/test/translation_cases/unwind.sql @@ -48,7 +48,7 @@ with s0 as (select array [1, 2, 3]::int8[] as i0) select i1 as x from s0, unnest select i0 as x from unnest(array [1, 2, 3]::int8[]) as i0; -- case: MATCH (n) WHERE n.environmentid = '1234' UNWIND labels(n) AS kind RETURN kind, count(n) AS count -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'environmentid'))::jsonb = to_jsonb(('1234')::text)::jsonb)) select i0 as kind, count(s0.n0)::int8 as count from s0, unnest((array(select _kind.name from generate_subscripts((s0.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s0.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[]) as i0 group by i0; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'environmentid')) = 'string' and (n0.properties ->> 'environmentid') = '1234'))) select i0 as kind, count(s0.n0)::int8 as count from s0, unnest((array(select _kind.name from generate_subscripts((s0.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s0.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[]) as i0 group by i0; -- case: MATCH (n) UNWIND labels(n) AS label RETURN label, count(n) AS count with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select i0 as label, count(s0.n0)::int8 as count from s0, unnest((array(select _kind.name from generate_subscripts((s0.n0).kind_ids, 1) as _kind_idx, kind _kind where _kind.id = ((s0.n0).kind_ids)[_kind_idx] order by _kind_idx))::text[]) as i0 group by i0; @@ -67,3 +67,4 @@ with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposit -- case: MATCH (n) WITH collect(n.name) + ['tail'] AS names UNWIND names AS name RETURN name with s0 as (with s1 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0) select array_remove(coalesce(array_agg(((s1.n0).properties ->> 'name'))::anyarray, array []::text[])::anyarray, null)::anyarray || array ['tail']::text[] as i0 from s1) select i1 as name from s0, unnest(i0) as i1; + diff --git a/cypher/models/pgsql/test/translation_cases/update.sql b/cypher/models/pgsql/test/translation_cases/update.sql index 5fd99b66..7663e030 100644 --- a/cypher/models/pgsql/test/translation_cases/update.sql +++ b/cypher/models/pgsql/test/translation_cases/update.sql @@ -36,10 +36,10 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (update node n1 set kind_ids = n1.kind_ids - array [1]::int2[], properties = n1.properties - array ['prop']::text[] from s0 where (s0.n0).id = n1.id returning (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n0) select s1.n0 as n from s1; -- case: match (n) where n.name = '1234' set n.is_target = true -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)), s1 as (update node n1 set properties = n1.properties || jsonb_build_object('is_target', true)::jsonb from s0 where (s0.n0).id = n1.id returning (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n0) select 1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '1234'))), s1 as (update node n1 set properties = n1.properties || jsonb_build_object('is_target', true)::jsonb from s0 where (s0.n0).id = n1.id returning (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n0) select 1; -- case: match (n) where n.name = '1234' match (e) where e.tag = n.tag_id set e.is_target = true -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('1234')::text)::jsonb)), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((n1.properties -> 'tag') = ((s0.n0).properties -> 'tag_id'))), s2 as (update node n2 set properties = n2.properties || jsonb_build_object('is_target', true)::jsonb from s1 where (s1.n1).id = n2.id returning s1.n0 as n0, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n1) select 1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = '1234'))), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((n1.properties -> 'tag') = ((s0.n0).properties -> 'tag_id'))), s2 as (update node n2 set properties = n2.properties || jsonb_build_object('is_target', true)::jsonb from s1 where (s1.n1).id = n2.id returning s1.n0 as n0, (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n1) select 1; -- case: match (n1), (n3) set n1.target = true set n3.target = true return n1, n3 with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1), s2 as (update node n2 set properties = n2.properties || jsonb_build_object('target', true)::jsonb from s1 where (s1.n0).id = n2.id returning (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n0, s1.n1 as n1), s3 as (update node n3 set properties = n3.properties || jsonb_build_object('target', true)::jsonb from s2 where (s2.n1).id = n3.id returning s2.n0 as n0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n1) select s3.n0 as n1, s3.n1 as n3 from s3; @@ -60,13 +60,14 @@ with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0), s1 as (update node n1 set properties = n1.properties - array ['prop']::text[] || jsonb_build_object('name', 'n' || (s0.n0).id)::jsonb from s0 where (s0.n0).id = n1.id returning (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n0) select 1; -- case: match (n) where n.name = 'n3' set n.name = 'RENAMED' return n -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n3')::text)::jsonb)), s1 as (update node n1 set properties = n1.properties || jsonb_build_object('name', 'RENAMED')::jsonb from s0 where (s0.n0).id = n1.id returning (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n0) select s1.n0 as n from s1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n3'))), s1 as (update node n1 set properties = n1.properties || jsonb_build_object('name', 'RENAMED')::jsonb from s0 where (s0.n0).id = n1.id returning (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n0) select s1.n0 as n from s1; -- case: match (n), (e) where n.name = 'n1' and e.name = 'n4' set n.name = e.name set e.name = 'RENAMED' -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb)), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where (((n1.properties -> 'name'))::jsonb = to_jsonb(('n4')::text)::jsonb)), s2 as (update node n2 set properties = n2.properties || jsonb_build_object('name', ((s1.n1).properties -> 'name'))::jsonb from s1 where (s1.n0).id = n2.id returning (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n0, s1.n1 as n1), s3 as (update node n3 set properties = n3.properties || jsonb_build_object('name', 'RENAMED')::jsonb from s2 where (s2.n1).id = n3.id returning s2.n0 as n0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n1) select 1; +with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from node n0 where ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1'))), s1 as (select s0.n0 as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from s0, node n1 where ((jsonb_typeof((n1.properties -> 'name')) = 'string' and (n1.properties ->> 'name') = 'n4'))), s2 as (update node n2 set properties = n2.properties || jsonb_build_object('name', ((s1.n1).properties -> 'name'))::jsonb from s1 where (s1.n0).id = n2.id returning (n2.id, n2.kind_ids, n2.properties)::nodecomposite as n0, s1.n1 as n1), s3 as (update node n3 set properties = n3.properties || jsonb_build_object('name', 'RENAMED')::jsonb from s2 where (s2.n1).id = n3.id returning s2.n0 as n0, (n3.id, n3.kind_ids, n3.properties)::nodecomposite as n1) select 1; -- case: match (n)-[r:EdgeKind1]->() where n:NodeKind1 set r.visited = true return r with s0 as (select (e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0 from edge e0 join node n0 on (n0.kind_ids operator (pg_catalog.@>) array [1]::int2[]) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [3]::int2[])), s1 as (update edge e1 set properties = e1.properties || jsonb_build_object('visited', true)::jsonb from s0 where (s0.e0).id = e1.id returning (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e0, s0.n0 as n0) select s1.e0 as r from s1; -- case: match (n)-[]->()-[r]->() where n.name = 'n1' set r.visited = true return r.name -with s0 as (select (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on (((n0.properties -> 'name'))::jsonb = to_jsonb(('n1')::text)::jsonb) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id), s2 as (update edge e2 set properties = e2.properties || jsonb_build_object('visited', true)::jsonb from s1 where (s1.e1).id = e2.id returning (e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite as e1, s1.n0 as n0, s1.n1 as n1) select ((s2.e1).properties -> 'name') from s2; +with s0 as (select e0.id as e0, (n0.id, n0.kind_ids, n0.properties)::nodecomposite as n0, (n1.id, n1.kind_ids, n1.properties)::nodecomposite as n1 from edge e0 join node n0 on ((jsonb_typeof((n0.properties -> 'name')) = 'string' and (n0.properties ->> 'name') = 'n1')) and n0.id = e0.start_id join node n1 on n1.id = e0.end_id), s1 as (select s0.e0 as e0, (e1.id, e1.start_id, e1.end_id, e1.kind_id, e1.properties)::edgecomposite as e1, s0.n0 as n0, s0.n1 as n1 from s0 join edge e1 on (s0.n1).id = e1.start_id join node n2 on n2.id = e1.end_id where e1.id != s0.e0), s2 as (update edge e2 set properties = e2.properties || jsonb_build_object('visited', true)::jsonb from s1 where (s1.e1).id = e2.id returning s1.e0 as e0, (e2.id, e2.start_id, e2.end_id, e2.kind_id, e2.properties)::edgecomposite as e1, s1.n0 as n0, s1.n1 as n1) select ((s2.e1).properties -> 'name') from s2; + diff --git a/cypher/models/pgsql/test/validation_integration_test.go b/cypher/models/pgsql/test/validation_integration_test.go index fc84fe4d..7e741bea 100644 --- a/cypher/models/pgsql/test/validation_integration_test.go +++ b/cypher/models/pgsql/test/validation_integration_test.go @@ -33,6 +33,7 @@ func pgConnectionString(t *testing.T) string { connStr := os.Getenv(connectionStringEnv) require.NotEmpty(t, connStr) if isNeo4jConnectionString(connStr) { + // CONNECTION_STRING selects one active backend for integration runs. t.Skipf("%s is not a PostgreSQL connection string", connectionStringEnv) } @@ -185,15 +186,17 @@ func TestBidirectionalASPHarnessOverloads(t *testing.T) { ) require.NoError(t, err) - forwardPrimer := nextFrontValues( - "(1::int8, 10::int8, 1::int4, false, false, array [101]::int8[])", - "(3::int8, 10::int8, 1::int4, false, false, array [103]::int8[])", - ) - backwardPrimer := nextFrontValues( - "(2::int8, 10::int8, 1::int4, false, false, array [202]::int8[])", - "(4::int8, 10::int8, 1::int4, false, false, array [204]::int8[])", + var ( + forwardPrimer = nextFrontValues( + "(1::int8, 10::int8, 1::int4, false, false, array [101]::int8[])", + "(3::int8, 10::int8, 1::int4, false, false, array [103]::int8[])", + ) + backwardPrimer = nextFrontValues( + "(2::int8, 10::int8, 1::int4, false, false, array [202]::int8[])", + "(4::int8, 10::int8, 1::int4, false, false, array [204]::int8[])", + ) + pairFilter = pairFilterValues("(1::int8, 2::int8)") ) - pairFilter := pairFilterValues("(1::int8, 2::int8)") rows, err := tx.Query(testCtx, "select root_id, next_id from bidirectional_asp_harness($1::text, $2::text, $3::text, $4::text, 4, ''::text, ''::text, $5::text) order by root_id, next_id", @@ -236,18 +239,20 @@ func TestBidirectionalASPHarnessOverloads(t *testing.T) { ) require.NoError(t, err) - forwardPrimer := nextFrontValues( - "(1::int8, 2::int8, 1::int4, true, false, array [102]::int8[])", - "(1::int8, 2::int8, 1::int4, true, false, array [103]::int8[])", - "(3::int8, 30::int8, 1::int4, false, false, array [330]::int8[])", - ) - backwardPrimer := nextFrontValues( - "(4::int8, 30::int8, 1::int4, false, false, array [304]::int8[])", - "(4::int8, 30::int8, 1::int4, false, false, array [305]::int8[])", - ) - pairFilter := pairFilterValues( - "(1::int8, 2::int8)", - "(3::int8, 4::int8)", + var ( + forwardPrimer = nextFrontValues( + "(1::int8, 2::int8, 1::int4, true, false, array [102]::int8[])", + "(1::int8, 2::int8, 1::int4, true, false, array [103]::int8[])", + "(3::int8, 30::int8, 1::int4, false, false, array [330]::int8[])", + ) + backwardPrimer = nextFrontValues( + "(4::int8, 30::int8, 1::int4, false, false, array [304]::int8[])", + "(4::int8, 30::int8, 1::int4, false, false, array [305]::int8[])", + ) + pairFilter = pairFilterValues( + "(1::int8, 2::int8)", + "(3::int8, 4::int8)", + ) ) rows, err := tx.Query(testCtx, @@ -333,14 +338,16 @@ func TestBidirectionalASPHarnessOverloads(t *testing.T) { require.NoError(t, err) defer tx.Rollback(testCtx) - forwardPrimer := nextFrontValues( - "(1::int8, 2::int8, 1::int4, true, false, array [102]::int8[])", - "(3::int8, 30::int8, 1::int4, false, false, array [330]::int8[])", - ) - backwardPrimer := nextFrontValues("(4::int8, 30::int8, 1::int4, false, false, array [304]::int8[])") - pairFilter := pairFilterValues( - "(1::int8, 2::int8)", - "(3::int8, 4::int8)", + var ( + forwardPrimer = nextFrontValues( + "(1::int8, 2::int8, 1::int4, true, false, array [102]::int8[])", + "(3::int8, 30::int8, 1::int4, false, false, array [330]::int8[])", + ) + backwardPrimer = nextFrontValues("(4::int8, 30::int8, 1::int4, false, false, array [304]::int8[])") + pairFilter = pairFilterValues( + "(1::int8, 2::int8)", + "(3::int8, 4::int8)", + ) ) rows, err := tx.Query(testCtx, diff --git a/cypher/models/pgsql/translate/aggregate_traversal_count.go b/cypher/models/pgsql/translate/aggregate_traversal_count.go new file mode 100644 index 00000000..330c5a48 --- /dev/null +++ b/cypher/models/pgsql/translate/aggregate_traversal_count.go @@ -0,0 +1,531 @@ +package translate + +import ( + "fmt" + "reflect" + + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" + "github.com/specterops/dawgs/cypher/models/walk" + "github.com/specterops/dawgs/graph" +) + +const ( + aggregateCandidateSourcesCTE pgsql.Identifier = "candidate_sources" + aggregateTraversalCTE pgsql.Identifier = "traversal" + aggregateTerminalNodesCTE pgsql.Identifier = "terminal_nodes" + aggregateTerminalHitsCTE pgsql.Identifier = "terminal_hits" + aggregateRankedCTE pgsql.Identifier = "ranked" + + aggregateSourceAlias pgsql.Identifier = "source_node" + aggregateEdgeAlias pgsql.Identifier = "e" + aggregateTerminalAlias pgsql.Identifier = "terminal_node" + + aggregateRootID pgsql.Identifier = "root_id" + aggregateNextID pgsql.Identifier = "next_id" + aggregateDepth pgsql.Identifier = "depth" + aggregatePath pgsql.Identifier = "path" + aggregateNodeID pgsql.Identifier = "id" +) + +func (s *Translator) translateAggregateTraversalCount(query *cypher.RegularQuery, plan optimize.LoweringPlan) (bool, error) { + if len(plan.AggregateTraversalCount) == 0 { + return false, nil + } + + shape, ok := optimize.AggregateTraversalCountShapeForQuery(query) + if !ok || shape.Target != plan.AggregateTraversalCount[0].Target { + return false, nil + } + + statement, err := s.aggregateTraversalCountQuery(shape) + if err != nil { + return false, err + } + + s.translation.Statement = statement + s.recordLowering(optimize.LoweringAggregateTraversalCount) + return true, nil +} + +func (s *Translator) aggregateTraversalCountQuery(shape optimize.AggregateTraversalCountShape) (pgsql.Query, error) { + candidateSources, err := s.buildAggregateCandidateSourcesCTE(shape) + if err != nil { + return pgsql.Query{}, err + } + + traversal, err := s.buildAggregateTraversalCTE(shape) + if err != nil { + return pgsql.Query{}, err + } + + terminalNodes, err := s.buildAggregateTerminalNodesCTE(shape) + if err != nil { + return pgsql.Query{}, err + } + + terminalHits, err := s.buildAggregateTerminalHitsCTE(shape) + if err != nil { + return pgsql.Query{}, err + } + + projection := pgsql.Projection{ + pgsql.AliasedExpression{ + Expression: aggregateNodeComposite(aggregateSourceAlias), + Alias: pgsql.AsOptionalIdentifier(pgsql.Identifier(shape.ReturnSourceAlias)), + }, + } + if shape.ReturnCount { + projection = append(projection, pgsql.AliasedExpression{ + Expression: pgsql.CompoundIdentifier{aggregateRankedCTE, pgsql.Identifier(shape.CountAlias)}, + Alias: pgsql.AsOptionalIdentifier(pgsql.Identifier(shape.ReturnCountAlias)), + }) + } + + return pgsql.Query{ + CommonTableExpressions: &pgsql.With{ + Recursive: true, + Expressions: []pgsql.CommonTableExpression{ + candidateSources, + traversal, + terminalNodes, + terminalHits, + s.buildAggregateRankedCTE(shape), + }, + }, + Body: pgsql.Select{ + Projection: projection, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: aggregateRankedCTE.AsCompoundIdentifier(), + }, + Joins: []pgsql.Join{{ + Table: pgsql.TableReference{ + Name: pgsql.TableNode.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(aggregateSourceAlias), + }, + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateSourceAlias, pgsql.ColumnID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{aggregateRankedCTE, aggregateRootID}, + ), + }, + }}, + }}, + }, + OrderBy: []*pgsql.OrderBy{{ + Expression: pgsql.CompoundIdentifier{aggregateRankedCTE, pgsql.Identifier(shape.CountAlias)}, + Ascending: false, + }}, + }, nil +} + +func (s *Translator) buildAggregateCandidateSourcesCTE(shape optimize.AggregateTraversalCountShape) (pgsql.CommonTableExpression, error) { + whereClause, err := s.aggregateSourceWhere(shape) + if err != nil { + return pgsql.CommonTableExpression{}, err + } + + return pgsql.CommonTableExpression{ + Alias: pgsql.TableAlias{ + Name: aggregateCandidateSourcesCTE, + Shape: pgsql.NewRecordShape([]pgsql.Identifier{aggregateRootID}), + }, + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.AliasedExpression{ + Expression: pgsql.CompoundIdentifier{aggregateSourceAlias, pgsql.ColumnID}, + Alias: pgsql.AsOptionalIdentifier(aggregateRootID), + }, + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: pgsql.TableNode.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(aggregateSourceAlias), + }, + }}, + Where: whereClause, + }, + }, + }, nil +} + +func (s *Translator) buildAggregateTraversalCTE(shape optimize.AggregateTraversalCountShape) (pgsql.CommonTableExpression, error) { + edgeKindConstraint, err := s.aggregateEdgeKindConstraint(aggregateEdgeAlias, shape.RelationshipKinds) + if err != nil { + return pgsql.CommonTableExpression{}, err + } + + sourceColumn, nextColumn := aggregateTraversalColumns(shape.Direction) + var ( + primerJoin = pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateEdgeAlias, sourceColumn}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{aggregateCandidateSourcesCTE, aggregateRootID}, + ) + recursiveJoin = pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateEdgeAlias, sourceColumn}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateNextID}, + ) + ) + + return pgsql.CommonTableExpression{ + Alias: pgsql.TableAlias{ + Name: aggregateTraversalCTE, + Shape: pgsql.NewRecordShape([]pgsql.Identifier{ + aggregateRootID, + aggregateNextID, + aggregateDepth, + aggregatePath, + }), + }, + Query: pgsql.Query{ + Body: pgsql.SetOperation{ + Operator: pgsql.OperatorUnion, + All: true, + LOperand: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.CompoundIdentifier{aggregateCandidateSourcesCTE, aggregateRootID}, + pgsql.CompoundIdentifier{aggregateEdgeAlias, nextColumn}, + pgsql.NewLiteral(int64(1), pgsql.Int8), + pgsql.ArrayLiteral{ + Values: []pgsql.Expression{ + pgsql.CompoundIdentifier{aggregateEdgeAlias, pgsql.ColumnID}, + }, + CastType: pgsql.Int8Array, + }, + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: aggregateCandidateSourcesCTE.AsCompoundIdentifier(), + }, + Joins: []pgsql.Join{{ + Table: pgsql.TableReference{ + Name: pgsql.TableEdge.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(aggregateEdgeAlias), + }, + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: primerJoin, + }, + }}, + }}, + Where: edgeKindConstraint, + }, + ROperand: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateRootID}, + pgsql.CompoundIdentifier{aggregateEdgeAlias, nextColumn}, + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateDepth}, + pgsql.OperatorAdd, + pgsql.NewLiteral(int64(1), pgsql.Int8), + ), + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregatePath}, + pgsql.OperatorConcatenate, + pgsql.CompoundIdentifier{aggregateEdgeAlias, pgsql.ColumnID}, + ), + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: aggregateTraversalCTE.AsCompoundIdentifier(), + }, + Joins: []pgsql.Join{{ + Table: pgsql.LateralSubquery{ + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.CompoundIdentifier{aggregateEdgeAlias, pgsql.ColumnID}, + pgsql.CompoundIdentifier{aggregateEdgeAlias, pgsql.ColumnStartID}, + pgsql.CompoundIdentifier{aggregateEdgeAlias, pgsql.ColumnEndID}, + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: pgsql.TableEdge.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(aggregateEdgeAlias), + }, + }}, + Where: pgsql.OptionalAnd( + pgsql.OptionalAnd( + recursiveJoin, + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateEdgeAlias, pgsql.ColumnID}, + pgsql.OperatorNotEquals, + pgsql.NewAllExpression(pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregatePath}), + ), + ), + edgeKindConstraint, + ), + }, + Offset: pgsql.NewLiteral(0, pgsql.Int), + }, + Binding: pgsql.AsOptionalIdentifier(aggregateEdgeAlias), + }, + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: pgsql.NewLiteral(true, pgsql.Boolean), + }, + }}, + }}, + Where: pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateDepth}, + pgsql.OperatorLessThan, + pgsql.NewLiteral(shape.MaxDepth, pgsql.Int8), + ), + }, + }, + }, + }, nil +} + +func (s *Translator) buildAggregateTerminalHitsCTE(shape optimize.AggregateTraversalCountShape) (pgsql.CommonTableExpression, error) { + terminalWhere := pgsql.Expression(nil) + + if shape.MinDepth > 1 { + terminalWhere = pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateDepth}, + pgsql.OperatorGreaterThanOrEqualTo, + pgsql.NewLiteral(shape.MinDepth, pgsql.Int8), + ) + } + + return pgsql.CommonTableExpression{ + Alias: pgsql.TableAlias{ + Name: aggregateTerminalHitsCTE, + Shape: pgsql.NewRecordShape([]pgsql.Identifier{aggregateRootID}), + }, + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateRootID}, + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: aggregateTraversalCTE.AsCompoundIdentifier(), + }, + Joins: []pgsql.Join{{ + Table: pgsql.TableReference{ + Name: aggregateTerminalNodesCTE.AsCompoundIdentifier(), + }, + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{aggregateTerminalNodesCTE, aggregateNodeID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{aggregateTraversalCTE, aggregateNextID}, + ), + }, + }}, + }}, + Where: terminalWhere, + }, + }, + }, nil +} + +func (s *Translator) buildAggregateTerminalNodesCTE(shape optimize.AggregateTraversalCountShape) (pgsql.CommonTableExpression, error) { + terminalWhere, err := s.aggregateTerminalWhere(shape) + if err != nil { + return pgsql.CommonTableExpression{}, err + } + + return pgsql.CommonTableExpression{ + Materialized: &pgsql.Materialized{Materialized: true}, + Alias: pgsql.TableAlias{ + Name: aggregateTerminalNodesCTE, + Shape: pgsql.NewRecordShape([]pgsql.Identifier{aggregateNodeID}), + }, + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.CompoundIdentifier{aggregateTerminalAlias, pgsql.ColumnID}, + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: pgsql.TableNode.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(aggregateTerminalAlias), + }, + }}, + Where: terminalWhere, + }, + }, + }, nil +} + +func (s *Translator) buildAggregateRankedCTE(shape optimize.AggregateTraversalCountShape) pgsql.CommonTableExpression { + countAlias := pgsql.Identifier(shape.CountAlias) + + return pgsql.CommonTableExpression{ + Alias: pgsql.TableAlias{ + Name: aggregateRankedCTE, + Shape: pgsql.NewRecordShape([]pgsql.Identifier{ + aggregateRootID, + countAlias, + }), + }, + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{ + pgsql.CompoundIdentifier{aggregateTerminalHitsCTE, aggregateRootID}, + pgsql.AliasedExpression{ + Expression: pgsql.FunctionCall{ + Function: pgsql.FunctionCount, + Parameters: []pgsql.Expression{pgsql.Wildcard{}}, + CastType: pgsql.Int8, + }, + Alias: pgsql.AsOptionalIdentifier(countAlias), + }, + }, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: aggregateTerminalHitsCTE.AsCompoundIdentifier(), + }, + }}, + GroupBy: []pgsql.Expression{ + pgsql.CompoundIdentifier{aggregateTerminalHitsCTE, aggregateRootID}, + }, + }, + OrderBy: []*pgsql.OrderBy{{ + Expression: countAlias, + Ascending: false, + }}, + Limit: pgsql.NewLiteral(shape.Limit, pgsql.Int8), + }, + } +} + +func (s *Translator) aggregateSourceWhere(shape optimize.AggregateTraversalCountShape) (pgsql.Expression, error) { + sourceKindConstraint, err := s.aggregateNodeKindConstraint(aggregateSourceAlias, shape.SourceKinds) + if err != nil { + return nil, err + } + + sourcePredicate, err := s.aggregateSourcePredicate(shape) + if err != nil { + return nil, err + } + + return pgsql.OptionalAnd(sourcePredicate, sourceKindConstraint), nil +} + +func (s *Translator) aggregateTerminalWhere(shape optimize.AggregateTraversalCountShape) (pgsql.Expression, error) { + terminalKindConstraint, err := s.aggregateNodeKindConstraint(aggregateTerminalAlias, shape.TerminalKinds) + if err != nil { + return nil, err + } + + terminalPredicate, err := s.aggregateBindingPredicate(shape.TerminalMatch, shape.TerminalSymbol, aggregateTerminalAlias) + if err != nil { + return nil, err + } + + return pgsql.OptionalAnd(terminalPredicate, terminalKindConstraint), nil +} + +func (s *Translator) aggregateSourcePredicate(shape optimize.AggregateTraversalCountShape) (pgsql.Expression, error) { + return s.aggregateBindingPredicate(shape.SourceMatch, shape.SourceSymbol, aggregateSourceAlias) +} + +func (s *Translator) aggregateBindingPredicate(match *cypher.Match, symbol string, alias pgsql.Identifier) (pgsql.Expression, error) { + if match == nil || match.Where == nil { + return nil, nil + } + + var ( + translator = NewTranslator(s.ctx, s.kindMapper.kindMapper, s.parameters, s.graphID) + binding = translator.scope.Define(alias, pgsql.NodeComposite) + ) + translator.scope.Alias(pgsql.Identifier(symbol), binding) + + if err := walk.Cypher(match.Where, translator); err != nil { + return nil, err + } + + sourceConstraints, err := translator.treeTranslator.ConsumeConstraintsFromVisibleSet(pgsql.AsIdentifierSet(alias)) + if err != nil { + return nil, err + } + + remainingConstraints, err := translator.treeTranslator.ConsumeAllConstraints() + if err != nil { + return nil, err + } + if remainingConstraints.Expression != nil { + return nil, fmt.Errorf("unsupported aggregate traversal predicate dependencies: %v", remainingConstraints.Dependencies.Slice()) + } + + for key, value := range translator.translation.Parameters { + if existingValue, hasExisting := s.translation.Parameters[key]; hasExisting && !reflect.DeepEqual(existingValue, value) { + return nil, fmt.Errorf("aggregate traversal parameter collision for %s", key) + } + + s.translation.Parameters[key] = value + } + + return sourceConstraints.Expression, nil +} + +func (s *Translator) aggregateNodeKindConstraint(alias pgsql.Identifier, kinds graph.Kinds) (pgsql.Expression, error) { + if len(kinds) == 0 { + return nil, nil + } + + kindIDs, err := s.kindMapper.MapKinds(kinds) + if err != nil { + return nil, err + } + + kindIDsLiteral, err := pgsql.AsLiteral(kindIDs) + if err != nil { + return nil, err + } + + return pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{alias, pgsql.ColumnKindIDs}, + pgsql.OperatorPGArrayLHSContainsRHS, + kindIDsLiteral, + ), nil +} + +func (s *Translator) aggregateEdgeKindConstraint(alias pgsql.Identifier, kinds graph.Kinds) (pgsql.Expression, error) { + if len(kinds) == 0 { + return nil, nil + } + + kindIDs, err := s.kindMapper.MapKinds(kinds) + if err != nil { + return nil, err + } + + return pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{alias, pgsql.ColumnKindID}, + pgsql.OperatorEquals, + pgsql.NewAnyExpressionHinted(pgsql.NewLiteral(kindIDs, pgsql.Int2Array)), + ), nil +} + +func aggregateTraversalColumns(direction graph.Direction) (pgsql.Identifier, pgsql.Identifier) { + switch direction { + case graph.DirectionInbound: + return pgsql.ColumnEndID, pgsql.ColumnStartID + default: + return pgsql.ColumnStartID, pgsql.ColumnEndID + } +} + +func aggregateNodeComposite(alias pgsql.Identifier) pgsql.CompositeValue { + return pgsql.CompositeValue{ + Values: []pgsql.Expression{ + pgsql.CompoundIdentifier{alias, pgsql.ColumnID}, + pgsql.CompoundIdentifier{alias, pgsql.ColumnKindIDs}, + pgsql.CompoundIdentifier{alias, pgsql.ColumnProperties}, + }, + DataType: pgsql.NodeComposite, + } +} diff --git a/cypher/models/pgsql/translate/collect_id_membership.go b/cypher/models/pgsql/translate/collect_id_membership.go new file mode 100644 index 00000000..d710b8ce --- /dev/null +++ b/cypher/models/pgsql/translate/collect_id_membership.go @@ -0,0 +1,120 @@ +package translate + +import ( + "strings" + + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/walk" +) + +type collectIDMembershipUsage struct { + membershipReferences int + otherReferences int +} + +type collectIDMembershipCollector struct { + walk.VisitorHandler + candidates map[pgsql.Identifier]struct{} + usages map[pgsql.Identifier]*collectIDMembershipUsage + stack []cypher.SyntaxNode +} + +func collectIDMembershipAliases(root *cypher.RegularQuery) (map[pgsql.Identifier]struct{}, error) { + candidates, err := collectIDMembershipCandidates(root) + if err != nil || len(candidates) == 0 { + return nil, err + } + + collector := &collectIDMembershipCollector{ + VisitorHandler: walk.NewCancelableErrorHandler(), + candidates: candidates, + usages: map[pgsql.Identifier]*collectIDMembershipUsage{}, + } + if err := walk.Cypher(root, collector); err != nil { + return nil, err + } + + aliases := map[pgsql.Identifier]struct{}{} + for alias := range candidates { + usage := collector.usages[alias] + if usage != nil && usage.membershipReferences > 0 && usage.otherReferences == 0 { + aliases[alias] = struct{}{} + } + } + return aliases, nil +} + +func collectIDMembershipCandidates(root *cypher.RegularQuery) (map[pgsql.Identifier]struct{}, error) { + candidates := map[pgsql.Identifier]struct{}{} + + err := walk.Cypher(root, walk.NewSimpleVisitor[cypher.SyntaxNode](func(node cypher.SyntaxNode, handler walk.VisitorHandler) { + projectionItem, isProjectionItem := node.(*cypher.ProjectionItem) + if !isProjectionItem || projectionItem.Alias == nil { + return + } + + function, isFunction := projectionItem.Expression.(*cypher.FunctionInvocation) + if !isFunction || !strings.EqualFold(function.Name, cypher.CollectFunction) || len(function.Arguments) != 1 { + return + } + + if _, isVariable := function.Arguments[0].(*cypher.Variable); isVariable { + candidates[pgsql.Identifier(projectionItem.Alias.Symbol)] = struct{}{} + } + })) + return candidates, err +} + +func (s *collectIDMembershipCollector) usage(alias pgsql.Identifier) *collectIDMembershipUsage { + usage := s.usages[alias] + if usage == nil { + usage = &collectIDMembershipUsage{} + s.usages[alias] = usage + } + return usage +} + +func (s *collectIDMembershipCollector) Enter(node cypher.SyntaxNode) { + variable, isVariable := node.(*cypher.Variable) + if !isVariable { + s.stack = append(s.stack, node) + return + } + + alias := pgsql.Identifier(variable.Symbol) + if _, isCandidate := s.candidates[alias]; isCandidate { + usage := s.usage(alias) + if s.isProjectionAliasDeclaration(variable) { + // The alias declaration is not a read. + } else if s.isMembershipCollectionOperand(variable) { + usage.membershipReferences++ + } else { + usage.otherReferences++ + } + } + + s.stack = append(s.stack, node) +} + +func (s *collectIDMembershipCollector) Visit(cypher.SyntaxNode) {} + +func (s *collectIDMembershipCollector) Exit(cypher.SyntaxNode) { + s.stack = s.stack[:len(s.stack)-1] +} + +func (s *collectIDMembershipCollector) isProjectionAliasDeclaration(variable *cypher.Variable) bool { + if len(s.stack) == 0 { + return false + } + projectionItem, isProjectionItem := s.stack[len(s.stack)-1].(*cypher.ProjectionItem) + return isProjectionItem && projectionItem.Alias == variable +} + +func (s *collectIDMembershipCollector) isMembershipCollectionOperand(variable *cypher.Variable) bool { + if len(s.stack) == 0 { + return false + } + partial, isPartialComparison := s.stack[len(s.stack)-1].(*cypher.PartialComparison) + return isPartialComparison && partial.Operator == cypher.OperatorIn && partial.Right == variable +} diff --git a/cypher/models/pgsql/translate/constraints.go b/cypher/models/pgsql/translate/constraints.go index dc9ea733..49dd0a63 100644 --- a/cypher/models/pgsql/translate/constraints.go +++ b/cypher/models/pgsql/translate/constraints.go @@ -6,6 +6,7 @@ import ( "github.com/specterops/dawgs/cypher/models/walk" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" "github.com/specterops/dawgs/graph" ) @@ -438,42 +439,39 @@ type PatternConstraints struct { // of the traversal has an extreme disparity in search space. // // In cases that match this heuristic, it's beneficial to begin the traversal with the most tightly constrained set -// of nodes. To accomplish this we flip the order of the traversal step. -func (s *PatternConstraints) OptimizePatternConstraintBalance(scope *Scope, traversalStep *TraversalStep) error { +// of nodes. The optimizer selectivity model decides whether the step should flip; this method only applies that +// decision to the translated constraint and traversal state. +func (s *PatternConstraints) OptimizePatternConstraintBalance(scope *Scope, traversalStep *TraversalStep) (bool, error) { // If the left node is already materialized from a previous step, it is the anchor // for this expansion. Flipping the traversal direction would detach it from the // previous frame and produce invalid SQL (missing FROM-clause entry). if traversalStep.LeftNodeBound { - return nil + return false, nil } - if traversalStep.RightNodeBound { - // Only flip when a previous frame exists to serve as the FROM source for the - // now-left bound node. In self-referential patterns such as (u)-[]->(u) the - // right node is "bound" because it reuses the left node's variable, but there - // is no preceding CTE to reference. Flipping in that case would set - // LeftNodeBound = true while Frame.Previous is nil, causing a nil-pointer - // dereference in buildTraversalPatternRoot. - if traversalStep.hasPreviousFrameBinding() { - traversalStep.FlipNodes() - s.FlipNodes() - } - - return nil + // Only flip a right-bound segment when a previous frame exists to serve as the + // FROM source for the now-left bound node. Self-referential patterns such as + // (u)-[]->(u) can mark the right node as bound without a preceding CTE. + if traversalStep.RightNodeBound && !traversalStep.hasPreviousFrameBinding() { + return false, nil } - if leftNodeSelectivity, err := MeasureSelectivity(scope, s.LeftNode.Expression); err != nil { - return err - } else if rightNodeSelectivity, err := MeasureSelectivity(scope, s.RightNode.Expression); err != nil { - return err - } else if rightNodeSelectivity-leftNodeSelectivity >= selectivityFlipThreshold { + if shouldFlip, err := optimize.NewSelectivityModel(scope).ShouldFlipTraversalDirection( + traversalStep.LeftNodeBound, + traversalStep.RightNodeBound, + s.LeftNode.Expression, + s.RightNode.Expression, + ); err != nil { + return false, err + } else if shouldFlip { // (a)-[*..]->(b:Constraint) // (a)<-[*..]-(b:Constraint) traversalStep.FlipNodes() s.FlipNodes() + return true, nil } - return nil + return false, nil } func (s *PatternConstraints) FlipNodes() { diff --git a/cypher/models/pgsql/translate/constraints_test.go b/cypher/models/pgsql/translate/constraints_test.go index 94f41619..cdde76b4 100644 --- a/cypher/models/pgsql/translate/constraints_test.go +++ b/cypher/models/pgsql/translate/constraints_test.go @@ -19,16 +19,18 @@ func TestMeasureSelectivity(t *testing.T) { } func TestCanExecuteSelectiveBidirectionalSearch(t *testing.T) { - lowSelectivity := pgd.Equals( - pgsql.Identifier("123"), - pgsql.Identifier("456"), - ) - idLookup := func(identifier pgsql.Identifier, id int64) pgsql.Expression { - return pgd.Equals( - pgsql.CompoundIdentifier{identifier, pgsql.ColumnID}, - pgd.IntLiteral(id), + var ( + lowSelectivity = pgd.Equals( + pgsql.Identifier("123"), + pgsql.Identifier("456"), ) - } + idLookup = func(identifier pgsql.Identifier, id int64) pgsql.Expression { + return pgd.Equals( + pgsql.CompoundIdentifier{identifier, pgsql.ColumnID}, + pgd.IntLiteral(id), + ) + } + ) t.Run("rejects low selectivity endpoints", func(t *testing.T) { step := &TraversalStep{ @@ -136,42 +138,46 @@ func TestCanExecuteSelectiveBidirectionalSearch(t *testing.T) { } func TestCanExecutePairAwareBidirectionalSearch(t *testing.T) { - scopeWithNodeBindings := func(identifiers ...pgsql.Identifier) *Scope { - scope := NewScope() - for _, identifier := range identifiers { - scope.Define(identifier, pgsql.NodeComposite) + var ( + scopeWithNodeBindings = func(identifiers ...pgsql.Identifier) *Scope { + scope := NewScope() + for _, identifier := range identifiers { + scope.Define(identifier, pgsql.NodeComposite) + } + + return scope } - - return scope - } - localSelectivePropertyConstraint := func(identifier pgsql.Identifier) pgsql.Expression { - return pgd.Equals( - pgd.PropertyLookup(identifier, "name"), - pgd.TextLiteral("123"), - ) - } - localBroadPropertyConstraint := func(identifier pgsql.Identifier) pgsql.Expression { - return pgd.Equals( - pgsql.CompoundIdentifier{identifier, pgsql.ColumnProperties}, - pgd.IntLiteral(1), - ) - } - localKindConstraint := func(identifier pgsql.Identifier) pgsql.Expression { - return pgd.And( - pgd.Equals( - pgsql.CompoundIdentifier{identifier, pgsql.ColumnKindIDs}, + localSelectivePropertyConstraint = func(identifier pgsql.Identifier) pgsql.Expression { + return pgd.Equals( + pgd.PropertyLookup(identifier, "name"), + pgd.TextLiteral("123"), + ) + } + localBroadPropertyConstraint = func(identifier pgsql.Identifier) pgsql.Expression { + return pgd.Equals( + pgsql.CompoundIdentifier{identifier, pgsql.ColumnProperties}, pgd.IntLiteral(1), - ), - pgd.Equals( - pgsql.CompoundIdentifier{identifier, pgsql.ColumnKindIDs}, - pgd.IntLiteral(2), - ), - ) - } + ) + } + localKindConstraint = func(identifier pgsql.Identifier) pgsql.Expression { + return pgd.And( + pgd.Equals( + pgsql.CompoundIdentifier{identifier, pgsql.ColumnKindIDs}, + pgd.IntLiteral(1), + ), + pgd.Equals( + pgsql.CompoundIdentifier{identifier, pgsql.ColumnKindIDs}, + pgd.IntLiteral(2), + ), + ) + } + ) t.Run("accepts selective property-backed local endpoint constraints for shortest path", func(t *testing.T) { - leftIdentifier := pgsql.Identifier("n0") - rightIdentifier := pgsql.Identifier("n1") + var ( + leftIdentifier = pgsql.Identifier("n0") + rightIdentifier = pgsql.Identifier("n1") + ) step := &TraversalStep{ LeftNode: &BoundIdentifier{ Identifier: leftIdentifier, @@ -195,8 +201,10 @@ func TestCanExecutePairAwareBidirectionalSearch(t *testing.T) { }) t.Run("rejects broad non-kind local endpoint constraints for shortest path", func(t *testing.T) { - leftIdentifier := pgsql.Identifier("n0") - rightIdentifier := pgsql.Identifier("n1") + var ( + leftIdentifier = pgsql.Identifier("n0") + rightIdentifier = pgsql.Identifier("n1") + ) step := &TraversalStep{ LeftNode: &BoundIdentifier{ Identifier: leftIdentifier, @@ -220,8 +228,10 @@ func TestCanExecutePairAwareBidirectionalSearch(t *testing.T) { }) t.Run("rejects pair-aware search when only one endpoint is selective", func(t *testing.T) { - leftIdentifier := pgsql.Identifier("n0") - rightIdentifier := pgsql.Identifier("n1") + var ( + leftIdentifier = pgsql.Identifier("n0") + rightIdentifier = pgsql.Identifier("n1") + ) step := &TraversalStep{ LeftNode: &BoundIdentifier{ Identifier: leftIdentifier, @@ -268,8 +278,10 @@ func TestCanExecutePairAwareBidirectionalSearch(t *testing.T) { }) t.Run("accepts selective property-backed local endpoint constraints for all shortest paths", func(t *testing.T) { - leftIdentifier := pgsql.Identifier("n0") - rightIdentifier := pgsql.Identifier("n1") + var ( + leftIdentifier = pgsql.Identifier("n0") + rightIdentifier = pgsql.Identifier("n1") + ) step := &TraversalStep{ LeftNode: &BoundIdentifier{ Identifier: leftIdentifier, @@ -293,8 +305,10 @@ func TestCanExecutePairAwareBidirectionalSearch(t *testing.T) { }) t.Run("rejects endpoint constraints that reference the other endpoint", func(t *testing.T) { - leftIdentifier := pgsql.Identifier("n0") - rightIdentifier := pgsql.Identifier("n1") + var ( + leftIdentifier = pgsql.Identifier("n0") + rightIdentifier = pgsql.Identifier("n1") + ) step := &TraversalStep{ LeftNode: &BoundIdentifier{ Identifier: leftIdentifier, @@ -323,3 +337,36 @@ func TestCanExecutePairAwareBidirectionalSearch(t *testing.T) { require.False(t, canExecute) }) } + +func TestCanMaterializeEndpointPairFilterRequiresPairAwareConstraints(t *testing.T) { + var ( + leftIdentifier = pgsql.Identifier("n0") + rightIdentifier = pgsql.Identifier("n1") + kindOnlyConstraint = func(identifier pgsql.Identifier) pgsql.Expression { + return pgd.Equals( + pgsql.CompoundIdentifier{identifier, pgsql.ColumnKindIDs}, + pgd.IntLiteral(1), + ) + } + propertyConstraint = func(identifier pgsql.Identifier) pgsql.Expression { + return pgd.Equals( + pgd.PropertyLookup(identifier, "name"), + pgd.TextLiteral("target"), + ) + } + ) + + step := &TraversalStep{ + LeftNode: &BoundIdentifier{Identifier: leftIdentifier}, + RightNode: &BoundIdentifier{Identifier: rightIdentifier}, + } + + require.False(t, canMaterializeEndpointPairFilterForStep(step, &Expansion{ + PrimerNodeConstraints: kindOnlyConstraint(leftIdentifier), + TerminalNodeConstraints: propertyConstraint(rightIdentifier), + })) + require.True(t, canMaterializeEndpointPairFilterForStep(step, &Expansion{ + PrimerNodeConstraints: propertyConstraint(leftIdentifier), + TerminalNodeConstraints: propertyConstraint(rightIdentifier), + })) +} diff --git a/cypher/models/pgsql/translate/count_fast_path.go b/cypher/models/pgsql/translate/count_fast_path.go new file mode 100644 index 00000000..29387061 --- /dev/null +++ b/cypher/models/pgsql/translate/count_fast_path.go @@ -0,0 +1,274 @@ +package translate + +import ( + "strings" + + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" + "github.com/specterops/dawgs/graph" +) + +const ( + countStoreNodeAlias pgsql.Identifier = "n0" + countStoreEdgeAlias pgsql.Identifier = "e0" + countStoreStartEndpointAlias pgsql.Identifier = "n0" + countStoreEndEndpointAlias pgsql.Identifier = "n1" +) + +type countStoreFastPathShape struct { + Target optimize.CountStoreFastPathTarget + Alias string + Kinds graph.Kinds +} + +func (s *Translator) translateCountStoreFastPath(query *cypher.RegularQuery, plan optimize.LoweringPlan) (bool, error) { + if len(plan.CountStoreFastPath) == 0 { + return false, nil + } + + shape, ok := countStoreFastPathShapeForQuery(query) + if !ok || shape.Target != plan.CountStoreFastPath[0].Target { + return false, nil + } + + countExpression := pgsql.FunctionCall{ + Function: pgsql.FunctionCount, + Parameters: []pgsql.Expression{pgsql.Wildcard{}}, + CastType: pgsql.Int8, + } + + var countProjection pgsql.SelectItem = countExpression + if shape.Alias != "" { + countProjection = pgsql.AliasedExpression{ + Expression: countExpression, + Alias: pgsql.AsOptionalIdentifier(pgsql.Identifier(shape.Alias)), + } + } + + fromClause, whereClause, err := s.countStoreFastPathFromAndWhere(shape) + if err != nil { + return false, err + } + + s.translation.Statement = pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{countProjection}, + From: []pgsql.FromClause{fromClause}, + Where: whereClause, + }, + } + s.recordLowering(optimize.LoweringCountStoreFastPath) + return true, nil +} + +func (s *Translator) countStoreFastPathFromAndWhere(shape countStoreFastPathShape) (pgsql.FromClause, pgsql.Expression, error) { + switch shape.Target { + case optimize.CountStoreFastPathNode: + where, err := s.countStoreNodeKindConstraint(shape.Kinds) + return pgsql.FromClause{ + Source: pgsql.TableReference{ + Name: pgsql.TableNode.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(countStoreNodeAlias), + }, + }, where, err + + case optimize.CountStoreFastPathEdge: + where, err := s.countStoreEdgeKindConstraint(shape.Kinds) + return pgsql.FromClause{ + Source: pgsql.TableReference{ + Name: pgsql.TableEdge.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(countStoreEdgeAlias), + }, + Joins: []pgsql.Join{ + countStoreEndpointJoin(countStoreStartEndpointAlias, pgsql.ColumnStartID), + countStoreEndpointJoin(countStoreEndEndpointAlias, pgsql.ColumnEndID), + }, + }, where, err + + default: + return pgsql.FromClause{}, nil, nil + } +} + +func countStoreEndpointJoin(nodeAlias pgsql.Identifier, edgeEndpoint pgsql.Identifier) pgsql.Join { + return pgsql.Join{ + Table: pgsql.TableReference{ + Name: pgsql.TableNode.AsCompoundIdentifier(), + Binding: pgsql.AsOptionalIdentifier(nodeAlias), + }, + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{nodeAlias, pgsql.ColumnID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{countStoreEdgeAlias, edgeEndpoint}, + ), + }, + } +} + +func (s *Translator) countStoreNodeKindConstraint(kinds graph.Kinds) (pgsql.Expression, error) { + if len(kinds) == 0 { + return nil, nil + } + + kindIDs, err := s.kindMapper.MapKinds(kinds) + if err != nil { + return nil, err + } + + kindIDsLiteral, err := pgsql.AsLiteral(kindIDs) + if err != nil { + return nil, err + } + + return pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{countStoreNodeAlias, pgsql.ColumnKindIDs}, + pgsql.OperatorPGArrayLHSContainsRHS, + kindIDsLiteral, + ), nil +} + +func (s *Translator) countStoreEdgeKindConstraint(kinds graph.Kinds) (pgsql.Expression, error) { + if len(kinds) == 0 { + return nil, nil + } + + kindIDs, err := s.kindMapper.MapKinds(kinds) + if err != nil { + return nil, err + } + + return pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{countStoreEdgeAlias, pgsql.ColumnKindID}, + pgsql.OperatorEquals, + pgsql.NewAnyExpressionHinted(pgsql.NewLiteral(kindIDs, pgsql.Int2Array)), + ), nil +} + +func countStoreFastPathShapeForQuery(query *cypher.RegularQuery) (countStoreFastPathShape, bool) { + if query == nil || query.SingleQuery == nil || query.SingleQuery.SinglePartQuery == nil { + return countStoreFastPathShape{}, false + } + + queryPart := query.SingleQuery.SinglePartQuery + if len(queryPart.UpdatingClauses) > 0 || len(queryPart.ReadingClauses) != 1 { + return countStoreFastPathShape{}, false + } + + countArgument, alias, ok := simpleCountProjection(queryPart.Return) + if !ok { + return countStoreFastPathShape{}, false + } + + readingClause := queryPart.ReadingClauses[0] + if readingClause == nil || readingClause.Match == nil { + return countStoreFastPathShape{}, false + } + + match := readingClause.Match + if match.Optional || match.Where != nil || len(match.Pattern) != 1 { + return countStoreFastPathShape{}, false + } + + patternPart := match.Pattern[0] + if patternPart == nil || patternPart.Variable != nil || patternPart.ShortestPathPattern || patternPart.AllShortestPathsPattern { + return countStoreFastPathShape{}, false + } + + if len(patternPart.PatternElements) == 1 { + nodePattern, ok := patternPart.PatternElements[0].AsNodePattern() + if !ok || nodePattern == nil || nodePattern.Properties != nil { + return countStoreFastPathShape{}, false + } + + bindingSymbol := countStoreVariableSymbol(nodePattern.Variable) + if countArgument != cypher.TokenLiteralAsterisk && countArgument != bindingSymbol { + return countStoreFastPathShape{}, false + } + + return countStoreFastPathShape{ + Target: optimize.CountStoreFastPathNode, + Alias: alias, + Kinds: nodePattern.Kinds, + }, true + } + + if len(patternPart.PatternElements) != 3 { + return countStoreFastPathShape{}, false + } + + leftNode, leftOK := patternPart.PatternElements[0].AsNodePattern() + relationship, relationshipOK := patternPart.PatternElements[1].AsRelationshipPattern() + rightNode, rightOK := patternPart.PatternElements[2].AsNodePattern() + if !leftOK || !relationshipOK || !rightOK { + return countStoreFastPathShape{}, false + } + + if constrainedCountStoreEndpoint(leftNode) || constrainedCountStoreEndpoint(rightNode) || + relationship == nil || relationship.Range != nil || relationship.Properties != nil || + relationship.Direction == graph.DirectionBoth { + return countStoreFastPathShape{}, false + } + + bindingSymbol := countStoreVariableSymbol(relationship.Variable) + if countArgument != cypher.TokenLiteralAsterisk && countArgument != bindingSymbol { + return countStoreFastPathShape{}, false + } + + return countStoreFastPathShape{ + Target: optimize.CountStoreFastPathEdge, + Alias: alias, + Kinds: relationship.Kinds, + }, true +} + +func simpleCountProjection(returnClause *cypher.Return) (string, string, bool) { + if returnClause == nil || returnClause.Projection == nil { + return "", "", false + } + + projection := returnClause.Projection + if projection.Distinct || projection.All || projection.Order != nil || projection.Skip != nil || projection.Limit != nil || len(projection.Items) != 1 { + return "", "", false + } + + projectionItem, ok := projection.Items[0].(*cypher.ProjectionItem) + if !ok || projectionItem == nil { + return "", "", false + } + + function, ok := projectionItem.Expression.(*cypher.FunctionInvocation) + if !ok || function == nil || !strings.EqualFold(function.Name, cypher.CountFunction) || + function.Distinct || len(function.Namespace) > 0 || len(function.Arguments) != 1 { + return "", "", false + } + + switch argument := function.Arguments[0].(type) { + case *cypher.Variable: + if argument == nil { + return "", "", false + } + + return argument.Symbol, countStoreVariableSymbol(projectionItem.Alias), true + case *cypher.RangeQuantifier: + if argument != nil && argument.Value == cypher.TokenLiteralAsterisk { + return cypher.TokenLiteralAsterisk, countStoreVariableSymbol(projectionItem.Alias), true + } + } + + return "", "", false +} + +func constrainedCountStoreEndpoint(nodePattern *cypher.NodePattern) bool { + return nodePattern == nil || nodePattern.Variable != nil || len(nodePattern.Kinds) > 0 || nodePattern.Properties != nil +} + +func countStoreVariableSymbol(variable *cypher.Variable) string { + if variable == nil { + return "" + } + + return variable.Symbol +} diff --git a/cypher/models/pgsql/translate/expansion.go b/cypher/models/pgsql/translate/expansion.go index e018a41d..1d49d44b 100644 --- a/cypher/models/pgsql/translate/expansion.go +++ b/cypher/models/pgsql/translate/expansion.go @@ -7,7 +7,9 @@ import ( "github.com/specterops/dawgs/cypher/models" "github.com/specterops/dawgs/cypher/models/pgsql" "github.com/specterops/dawgs/cypher/models/pgsql/format" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" "github.com/specterops/dawgs/cypher/models/pgsql/pgd" + "github.com/specterops/dawgs/graph" ) const translateDefaultMaxTraversalDepth int64 = 15 @@ -57,6 +59,8 @@ type ExpansionBuilder struct { queryParameters map[string]any traversalStep *TraversalStep model *Expansion + unwindClauses []UnwindClause + unwindSources []pgsql.FromClause } func NewExpansionBuilder(queryParameters map[string]any, traversalStep *TraversalStep) (*ExpansionBuilder, error) { @@ -71,6 +75,11 @@ func NewExpansionBuilder(queryParameters map[string]any, traversalStep *Traversa }, nil } +func (s *ExpansionBuilder) SetUnwindClauses(clauses []UnwindClause) { + s.unwindClauses = clauses + s.unwindSources = unwindFromClauses(clauses) +} + func nextFrontInsert(body pgsql.SetExpression) pgsql.Insert { return pgsql.Insert{ Table: pgsql.TableReference{ @@ -135,8 +144,10 @@ func newExpansionNodeSeed(identifier, nodeIdentifier pgsql.Identifier, constrain } func newExpansionNodeFilterSeed(identifier, filterIdentifier, nodeIdentifier pgsql.Identifier, constraints pgsql.Expression) expansionSeed { - filterAlias := pgsql.Identifier(string(identifier) + "_filter") - filterID := pgsql.CompoundIdentifier{filterAlias, pgsql.ColumnID} + var ( + filterAlias = pgsql.Identifier(string(identifier) + "_filter") + filterID = pgsql.CompoundIdentifier{filterAlias, pgsql.ColumnID} + ) if constraints == nil { return newExpansionSeed(identifier, filterID, []pgsql.FromClause{{ @@ -185,6 +196,87 @@ func newExpansionBoundNodeSeed(identifier pgsql.Identifier, previousFrame *Frame return seed } +func fromClausesContainSource(fromClauses []pgsql.FromClause, identifier pgsql.Identifier) bool { + for _, fromClause := range fromClauses { + if tableReference, isTableReference := fromClause.Source.(pgsql.TableReference); isTableReference && + len(tableReference.Name) == 1 && + tableReference.Name[0] == identifier { + return true + } + } + + return false +} + +func prependFrameSourceIfMissing(fromClauses []pgsql.FromClause, frame *Frame) []pgsql.FromClause { + if frame == nil || fromClausesContainSource(fromClauses, frame.Binding.Identifier) { + return fromClauses + } + + return append([]pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: pgsql.CompoundIdentifier{frame.Binding.Identifier}, + }, + }}, fromClauses...) +} + +func expressionReferencesUnwindBinding(expression pgsql.Expression, unwindClauses []UnwindClause) (bool, error) { + if expression == nil || len(unwindClauses) == 0 { + return false, nil + } + + references, err := ExtractSyntaxNodeReferences(expression) + if err != nil { + return false, err + } + + for _, clause := range unwindClauses { + if clause.Binding != nil && references.Contains(clause.Binding.Identifier) { + return true, nil + } + } + + return false, nil +} + +func (s *ExpansionBuilder) seedEndpointConstraintSplit(expression pgsql.Expression, nodeIdentifier pgsql.Identifier, previousFrameIdentifier pgsql.Identifier) (pgsql.Expression, pgsql.Expression) { + var ( + seedExpression = rewriteBoundEndpointSeedReference(expression, previousFrameIdentifier, nodeIdentifier) + localScope = pgsql.AsIdentifierSet(nodeIdentifier) + ) + + for _, clause := range s.unwindClauses { + if clause.Binding != nil { + localScope.Add(clause.Binding.Identifier) + } + } + + return partitionConstraintByLocality(seedExpression, localScope) +} + +func (s *ExpansionBuilder) appendUnwindSourcesIfReferenced(selectBody *pgsql.Select, expressions ...pgsql.Expression) error { + for _, expression := range expressions { + if referencesUnwind, err := expressionReferencesUnwindBinding(expression, s.unwindClauses); err != nil { + return err + } else if referencesUnwind { + var previousFrame *Frame + if s.traversalStep != nil && s.traversalStep.Frame != nil { + previousFrame = s.traversalStep.Frame.Previous + } + + selectBody.From = prependFrameSourceIfMissing(selectBody.From, previousFrame) + selectBody.From = append(selectBody.From, s.unwindSources...) + return nil + } + } + + return nil +} + +func (s *ExpansionBuilder) appendUnwindSources(selectBody *pgsql.Select) { + selectBody.From = append(selectBody.From, s.unwindSources...) +} + func newExpansionRootIDsParameterSeed(identifier, nodeIdentifier pgsql.Identifier, constraints pgsql.Expression) expansionSeed { return newExpansionNodeFilterSeed(identifier, expansionRootFilter, nodeIdentifier, constraints) } @@ -532,13 +624,6 @@ func rewriteBoundEndpointSeedReference(expression pgsql.Expression, previousFram } } -func seedEndpointConstraintSplit(expression pgsql.Expression, nodeIdentifier pgsql.Identifier, previousFrameIdentifier pgsql.Identifier) (pgsql.Expression, pgsql.Expression) { - // Harness seed fragments only range over the endpoint node alias and an optional ID filter. - // Reframe safe endpoint references first, then leave anything still non-local for the outer projection. - seedExpression := rewriteBoundEndpointSeedReference(expression, previousFrameIdentifier, nodeIdentifier) - return partitionConstraintByLocality(seedExpression, pgsql.AsIdentifierSet(nodeIdentifier)) -} - func seededFrontPrimerQuery(seed expansionSeed, primer pgsql.Select) pgsql.Query { return pgsql.Query{ CommonTableExpressions: &pgsql.With{ @@ -652,11 +737,13 @@ func (s *ExpansionBuilder) usesBoundEndpointPairs() bool { } func (s *ExpansionBuilder) boundNodeIDsFilterStatement(filterIdentifier pgsql.Identifier, nodeIdentifier pgsql.Identifier) pgsql.Insert { - previousFrameIdentifier := s.traversalStep.Frame.Previous.Binding.Identifier - nodeIDExpression := pgsql.RowColumnReference{ - Identifier: pgsql.CompoundIdentifier{previousFrameIdentifier, nodeIdentifier}, - Column: pgsql.ColumnID, - } + var ( + previousFrameIdentifier = s.traversalStep.Frame.Previous.Binding.Identifier + nodeIDExpression = pgsql.RowColumnReference{ + Identifier: pgsql.CompoundIdentifier{previousFrameIdentifier, nodeIdentifier}, + Column: pgsql.ColumnID, + } + ) return pgsql.Insert{ Table: pgsql.TableReference{ @@ -744,15 +831,17 @@ func (s *ExpansionBuilder) boundEndpointPairFilterStatement() (pgsql.Insert, boo return pgsql.Insert{}, false } - previousFrameIdentifier := s.traversalStep.Frame.Previous.Binding.Identifier - rootIDExpression := pgsql.RowColumnReference{ - Identifier: pgsql.CompoundIdentifier{previousFrameIdentifier, s.traversalStep.LeftNode.Identifier}, - Column: pgsql.ColumnID, - } - terminalIDExpression := pgsql.RowColumnReference{ - Identifier: pgsql.CompoundIdentifier{previousFrameIdentifier, s.traversalStep.RightNode.Identifier}, - Column: pgsql.ColumnID, - } + var ( + previousFrameIdentifier = s.traversalStep.Frame.Previous.Binding.Identifier + rootIDExpression = pgsql.RowColumnReference{ + Identifier: pgsql.CompoundIdentifier{previousFrameIdentifier, s.traversalStep.LeftNode.Identifier}, + Column: pgsql.ColumnID, + } + terminalIDExpression = pgsql.RowColumnReference{ + Identifier: pgsql.CompoundIdentifier{previousFrameIdentifier, s.traversalStep.RightNode.Identifier}, + Column: pgsql.ColumnID, + } + ) return pgsql.Insert{ Table: pgsql.TableReference{ @@ -794,9 +883,11 @@ func (s *ExpansionBuilder) materializedEndpointPairFilterStatement() (pgsql.Inse return pgsql.Insert{}, false } - rootIDExpression := pgsql.CompoundIdentifier{s.traversalStep.LeftNode.Identifier, pgsql.ColumnID} - terminalIDExpression := pgsql.CompoundIdentifier{s.traversalStep.RightNode.Identifier, pgsql.ColumnID} - pairConstraints := pgsql.OptionalAnd(expansionModel.PrimerNodeConstraints, expansionModel.TerminalNodeConstraints) + var ( + rootIDExpression = pgsql.CompoundIdentifier{s.traversalStep.LeftNode.Identifier, pgsql.ColumnID} + terminalIDExpression = pgsql.CompoundIdentifier{s.traversalStep.RightNode.Identifier, pgsql.ColumnID} + pairConstraints = pgsql.OptionalAnd(expansionModel.PrimerNodeConstraints, expansionModel.TerminalNodeConstraints) + ) pairConstraints = pgsql.OptionalAnd(pairConstraints, pgsql.NewBinaryExpression( rootIDExpression, pgsql.OperatorIsNot, @@ -984,6 +1075,16 @@ func (s *ExpansionBuilder) forwardTerminalSatisfaction(expansionModel *Expansion return satisfiedSelectItem } +func forwardTerminalSatisfactionProjection(expansionModel *Expansion) pgsql.Expression { + if expansionModel.TerminalNodeSatisfactionProjection != nil && + !expansionModel.UseMaterializedTerminalFilter && + !expansionModel.UseMaterializedEndpointPairFilter { + return pgsql.Expression(expansionModel.TerminalNodeSatisfactionProjection) + } + + return nil +} + func backwardContinuationSatisfaction(expansionModel *Expansion) pgsql.Expression { return pgsql.ExistsExpression{ Subquery: pgsql.Subquery{ @@ -1027,7 +1128,15 @@ func (s *ExpansionBuilder) backwardTerminalSatisfaction(expansionModel *Expansio return satisfiedSelectItem } -func (s *ExpansionBuilder) prepareForwardFrontPrimerQuery(expansionModel *Expansion) (pgsql.Query, pgsql.Expression) { +func backwardTerminalSatisfactionProjection(expansionModel *Expansion) pgsql.Expression { + if expansionModel.PrimerNodeSatisfactionProjection != nil && !expansionModel.UseMaterializedEndpointPairFilter { + return pgsql.Expression(expansionModel.PrimerNodeSatisfactionProjection) + } + + return nil +} + +func (s *ExpansionBuilder) prepareForwardFrontPrimerQuery(expansionModel *Expansion) (pgsql.Query, pgsql.Expression, error) { var ( primerSeedConstraints pgsql.Expression primerProjectionPredicate pgsql.Expression @@ -1042,7 +1151,7 @@ func (s *ExpansionBuilder) prepareForwardFrontPrimerQuery(expansionModel *Expans previousFrameIdentifier = s.traversalStep.Frame.Previous.Binding.Identifier } - primerSeedConstraints, primerProjectionPredicate = seedEndpointConstraintSplit( + primerSeedConstraints, primerProjectionPredicate = s.seedEndpointConstraintSplit( expansionModel.PrimerNodeConstraints, s.traversalStep.LeftNode.Identifier, previousFrameIdentifier, @@ -1064,6 +1173,12 @@ func (s *ExpansionBuilder) prepareForwardFrontPrimerQuery(expansionModel *Expans seed = &nodeSeed } + if seed != nil { + if err := s.appendUnwindSourcesIfReferenced(&seed.query, primerSeedConstraints); err != nil { + return pgsql.Query{}, nil, err + } + } + // The returned projection predicate is the part of the endpoint predicate // that cannot be evaluated in the seed CTE because it still references an // outer frame. @@ -1106,6 +1221,9 @@ func (s *ExpansionBuilder) prepareForwardFrontPrimerQuery(expansionModel *Expans } nextQuery.From = []pgsql.FromClause{nextQueryFrom} + if err := s.appendUnwindSourcesIfReferenced(&nextQuery, expansionModel.EdgeConstraints, forwardTerminalSatisfactionProjection(expansionModel)); err != nil { + return pgsql.Query{}, nil, err + } if !expansionModel.HasExplicitEndpointInequality { nextQuery.Where = pgsql.OptionalAnd( @@ -1114,10 +1232,10 @@ func (s *ExpansionBuilder) prepareForwardFrontPrimerQuery(expansionModel *Expans ) } - return frontPrimerQuery(seed, nextQuery), primerProjectionPredicate + return frontPrimerQuery(seed, nextQuery), primerProjectionPredicate, nil } -func (s *ExpansionBuilder) prepareForwardFrontRecursiveQuery(expansionModel *Expansion) pgsql.Select { +func (s *ExpansionBuilder) prepareForwardFrontRecursiveQuery(expansionModel *Expansion) (pgsql.Select, error) { nextQuery := pgsql.Select{ Where: expansionModel.EdgeConstraints, } @@ -1197,10 +1315,14 @@ func (s *ExpansionBuilder) prepareForwardFrontRecursiveQuery(expansionModel *Exp } nextQuery.From = []pgsql.FromClause{nextQueryFrom} - return nextQuery + if err := s.appendUnwindSourcesIfReferenced(&nextQuery, expansionModel.EdgeConstraints, forwardTerminalSatisfactionProjection(expansionModel)); err != nil { + return pgsql.Select{}, err + } + + return nextQuery, nil } -func (s *ExpansionBuilder) prepareBackwardFrontPrimerQuery(expansionModel *Expansion) (pgsql.Query, pgsql.Expression) { +func (s *ExpansionBuilder) prepareBackwardFrontPrimerQuery(expansionModel *Expansion) (pgsql.Query, pgsql.Expression, error) { var ( terminalSeedConstraints pgsql.Expression terminalProjectionPredicate pgsql.Expression @@ -1215,7 +1337,7 @@ func (s *ExpansionBuilder) prepareBackwardFrontPrimerQuery(expansionModel *Expan previousFrameIdentifier = s.traversalStep.Frame.Previous.Binding.Identifier } - terminalSeedConstraints, terminalProjectionPredicate = seedEndpointConstraintSplit( + terminalSeedConstraints, terminalProjectionPredicate = s.seedEndpointConstraintSplit( expansionModel.TerminalNodeConstraints, s.traversalStep.RightNode.Identifier, previousFrameIdentifier, @@ -1237,6 +1359,12 @@ func (s *ExpansionBuilder) prepareBackwardFrontPrimerQuery(expansionModel *Expan seed = &nodeSeed } + if seed != nil { + if err := s.appendUnwindSourcesIfReferenced(&seed.query, terminalSeedConstraints); err != nil { + return pgsql.Query{}, nil, err + } + } + // The returned projection predicate is applied after the harness materializes // endpoints, where any outer-frame references are back in scope. nextQuery.Projection = []pgsql.SelectItem{ @@ -1276,10 +1404,14 @@ func (s *ExpansionBuilder) prepareBackwardFrontPrimerQuery(expansionModel *Expan } nextQuery.From = []pgsql.FromClause{nextQueryFrom} - return frontPrimerQuery(seed, nextQuery), terminalProjectionPredicate + if err := s.appendUnwindSourcesIfReferenced(&nextQuery, expansionModel.EdgeConstraints, backwardTerminalSatisfactionProjection(expansionModel)); err != nil { + return pgsql.Query{}, nil, err + } + + return frontPrimerQuery(seed, nextQuery), terminalProjectionPredicate, nil } -func (s *ExpansionBuilder) prepareBackwardFrontRecursiveQuery(expansionModel *Expansion) pgsql.Select { +func (s *ExpansionBuilder) prepareBackwardFrontRecursiveQuery(expansionModel *Expansion) (pgsql.Select, error) { nextQuery := pgsql.Select{ Where: expansionModel.EdgeConstraints, } @@ -1344,7 +1476,11 @@ func (s *ExpansionBuilder) prepareBackwardFrontRecursiveQuery(expansionModel *Ex } nextQuery.From = []pgsql.FromClause{nextQueryFrom} - return nextQuery + if err := s.appendUnwindSourcesIfReferenced(&nextQuery, expansionModel.EdgeConstraints, backwardTerminalSatisfactionProjection(expansionModel)); err != nil { + return pgsql.Select{}, err + } + + return nextQuery, nil } func shortestPathSearchCTE(functionName pgsql.Identifier, expansionModel *Expansion, harnessParameters []pgsql.Expression) pgsql.CommonTableExpression { @@ -1452,8 +1588,10 @@ func (s *ExpansionBuilder) applyShortestPathSeedProjectionConstraints(projection // Match Neo4j's shortest-path behavior by surfacing an error for result rows // where the resolved root and terminal endpoints are the same node. func shortestPathSelfEndpointGuard(expansionFrame pgsql.Identifier) pgsql.Expression { - rootID := pgsql.CompoundIdentifier{expansionFrame, expansionRootID} - terminalID := pgsql.CompoundIdentifier{expansionFrame, expansionNextID} + var ( + rootID = pgsql.CompoundIdentifier{expansionFrame, expansionRootID} + terminalID = pgsql.CompoundIdentifier{expansionFrame, expansionNextID} + ) return shortestPathSelfEndpointGuardCase(rootID, terminalID) } @@ -1606,10 +1744,15 @@ func (s *ExpansionBuilder) buildShortestPathsHarnessCall(harnessFunctionName pgs expansionModel.UseMaterializedTerminalFilter = s.canMaterializeTerminalFilter(expansionModel) - var ( - forwardFrontPrimerQuery, forwardSeedProjectionConstraints = s.prepareForwardFrontPrimerQuery(expansionModel) - forwardFrontRecursiveQuery = s.prepareForwardFrontRecursiveQuery(expansionModel) - ) + forwardFrontPrimerQuery, forwardSeedProjectionConstraints, err := s.prepareForwardFrontPrimerQuery(expansionModel) + if err != nil { + return pgsql.Query{}, err + } + + forwardFrontRecursiveQuery, err := s.prepareForwardFrontRecursiveQuery(expansionModel) + if err != nil { + return pgsql.Query{}, err + } projectionQuery.Projection = expansionModel.Projection @@ -1644,6 +1787,7 @@ func (s *ExpansionBuilder) buildShortestPathsHarnessCall(harnessFunctionName pgs s.applyBoundEndpointProjectionConstraints(&projectionQuery, expansionModel) s.applyShortestPathSeedProjectionConstraints(&projectionQuery, forwardSeedProjectionConstraints) + s.appendUnwindSources(&projectionQuery) s.applyShortestPathSelfEndpointGuard(&projectionQuery, expansionModel) if harnessParameters, err := s.shortestPathsParameters(expansionModel, forwardFrontPrimerQuery, forwardFrontRecursiveQuery); err != nil { @@ -1668,33 +1812,11 @@ func (s *ExpansionBuilder) BuildAllShortestPathsRoot() (pgsql.Query, error) { } func (s *ExpansionBuilder) canMaterializeTerminalFilter(expansionModel *Expansion) bool { - if expansionModel.TerminalNodeConstraints == nil || s.usesBoundEndpointPairs() || s.usesBoundTerminalIDs() { - return false - } - - // Terminal filters are only useful as standalone SQL when they depend solely - // on the terminal node; external references must stay in the main query. - _, externalConstraints := partitionConstraintByLocality( - expansionModel.TerminalNodeConstraints, - pgsql.AsIdentifierSet(s.traversalStep.RightNode.Identifier), - ) - - return externalConstraints == nil + return canMaterializeTerminalFilterForStep(s.traversalStep, expansionModel) } func (s *ExpansionBuilder) canMaterializeEndpointPairFilter(expansionModel *Expansion) bool { - // Pair filters enumerate the exact root/terminal combinations the - // bidirectional harness must resolve. Kind-only endpoint predicates are not - // enough because they do not constrain the search columns used by the harness. - if s.usesBoundEndpointPairs() || - expansionModel.PrimerNodeConstraints == nil || - expansionModel.TerminalNodeConstraints == nil || - !hasLocalEndpointConstraint(expansionModel.PrimerNodeConstraints, s.traversalStep.LeftNode.Identifier) || - !hasLocalEndpointConstraint(expansionModel.TerminalNodeConstraints, s.traversalStep.RightNode.Identifier) { - return false - } - - return true + return canMaterializeEndpointPairFilterForStep(s.traversalStep, expansionModel) } func (s *ExpansionBuilder) buildBiDirectionalShortestPathsHarnessCall(harnessFunctionName pgsql.Identifier) (pgsql.Query, error) { @@ -1705,12 +1827,25 @@ func (s *ExpansionBuilder) buildBiDirectionalShortestPathsHarnessCall(harnessFun expansionModel.UseMaterializedEndpointPairFilter = s.canMaterializeEndpointPairFilter(expansionModel) - var ( - forwardFrontPrimerQuery, forwardSeedProjectionConstraints = s.prepareForwardFrontPrimerQuery(expansionModel) - forwardFrontRecursiveQuery = s.prepareForwardFrontRecursiveQuery(expansionModel) - backwardFrontPrimerQuery, backwardSeedProjectionConstraints = s.prepareBackwardFrontPrimerQuery(expansionModel) - backwardFrontRecursiveQuery = s.prepareBackwardFrontRecursiveQuery(expansionModel) - ) + forwardFrontPrimerQuery, forwardSeedProjectionConstraints, err := s.prepareForwardFrontPrimerQuery(expansionModel) + if err != nil { + return pgsql.Query{}, err + } + + forwardFrontRecursiveQuery, err := s.prepareForwardFrontRecursiveQuery(expansionModel) + if err != nil { + return pgsql.Query{}, err + } + + backwardFrontPrimerQuery, backwardSeedProjectionConstraints, err := s.prepareBackwardFrontPrimerQuery(expansionModel) + if err != nil { + return pgsql.Query{}, err + } + + backwardFrontRecursiveQuery, err := s.prepareBackwardFrontRecursiveQuery(expansionModel) + if err != nil { + return pgsql.Query{}, err + } projectionQuery.Projection = expansionModel.Projection @@ -1745,6 +1880,7 @@ func (s *ExpansionBuilder) buildBiDirectionalShortestPathsHarnessCall(harnessFun s.applyBoundEndpointProjectionConstraints(&projectionQuery, expansionModel) s.applyShortestPathSeedProjectionConstraints(&projectionQuery, pgsql.OptionalAnd(forwardSeedProjectionConstraints, backwardSeedProjectionConstraints)) + s.appendUnwindSources(&projectionQuery) s.applyShortestPathSelfEndpointGuard(&projectionQuery, expansionModel) if harnessParameters, err := s.bidirectionalAllShortestPathsParameters(expansionModel, forwardFrontPrimerQuery, forwardFrontRecursiveQuery, backwardFrontPrimerQuery, backwardFrontRecursiveQuery); err != nil { @@ -1993,11 +2129,293 @@ func (s *ExpansionBuilder) Build(expansionIdentifier pgsql.Identifier, commonTab return query } +func projectionAliasExpressions(projection pgsql.Projection) map[pgsql.Identifier]pgsql.Expression { + aliases := make(map[pgsql.Identifier]pgsql.Expression) + + for _, selectItem := range projection { + switch typedSelectItem := selectItem.(type) { + case *pgsql.AliasedExpression: + if typedSelectItem.Alias.Set { + aliases[typedSelectItem.Alias.Value] = typedSelectItem.Expression + } + + case pgsql.AliasedExpression: + if typedSelectItem.Alias.Set { + aliases[typedSelectItem.Alias.Value] = typedSelectItem.Expression + } + + case pgsql.Identifier: + aliases[typedSelectItem] = typedSelectItem + + case pgsql.CompoundIdentifier: + if len(typedSelectItem) > 0 { + aliases[typedSelectItem[len(typedSelectItem)-1]] = typedSelectItem + } + } + } + + return aliases +} + +func rewriteCurrentFrameProjectionSetExpression(setExpression pgsql.SetExpression, frameID pgsql.Identifier, aliases map[pgsql.Identifier]pgsql.Expression) pgsql.SetExpression { + switch typedSetExpression := setExpression.(type) { + case pgsql.Select: + return rewriteCurrentFrameProjectionSelect(typedSetExpression, frameID, aliases) + + case pgsql.SetOperation: + typedSetExpression.LOperand = rewriteCurrentFrameProjectionSetExpression(typedSetExpression.LOperand, frameID, aliases) + typedSetExpression.ROperand = rewriteCurrentFrameProjectionSetExpression(typedSetExpression.ROperand, frameID, aliases) + return typedSetExpression + + default: + return setExpression + } +} + +func rewriteCurrentFrameProjectionQuery(query pgsql.Query, frameID pgsql.Identifier, aliases map[pgsql.Identifier]pgsql.Expression) pgsql.Query { + query.Body = rewriteCurrentFrameProjectionSetExpression(query.Body, frameID, aliases) + + for idx, orderBy := range query.OrderBy { + if orderBy != nil { + query.OrderBy[idx].Expression = rewriteCurrentFrameProjectionReferences(orderBy.Expression, frameID, aliases) + } + } + + query.Offset = rewriteCurrentFrameProjectionReferences(query.Offset, frameID, aliases) + query.Limit = rewriteCurrentFrameProjectionReferences(query.Limit, frameID, aliases) + + return query +} + +func rewriteCurrentFrameProjectionSelect(selectBody pgsql.Select, frameID pgsql.Identifier, aliases map[pgsql.Identifier]pgsql.Expression) pgsql.Select { + for idx, selectItem := range selectBody.Projection { + if rewritten, isSelectItem := rewriteCurrentFrameProjectionReferences(selectItem, frameID, aliases).(pgsql.SelectItem); isSelectItem { + selectBody.Projection[idx] = rewritten + } + } + + for idx := range selectBody.From { + selectBody.From[idx].Source = rewriteCurrentFrameProjectionReferences(selectBody.From[idx].Source, frameID, aliases) + + for joinIdx := range selectBody.From[idx].Joins { + selectBody.From[idx].Joins[joinIdx].Table = rewriteCurrentFrameProjectionReferences(selectBody.From[idx].Joins[joinIdx].Table, frameID, aliases) + selectBody.From[idx].Joins[joinIdx].JoinOperator.Constraint = rewriteCurrentFrameProjectionReferences(selectBody.From[idx].Joins[joinIdx].JoinOperator.Constraint, frameID, aliases) + } + } + + selectBody.Where = rewriteCurrentFrameProjectionReferences(selectBody.Where, frameID, aliases) + + for idx, groupByExpression := range selectBody.GroupBy { + selectBody.GroupBy[idx] = rewriteCurrentFrameProjectionReferences(groupByExpression, frameID, aliases) + } + + selectBody.Having = rewriteCurrentFrameProjectionReferences(selectBody.Having, frameID, aliases) + + return selectBody +} + +func rewriteCurrentFrameProjectionReferences(expression pgsql.Expression, frameID pgsql.Identifier, aliases map[pgsql.Identifier]pgsql.Expression) pgsql.Expression { + if expression == nil { + return nil + } + + switch typedExpression := expression.(type) { + case pgsql.CompoundIdentifier: + if len(typedExpression) == 2 && typedExpression[0] == frameID { + if replacement, hasReplacement := aliases[typedExpression[1]]; hasReplacement { + return replacement + } + } + + return typedExpression + + case pgsql.RowColumnReference: + typedExpression.Identifier = rewriteCurrentFrameProjectionReferences(typedExpression.Identifier, frameID, aliases) + return typedExpression + + case pgsql.UnaryExpression: + typedExpression.Operand = rewriteCurrentFrameProjectionReferences(typedExpression.Operand, frameID, aliases) + return typedExpression + + case *pgsql.UnaryExpression: + typedExpression.Operand = rewriteCurrentFrameProjectionReferences(typedExpression.Operand, frameID, aliases) + return typedExpression + + case pgsql.BinaryExpression: + typedExpression.LOperand = rewriteCurrentFrameProjectionReferences(typedExpression.LOperand, frameID, aliases) + typedExpression.ROperand = rewriteCurrentFrameProjectionReferences(typedExpression.ROperand, frameID, aliases) + return typedExpression + + case *pgsql.BinaryExpression: + typedExpression.LOperand = rewriteCurrentFrameProjectionReferences(typedExpression.LOperand, frameID, aliases) + typedExpression.ROperand = rewriteCurrentFrameProjectionReferences(typedExpression.ROperand, frameID, aliases) + return typedExpression + + case pgsql.FunctionCall: + for idx, parameter := range typedExpression.Parameters { + typedExpression.Parameters[idx] = rewriteCurrentFrameProjectionReferences(parameter, frameID, aliases) + } + return typedExpression + + case *pgsql.FunctionCall: + for idx, parameter := range typedExpression.Parameters { + typedExpression.Parameters[idx] = rewriteCurrentFrameProjectionReferences(parameter, frameID, aliases) + } + return typedExpression + + case pgsql.TypeCast: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case pgsql.CompositeValue: + for idx, value := range typedExpression.Values { + typedExpression.Values[idx] = rewriteCurrentFrameProjectionReferences(value, frameID, aliases) + } + return typedExpression + + case *pgsql.Parenthetical: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case *pgsql.EdgeArrayFromPathIDs: + typedExpression.PathIDs = rewriteCurrentFrameProjectionReferences(typedExpression.PathIDs, frameID, aliases) + return typedExpression + + case pgsql.ArrayLiteral: + for idx, value := range typedExpression.Values { + typedExpression.Values[idx] = rewriteCurrentFrameProjectionReferences(value, frameID, aliases) + } + return typedExpression + + case pgsql.ArrayExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case pgsql.ArrayIndex: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + for idx, index := range typedExpression.Indexes { + typedExpression.Indexes[idx] = rewriteCurrentFrameProjectionReferences(index, frameID, aliases) + } + return typedExpression + + case *pgsql.ArrayIndex: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + for idx, index := range typedExpression.Indexes { + typedExpression.Indexes[idx] = rewriteCurrentFrameProjectionReferences(index, frameID, aliases) + } + return typedExpression + + case pgsql.ArraySlice: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + typedExpression.Lower = rewriteCurrentFrameProjectionReferences(typedExpression.Lower, frameID, aliases) + typedExpression.Upper = rewriteCurrentFrameProjectionReferences(typedExpression.Upper, frameID, aliases) + return typedExpression + + case *pgsql.ArraySlice: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + typedExpression.Lower = rewriteCurrentFrameProjectionReferences(typedExpression.Lower, frameID, aliases) + typedExpression.Upper = rewriteCurrentFrameProjectionReferences(typedExpression.Upper, frameID, aliases) + return typedExpression + + case pgsql.AllExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case *pgsql.AllExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case pgsql.AnyExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case *pgsql.AnyExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case pgsql.Case: + typedExpression.Operand = rewriteCurrentFrameProjectionReferences(typedExpression.Operand, frameID, aliases) + for idx, condition := range typedExpression.Conditions { + typedExpression.Conditions[idx] = rewriteCurrentFrameProjectionReferences(condition, frameID, aliases) + } + for idx, then := range typedExpression.Then { + typedExpression.Then[idx] = rewriteCurrentFrameProjectionReferences(then, frameID, aliases) + } + typedExpression.Else = rewriteCurrentFrameProjectionReferences(typedExpression.Else, frameID, aliases) + return typedExpression + + case *pgsql.Case: + typedExpression.Operand = rewriteCurrentFrameProjectionReferences(typedExpression.Operand, frameID, aliases) + for idx, condition := range typedExpression.Conditions { + typedExpression.Conditions[idx] = rewriteCurrentFrameProjectionReferences(condition, frameID, aliases) + } + for idx, then := range typedExpression.Then { + typedExpression.Then[idx] = rewriteCurrentFrameProjectionReferences(then, frameID, aliases) + } + typedExpression.Else = rewriteCurrentFrameProjectionReferences(typedExpression.Else, frameID, aliases) + return typedExpression + + case pgsql.ExistsExpression: + typedExpression.Subquery.Query = rewriteCurrentFrameProjectionQuery(typedExpression.Subquery.Query, frameID, aliases) + return typedExpression + + case pgsql.Subquery: + typedExpression.Query = rewriteCurrentFrameProjectionQuery(typedExpression.Query, frameID, aliases) + return typedExpression + + case pgsql.Query: + return rewriteCurrentFrameProjectionQuery(typedExpression, frameID, aliases) + + case pgsql.Select: + return rewriteCurrentFrameProjectionSelect(typedExpression, frameID, aliases) + + case pgsql.SetOperation: + typedExpression.LOperand = rewriteCurrentFrameProjectionSetExpression(typedExpression.LOperand, frameID, aliases) + typedExpression.ROperand = rewriteCurrentFrameProjectionSetExpression(typedExpression.ROperand, frameID, aliases) + return typedExpression + + case pgsql.ProjectionFrom: + for idx, selectItem := range typedExpression.Projection { + if rewritten, isSelectItem := rewriteCurrentFrameProjectionReferences(selectItem, frameID, aliases).(pgsql.SelectItem); isSelectItem { + typedExpression.Projection[idx] = rewritten + } + } + for idx := range typedExpression.From { + typedExpression.From[idx].Source = rewriteCurrentFrameProjectionReferences(typedExpression.From[idx].Source, frameID, aliases) + for joinIdx := range typedExpression.From[idx].Joins { + typedExpression.From[idx].Joins[joinIdx].JoinOperator.Constraint = rewriteCurrentFrameProjectionReferences(typedExpression.From[idx].Joins[joinIdx].JoinOperator.Constraint, frameID, aliases) + } + } + return typedExpression + + case pgsql.AliasedExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case *pgsql.AliasedExpression: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case pgsql.Variadic: + typedExpression.Expression = rewriteCurrentFrameProjectionReferences(typedExpression.Expression, frameID, aliases) + return typedExpression + + case pgsql.LateralSubquery: + typedExpression.Query = rewriteCurrentFrameProjectionQuery(typedExpression.Query, frameID, aliases) + return typedExpression + + default: + return expression + } +} + func (s *Translator) buildExpansionPatternRoot(traversalStepContext TraversalStepContext, expansion *ExpansionBuilder) (pgsql.Query, error) { var ( traversalStep = traversalStepContext.CurrentStep expansionModel = traversalStep.Expansion seedIdentifier = expansionSeedIdentifier(expansionModel.Frame.Binding.Identifier) + unwindClauses = s.query.CurrentPart().ConsumeUnwindClauses() + unwindSources = unwindFromClauses(unwindClauses) ) // Determine local scope of the primer: the edge and both nodes. @@ -2040,6 +2458,15 @@ func (s *Translator) buildExpansionPatternRoot(traversalStepContext TraversalSte expansion.UseUnionAll = true } + if seed != nil { + if seedNeedsUnwind, err := expressionReferencesUnwindBinding(seedConstraints, unwindClauses); err != nil { + return pgsql.Query{}, err + } else if seedNeedsUnwind { + seed.query.From = prependFrameSourceIfMissing(seed.query.From, traversalStep.Frame.Previous) + seed.query.From = append(seed.query.From, unwindSources...) + } + } + expansion.PrimerStatement.Where = expansionModel.EdgeConstraints expansion.ProjectionStatement.Projection = expansionModel.Projection @@ -2076,6 +2503,12 @@ func (s *Translator) buildExpansionPatternRoot(traversalStepContext TraversalSte } expansion.PrimerStatement.From = append(expansion.PrimerStatement.From, nextQueryFrom) + if primerNeedsUnwind, err := expressionReferencesUnwindBinding(expansionModel.EdgeConstraints, unwindClauses); err != nil { + return pgsql.Query{}, err + } else if primerNeedsUnwind { + expansion.PrimerStatement.From = prependFrameSourceIfMissing(expansion.PrimerStatement.From, traversalStep.Frame.Previous) + expansion.PrimerStatement.From = append(expansion.PrimerStatement.From, unwindSources...) + } if expansionAllowsZeroDepth(expansionModel) { zeroDepthStatement, err := expansion.buildZeroDepthExpansionSelect(seed) @@ -2121,6 +2554,8 @@ func (s *Translator) buildExpansionPatternRoot(traversalStepContext TraversalSte }) } + expansion.ProjectionStatement.From = append(expansion.ProjectionStatement.From, unwindSources...) + // Select the expansion components for the projection statement expansion.ProjectionStatement.From = append(expansion.ProjectionStatement.From, pgsql.FromClause{ Source: pgsql.TableReference{ @@ -2153,7 +2588,23 @@ func (s *Translator) buildExpansionPatternRoot(traversalStepContext TraversalSte ), ) } + if previousProjectionFrameID != "" && traversalStep.RightNodeBound { + projectionConstraints = pgsql.OptionalAnd( + projectionConstraints, + boundEndpointProjectionConstraint( + previousProjectionFrameID, + traversalStep.RightNode.Identifier, + expansionModel.Frame.Binding.Identifier, + expansionNextID, + ), + ) + } + projectionConstraints = rewriteCurrentFrameProjectionReferences( + projectionConstraints, + traversalStep.Frame.Binding.Identifier, + projectionAliasExpressions(expansion.ProjectionStatement.Projection), + ) expansion.ProjectionStatement.Where = projectionConstraints } @@ -2268,6 +2719,11 @@ func (s *Translator) buildExpansionPatternStep(traversalStepContext TraversalSte if projectionConstraints, err := s.buildExpansionProjectionConstraints(traversalStepContext); err != nil { return pgsql.Query{}, err } else { + projectionConstraints = rewriteCurrentFrameProjectionReferences( + projectionConstraints, + traversalStep.Frame.Binding.Identifier, + projectionAliasExpressions(expansion.ProjectionStatement.Projection), + ) expansion.ProjectionStatement.Where = projectionConstraints } @@ -2285,6 +2741,194 @@ func expansionTerminalSatisfactionLocality(traversalStep *TraversalStep) (pgsql. ) } +func applyExpansionSuffixPushdown(part *PatternPart) (int, error) { + var applied int + + for idx := 0; idx+1 < len(part.TraversalSteps); idx++ { + var ( + currentStep = part.TraversalSteps[idx] + suffixSteps = part.TraversalSteps[idx+1:] + ) + + if candidateApplied, err := applyExpansionSuffixPushdownCandidate(currentStep, suffixSteps); err != nil { + return applied, err + } else if candidateApplied { + applied++ + } + } + + return applied, nil +} + +func applyExpansionSuffixPushdownCandidate(currentStep *TraversalStep, suffixSteps []*TraversalStep) (bool, error) { + if suffixSatisfaction, satisfied := expansionSuffixTerminalSatisfaction(currentStep, suffixSteps); satisfied { + currentStep.Expansion.TerminalNodeConstraints = pgsql.OptionalAnd( + currentStep.Expansion.TerminalNodeConstraints, + suffixSatisfaction, + ) + + if terminalCriteriaProjection, err := pgsql.As[pgsql.SelectItem](currentStep.Expansion.TerminalNodeConstraints); err != nil { + return false, err + } else { + currentStep.Expansion.TerminalNodeSatisfactionProjection = terminalCriteriaProjection + } + + return true, nil + } + + return false, nil +} + +func suffixEdgeLeftEndpoint(edgeIdentifier pgsql.Identifier, direction graph.Direction) (pgsql.Expression, bool) { + switch direction { + case graph.DirectionOutbound: + return pgsql.CompoundIdentifier{edgeIdentifier, pgsql.ColumnStartID}, true + case graph.DirectionInbound: + return pgsql.CompoundIdentifier{edgeIdentifier, pgsql.ColumnEndID}, true + default: + return nil, false + } +} + +func suffixEdgeRightEndpoint(edgeIdentifier pgsql.Identifier, direction graph.Direction) (pgsql.Expression, bool) { + switch direction { + case graph.DirectionOutbound: + return pgsql.CompoundIdentifier{edgeIdentifier, pgsql.ColumnEndID}, true + case graph.DirectionInbound: + return pgsql.CompoundIdentifier{edgeIdentifier, pgsql.ColumnStartID}, true + default: + return nil, false + } +} + +func suffixBoundNodeIDReference(currentStep *TraversalStep, node *BoundIdentifier) (pgsql.Expression, bool) { + if currentStep == nil || + currentStep.Frame == nil || + currentStep.Frame.Previous == nil || + currentStep.Frame.Previous.Binding == nil || + node == nil || + !currentStep.Frame.Previous.Known().Contains(node.Identifier) { + return nil, false + } + + return pgsql.RowColumnReference{ + Identifier: pgsql.CompoundIdentifier{currentStep.Frame.Previous.Binding.Identifier, node.Identifier}, + Column: pgsql.ColumnID, + }, true +} + +func suffixStepEdgeConstraints(step *TraversalStep) pgsql.Expression { + if step == nil || step.EdgeConstraints == nil { + return nil + } + + localConstraints, _ := partitionConstraintByLocality( + step.EdgeConstraints.Expression, + pgsql.AsIdentifierSet(step.Edge.Identifier), + ) + + return localConstraints +} + +func expansionSuffixTerminalSatisfaction(currentStep *TraversalStep, suffixSteps []*TraversalStep) (pgsql.Expression, bool) { + if currentStep == nil || + currentStep.Expansion == nil || + currentStep.RightNode == nil || + len(suffixSteps) == 0 || + suffixSteps[0] == nil || + suffixSteps[0].LeftNode == nil || + currentStep.RightNode.Identifier != suffixSteps[0].LeftNode.Identifier { + return nil, false + } + + var ( + fromClause pgsql.FromClause + where pgsql.Expression + previousID pgsql.Expression = pgsql.CompoundIdentifier{currentStep.RightNode.Identifier, pgsql.ColumnID} + ) + + for idx, step := range suffixSteps { + if step == nil || + step.Expansion != nil || + step.LeftNode == nil || + step.Edge == nil || + step.RightNode == nil || + step.Direction == graph.DirectionBoth { + break + } + + if idx > 0 && suffixSteps[idx-1].RightNode.Identifier != step.LeftNode.Identifier { + break + } + + leftEndpoint, validDirection := suffixEdgeLeftEndpoint(step.Edge.Identifier, step.Direction) + if !validDirection { + return nil, false + } + + edgeJoin := pgd.Equals(previousID, leftEndpoint) + if idx == 0 { + fromClause = expansionEdgeFromClause(step.Edge.Identifier) + where = pgsql.OptionalAnd(where, edgeJoin) + } else { + fromClause.Joins = append(fromClause.Joins, pgsql.Join{ + Table: expansionEdgeTableReference(step.Edge.Identifier), + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: edgeJoin, + }, + }) + } + + where = pgsql.OptionalAnd(where, suffixStepEdgeConstraints(step)) + + rightEndpoint, validDirection := suffixEdgeRightEndpoint(step.Edge.Identifier, step.Direction) + if !validDirection { + return nil, false + } + + if step.RightNodeBound { + boundRightNodeID, hasBoundRightNodeID := suffixBoundNodeIDReference(currentStep, step.RightNode) + if !hasBoundRightNodeID { + return nil, false + } + + where = pgsql.OptionalAnd(where, step.RightNodeConstraints) + where = pgsql.OptionalAnd(where, pgd.Equals(rightEndpoint, boundRightNodeID)) + previousID = boundRightNodeID + } else { + fromClause.Joins = append(fromClause.Joins, pgsql.Join{ + Table: expansionNodeTableReference(step.RightNode.Identifier), + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: pgsql.OptionalAnd( + step.RightNodeConstraints, + pgd.Equals(pgsql.CompoundIdentifier{step.RightNode.Identifier, pgsql.ColumnID}, rightEndpoint), + ), + }, + }) + + previousID = pgsql.CompoundIdentifier{step.RightNode.Identifier, pgsql.ColumnID} + } + } + + if fromClause.Source == nil { + return nil, false + } + + return pgsql.ExistsExpression{ + Subquery: pgsql.Subquery{ + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: pgsql.Projection{pgd.IntLiteral(1)}, + From: []pgsql.FromClause{fromClause}, + Where: where, + }, + }, + }, + }, true +} + func expansionLocalTerminalSatisfactionProjection(traversalStep *TraversalStep) (pgsql.SelectItem, error) { localSatisfiedConstraint, _ := expansionTerminalSatisfactionLocality(traversalStep) @@ -2464,17 +3108,28 @@ func (s *Translator) buildExpansionProjectionConstraints(traversalStepContext Tr return projectionConstraints, nil } -func (s *Translator) translateTraversalPatternPartWithExpansion(isFirstTraversalStep bool, traversalStep *TraversalStep) error { +func (s *Translator) translateTraversalPatternPartWithExpansion(part *PatternPart, stepIndex int, isFirstTraversalStep bool, traversalStep *TraversalStep, allowProjectionPruning bool) error { expansionModel := traversalStep.Expansion // Translate the expansion's constraints - this has the side effect of making the pattern identifiers visible in // the current scope frame - if err := s.translateExpansionConstraints(isFirstTraversalStep, traversalStep, expansionModel); err != nil { + if err := s.translateExpansionConstraints(part, stepIndex, isFirstTraversalStep, traversalStep, expansionModel); err != nil { return err } // Export the path from the traversal's scope traversalStep.Frame.Export(expansionModel.PathBinding.Identifier) + if allowProjectionPruning { + _, hasDecision := s.projectionPruningDecision(part, stepIndex) + if hasDecision && pruneExpansionStepProjectionExports(part, stepIndex, traversalStep) { + s.recordLowering(optimize.LoweringProjectionPruning) + } + + if _, hasDecision := s.latePathMaterializationDecision(part, stepIndex, optimize.LatePathMaterializationExpansionPath); hasDecision && + traversalStep.Frame.Exported.Contains(expansionModel.PathBinding.Identifier) { + s.recordLowering(optimize.LoweringLatePathMaterialization) + } + } // Push a new frame that contains currently projected scope from the expansion recursive CTE if expansionFrame, err := s.scope.PushFrame(); err != nil { @@ -2529,7 +3184,7 @@ func (s *Translator) translateTraversalPatternPartWithExpansion(isFirstTraversal } if expansionModel.Options.FindShortestPath || expansionModel.Options.FindAllShortestPaths { - if err := s.translateShortestPathTraversal(traversalStep, expansionModel); err != nil { + if err := s.translateShortestPathTraversal(part, stepIndex, traversalStep, expansionModel); err != nil { return err } } @@ -2537,16 +3192,18 @@ func (s *Translator) translateTraversalPatternPartWithExpansion(isFirstTraversal return nil } -func (s *Translator) translateExpansionConstraints(isFirstTraversalStep bool, step *TraversalStep, expansionModel *Expansion) error { +func (s *Translator) translateExpansionConstraints(part *PatternPart, stepIndex int, isFirstTraversalStep bool, step *TraversalStep, expansionModel *Expansion) error { if constraints, err := consumePatternConstraints(isFirstTraversalStep, recursivePattern, step, s.treeTranslator); err != nil { return err } else { // If one side of the expansion has constraints but the other does not this may be an opportunity to reorder the traversal // to start with tighter search bounds - if err := constraints.OptimizePatternConstraintBalance(s.scope, step); err != nil { + if err := s.applyPatternConstraintBalance(part, stepIndex, &constraints, step); err != nil { return err } + s.recordPredicatePlacementConsumption(part, stepIndex, step, constraints) + // Left node if leftNodeJoinCondition, err := leftNodeTraversalStepConstraint(step); err != nil { return err @@ -2610,13 +3267,13 @@ func (s *Translator) translateExpansionConstraints(isFirstTraversalStep bool, st return nil } -func (s *Translator) translateShortestPathTraversal(traversalStep *TraversalStep, expansionModel *Expansion) error { +func (s *Translator) translateShortestPathTraversal(part *PatternPart, stepIndex int, traversalStep *TraversalStep, expansionModel *Expansion) error { var ( useBidirectionalSearch bool err error ) - useBidirectionalSearch, err = traversalStep.CanExecutePairAwareBidirectionalSearch(s.scope) + useBidirectionalSearch, err = s.useBidirectionalShortestPathStrategy(part, stepIndex, traversalStep) if err != nil { return err @@ -2627,6 +3284,7 @@ func (s *Translator) translateShortestPathTraversal(traversalStep *TraversalStep traversalStep.LeftNode.Identifier, traversalStep.RightNode.Identifier, ) + s.applyShortestPathFilterMaterialization(part, stepIndex, traversalStep, expansionModel) // If this query is a shortest-path look up, the translator will have to use a function harness for // traversal. As such, query fragments for the traversal harness will have to be passed by the parameters diff --git a/cypher/models/pgsql/translate/expression.go b/cypher/models/pgsql/translate/expression.go index 7161db3f..6123f8a5 100644 --- a/cypher/models/pgsql/translate/expression.go +++ b/cypher/models/pgsql/translate/expression.go @@ -240,7 +240,7 @@ func rewritePropertyLookupOperator(propertyLookup *pgsql.BinaryExpression, dataT func isJSONScalarEqualityType(dataType pgsql.DataType) bool { switch dataType { - case pgsql.Boolean, pgsql.Float4, pgsql.Float8, pgsql.Int, pgsql.Int2, pgsql.Int4, pgsql.Int8, pgsql.Numeric, pgsql.Text: + case pgsql.Boolean, pgsql.Float4, pgsql.Float8, pgsql.Int, pgsql.Int2, pgsql.Int4, pgsql.Int8, pgsql.Numeric: return true default: @@ -248,51 +248,9 @@ func isJSONScalarEqualityType(dataType pgsql.DataType) bool { } } -func isBooleanTextCompatibilityValue(value any) bool { - switch value { - case "true", "false": - return true - - default: - return false - } -} - -func isBooleanTextCompatibilityParameter(kindMapper *contextAwareKindMapper, parameter pgsql.Parameter) bool { - if kindMapper == nil || parameter.TypeHint() != pgsql.Text { - return false - } - - value, hasValue := kindMapper.parameters[parameter.Identifier.String()] - return hasValue && isBooleanTextCompatibilityValue(value) -} - -func isBooleanTextCompatibilityOperand(kindMapper *contextAwareKindMapper, expression pgsql.Expression) bool { - switch typedExpression := expression.(type) { - case pgsql.Literal: - return typedExpression.TypeHint() == pgsql.Text && isBooleanTextCompatibilityValue(typedExpression.Value) - - case pgsql.Parameter: - return isBooleanTextCompatibilityParameter(kindMapper, typedExpression) - - case *pgsql.Parameter: - if typedExpression == nil { - return false - } - - return isBooleanTextCompatibilityParameter(kindMapper, *typedExpression) - - default: - return false - } -} - -func rewriteJSONScalarEqualityOperand(kindMapper *contextAwareKindMapper, expression pgsql.Expression) (pgsql.Expression, bool) { +func rewriteJSONScalarEqualityOperand(expression pgsql.Expression) (pgsql.Expression, bool) { if literal, isLiteral := expression.(pgsql.Literal); isLiteral && literal.Null { return nil, false - } else if isBooleanTextCompatibilityOperand(kindMapper, expression) { - // Preserve compatibility for existing callers that compare JSON boolean properties to stringified booleans. - return nil, false } if typedExpression, isTypeHinted := expression.(pgsql.TypeHinted); !isTypeHinted { @@ -310,6 +268,20 @@ func rewriteJSONScalarEqualityOperand(kindMapper *contextAwareKindMapper, expres } } +func rewriteStringEqualityOperand(expression pgsql.Expression) (pgsql.Expression, bool) { + if literal, isLiteral := expression.(pgsql.Literal); isLiteral && literal.Null { + return nil, false + } + + if typedExpression, isTypeHinted := expression.(pgsql.TypeHinted); !isTypeHinted { + return nil, false + } else if typedExpression.TypeHint() != pgsql.Text { + return nil, false + } + + return expression, true +} + func lookupRequiresElementType(typeHint pgsql.DataType, operator pgsql.Operator, otherOperand pgsql.SyntaxNode) bool { if typeHint.IsArrayType() { switch operator { @@ -381,7 +353,10 @@ func rewritePropertyLookupOperands(kindMapper *contextAwareKindMapper, expressio } case pgsql.OperatorEquals, pgsql.OperatorCypherNotEquals: - if rewrittenROperand, rewritten := rewriteJSONScalarEqualityOperand(kindMapper, expression.ROperand); rewritten { + if rewrittenROperand, rewritten := rewriteStringEqualityOperand(expression.ROperand); rewritten { + expression.LOperand = rewritePropertyLookupOperator(leftPropertyLookup, pgsql.Text) + expression.ROperand = rewrittenROperand + } else if rewrittenROperand, rewritten := rewriteJSONScalarEqualityOperand(expression.ROperand); rewritten { leftPropertyLookup.Operator = pgsql.OperatorJSONField expression.ROperand = rewrittenROperand } else if rOperandTypeHint == pgsql.AnyArray { @@ -415,7 +390,10 @@ func rewritePropertyLookupOperands(kindMapper *contextAwareKindMapper, expressio // for special (like, ilike, etc.) character classes case pgsql.OperatorEquals, pgsql.OperatorCypherNotEquals: - if rewrittenLOperand, rewritten := rewriteJSONScalarEqualityOperand(kindMapper, expression.LOperand); rewritten { + if rewrittenLOperand, rewritten := rewriteStringEqualityOperand(expression.LOperand); rewritten { + expression.LOperand = rewrittenLOperand + expression.ROperand = rewritePropertyLookupOperator(rightPropertyLookup, pgsql.Text) + } else if rewrittenLOperand, rewritten := rewriteJSONScalarEqualityOperand(expression.LOperand); rewritten { expression.LOperand = rewrittenLOperand rightPropertyLookup.Operator = pgsql.OperatorJSONField } else if lOperandTypeHint == pgsql.AnyArray { @@ -474,6 +452,8 @@ func (s *Builder) PopOperand(kindMapper *contextAwareKindMapper) (pgsql.Expressi case *pgsql.BinaryExpression: if err := applyBinaryExpressionTypeHints(kindMapper, typedNext); err != nil { return nil, err + } else if rewrittenExpression, rewritten := buildStringPropertyEqualityPredicate(typedNext); rewritten { + next = rewrittenExpression } } @@ -704,6 +684,13 @@ func rewriteIdentityOperands(scope *Scope, newExpression *pgsql.BinaryExpression newExpression.LOperand = pgsql.CompoundIdentifier{typedLOperand, pgsql.ColumnID} newExpression.ROperand = pgsql.CompoundIdentifier{typedROperand, pgsql.ColumnID} + case pgsql.Int8Array: + if newExpression.Operator == pgsql.OperatorIn { + newExpression.LOperand = pgsql.CompoundIdentifier{typedLOperand, pgsql.ColumnID} + } else { + return fmt.Errorf("invalid comparison between types %s and %s", boundLOperand.DataType, boundROperand.DataType) + } + case pgsql.NodeCompositeArray: const unnestElemAlias pgsql.Identifier = "_unnest_elem" newExpression.LOperand = pgsql.CompoundIdentifier{typedLOperand, pgsql.ColumnID} @@ -743,6 +730,13 @@ func rewriteIdentityOperands(scope *Scope, newExpression *pgsql.BinaryExpression newExpression.LOperand = pgsql.CompoundIdentifier{typedLOperand, pgsql.ColumnID} newExpression.ROperand = pgsql.CompoundIdentifier{typedROperand, pgsql.ColumnID} + case pgsql.Int8Array: + if newExpression.Operator == pgsql.OperatorIn { + newExpression.LOperand = pgsql.CompoundIdentifier{typedLOperand, pgsql.ColumnID} + } else { + return fmt.Errorf("invalid comparison between types %s and %s", boundLOperand.DataType, boundROperand.DataType) + } + case pgsql.EdgeCompositeArray: newExpression.LOperand = pgsql.CompoundIdentifier{typedLOperand, pgsql.ColumnID} newExpression.ROperand = pgsql.CompoundIdentifier{typedROperand, pgsql.ColumnID} @@ -856,15 +850,17 @@ func jsonEmptyArrayLiteral() pgsql.Expression { func rewritePropertyLookupNullCheck(propertyLookup *pgsql.BinaryExpression, isNotNull bool) pgsql.Expression { propertyLookup.Operator = pgsql.OperatorJSONField - existsExpression := pgsql.NewBinaryExpression( - propertyLookup.LOperand, - pgsql.OperatorJSONBFieldExists, - propertyLookup.ROperand, - ) - jsonNullExpression := pgsql.NewBinaryExpression( - propertyLookup, - pgsql.OperatorEquals, - jsonNullLiteral(), + var ( + existsExpression = pgsql.NewBinaryExpression( + propertyLookup.LOperand, + pgsql.OperatorJSONBFieldExists, + propertyLookup.ROperand, + ) + jsonNullExpression = pgsql.NewBinaryExpression( + propertyLookup, + pgsql.OperatorEquals, + jsonNullLiteral(), + ) ) if isNotNull { @@ -886,21 +882,122 @@ func jsonFieldPropertyLookup(propertyLookup *pgsql.BinaryExpression) *pgsql.Bina return pgsql.NewBinaryExpression(propertyLookup.LOperand, pgsql.OperatorJSONField, propertyLookup.ROperand) } -func buildEmptyArrayPropertyComparison(propertyLookup *pgsql.BinaryExpression, negated bool) *pgsql.BinaryExpression { - emptyArrayExpression := pgsql.NewBinaryExpression( - jsonFieldPropertyLookup(propertyLookup), +func jsonTextPropertyLookup(propertyLookup *pgsql.BinaryExpression) *pgsql.BinaryExpression { + return pgsql.NewBinaryExpression(propertyLookup.LOperand, pgsql.OperatorJSONTextField, propertyLookup.ROperand) +} + +func jsonbTypeof(expression pgsql.Expression) pgsql.Expression { + return pgsql.FunctionCall{ + Function: pgsql.FunctionJSONBTypeof, + Parameters: []pgsql.Expression{expression}, + } +} + +func jsonbStringTypeCheck(propertyLookup *pgsql.BinaryExpression) pgsql.Expression { + return pgsql.NewBinaryExpression( + jsonbTypeof(jsonFieldPropertyLookup(propertyLookup)), pgsql.OperatorEquals, - jsonEmptyArrayLiteral(), + pgsql.NewLiteral("string", pgsql.Text), ) - nullExpression := pgsql.NewBinaryExpression( - jsonFieldPropertyLookup(propertyLookup), - pgsql.OperatorEquals, - jsonNullLiteral(), +} + +func toJSONBTextOperand(expression pgsql.Expression) pgsql.Expression { + return pgsql.FunctionCall{ + Function: pgsql.FunctionToJSONB, + Parameters: []pgsql.Expression{ + pgsql.NewTypeCast(expression, pgsql.Text), + }, + CastType: pgsql.JSONB, + } +} + +func buildStringPropertyEqualityComparison(propertyLookup *pgsql.BinaryExpression, textOperand pgsql.Expression, propertyOnLeft bool, operator pgsql.Operator) pgsql.Expression { + textPropertyLookup := jsonTextPropertyLookup(propertyLookup) + + if propertyOnLeft { + return pgsql.NewBinaryExpression(textPropertyLookup, operator, textOperand) + } + + return pgsql.NewBinaryExpression(textOperand, operator, textPropertyLookup) +} + +func buildStringPropertyEqualityPredicate(expression *pgsql.BinaryExpression) (pgsql.Expression, bool) { + if !expression.Operator.IsIn(pgsql.OperatorEquals, pgsql.OperatorCypherNotEquals) { + return nil, false + } + + leftPropertyLookup, hasLeftPropertyLookup := expressionToPropertyLookupBinaryExpression(expression.LOperand) + rightPropertyLookup, hasRightPropertyLookup := expressionToPropertyLookupBinaryExpression(expression.ROperand) + + if hasLeftPropertyLookup { + if rewrittenROperand, rewritten := rewriteStringEqualityOperand(expression.ROperand); rewritten { + rewritePropertyLookupOperator(leftPropertyLookup, pgsql.Text) + return buildStringPropertyComparisonPredicate(leftPropertyLookup, rewrittenROperand, true, expression.Operator), true + } + } + + if hasRightPropertyLookup { + if rewrittenLOperand, rewritten := rewriteStringEqualityOperand(expression.LOperand); rewritten { + rewritePropertyLookupOperator(rightPropertyLookup, pgsql.Text) + return buildStringPropertyComparisonPredicate(rightPropertyLookup, rewrittenLOperand, false, expression.Operator), true + } + } + + return nil, false +} + +func buildStringPropertyComparisonPredicate(propertyLookup *pgsql.BinaryExpression, textOperand pgsql.Expression, propertyOnLeft bool, operator pgsql.Operator) pgsql.Expression { + stringComparison := buildStringPropertyEqualityComparison(propertyLookup, textOperand, propertyOnLeft, operator) + + if operator == pgsql.OperatorEquals { + return pgsql.NewParenthetical(pgsql.NewBinaryExpression( + jsonbStringTypeCheck(propertyLookup), + pgsql.OperatorAnd, + stringComparison, + )) + } + + var ( + nonStringTypeCheck = pgsql.NewBinaryExpression( + jsonbTypeof(jsonFieldPropertyLookup(propertyLookup)), + pgsql.OperatorCypherNotEquals, + pgsql.NewLiteral("string", pgsql.Text), + ) + nonStringComparison = pgsql.NewBinaryExpression( + jsonFieldPropertyLookup(propertyLookup), + pgsql.OperatorCypherNotEquals, + toJSONBTextOperand(textOperand), + ) ) - nullTaintExpression := pgsql.NewBinaryExpression( - nullExpression, - pgsql.OperatorAnd, - pgsql.NullLiteral(), + + return pgsql.NewParenthetical(pgsql.NewBinaryExpression( + pgsql.NewBinaryExpression( + jsonbStringTypeCheck(propertyLookup), + pgsql.OperatorAnd, + stringComparison, + ), + pgsql.OperatorOr, + pgsql.NewBinaryExpression(nonStringTypeCheck, pgsql.OperatorAnd, nonStringComparison), + )) +} + +func buildEmptyArrayPropertyComparison(propertyLookup *pgsql.BinaryExpression, negated bool) *pgsql.BinaryExpression { + var ( + emptyArrayExpression = pgsql.NewBinaryExpression( + jsonFieldPropertyLookup(propertyLookup), + pgsql.OperatorEquals, + jsonEmptyArrayLiteral(), + ) + nullExpression = pgsql.NewBinaryExpression( + jsonFieldPropertyLookup(propertyLookup), + pgsql.OperatorEquals, + jsonNullLiteral(), + ) + nullTaintExpression = pgsql.NewBinaryExpression( + nullExpression, + pgsql.OperatorAnd, + pgsql.NullLiteral(), + ) ) if negated { @@ -1197,6 +1294,10 @@ func (s *ExpressionTreeTranslator) rewriteBinaryExpression(newExpression *pgsql. s.PushOperand(newExpression) case pgsql.OperatorEquals: + if err := applyBinaryExpressionTypeHints(s.kindMapper, newExpression); err != nil { + return err + } + if propertyLookup, hasEmptyArrayLiteralPropertyComparison := isEmptyArrayLiteralPropertyComparison(newExpression); hasEmptyArrayLiteralPropertyComparison { expandedExpression := buildEmptyArrayPropertyComparison(propertyLookup, false) @@ -1205,11 +1306,17 @@ func (s *ExpressionTreeTranslator) rewriteBinaryExpression(newExpression *pgsql. } s.PushOperand(pgsql.NewParenthetical(expandedExpression)) + } else if rewrittenExpression, rewritten := buildStringPropertyEqualityPredicate(newExpression); rewritten { + s.PushOperand(rewrittenExpression) } else { s.PushOperand(newExpression) } case pgsql.OperatorCypherNotEquals: + if err := applyBinaryExpressionTypeHints(s.kindMapper, newExpression); err != nil { + return err + } + if propertyLookup, hasEmptyArrayLiteralPropertyComparison := isEmptyArrayLiteralPropertyComparison(newExpression); hasEmptyArrayLiteralPropertyComparison { expandedExpression := buildEmptyArrayPropertyComparison(propertyLookup, true) @@ -1218,6 +1325,8 @@ func (s *ExpressionTreeTranslator) rewriteBinaryExpression(newExpression *pgsql. } s.PushOperand(pgsql.NewParenthetical(expandedExpression)) + } else if rewrittenExpression, rewritten := buildStringPropertyEqualityPredicate(newExpression); rewritten { + s.PushOperand(rewrittenExpression) } else { s.PushOperand(newExpression) } diff --git a/cypher/models/pgsql/translate/expression_test.go b/cypher/models/pgsql/translate/expression_test.go index c95edb1c..48faddab 100644 --- a/cypher/models/pgsql/translate/expression_test.go +++ b/cypher/models/pgsql/translate/expression_test.go @@ -298,66 +298,107 @@ func TestInferWrappedExpressionType(t *testing.T) { } func TestPropertyLookupEqualityScalarRewrites(t *testing.T) { - propertyLookup := func(property string) *pgsql.BinaryExpression { - return pgsql.NewPropertyLookup( - pgsql.CompoundIdentifier{"n", pgsql.ColumnProperties}, - mustAsLiteral(property), - ) - } - renderEquality := func(t *testing.T, lOperand, rOperand pgsql.Expression) string { - t.Helper() + var ( + propertyLookup = func(property string) *pgsql.BinaryExpression { + return pgsql.NewPropertyLookup( + pgsql.CompoundIdentifier{"n", pgsql.ColumnProperties}, + mustAsLiteral(property), + ) + } + renderComparison = func(t *testing.T, lOperand pgsql.Expression, operator pgsql.Operator, rOperand pgsql.Expression) string { + t.Helper() - treeTranslator := translate.NewExpressionTreeTranslator(nil) - treeTranslator.PushOperand(lOperand) - treeTranslator.PushOperand(rOperand) - require.NoError(t, treeTranslator.CompleteBinaryExpression(translate.NewScope(), pgsql.OperatorEquals)) + treeTranslator := translate.NewExpressionTreeTranslator(nil) + treeTranslator.PushOperand(lOperand) + treeTranslator.PushOperand(rOperand) + require.NoError(t, treeTranslator.CompleteBinaryExpression(translate.NewScope(), operator)) - formatted, err := format.Expression(treeTranslator.PeekOperand(), format.NewOutputBuilder()) - require.NoError(t, err) + formatted, err := format.Expression(treeTranslator.PeekOperand(), format.NewOutputBuilder()) + require.NoError(t, err) - return formatted - } + return formatted + } + ) testCases := []struct { Name string LOperand pgsql.Expression + Operator pgsql.Operator ROperand pgsql.Expression Expected string }{{ - Name: "boolean string literal keeps text property lookup", + Name: "string literal uses typed text property lookup", LOperand: propertyLookup("isassignabletorole"), + Operator: pgsql.OperatorEquals, ROperand: mustAsLiteral("true"), - Expected: "(n.properties ->> 'isassignabletorole') = 'true'", + Expected: "(jsonb_typeof((n.properties -> 'isassignabletorole')) = 'string' and (n.properties ->> 'isassignabletorole') = 'true')", }, { - Name: "boolean string literal keeps text property lookup when reversed", + Name: "string literal uses typed text property lookup when reversed", LOperand: mustAsLiteral("true"), + Operator: pgsql.OperatorEquals, ROperand: propertyLookup("isassignabletorole"), - Expected: "'true' = (n.properties ->> 'isassignabletorole')", + Expected: "(jsonb_typeof((n.properties -> 'isassignabletorole')) = 'string' and 'true' = (n.properties ->> 'isassignabletorole'))", }, { - Name: "non-boolean string literal keeps jsonb scalar equality", + Name: "numeric-looking string literal remains string typed", LOperand: propertyLookup("rank"), + Operator: pgsql.OperatorEquals, ROperand: mustAsLiteral("1"), - Expected: "((n.properties -> 'rank'))::jsonb = to_jsonb(('1')::text)::jsonb", + Expected: "(jsonb_typeof((n.properties -> 'rank')) = 'string' and (n.properties ->> 'rank') = '1')", + }, { + Name: "text parameter uses typed text property lookup", + LOperand: propertyLookup("objectid"), + Operator: pgsql.OperatorEquals, + ROperand: pgsql.Parameter{Identifier: "pi0", CastType: pgsql.Text}, + Expected: "(jsonb_typeof((n.properties -> 'objectid')) = 'string' and (n.properties ->> 'objectid') = @pi0::text)", + }, { + Name: "text function uses typed text property lookup", + LOperand: propertyLookup("distinguishedname"), + Operator: pgsql.OperatorEquals, + ROperand: pgsql.FunctionCall{ + Function: pgsql.FunctionToUpper, + Parameters: []pgsql.Expression{mustAsLiteral("admin")}, + CastType: pgsql.Text, + }, + Expected: "(jsonb_typeof((n.properties -> 'distinguishedname')) = 'string' and (n.properties ->> 'distinguishedname') = upper('admin')::text)", + }, { + Name: "text function uses typed text property lookup when reversed", + LOperand: pgsql.FunctionCall{ + Function: pgsql.FunctionToUpper, + Parameters: []pgsql.Expression{mustAsLiteral("admin")}, + CastType: pgsql.Text, + }, + Operator: pgsql.OperatorEquals, + ROperand: propertyLookup("distinguishedname"), + Expected: "(jsonb_typeof((n.properties -> 'distinguishedname')) = 'string' and upper('admin')::text = (n.properties ->> 'distinguishedname'))", + }, { + Name: "string inequality keeps non-string JSONB branch", + LOperand: propertyLookup("rank"), + Operator: pgsql.OperatorCypherNotEquals, + ROperand: mustAsLiteral("1"), + Expected: "(jsonb_typeof((n.properties -> 'rank')) = 'string' and (n.properties ->> 'rank') <> '1' or jsonb_typeof((n.properties -> 'rank')) <> 'string' and (n.properties -> 'rank') <> to_jsonb(('1')::text)::jsonb)", }, { Name: "boolean literal keeps jsonb scalar equality", LOperand: propertyLookup("isassignabletorole"), + Operator: pgsql.OperatorEquals, ROperand: mustAsLiteral(true), Expected: "((n.properties -> 'isassignabletorole'))::jsonb = to_jsonb((true)::bool)::jsonb", }, { Name: "numeric literal keeps jsonb scalar equality", LOperand: propertyLookup("count"), + Operator: pgsql.OperatorEquals, ROperand: mustAsLiteral(1), Expected: "((n.properties -> 'count'))::jsonb = to_jsonb((1)::int8)::jsonb", }, { Name: "property to property equality keeps jsonb operands", LOperand: propertyLookup("left"), + Operator: pgsql.OperatorEquals, ROperand: propertyLookup("right"), Expected: "(n.properties -> 'left') = (n.properties -> 'right')", }} for _, testCase := range testCases { t.Run(testCase.Name, func(t *testing.T) { - require.Equal(t, testCase.Expected, renderEquality(t, testCase.LOperand, testCase.ROperand)) + require.Equal(t, testCase.Expected, renderComparison(t, testCase.LOperand, testCase.Operator, testCase.ROperand)) }) } } @@ -443,8 +484,10 @@ func TestExpressionTreeTranslator(t *testing.T) { treeTranslator.PopRemainingExpressionsAsUserConstraints() // Pull out the 'a' constraint - aIdentifier := pgsql.AsIdentifierSet("a") - expectedTranslation := "(a.name = 'a' and a.num_a > 1)" + var ( + aIdentifier = pgsql.AsIdentifierSet("a") + expectedTranslation = "(a.name = 'a' and a.num_a > 1)" + ) validateConstraints(t, treeTranslator, aIdentifier, expectedTranslation) // Pull out the 'b' constraint next diff --git a/cypher/models/pgsql/translate/function.go b/cypher/models/pgsql/translate/function.go index 5a475e3b..942e6d22 100644 --- a/cypher/models/pgsql/translate/function.go +++ b/cypher/models/pgsql/translate/function.go @@ -8,6 +8,7 @@ import ( "github.com/specterops/dawgs/cypher/models/cypher" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" ) const legacyToIntegerFunction = "toint" @@ -279,6 +280,9 @@ func bindingExpressionType(binding *BoundIdentifier) pgsql.DataType { case pgsql.ExpansionRootNode, pgsql.ExpansionTerminalNode: return pgsql.NodeComposite + case pgsql.PathEdge: + return pgsql.Int8 + default: return binding.DataType } @@ -412,14 +416,7 @@ func (s *Translator) translateTailFunction(functionInvocation *cypher.FunctionIn &pgsql.ArraySlice{ Expression: pgsql.NewParenthetical(argument), Lower: pgsql.NewLiteral(2, pgsql.Int), - Upper: pgsql.FunctionCall{ - Function: pgsql.FunctionCardinality, - Parameters: []pgsql.Expression{ - argument, - }, - CastType: pgsql.Int, - }, - CastType: arrayType, + CastType: arrayType, }, pgsql.ArrayLiteral{ CastType: arrayType, @@ -469,25 +466,23 @@ func (s *Translator) translatePathComponentFunction(functionInvocation *cypher.F } else if literal, isLiteral := argument.(pgsql.Literal); isLiteral && literal.Null { s.treeTranslator.PushOperand(pgsql.NewTypeCast(literal, castType)) } else { - if column == pgsql.ColumnEdges { - if identifier, isIdentifier := unwrapParenthetical(argument).(pgsql.Identifier); isIdentifier { - binding, bound := s.scope.Lookup(identifier) - if !bound { - binding, bound = s.scope.AliasedLookup(identifier) - } - - if !bound { - return fmt.Errorf("unable to resolve path identifier %s", identifier) - } else if binding.DataType != pgsql.PathComposite { - return fmt.Errorf("expected path expression but received %s", binding.DataType) - } + if identifier, isIdentifier := unwrapParenthetical(argument).(pgsql.Identifier); isIdentifier { + binding, bound := s.scope.Lookup(identifier) + if !bound { + binding, bound = s.scope.AliasedLookup(identifier) + } - s.treeTranslator.PushOperand(pgsql.NewTypeCast(pgsql.RowColumnReference{ - Identifier: argument, - Column: column, - }, castType)) - return nil + if !bound { + return fmt.Errorf("unable to resolve path identifier %s", identifier) + } else if binding.DataType != pgsql.PathComposite { + return fmt.Errorf("expected path expression but received %s", binding.DataType) } + + s.treeTranslator.PushOperand(pgsql.NewTypeCast(pgsql.RowColumnReference{ + Identifier: identifier, + Column: column, + }, castType)) + return nil } if pathExpression, err := s.expressionForPath(argument); err != nil { @@ -534,6 +529,28 @@ func prepareCollectExpression(scope *Scope, collectedExpression pgsql.Expression return collectedExpression, castType, nil } +func prepareCollectIDExpression(scope *Scope, collectedExpression pgsql.Expression) (pgsql.Expression, bool) { + identifier, isIdentifier := unwrapParenthetical(collectedExpression).(pgsql.Identifier) + if !isIdentifier { + return nil, false + } + + binding, bound := scope.Lookup(identifier) + if !bound { + return nil, false + } + + switch binding.DataType { + case pgsql.NodeComposite, pgsql.EdgeComposite, pgsql.ExpansionRootNode, pgsql.ExpansionTerminalNode, pgsql.ExpansionEdge: + return pgsql.RowColumnReference{ + Identifier: identifier, + Column: pgsql.ColumnID, + }, true + default: + return nil, false + } +} + func translateNodeLabelsExpression(identifier pgsql.Identifier) pgsql.TypeHinted { const ( kindAlias pgsql.Identifier = "_kind" @@ -845,6 +862,19 @@ func (s *Translator) translateFunction(typedExpression *cypher.FunctionInvocatio s.SetError(fmt.Errorf("expected only one argument for cypher function: %s", typedExpression.Name)) } else if collectedExpression, err := s.treeTranslator.PopOperand(); err != nil { s.SetError(err) + } else if s.collectIDProjectionDepth > 0 { + if idExpression, collectIDs := prepareCollectIDExpression(s.scope, collectedExpression); collectIDs { + s.treeTranslator.PushOperand( + functionWrapCollectToArray(typedExpression.Distinct, idExpression, pgsql.Int8Array), + ) + s.recordLowering(optimize.LoweringCollectIDMembership) + } else if preparedExpression, castType, err := prepareCollectExpression(s.scope, collectedExpression, typedExpression.Name); err != nil { + s.SetError(err) + } else { + s.treeTranslator.PushOperand( + functionWrapCollectToArray(typedExpression.Distinct, preparedExpression, castType), + ) + } } else if preparedExpression, castType, err := prepareCollectExpression(s.scope, collectedExpression, typedExpression.Name); err != nil { s.SetError(err) } else { diff --git a/cypher/models/pgsql/translate/function_test.go b/cypher/models/pgsql/translate/function_test.go index fbb517bd..42242991 100644 --- a/cypher/models/pgsql/translate/function_test.go +++ b/cypher/models/pgsql/translate/function_test.go @@ -2,6 +2,7 @@ package translate import ( "context" + "strings" "testing" "github.com/specterops/dawgs/cypher/frontend" @@ -56,6 +57,72 @@ func TestPathComponentFunctionsTranslateNullArguments(t *testing.T) { require.Contains(t, formatted, "(null)::edgecomposite[]") } +func TestTailFunctionDoesNotDuplicatePathComponentExpression(t *testing.T) { + kindMapper := pgutil.NewInMemoryKindMapper() + + query, err := frontend.ParseCypher(frontend.NewContext(), `MATCH p = ()-[*1..]->() RETURN tail(tail(nodes(p)))`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + require.Equal(t, 1, strings.Count(formatted, "ordered_edges_to_path"), formatted) + require.NotContains(t, formatted, "cardinality(((case when") +} + +func TestTailPredicateStagesPathComponentExpression(t *testing.T) { + kindMapper := pgutil.NewInMemoryKindMapper() + + query, err := frontend.ParseCypher(frontend.NewContext(), `MATCH p = ()-[*1..]->() WHERE NONE(n IN TAIL(TAIL(NODES(p))) WHERE true) RETURN p`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + require.Equal(t, 1, strings.Count(formatted, "ordered_edges_to_path")) + require.Contains(t, formatted, "lateral (select") + require.Contains(t, formatted, ".nodes") +} + +func TestProjectionStagesPathBeforeReadingComponents(t *testing.T) { + kindMapper := pgutil.NewInMemoryKindMapper() + + query, err := frontend.ParseCypher(frontend.NewContext(), `MATCH p = ()-[*1..]->() RETURN p, nodes(p), relationships(p)`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + require.Contains(t, formatted, "lateral (select") + require.Equal(t, 1, strings.Count(formatted, "ordered_edges_to_path"), formatted) + require.Contains(t, formatted, ".nodes") + require.Contains(t, formatted, ".edges") +} + +func TestProjectionStagesRepeatedPathComponents(t *testing.T) { + kindMapper := pgutil.NewInMemoryKindMapper() + + query, err := frontend.ParseCypher(frontend.NewContext(), `MATCH p = ()-[*1..]->() RETURN size(relationships(p)), nodes(p), relationships(p)`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + require.Contains(t, formatted, "lateral (select") + require.Equal(t, 1, strings.Count(formatted, "ordered_edges_to_path"), formatted) + require.Equal(t, 1, strings.Count(formatted, "from unnest"), formatted) + require.Contains(t, formatted, ".nodes") + require.Contains(t, formatted, ".edges") +} + func TestPrepareCollectExpressionMissingBindingErrorNamesArgument(t *testing.T) { t.Parallel() @@ -63,3 +130,51 @@ func TestPrepareCollectExpressionMissingBindingErrorNamesArgument(t *testing.T) require.EqualError(t, err, "binding not found for collect function argument missing") } + +func TestCollectMembershipOnlyProjectionUsesIDs(t *testing.T) { + t.Parallel() + + kindMapper := pgutil.NewInMemoryKindMapper() + + query, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (s) + WITH collect(s) AS exclude + MATCH (c) + WHERE NOT c IN exclude + RETURN c + `) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + normalized := strings.Join(strings.Fields(formatted), " ") + + require.Contains(t, normalized, "array_agg((n0).id)") + require.Contains(t, normalized, "array []::int8[]") + require.Contains(t, normalized, "not n1.id = any (s0.") + require.NotContains(t, normalized, "array []::nodecomposite[]") + requireOptimizationLowering(t, translation.Optimization, "CollectIDMembership") +} + +func TestReturnedCollectNodeKeepsCompositeArray(t *testing.T) { + t.Parallel() + + kindMapper := pgutil.NewInMemoryKindMapper() + + query, err := frontend.ParseCypher(frontend.NewContext(), `MATCH (s) RETURN collect(s) AS nodes`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + normalized := strings.Join(strings.Fields(formatted), " ") + + require.Contains(t, normalized, "array []::nodecomposite[]") + require.NotContains(t, normalized, "array_agg((n0).id)") + requireNoOptimizationLowering(t, translation.Optimization, "CollectIDMembership") +} diff --git a/cypher/models/pgsql/translate/hinting.go b/cypher/models/pgsql/translate/hinting.go index ee743442..5d837c4e 100644 --- a/cypher/models/pgsql/translate/hinting.go +++ b/cypher/models/pgsql/translate/hinting.go @@ -417,7 +417,11 @@ func applyTypeFunctionLikeTypeHints(kindMapper *contextAwareKindMapper, expressi typedROperand.CastType = lOperandTypeHint expression.ROperand = typedROperand } else if !lOperandTypeHint.IsKnown() { - expression.LOperand = pgsql.NewTypeCast(expression.LOperand, typedROperand.CastType.ArrayBaseType()) + if propertyLookup, isPropertyLookup := expressionToPropertyLookupBinaryExpression(expression.LOperand); isPropertyLookup && typedROperand.CastType == pgsql.Text { + expression.LOperand = rewritePropertyLookupOperator(propertyLookup, pgsql.Text) + } else { + expression.LOperand = pgsql.NewTypeCast(expression.LOperand, typedROperand.CastType.ArrayBaseType()) + } } else if pgsql.OperatorIsComparator(expression.Operator) && !typedROperand.CastType.IsComparable(lOperandTypeHint, expression.Operator) { return newFunctionCallComparatorError(typedROperand, expression.Operator, lOperandTypeHint) } @@ -439,5 +443,9 @@ func applyBinaryExpressionTypeHints(kindMapper *contextAwareKindMapper, expressi return err } - return applyTypeFunctionLikeTypeHints(kindMapper, expression) + if err := applyTypeFunctionLikeTypeHints(kindMapper, expression); err != nil { + return err + } + + return nil } diff --git a/cypher/models/pgsql/translate/limit_pushdown_test.go b/cypher/models/pgsql/translate/limit_pushdown_test.go index 7334b2fb..e7edcb5f 100644 --- a/cypher/models/pgsql/translate/limit_pushdown_test.go +++ b/cypher/models/pgsql/translate/limit_pushdown_test.go @@ -75,6 +75,7 @@ func limitPushdownTestJoin(nodeAlias, expansionColumn pgsql.Identifier) pgsql.Jo func limitPushdownTestPart(harnessFunction pgsql.Identifier) *QueryPart { part := NewQueryPart(1, 0) part.Limit = pgsql.NewLiteral(10, pgsql.Int) + part.AllowLimitPushdown(limitPushdownTestSourceFrame) part.Model.AddCTE(pgsql.CommonTableExpression{ Alias: pgsql.TableAlias{Name: limitPushdownTestSourceFrame}, Query: pgsql.Query{ @@ -109,11 +110,13 @@ func limitPushdownTestTail(where pgsql.Expression) pgsql.Select { } func TestLimitPushdownTailSourceAllowsUnidirectionalShortestPathEndpointInequality(t *testing.T) { - part := limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) - tailSelect := limitPushdownTestTail(limitPushdownTestEndpointInequality( - limitPushdownTestRootAlias, - limitPushdownTestTerminalAlias, - )) + var ( + part = limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) + tailSelect = limitPushdownTestTail(limitPushdownTestEndpointInequality( + limitPushdownTestRootAlias, + limitPushdownTestTerminalAlias, + )) + ) sourceFrame, canPushDown := limitPushdownTailSource(part, tailSelect) require.True(t, canPushDown) @@ -121,11 +124,13 @@ func TestLimitPushdownTailSourceAllowsUnidirectionalShortestPathEndpointInequali } func TestLimitPushdownTailSourceAllowsReversedUnidirectionalShortestPathEndpointInequality(t *testing.T) { - part := limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) - tailSelect := limitPushdownTestTail(limitPushdownTestEndpointInequality( - limitPushdownTestTerminalAlias, - limitPushdownTestRootAlias, - )) + var ( + part = limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) + tailSelect = limitPushdownTestTail(limitPushdownTestEndpointInequality( + limitPushdownTestTerminalAlias, + limitPushdownTestRootAlias, + )) + ) sourceFrame, canPushDown := limitPushdownTailSource(part, tailSelect) require.True(t, canPushDown) @@ -133,16 +138,18 @@ func TestLimitPushdownTailSourceAllowsReversedUnidirectionalShortestPathEndpoint } func TestLimitPushdownTailSourceBlocksMixedShortestPathWherePredicate(t *testing.T) { - part := limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) - tailSelect := limitPushdownTestTail(pgsql.NewBinaryExpression( - limitPushdownTestEndpointInequality(limitPushdownTestRootAlias, limitPushdownTestTerminalAlias), - pgsql.OperatorAnd, - pgsql.NewBinaryExpression( - limitPushdownTestEndpointRef(limitPushdownTestTerminalAlias), - pgsql.OperatorGreaterThan, - pgsql.NewLiteral(0, pgsql.Int), - ), - )) + var ( + part = limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) + tailSelect = limitPushdownTestTail(pgsql.NewBinaryExpression( + limitPushdownTestEndpointInequality(limitPushdownTestRootAlias, limitPushdownTestTerminalAlias), + pgsql.OperatorAnd, + pgsql.NewBinaryExpression( + limitPushdownTestEndpointRef(limitPushdownTestTerminalAlias), + pgsql.OperatorGreaterThan, + pgsql.NewLiteral(0, pgsql.Int), + ), + )) + ) _, canPushDown := limitPushdownTailSource(part, tailSelect) require.False(t, canPushDown) @@ -208,11 +215,13 @@ func TestLimitPushdownTailSourceAllowsBoundEndpointShortestPathSourceWithoutTail } func TestLimitPushdownTailSourceAllowsBidirectionalShortestPathEndpointInequality(t *testing.T) { - part := limitPushdownTestPart(pgsql.FunctionBidirectionalSPHarness) - tailSelect := limitPushdownTestTail(limitPushdownTestEndpointInequality( - limitPushdownTestRootAlias, - limitPushdownTestTerminalAlias, - )) + var ( + part = limitPushdownTestPart(pgsql.FunctionBidirectionalSPHarness) + tailSelect = limitPushdownTestTail(limitPushdownTestEndpointInequality( + limitPushdownTestRootAlias, + limitPushdownTestTerminalAlias, + )) + ) sourceFrame, canPushDown := limitPushdownTailSource(part, tailSelect) require.True(t, canPushDown) @@ -220,11 +229,13 @@ func TestLimitPushdownTailSourceAllowsBidirectionalShortestPathEndpointInequalit } func TestPushDownShortestPathLimitAppendsHarnessLimitWithEndpointInequality(t *testing.T) { - part := limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) - tailSelect := limitPushdownTestTail(limitPushdownTestEndpointInequality( - limitPushdownTestRootAlias, - limitPushdownTestTerminalAlias, - )) + var ( + part = limitPushdownTestPart(pgsql.FunctionUnidirectionalSPHarness) + tailSelect = limitPushdownTestTail(limitPushdownTestEndpointInequality( + limitPushdownTestRootAlias, + limitPushdownTestTerminalAlias, + )) + ) pushDownShortestPathLimit(part, tailSelect) @@ -245,11 +256,13 @@ func TestPushDownShortestPathLimitAppendsHarnessLimitWithEndpointInequality(t *t } func TestPushDownBidirectionalShortestPathLimitAppendsHarnessLimitWithEndpointInequality(t *testing.T) { - part := limitPushdownTestPart(pgsql.FunctionBidirectionalSPHarness) - tailSelect := limitPushdownTestTail(limitPushdownTestEndpointInequality( - limitPushdownTestRootAlias, - limitPushdownTestTerminalAlias, - )) + var ( + part = limitPushdownTestPart(pgsql.FunctionBidirectionalSPHarness) + tailSelect = limitPushdownTestTail(limitPushdownTestEndpointInequality( + limitPushdownTestRootAlias, + limitPushdownTestTerminalAlias, + )) + ) pushDownShortestPathLimit(part, tailSelect) diff --git a/cypher/models/pgsql/translate/match.go b/cypher/models/pgsql/translate/match.go index ba86e010..c49c3dba 100644 --- a/cypher/models/pgsql/translate/match.go +++ b/cypher/models/pgsql/translate/match.go @@ -16,7 +16,7 @@ func (s *Translator) translateMatch(match *cypher.Match) error { return err } } else { - if err := s.translateTraversalPatternPart(part, false); err != nil { + if err := s.translateTraversalPatternPart(part, false, !match.Optional); err != nil { return err } } @@ -84,8 +84,10 @@ func (s *Translator) buildOptionalMatchAggregationStep(aggregationFrame *Frame) // An "aggregation" frame like this will only be triggered after an OPTIONAL MATCH, which should only // take place AFTER `n>=1` previous MATCH expressions. To properly base the aggregation, we need to // join to the origin frame (prior to the OPTIONAL MATCH) based on the OPTIONAL MATCH's frame. - optMatchFrame := aggregationFrame.Previous - originFrame := optMatchFrame.Previous + var ( + optMatchFrame = aggregationFrame.Previous + originFrame = optMatchFrame.Previous + ) // originFrame could be nil if no previous frame is defined (for ex., leading OPTIONAL MATCH, which is // valid but effectively a plain MATCH) if originFrame == nil { @@ -112,8 +114,10 @@ func (s *Translator) buildOptionalMatchAggregationStep(aggregationFrame *Frame) // Construct the projection for this frame. Just take all of the exports for the "origin" frame // and optional match frame and re-export them // TODO: Does there need to be additional logic for visible/defined bindings, instead of only exports? - originIDExclusions := map[string]struct{}{} - projection := pgsql.Projection{} + var ( + originIDExclusions = map[string]struct{}{} + projection = pgsql.Projection{} + ) for _, exported := range originFrame.Exported.Slice() { projection = append(projection, &pgsql.AliasedExpression{ Expression: pgsql.CompoundIdentifier{originFrame.Binding.Identifier, exported}, diff --git a/cypher/models/pgsql/translate/model.go b/cypher/models/pgsql/translate/model.go index e1dd8a62..6bc434cf 100644 --- a/cypher/models/pgsql/translate/model.go +++ b/cypher/models/pgsql/translate/model.go @@ -6,6 +6,7 @@ import ( "github.com/specterops/dawgs/cypher/models" "github.com/specterops/dawgs/cypher/models/cypher" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" "github.com/specterops/dawgs/cypher/models/walk" "github.com/specterops/dawgs/graph" ) @@ -144,21 +145,52 @@ func (s *TraversalStep) usesBoundEndpointPairs() bool { return s.LeftNodeBound && s.RightNodeBound && s.hasPreviousFrameBinding() } -func (s *TraversalStep) endpointSelectivity(scope *Scope, expression pgsql.Expression, bound bool) (int, error) { - selectivity, err := MeasureSelectivity(scope, expression) - if err != nil { - return 0, err +func (s *TraversalStep) usesBoundTerminalIDs() bool { + return s.RightNodeBound && s.hasPreviousFrameBinding() +} + +func canMaterializeTerminalFilterForStep(traversalStep *TraversalStep, expansionModel *Expansion) bool { + if traversalStep == nil || expansionModel == nil || traversalStep.RightNode == nil || + expansionModel.TerminalNodeConstraints == nil || + traversalStep.usesBoundEndpointPairs() || + traversalStep.usesBoundTerminalIDs() { + return false } - if bound && s.hasPreviousFrameBinding() { - selectivity += selectivityWeightBoundIdentifier + // Terminal filters are only useful as standalone SQL when they depend solely + // on the terminal node; external references must stay in the main query. + _, externalConstraints := partitionConstraintByLocality( + expansionModel.TerminalNodeConstraints, + pgsql.AsIdentifierSet(traversalStep.RightNode.Identifier), + ) + + return externalConstraints == nil +} + +func canMaterializeEndpointPairFilterForStep(traversalStep *TraversalStep, expansionModel *Expansion) bool { + // Pair filters enumerate the exact root/terminal combinations the + // bidirectional harness must resolve. Kind-only endpoint predicates are not + // enough because they do not constrain the search columns used by the harness. + if traversalStep == nil || expansionModel == nil || + traversalStep.LeftNode == nil || + traversalStep.RightNode == nil || + traversalStep.usesBoundEndpointPairs() || + expansionModel.PrimerNodeConstraints == nil || + expansionModel.TerminalNodeConstraints == nil || + !hasPairAwareEndpointConstraint(expansionModel.PrimerNodeConstraints, traversalStep.LeftNode.Identifier) || + !hasPairAwareEndpointConstraint(expansionModel.TerminalNodeConstraints, traversalStep.RightNode.Identifier) { + return false } - return selectivity, nil + return true +} + +func (s *TraversalStep) endpointSelectivity(scope *Scope, expression pgsql.Expression, bound bool) (int, error) { + return optimize.NewSelectivityModel(scope).EndpointSelectivity(expression, bound, s.hasPreviousFrameBinding()) } func isBidirectionalSearchAnchor(selectivity int) bool { - return selectivity >= selectivityBidirectionalAnchorThreshold + return optimize.IsBidirectionalSearchAnchor(selectivity) } func hasIDEqualityConstraint(expression pgsql.Expression, identifier pgsql.Identifier) bool { @@ -168,8 +200,10 @@ func hasIDEqualityConstraint(expression pgsql.Expression, identifier pgsql.Ident continue } - leftIsID := isIdentifierIDReference(binaryExpression.LOperand, identifier) - rightIsID := isIdentifierIDReference(binaryExpression.ROperand, identifier) + var ( + leftIsID = isIdentifierIDReference(binaryExpression.LOperand, identifier) + rightIsID = isIdentifierIDReference(binaryExpression.ROperand, identifier) + ) if leftIsID && isStaticIDEqualityOperand(binaryExpression.ROperand) { return true @@ -341,80 +375,51 @@ func (s *TraversalStep) CanExecutePairAwareBidirectionalSearch(scope *Scope) (bo } } -// flattenConjunction collects the leaf operands of a left-recursive AND chain. func flattenConjunction(expr pgsql.Expression) []pgsql.Expression { - if bin, typeOK := expr.(*pgsql.BinaryExpression); !typeOK || bin.Operator != pgsql.OperatorAnd { - return []pgsql.Expression{expr} - } else { - return append(flattenConjunction(bin.LOperand), flattenConjunction(bin.ROperand)...) - } + return optimize.FlattenConjunction(expr) } -// expressionReferencesOnlyLocalIdentifiers returns true only when every binding -// reference found in the expression is a member of localScope. func expressionReferencesOnlyLocalIdentifiers(expression pgsql.Expression, localScope *pgsql.IdentifierSet) bool { - isLocal := true + return optimize.ExpressionReferencesOnlyLocalIdentifiers(expression, localScope) +} - walk.PgSQL(expression, walk.NewSimpleVisitor[pgsql.SyntaxNode]( - func(node pgsql.SyntaxNode, handler walk.VisitorHandler) { - switch typedNode := node.(type) { - case pgsql.CompoundIdentifier: - if len(typedNode) > 0 && !localScope.Contains(typedNode[0]) { - isLocal = false - handler.SetDone() - } +func subqueryReferencesOnlyLocalIdentifiers(subquery pgsql.Subquery, localScope *pgsql.IdentifierSet) bool { + return optimize.SubqueryReferencesOnlyLocalIdentifiers(subquery, localScope) +} - case pgsql.Identifier: - if !localScope.Contains(typedNode) { - isLocal = false - handler.SetDone() - } +func queryReferencesOnlyLocalIdentifiers(query pgsql.Query, localScope *pgsql.IdentifierSet) bool { + return optimize.QueryReferencesOnlyLocalIdentifiers(query, localScope) +} - case pgsql.RowColumnReference: - if !expressionReferencesOnlyLocalIdentifiers(typedNode.Identifier, localScope) { - isLocal = false - handler.SetDone() - } else { - handler.Consume() - } - } - }, - )) +func addFromClauseBindings(localScope *pgsql.IdentifierSet, fromClauses []pgsql.FromClause) { + optimize.AddFromClauseBindings(localScope, fromClauses) +} - return isLocal +func addFromExpressionBinding(localScope *pgsql.IdentifierSet, expression pgsql.Expression) { + optimize.AddFromExpressionBinding(localScope, expression) } -func isLocalToScope(expression pgsql.Expression, localScope *pgsql.IdentifierSet) bool { - if expression == nil { - return true - } +func selectReferencesOnlyLocalIdentifiers(selectBody pgsql.Select, localScope *pgsql.IdentifierSet) bool { + return optimize.SelectReferencesOnlyLocalIdentifiers(selectBody, localScope) +} - return expressionReferencesOnlyLocalIdentifiers(expression, localScope) +func fromExpressionReferencesOnlyLocalIdentifiers(expression pgsql.Expression) bool { + return optimize.FromExpressionReferencesOnlyLocalIdentifiers(expression) } -// partitionConstraintByLocality splits a conjunction (A AND B AND ...) into -// two expressions: one whose every binding reference is contained in -// localScope (safe for JOIN ON), and one that references outside identifiers -// (must stay in WHERE). -// -// Only top-level AND operands are split. If an expression is not a -// BinaryExpression with OperatorAnd, the whole expression is tested as a unit. -func partitionConstraintByLocality(expression pgsql.Expression, localScope *pgsql.IdentifierSet) (pgsql.Expression, pgsql.Expression) { - var ( - joinConstraints pgsql.Expression - whereConstraints pgsql.Expression - terms = flattenConjunction(expression) - ) +func isLocalToScope(expression pgsql.Expression, localScope *pgsql.IdentifierSet) bool { + return optimize.IsLocalToScope(expression, localScope) +} - for _, term := range terms { - if isLocalToScope(term, localScope) { - joinConstraints = pgsql.OptionalAnd(joinConstraints, term) - } else { - whereConstraints = pgsql.OptionalAnd(whereConstraints, term) - } - } +func partitionConstraintByLocality(expression pgsql.Expression, localScope *pgsql.IdentifierSet) (pgsql.Expression, pgsql.Expression) { + return optimize.PartitionConstraintByLocality(expression, localScope) +} - return joinConstraints, whereConstraints +type ProjectionPruningApplication struct { + LeftNode *BoundIdentifier + Relationship *BoundIdentifier + RightNode *BoundIdentifier + PathBinding *BoundIdentifier } type TraversalStep struct { @@ -422,8 +427,10 @@ type TraversalStep struct { Direction graph.Direction Expansion *Expansion PathReversed bool + ProjectionPruning ProjectionPruningApplication LeftNode *BoundIdentifier LeftNodeBound bool + UseExpandInto bool LeftNodeConstraints pgsql.Expression LeftNodeJoinCondition pgsql.Expression Edge *BoundIdentifier @@ -491,6 +498,8 @@ type PatternPart struct { ShortestPath bool AllShortestPaths bool PatternBinding *BoundIdentifier + Target optimize.PatternTarget + HasTarget bool TraversalSteps []*TraversalStep NodeSelect NodeSelect Constraints *ConstraintTracker diff --git a/cypher/models/pgsql/translate/optimizer_safety_test.go b/cypher/models/pgsql/translate/optimizer_safety_test.go new file mode 100644 index 00000000..aa8ecd7e --- /dev/null +++ b/cypher/models/pgsql/translate/optimizer_safety_test.go @@ -0,0 +1,1151 @@ +package translate + +import ( + "context" + "strings" + "testing" + + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" + "github.com/specterops/dawgs/drivers/pg/pgutil" + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +const optimizerADCSQuery = ` +MATCH (n:Group) +WHERE n.objectid = 'S-1-5-21-2643190041-1319121918-239771340-513' +MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) +MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) +WHERE ct.authenticationenabled = true +AND ct.requiresmanagerapproval = false +AND ct.enrolleesuppliessubject = true +AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) +RETURN p1, p2 +` + +func optimizerSafetyKindMapper() *pgutil.InMemoryKindMapper { + mapper := pgutil.NewInMemoryKindMapper() + + for _, kind := range graph.StringsToKinds([]string{ + "AllExtendedRights", + "CertTemplate", + "Domain", + "Enroll", + "EnterpriseCA", + "EnterpriseCAFor", + "GenericAll", + "Group", + "IssuedSignedBy", + "MemberOf", + "NTAuthStore", + "NTAuthStoreFor", + "PublishedTo", + "RootCA", + "RootCAFor", + "TrustedForNTAuth", + "AdminTo", + "Computer", + "Tag_Tier_Zero", + "User", + }) { + mapper.Put(kind) + } + + return mapper +} + +func optimizerSafetySQL(t *testing.T, cypherQuery string) string { + t.Helper() + + translation := optimizerSafetyTranslation(t, cypherQuery) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + + return strings.Join(strings.Fields(formattedQuery), " ") +} + +func optimizerSafetyTranslation(t *testing.T, cypherQuery string) Result { + t.Helper() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), cypherQuery) + require.NoError(t, err) + + translation, err := Translate(context.Background(), regularQuery, optimizerSafetyKindMapper(), nil, DefaultGraphID) + require.NoError(t, err) + + return translation +} + +func requireOptimizationLowering(t *testing.T, summary OptimizationSummary, name string) { + t.Helper() + + for _, lowering := range summary.Lowerings { + if lowering.Name == name { + return + } + } + + require.Failf(t, "missing optimization lowering", "expected lowering %q in %#v", name, summary.Lowerings) +} + +func requireNoOptimizationLowering(t *testing.T, summary OptimizationSummary, name string) { + t.Helper() + + for _, lowering := range summary.Lowerings { + require.NotEqualf(t, name, lowering.Name, "unexpected applied lowering %q in %#v", name, summary.Lowerings) + } +} + +func requirePlannedOptimizationLowering(t *testing.T, summary OptimizationSummary, name string) { + t.Helper() + + for _, lowering := range summary.PlannedLowerings { + if lowering.Name == name { + return + } + } + + require.Failf(t, "missing planned optimization lowering", "expected planned lowering %q in %#v", name, summary.PlannedLowerings) +} + +func requireNoPlannedOptimizationLowering(t *testing.T, summary OptimizationSummary, name string) { + t.Helper() + + for _, lowering := range summary.PlannedLowerings { + require.NotEqualf(t, name, lowering.Name, "unexpected planned lowering %q in %#v", name, summary.PlannedLowerings) + } +} + +func requirePlanParameterContains(t *testing.T, translation Result, expected string) { + t.Helper() + + for _, parameter := range translation.Parameters { + if planQuery, ok := parameter.(string); ok && strings.Contains(planQuery, expected) { + return + } + } + + require.Failf(t, "missing plan parameter content", "expected a plan parameter to contain %q in %#v", expected, translation.Parameters) +} + +func requireSkippedOptimizationLowering(t *testing.T, summary OptimizationSummary, name string, reason string) { + t.Helper() + + for _, lowering := range summary.SkippedLowerings { + if lowering.Name == name { + require.Equal(t, reason, lowering.Reason) + return + } + } + + require.Failf(t, "missing skipped optimization lowering", "expected skipped lowering %q in %#v", name, summary.SkippedLowerings) +} + +func requireSkippedOptimizationLoweringCount(t *testing.T, summary OptimizationSummary, name string, count int) { + t.Helper() + + for _, lowering := range summary.SkippedLowerings { + if lowering.Name == name { + require.Equal(t, count, lowering.Count) + return + } + } + + require.Failf(t, "missing skipped optimization lowering", "expected skipped lowering %q in %#v", name, summary.SkippedLowerings) +} + +func requireNoSkippedOptimizationLowering(t *testing.T, summary OptimizationSummary, name string) { + t.Helper() + + for _, lowering := range summary.SkippedLowerings { + require.NotEqualf(t, name, lowering.Name, "unexpected skipped lowering %q in %#v", name, summary.SkippedLowerings) + } +} + +func TestOptimizerSafetyReportsPartiallySkippedLowerings(t *testing.T) { + t.Parallel() + + translator := NewTranslator(context.Background(), optimizerSafetyKindMapper(), nil, DefaultGraphID) + translator.translation.Optimization.LoweringPlan = &optimize.LoweringPlan{ + PredicatePlacement: []optimize.PredicatePlacementDecision{ + {Target: optimize.TraversalStepTarget{StepIndex: 0}}, + {Target: optimize.TraversalStepTarget{StepIndex: 1}}, + }, + } + + translator.recordLowering(optimize.LoweringPredicatePlacement) + translator.recordSkippedLowerings() + + requireOptimizationLowering(t, translator.translation.Optimization, optimize.LoweringPredicatePlacement) + requireSkippedOptimizationLowering(t, translator.translation.Optimization, optimize.LoweringPredicatePlacement, "planned predicate placements were not consumed by this translation shape") + requireSkippedOptimizationLoweringCount(t, translator.translation.Optimization, optimize.LoweringPredicatePlacement, 1) +} + +func requireSQLContainsInOrder(t *testing.T, sql string, parts ...string) { + t.Helper() + + offset := 0 + for _, part := range parts { + nextIndex := strings.Index(sql[offset:], part) + require.NotEqualf(t, -1, nextIndex, "expected SQL to contain %q after offset %d:\n%s", part, offset, sql) + offset += nextIndex + len(part) + } +} + +func TestOptimizerSafetyCountStoreFastPathUsesBaseNodeCount(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH (n) RETURN count(n)`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + require.Empty(t, translation.Optimization.SkippedLowerings) + require.Equal(t, "select count(*)::int8 from node n0;", strings.Join(strings.Fields(formattedQuery), " ")) +} + +func TestOptimizerSafetyCountStoreFastPathKeepsKindConstraintAndAlias(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH (n:Group) RETURN count(n) AS total`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + require.Equal(t, "select count(*)::int8 as total from node n0 where n0.kind_ids operator (pg_catalog.@>) array [8]::int2[];", strings.Join(strings.Fields(formattedQuery), " ")) +} + +func TestOptimizerSafetyCountStoreFastPathSupportsNodeCountStar(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH (:Group) RETURN count(*) AS total`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + require.Equal(t, "select count(*)::int8 as total from node n0 where n0.kind_ids operator (pg_catalog.@>) array [8]::int2[];", strings.Join(strings.Fields(formattedQuery), " ")) +} + +func TestOptimizerSafetyCountStoreFastPathUsesBaseEdgeCount(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH ()-[r:MemberOf]->() RETURN count(r)`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireSkippedOptimizationLowering(t, translation.Optimization, optimize.LoweringProjectionPruning, "superseded by CountStoreFastPath") + require.Equal(t, "select count(*)::int8 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [10]::int2[]);", strings.Join(strings.Fields(formattedQuery), " ")) +} + +func TestOptimizerSafetyCountStoreFastPathUsesSparseEdgeKindCount(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH ()-[r:Enroll]->() RETURN count(r)`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireSkippedOptimizationLowering(t, translation.Optimization, optimize.LoweringProjectionPruning, "superseded by CountStoreFastPath") + require.NotContains(t, normalizedQuery, "with recursive") + require.NotContains(t, normalizedQuery, "ordered_edges_to_path") + require.Equal(t, "select count(*)::int8 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [4]::int2[]);", normalizedQuery) +} + +func TestOptimizerSafetyCountStoreFastPathUsesUntypedEdgeCount(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH ()-[r]->() RETURN count(r)`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireSkippedOptimizationLowering(t, translation.Optimization, optimize.LoweringProjectionPruning, "superseded by CountStoreFastPath") + require.NotContains(t, normalizedQuery, "with recursive") + require.NotContains(t, normalizedQuery, "ordered_edges_to_path") + require.Equal(t, "select count(*)::int8 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id;", normalizedQuery) +} + +func TestOptimizerSafetyCountStoreFastPathSupportsEdgeCountStar(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, `MATCH ()-[:MemberOf]->() RETURN count(*)`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringCountStoreFastPath) + requireSkippedOptimizationLowering(t, translation.Optimization, optimize.LoweringProjectionPruning, "superseded by CountStoreFastPath") + require.Equal(t, "select count(*)::int8 from edge e0 join node n0 on n0.id = e0.start_id join node n1 on n1.id = e0.end_id where e0.kind_id = any (array [10]::int2[]);", strings.Join(strings.Fields(formattedQuery), " ")) +} + +func TestOptimizerSafetyADCSQueryPrunesExpansionEdgeCarry(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, optimizerADCSQuery) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requirePlannedOptimizationLowering(t, translation.Optimization, "PredicatePlacement") + requirePlannedOptimizationLowering(t, translation.Optimization, "ExpandIntoDetection") + requireOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requireOptimizationLowering(t, translation.Optimization, "PredicatePlacement") + requireOptimizationLowering(t, translation.Optimization, "ExpandIntoDetection") + + require.Contains(t, normalizedQuery, "select distinct (s0.n0).id as root_id from s0") + require.Contains(t, normalizedQuery, "select distinct (s5.n0).id as root_id from s5") + require.Contains(t, normalizedQuery, "select distinct (s9.n2).id as root_id from s9") + require.Contains(t, normalizedQuery, "s5.ep0 as ep0") + require.NotContains(t, normalizedQuery, "s5.e0 as e0") + require.Contains(t, normalizedQuery, "from unnest(s12.ep0)") + require.Contains(t, normalizedQuery, "from unnest(array [s12.e1]::int8[])") + require.NotContains(t, normalizedQuery, "array [s12.e1]::edgecomposite[]") + require.Contains(t, normalizedQuery, "from s5, s7") + requireSQLContainsInOrder(t, normalizedQuery, + "where s7.satisfied and exists (select 1 from edge e5 join node n6", + "properties -> 'authenticationenabled'", + "join edge e6 on n6.id = e6.start_id", + "e6.end_id = (s5.n2).id", + "and (s5.n0).id = s7.root_id", + ) + requireSQLContainsInOrder(t, normalizedQuery, + "where s11.satisfied and (s9.n2).id = s11.root_id and exists", + "from edge e8 where n7.id = e8.start_id", + "e8.end_id = (s9.n4).id", + ) +} + +func assertOptimizerSafetyRelationshipStaysComposite(t *testing.T, cypherQuery string) { + t.Helper() + + normalizedQuery := optimizerSafetySQL(t, cypherQuery) + + require.Contains(t, normalizedQuery, "(e0.id, e0.start_id, e0.end_id, e0.kind_id, e0.properties)::edgecomposite as e0") + require.Contains(t, normalizedQuery, "::edgecomposite") + require.NotContains(t, normalizedQuery, "e0.id as e0") + require.NotContains(t, normalizedQuery, "::int8[]") +} + +func TestOptimizerSafetyReferencedRelationshipStaysComposite(t *testing.T) { + t.Parallel() + + assertOptimizerSafetyRelationshipStaysComposite(t, ` +MATCH p = (n:Group)-[r:MemberOf]->(m:Group) +RETURN p, r +`) +} + +func TestOptimizerSafetyRelationshipExpressionReferencesStayComposite(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + query string + }{ + { + name: "type return", + query: ` +MATCH p = (n:Group)-[r:MemberOf]->(m:Group) +RETURN p, type(r) +`, + }, + { + name: "property predicate", + query: ` +MATCH p = (n:Group)-[r:MemberOf]->(m:Group) +WHERE r.label = 'member' +RETURN p +`, + }, + { + name: "start node return", + query: ` +MATCH p = (n:Group)-[r:MemberOf]->(m:Group) +RETURN p, startNode(r) +`, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + assertOptimizerSafetyRelationshipStaysComposite(t, testCase.query) + }) + } +} + +func TestOptimizerSafetyOptionalMatchPathStaysComposite(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH (n:Group) +OPTIONAL MATCH p = (n)-[:MemberOf]->(m:Group) +RETURN n, p +`) + + require.Contains(t, normalizedQuery, "::edgecomposite[]") + require.NotContains(t, normalizedQuery, "::int8[]") +} + +func TestOptimizerSafetyFixedHopExpandIntoUsesBoundEndpoints(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` +MATCH (a:Group) +MATCH (b:Group) +MATCH p = (a)-[:MemberOf]->(b) +RETURN p +`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), regularQuery, optimizerSafetyKindMapper(), nil, DefaultGraphID) + require.NoError(t, err) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "(s1.n0).id = e0.start_id") + require.Contains(t, normalizedQuery, "(s1.n1).id = e0.end_id") + require.NotContains(t, normalizedQuery, "join node") + require.NotNil(t, translation.Optimization.LoweringPlan) + require.NotEmpty(t, translation.Optimization.LoweringPlan.ExpandInto) + requirePlannedOptimizationLowering(t, translation.Optimization, "ExpandIntoDetection") + requireOptimizationLowering(t, translation.Optimization, "ExpandIntoDetection") +} + +func TestOptimizerSafetyReordersIndependentNodeAnchor(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH (a) +MATCH (b:EnterpriseCA {name: 'target'}) +MATCH p = (a)-[:MemberOf]->(b) +RETURN p +`) + var ( + enterpriseAnchorIndex = strings.Index(normalizedQuery, "array [5]::int2[]") + broadScanIndex = strings.Index(normalizedQuery, "from s0, node n1") + ) + + require.NotEqual(t, -1, enterpriseAnchorIndex) + require.NotEqual(t, -1, broadScanIndex) + require.Less(t, enterpriseAnchorIndex, broadScanIndex) + require.Contains(t, normalizedQuery, "(s1.n1).id = e0.start_id") + require.Contains(t, normalizedQuery, "(s1.n0).id = e0.end_id") +} + +func TestOptimizerSafetyExpansionTerminalPushdownForFixedSuffix(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH p = (n:Group)-[:MemberOf*1..]->(m)-[:Enroll]->(ca:EnterpriseCA) +RETURN p +`) + + require.Contains(t, normalizedQuery, "exists (select 1 from edge e1 join node n2") + require.Contains(t, normalizedQuery, "n1.id = e1.start_id") + require.Contains(t, normalizedQuery, "e1.kind_id = any (array [4]::int2[])") + require.Contains(t, normalizedQuery, "n2.kind_ids operator (pg_catalog.@>) array [5]::int2[]") +} + +func TestOptimizerSafetySuffixPredicatePlacementStaysInsideTerminalExists(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH p = (n:Group)-[:MemberOf*1..]->(m)-[:Enroll]->(ca:EnterpriseCA) +WHERE ca.name = 'target' +RETURN p +`) + + requireSQLContainsInOrder(t, normalizedQuery, + "exists (select 1 from edge e1 join node n2", + "properties -> 'name'", + "where n1.id = e1.start_id", + ) +} + +func TestOptimizerSafetyPredicatePlacementRecordsExpansionRootConstraint(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = (src:Group)-[:MemberOf*1..]->(mid)-[:Enroll]->(ca:EnterpriseCA) +WHERE src.name = 'source' +RETURN p +`) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringPredicatePlacement) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringPredicatePlacement) + requireNoSkippedOptimizationLowering(t, translation.Optimization, optimize.LoweringPredicatePlacement) + requireSQLContainsInOrder(t, normalizedQuery, + "select n0.id as root_id from node n0 where", + "properties -> 'name'", + ) +} + +func TestOptimizerSafetyPredicatePlacementRecordsFixedTraversalConstraint(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (src:Group)-[:MemberOf]->(dst) +WHERE src.name = 'source' +RETURN dst +`) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringPredicatePlacement) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringPredicatePlacement) + requireNoSkippedOptimizationLowering(t, translation.Optimization, optimize.LoweringPredicatePlacement) + requireSQLContainsInOrder(t, normalizedQuery, + "join node n0 on", + "properties -> 'name'", + "join node n1", + ) +} + +func TestOptimizerSafetyPatternPredicateExistencePlacementIsPlanned(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (s) +WHERE NOT (s)-[]-() +RETURN s +`) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "not exists (select 1 from edge e0") + requirePlannedOptimizationLowering(t, translation.Optimization, "PredicatePlacement") + requireOptimizationLowering(t, translation.Optimization, "PredicatePlacement") +} + +func TestOptimizerSafetyContinuationRelationshipsExcludePriorPathRelationships(t *testing.T) { + t.Parallel() + + expandedPrefixQuery := optimizerSafetySQL(t, ` +MATCH p = (n:Group)-[:MemberOf*1..]->(m)-[:Enroll]-(ca:EnterpriseCA) +RETURN p +`) + + require.Contains(t, expandedPrefixQuery, "e1.id != all") + require.Contains(t, expandedPrefixQuery, "ep0") + + fixedPrefixQuery := optimizerSafetySQL(t, ` +MATCH p = (n:Group)-[:MemberOf]->(m)-[:Enroll]->(ca:EnterpriseCA) +RETURN p +`) + + require.Contains(t, fixedPrefixQuery, "e1.id != s0.e0") +} + +func TestOptimizerSafetyDirectionBalancedExpansionDoesNotPlanStaleSuffixPushdown(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = (n)-[:MemberOf*1..]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(d:Domain) +RETURN p + `) + + requirePlannedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireNoPlannedOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requireNoOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") +} + +func TestOptimizerSafetyTraversalDirectionUsesRightEndpointPredicate(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = (n)-[:MemberOf*1..]->(ca) +WHERE ca.name = 'target' +RETURN p + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + require.Contains(t, normalizedQuery, "jsonb_typeof((n1.properties -> 'name')) = 'string'") + require.Contains(t, normalizedQuery, "(n1.properties ->> 'name') = 'target'") + require.Contains(t, normalizedQuery, "join edge e0 on e0.end_id = s1_seed.root_id") +} + +func TestOptimizerSafetyAggregateTraversalCountUsesIDOnlySourceAnchoredShape(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true AND u.enabled = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + var ( + normalizedQuery = strings.Join(strings.Fields(formattedQuery), " ") + lowerQuery = strings.ToLower(normalizedQuery) + ) + + requirePlannedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireNoOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireSkippedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection", "superseded by AggregateTraversalCount") + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + require.Contains(t, lowerQuery, "with recursive candidate_sources(root_id)") + require.Contains(t, lowerQuery, "traversal(root_id, next_id, depth, path)") + require.Contains(t, lowerQuery, "terminal_nodes(id) as materialized") + require.Contains(t, lowerQuery, "terminal_hits(root_id)") + require.Contains(t, lowerQuery, "ranked(root_id, admincount)") + require.Contains(t, lowerQuery, "join edge e on e.start_id = candidate_sources.root_id") + require.Contains(t, lowerQuery, "e.start_id = traversal.next_id") + require.Contains(t, lowerQuery, "e.id != all (traversal.path)") + require.Contains(t, lowerQuery, "join terminal_nodes on terminal_nodes.id = traversal.next_id") + require.Contains(t, lowerQuery, "count(*)::int8 as admincount") + require.Contains(t, lowerQuery, "group by terminal_hits.root_id") + require.Contains(t, lowerQuery, "from ranked join node source_node on source_node.id = ranked.root_id") + require.NotContains(t, lowerQuery, "group by (") + require.NotContains(t, lowerQuery, "::nodecomposite as n0 from") +} + +func TestOptimizerSafetyTraversalDirectionReportsKindOnlyTerminalSkip(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +RETURN count(c) + `) + + requirePlannedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireNoOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireSkippedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection", "terminal kind-only estimate too broad") +} + +func TestOptimizerSafetyTraversalDirectionReportsSelectiveSourceSkip(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.objectid = 'S-1-5-21-1-1100' +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {name: 'target'}) +RETURN c + `) + + requirePlannedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireNoOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireSkippedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection", "bound source estimate selective") +} + +func TestOptimizerSafetyTraversalDirectionReportsPriorLimitSourceSkip(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true +WITH u +LIMIT 10 +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer {name: 'target'}) +RETURN c + `) + + requirePlannedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireNoOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection") + requireSkippedOptimizationLowering(t, translation.Optimization, "TraversalDirectionSelection", "bound source estimate selective") +} + +func TestOptimizerSafetyAggregateTraversalCountAcceptsRowCount(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true AND u.enabled = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(*) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(strings.ToLower(formattedQuery)), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + require.Contains(t, normalizedQuery, "count(*)::int8 as admincount") + require.Contains(t, normalizedQuery, "group by terminal_hits.root_id") +} + +func TestOptimizerSafetyAggregateTraversalCountHonorsExplicitDepthBounds(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*2..4]->(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(strings.ToLower(formattedQuery)), " ") + + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + require.Contains(t, normalizedQuery, "where traversal.depth < 4") + require.Contains(t, normalizedQuery, "where traversal.depth >= 2") +} + +func TestOptimizerSafetyAggregateTraversalCountSupportsInboundSourceAnchoring(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)<-[:MemberOf|AdminTo*1..]-(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(strings.ToLower(formattedQuery)), " ") + + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + require.Contains(t, normalizedQuery, "join edge e on e.end_id = candidate_sources.root_id") + require.Contains(t, normalizedQuery, "e.end_id = traversal.next_id") +} + +func TestOptimizerSafetyAggregateTraversalCountReturnsCountAlias(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u AS user, adminCount AS privileges +ORDER BY privileges DESC +LIMIT 100 + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(strings.ToLower(formattedQuery)), " ") + + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + require.Contains(t, normalizedQuery, "(source_node.id, source_node.kind_ids, source_node.properties)::nodecomposite as user") + require.Contains(t, normalizedQuery, "ranked.admincount as privileges") + require.Contains(t, normalizedQuery, "order by ranked.admincount desc") +} + +func TestOptimizerSafetyAggregateTraversalCountFoldsTerminalFilter(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WHERE c.enabled = true +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(strings.ToLower(formattedQuery)), " ") + + requireOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + require.Contains(t, normalizedQuery, "terminal_nodes(id) as materialized") + require.Contains(t, normalizedQuery, "terminal_node.properties -> 'enabled'") + require.Contains(t, normalizedQuery, "join terminal_nodes on terminal_nodes.id = traversal.next_id") +} + +func TestOptimizerSafetyAggregateTraversalCountSkipsUnsafeWideningCandidates(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + query string + }{{ + name: "distinct terminal count", + query: ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(DISTINCT c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `, + }, { + name: "optional traversal", + query: ` +MATCH (u:User) +WHERE u.hasspn = true +OPTIONAL MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `, + }, { + name: "path binding observed", + query: ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH p = (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, p, COUNT(c) AS adminCount +RETURN u, p +ORDER BY adminCount DESC +LIMIT 100 + `, + }, { + name: "relationship binding observed", + query: ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[r:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, r, COUNT(c) AS adminCount +RETURN u, r +ORDER BY adminCount DESC +LIMIT 100 + `, + }, { + name: "correlated terminal filter", + query: ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WHERE c.name = u.name +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `, + }, { + name: "post aggregation filter", + query: ` +MATCH (u:User) +WHERE u.hasspn = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +WHERE adminCount > 1 +RETURN u +ORDER BY adminCount DESC +LIMIT 100 + `, + }} + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + translation := optimizerSafetyTranslation(t, testCase.query) + + requireNoPlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + requireNoOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + }) + } +} + +func TestOptimizerSafetyAggregateTraversalCountSkipsObservedTerminal(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (u:User) +WHERE u.hasspn = true AND u.enabled = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, c, COUNT(c) AS adminCount +RETURN u, c +ORDER BY adminCount DESC +LIMIT 100 + `) + + requireNoPlannedOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) + requireNoOptimizationLowering(t, translation.Optimization, optimize.LoweringAggregateTraversalCount) +} + +func TestOptimizerSafetyShortestPathStrategyUsesPlannedBidirectionalSearch(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = allShortestPaths((s)-[:MemberOf*1..]->(e)) +WHERE s.name = 'source' AND e.name = 'target' +RETURN p + `) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "bidirectional_asp_harness") + requirePlannedOptimizationLowering(t, translation.Optimization, "ShortestPathStrategySelection") + requirePlannedOptimizationLowering(t, translation.Optimization, "ShortestPathFilterMaterialization") + requireOptimizationLowering(t, translation.Optimization, "ShortestPathStrategySelection") + requireOptimizationLowering(t, translation.Optimization, "ShortestPathFilterMaterialization") +} + +func TestOptimizerSafetyShortestPathTerminalFilterUsesPlannedMaterialization(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (s:Group {name: 'source'}) +MATCH p = shortestPath((s)-[:MemberOf*1..]->(e)) +WHERE e.name = 'target' +RETURN p + `) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "unidirectional_sp_harness") + require.Contains(t, normalizedQuery, "traversal_terminal_filter") + requirePlannedOptimizationLowering(t, translation.Optimization, "ShortestPathFilterMaterialization") + requireOptimizationLowering(t, translation.Optimization, "ShortestPathFilterMaterialization") +} + +func TestOptimizerSafetyShortestPathKindOnlyTerminalFilterUsesPlannedMaterialization(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = shortestPath((s:Group)-[:MemberOf|GenericAll|AdminTo*1..]->(t:Tag_Tier_Zero)) +WHERE s.objectid ENDS WITH '-513' AND s <> t +RETURN p +LIMIT 1000 + `) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "unidirectional_sp_harness") + require.Contains(t, normalizedQuery, "traversal_terminal_filter") + requirePlannedOptimizationLowering(t, translation.Optimization, "ShortestPathFilterMaterialization") + requireOptimizationLowering(t, translation.Optimization, "ShortestPathFilterMaterialization") +} + +func TestOptimizerSafetyLimitPushdownUsesPlannedTraversalFrame(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = (n:Group)-[:MemberOf]->(m:Group) +RETURN p +LIMIT 1 + `) + + requirePlannedOptimizationLowering(t, translation.Optimization, "LimitPushdown") + requireOptimizationLowering(t, translation.Optimization, "LimitPushdown") +} + +func TestOptimizerSafetyShortestPathLimitPushdownUsesPlannedHarness(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH p = shortestPath((s)-[:MemberOf*1..]->(e)) +WHERE s.name = 'source' AND e.name = 'target' +RETURN p +LIMIT 1 + `) + + requirePlannedOptimizationLowering(t, translation.Optimization, "LimitPushdown") + requireOptimizationLowering(t, translation.Optimization, "LimitPushdown") +} + +func TestOptimizerSafetyShortestPathRootCarriesUnwindSources(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` + UNWIND ['source'] AS sourceName + MATCH p = shortestPath((s:Group)-[:MemberOf*1..]->(e:Group)) + WHERE s.name = sourceName AND e.name = 'target' + RETURN sourceName, p + `) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "unidirectional_sp_harness") + require.Contains(t, normalizedQuery, "unnest(array ['source']::text[]) as i0") + requirePlanParameterContains(t, translation, "jsonb_typeof((n1.properties -> 'name')) = 'string'") + requirePlanParameterContains(t, translation, "(n0.properties ->> 'name') = i0") +} + +func TestOptimizerSafetyShortestPathTerminalCarriesUnwindSources(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` + UNWIND ['target'] AS targetName + MATCH p = shortestPath((s:Group)-[:MemberOf*1..]->(e:Group)) + WHERE s.name = 'source' AND e.name = targetName + RETURN targetName, p + `) + + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + require.Contains(t, normalizedQuery, "unidirectional_sp_harness") + require.Contains(t, normalizedQuery, "unnest(array ['target']::text[]) as i0") + requirePlanParameterContains(t, translation, "(n1.properties ->> 'name') = i0") +} + +func TestOptimizerSafetyTranslationReportsOptimizerMetadata(t *testing.T) { + t.Parallel() + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), ` +MATCH p = (n:Group)-[:MemberOf*1..]->(m)-[:Enroll]->(ca:EnterpriseCA) +WHERE ca.name = 'target' +RETURN p +`) + require.NoError(t, err) + + translation, err := Translate(context.Background(), regularQuery, optimizerSafetyKindMapper(), nil, DefaultGraphID) + require.NoError(t, err) + + require.NotEmpty(t, translation.Optimization.Rules) + require.NotEmpty(t, translation.Optimization.PredicateAttachments) + require.NotNil(t, translation.Optimization.LoweringPlan) + require.NotEmpty(t, translation.Optimization.LoweringPlan.ProjectionPruning) + require.NotEmpty(t, translation.Optimization.LoweringPlan.LatePathMaterialization) + require.NotEmpty(t, translation.Optimization.LoweringPlan.ExpansionSuffixPushdown) + require.NotEmpty(t, translation.Optimization.LoweringPlan.PredicatePlacement) + requirePlannedOptimizationLowering(t, translation.Optimization, "ProjectionPruning") + requirePlannedOptimizationLowering(t, translation.Optimization, "LatePathMaterialization") + requirePlannedOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requirePlannedOptimizationLowering(t, translation.Optimization, "PredicatePlacement") + requireOptimizationLowering(t, translation.Optimization, "ProjectionPruning") + requireOptimizationLowering(t, translation.Optimization, "LatePathMaterialization") + requireOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requireOptimizationLowering(t, translation.Optimization, "PredicatePlacement") +} + +func TestOptimizerSafetyExpansionTerminalPushdownForZeroDepthExpansion(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]->(ca:EnterpriseCA) +RETURN p +`) + + require.Contains(t, normalizedQuery, "exists (select 1 from edge e1 join node n2") + require.Contains(t, normalizedQuery, "n1.id = e1.start_id") + require.Contains(t, normalizedQuery, "e1.kind_id = any (array [4]::int2[])") + require.Contains(t, normalizedQuery, "n2.kind_ids operator (pg_catalog.@>) array [5]::int2[]") +} + +func TestOptimizerSafetyExpansionTerminalPushdownForBoundEndpointSuffixChain(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH (ca:EnterpriseCA {name: 'target'}) +MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]->(ct:CertTemplate)-[:PublishedTo]->(ca) +WHERE ct.authenticationenabled = true +RETURN p +`) + + require.Contains(t, normalizedQuery, "exists (select 1 from edge e1 join node n3") + require.Contains(t, normalizedQuery, "join edge e2 on n3.id = e2.start_id") + require.Contains(t, normalizedQuery, "n2.id = e1.start_id") + require.Contains(t, normalizedQuery, "e1.kind_id = any") + require.Contains(t, normalizedQuery, "n3.kind_ids operator (pg_catalog.@>)") + require.Contains(t, normalizedQuery, "e2.kind_id = any") + require.Contains(t, normalizedQuery, "e2.end_id = (s0.n0).id") + requireSQLContainsInOrder(t, normalizedQuery, + "exists (select 1 from edge e1 join node n3", + "properties -> 'authenticationenabled'", + "join edge e2 on n3.id = e2.start_id", + "e2.end_id = (s0.n0).id", + ) +} + +func TestOptimizerSafetyExpansionTerminalPushdownIncludesConstrainedBoundEndpoint(t *testing.T) { + t.Parallel() + + translation := optimizerSafetyTranslation(t, ` +MATCH (ca) +MATCH p = (n:Group)-[:MemberOf*0..]->(m)-[:Enroll]->(ct:CertTemplate)-[:PublishedTo]->(ca:EnterpriseCA) +RETURN p +`) + formattedQuery, err := Translated(translation) + require.NoError(t, err) + normalizedQuery := strings.Join(strings.Fields(formattedQuery), " ") + + requirePlannedOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requireOptimizationLowering(t, translation.Optimization, "ExpansionSuffixPushdown") + requireSQLContainsInOrder(t, normalizedQuery, + "exists (select 1 from edge e1 join node n3", + "join edge e2 on n3.id = e2.start_id", + "e2.end_id = (s0.n0).id", + ) + require.Contains(t, normalizedQuery, "(s0.n0).kind_ids operator (pg_catalog.@>)") +} + +func TestOptimizerSafetyExpansionTerminalPushdownForBoundDomainSuffix(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH (d:Domain {name: 'target'}) +MATCH p = (ca:EnterpriseCA)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(root:RootCA)-[:RootCAFor]->(d) +RETURN p +`) + + require.Contains(t, normalizedQuery, "exists (select 1 from edge e1") + require.Contains(t, normalizedQuery, "e1.kind_id = any") + require.Contains(t, normalizedQuery, "n2.kind_ids operator (pg_catalog.@>)") + require.Contains(t, normalizedQuery, "n2.id = e1.start_id") + require.Contains(t, normalizedQuery, "e1.end_id = (s0.n0).id") +} + +func TestOptimizerSafetyExpansionTerminalPushdownForInboundFixedSuffix(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH p = (ca:EnterpriseCA)<-[:PublishedTo*1..]-(ct)<-[:Enroll]-(m:Group) +RETURN p +`) + + require.Contains(t, normalizedQuery, "exists (select 1 from edge e1 join node n2") + require.Contains(t, normalizedQuery, "n1.id = e1.end_id") + require.Contains(t, normalizedQuery, "e1.kind_id = any (array [4]::int2[])") + require.Contains(t, normalizedQuery, "n2.kind_ids operator (pg_catalog.@>)") +} + +func TestOptimizerSafetyExpansionTerminalPushdownSkipsDirectionlessSuffix(t *testing.T) { + t.Parallel() + + normalizedQuery := optimizerSafetySQL(t, ` +MATCH p = (n:Group)-[:MemberOf*1..]->(m)-[:Enroll]-(ca:EnterpriseCA) +RETURN p +`) + + require.NotContains(t, normalizedQuery, "exists (select 1 from edge e1 join node n2") +} diff --git a/cypher/models/pgsql/translate/path_functions.go b/cypher/models/pgsql/translate/path_functions.go index a73be813..516f3022 100644 --- a/cypher/models/pgsql/translate/path_functions.go +++ b/cypher/models/pgsql/translate/path_functions.go @@ -12,7 +12,7 @@ func pathCompositeEdgesExpression(scope *Scope, pathBinding *BoundIdentifier) (p for _, dependency := range pathBinding.Dependencies { switch dependency.DataType { case pgsql.ExpansionPath: - if edgeArrayReference, err := expansionPathEdgeArrayReference(scope, dependency); err != nil { + if edgeArrayReference, err := expansionPathEdgeArrayExpression(scope, dependency); err != nil { return nil, err } else { edgeArrayReferences = append(edgeArrayReferences, edgeArrayReference) @@ -25,6 +25,9 @@ func pathCompositeEdgesExpression(scope *Scope, pathBinding *BoundIdentifier) (p CastType: pgsql.EdgeCompositeArray, }) + case pgsql.PathEdge: + edgeArrayReferences = append(edgeArrayReferences, pathEdgeArrayExpression(scope, dependency)) + default: // Path bindings also depend on their node endpoints. Those are not part of relationships(p). } @@ -38,7 +41,7 @@ func pathCompositeEdgesExpression(scope *Scope, pathBinding *BoundIdentifier) (p } func resolvePathCompositeFieldReference(scope *Scope, reference pgsql.RowColumnReference) (pgsql.Expression, bool, error) { - identifier, isIdentifier := reference.Identifier.(pgsql.Identifier) + identifier, isIdentifier := unwrapParenthetical(reference.Identifier).(pgsql.Identifier) if !isIdentifier { return nil, false, nil } @@ -62,6 +65,15 @@ func resolvePathCompositeFieldReference(scope *Scope, reference pgsql.RowColumnR case pgsql.ColumnEdges: expression, err := pathCompositeEdgesExpression(scope, binding) return expression, true, err + case pgsql.ColumnNodes: + if expression, err := expressionForPathComposite(binding, scope); err != nil { + return nil, false, err + } else { + return pgsql.RowColumnReference{ + Identifier: expression, + Column: reference.Column, + }, true, nil + } default: return nil, false, fmt.Errorf("unsupported path composite field reference: %s", reference.Column) } @@ -141,6 +153,24 @@ func resolvePathCompositeFieldReferences(scope *Scope, expression pgsql.Expressi case nil: return nil, nil + case pgsql.Identifier: + if binding, bound := scope.Lookup(typedExpression); !bound { + if aliasedBinding, aliasBound := scope.AliasedLookup(typedExpression); aliasBound { + binding = aliasedBinding + bound = true + } + + if !bound || binding.DataType != pgsql.PathComposite { + return expression, nil + } + + return expressionForPathComposite(binding, scope) + } else if binding.DataType == pgsql.PathComposite { + return expressionForPathComposite(binding, scope) + } + + return expression, nil + case pgsql.RowColumnReference: if resolved, rewritten, err := resolvePathCompositeFieldReference(scope, typedExpression); rewritten || err != nil { return resolved, err @@ -214,6 +244,44 @@ func resolvePathCompositeFieldReferences(scope *Scope, expression pgsql.Expressi return typedExpression, nil } + case pgsql.ArraySlice: + if resolved, err := resolvePathCompositeFieldReferences(scope, typedExpression.Expression); err != nil { + return nil, err + } else { + typedExpression.Expression = resolved + } + + if typedExpression.Lower != nil { + if resolved, err := resolvePathCompositeFieldReferences(scope, typedExpression.Lower); err != nil { + return nil, err + } else { + typedExpression.Lower = resolved + } + } + + if typedExpression.Upper != nil { + if resolved, err := resolvePathCompositeFieldReferences(scope, typedExpression.Upper); err != nil { + return nil, err + } else { + typedExpression.Upper = resolved + } + } + + return typedExpression, nil + + case *pgsql.ArraySlice: + if typedExpression == nil { + return nil, nil + } + + resolved, err := resolvePathCompositeFieldReferences(scope, *typedExpression) + if err != nil { + return nil, err + } + + arraySlice := resolved.(pgsql.ArraySlice) + return &arraySlice, nil + case pgsql.ArrayLiteral: for idx, value := range typedExpression.Values { if resolved, err := resolvePathCompositeFieldReferences(scope, value); err != nil { diff --git a/cypher/models/pgsql/translate/pattern.go b/cypher/models/pgsql/translate/pattern.go index 57eff0d2..a77d03ce 100644 --- a/cypher/models/pgsql/translate/pattern.go +++ b/cypher/models/pgsql/translate/pattern.go @@ -38,6 +38,10 @@ func (s *Translator) translatePatternPart(patternPart *cypher.PatternPart) error newPatternPart.IsTraversal = len(patternPart.PatternElements) > 1 newPatternPart.ShortestPath = patternPart.ShortestPathPattern newPatternPart.AllShortestPaths = patternPart.AllShortestPathsPattern + if target, hasTarget := s.patternTargets[patternPart]; hasTarget { + newPatternPart.Target = target + newPatternPart.HasTarget = true + } if cypherBinding, hasCypherSymbol, err := extractIdentifierFromCypherExpression(patternPart); err != nil { return err @@ -80,7 +84,6 @@ func (s *Translator) buildTraversalPattern(traversalStep *TraversalStep, isRootS }, Query: traversalStepQuery, }) - s.query.CurrentPart().AllowLimitPushdown(traversalStep.Frame.Binding.Identifier) } } else { if traversalStepQuery, err := s.buildTraversalPatternStep(traversalStep.Frame, traversalStep); err != nil { @@ -92,7 +95,6 @@ func (s *Translator) buildTraversalPattern(traversalStep *TraversalStep, isRootS }, Query: traversalStepQuery, }) - s.query.CurrentPart().AllowLimitPushdown(traversalStep.Frame.Binding.Identifier) } } @@ -112,7 +114,6 @@ func (s *Translator) buildExpansionPattern(traversalStepContext TraversalStepCon }, Query: traversalStepQuery, }) - s.query.CurrentPart().AllowLimitPushdown(traversalStep.Frame.Binding.Identifier) } } else { if traversalStepQuery, err := s.buildExpansionPatternStep(traversalStepContext, expansion); err != nil { @@ -124,7 +125,6 @@ func (s *Translator) buildExpansionPattern(traversalStepContext TraversalStepCon }, Query: traversalStepQuery, }) - s.query.CurrentPart().AllowLimitPushdown(traversalStep.Frame.Binding.Identifier) } } @@ -135,6 +135,8 @@ func (s *Translator) buildShortestPathsExpansionPattern(traversalStepContext Tra traversalStep := traversalStepContext.CurrentStep if traversalStepContext.IsRootStep { + expansion.SetUnwindClauses(s.query.CurrentPart().ConsumeUnwindClauses()) + if allPaths { if traversalStep.Expansion.UseBidirectionalSearch { if traversalStepQuery, err := expansion.BuildBiDirectionalAllShortestPathsRoot(); err != nil { @@ -229,6 +231,8 @@ func (s *Translator) buildTraversalPatternPart(part *PatternPart) error { } else if err := s.buildTraversalPattern(traversalStep, isRootStep); err != nil { return err } + + s.allowLimitPushdownForStep(part, idx, traversalStep) } return nil diff --git a/cypher/models/pgsql/translate/predicate.go b/cypher/models/pgsql/translate/predicate.go index d14b37f0..f9f3b38b 100644 --- a/cypher/models/pgsql/translate/predicate.go +++ b/cypher/models/pgsql/translate/predicate.go @@ -4,11 +4,13 @@ import ( "fmt" "github.com/specterops/dawgs/cypher/models" + "github.com/specterops/dawgs/cypher/models/cypher" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" "github.com/specterops/dawgs/graph" ) -func (s *Translator) preparePatternPredicate() error { +func (s *Translator) preparePatternPredicate(predicate *cypher.PatternPredicate) error { currentQueryPart := s.query.CurrentPart() // Stash the match pattern @@ -17,12 +19,16 @@ func (s *Translator) preparePatternPredicate() error { // All pattern predicates must be relationship patterns newPatternPart := currentQueryPart.currentPattern.NewPart() newPatternPart.IsTraversal = true + if target, hasTarget := s.patternPredicateTargets[predicate]; hasTarget { + newPatternPart.Target = target + newPatternPart.HasTarget = true + } return nil } func (s *Translator) buildOptimizedRelationshipExistPredicate(part *PatternPart, traversalStep *TraversalStep) (pgsql.Expression, error) { - whereClause := pgsql.NewBinaryExpression( + var whereClause pgsql.Expression = pgsql.NewBinaryExpression( pgsql.NewBinaryExpression( pgsql.CompoundIdentifier{traversalStep.Edge.Identifier, pgsql.ColumnStartID}, pgsql.OperatorEquals, @@ -34,6 +40,40 @@ func (s *Translator) buildOptimizedRelationshipExistPredicate(part *PatternPart, pgsql.CompoundIdentifier{traversalStep.LeftNode.Identifier, pgsql.ColumnID}), ) + if traversalStep.RightNodeBound { + var ( + forward = pgsql.NewBinaryExpression( + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{traversalStep.Edge.Identifier, pgsql.ColumnStartID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{traversalStep.LeftNode.Identifier, pgsql.ColumnID}), + pgsql.OperatorAnd, + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{traversalStep.Edge.Identifier, pgsql.ColumnEndID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{traversalStep.RightNode.Identifier, pgsql.ColumnID}), + ) + reverse = pgsql.NewBinaryExpression( + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{traversalStep.Edge.Identifier, pgsql.ColumnEndID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{traversalStep.LeftNode.Identifier, pgsql.ColumnID}), + pgsql.OperatorAnd, + pgsql.NewBinaryExpression( + pgsql.CompoundIdentifier{traversalStep.Edge.Identifier, pgsql.ColumnStartID}, + pgsql.OperatorEquals, + pgsql.CompoundIdentifier{traversalStep.RightNode.Identifier, pgsql.ColumnID}), + ) + ) + whereClause = pgsql.NewBinaryExpression(forward, pgsql.OperatorOr, reverse) + } + + if constraint, err := s.treeTranslator.ConsumeConstraintsFromVisibleSet(pgsql.AsIdentifierSet(traversalStep.Edge.Identifier)); err != nil { + return nil, err + } else { + whereClause = pgsql.OptionalAnd(constraint.Expression, pgsql.NewParenthetical(whereClause)) + } + if err := RewriteFrameBindings(s.scope, whereClause); err != nil { return nil, err } @@ -83,6 +123,19 @@ func (s *Translator) translatePatternPredicate() error { return nil } +func (s *Translator) usePatternPredicateExistencePlacement(patternPart *PatternPart, traversalStep *TraversalStep) (bool, error) { + if patternPart == nil || !patternPart.HasTarget || traversalStep == nil || traversalStep.Direction != graph.DirectionBoth { + return false, nil + } + + decision, hasDecision := s.patternPredicateDecisions[patternPart.Target.TraversalStep(0)] + if !hasDecision || decision.Mode != optimize.PatternPredicatePlacementExistence { + return false, nil + } + + return true, nil +} + // buildPatternPredicates is used by translateMatch to resolve deferred pattern predicate // futures collected for the current MATCH/OPTIONAL MATCH query part's WHERE expressions func (s *Translator) buildPatternPredicates() error { @@ -97,33 +150,22 @@ func (s *Translator) buildPatternPredicates() error { ) if len(patternPart.TraversalSteps) == 1 { - var ( - traversalStep = patternPart.TraversalSteps[0] - traversalStepIdentifiers = pgsql.AsIdentifierSet( - traversalStep.LeftNode.Identifier, - traversalStep.Edge.Identifier, - traversalStep.RightNode.Identifier, - ) - ) - - if traversalStep.Direction == graph.DirectionBoth { - if hasGlobalConstraints, err := s.treeTranslator.HasAnyConstraints(traversalStepIdentifiers); err != nil { + traversalStep := patternPart.TraversalSteps[0] + if useExistencePlacement, err := s.usePatternPredicateExistencePlacement(patternPart, traversalStep); err != nil { + return err + } else if useExistencePlacement { + if predicateExpression, err := s.buildOptimizedRelationshipExistPredicate(patternPart, traversalStep); err != nil { return err - } else if hasPredicateConstraints, err := patternPart.Constraints.HasConstraints(traversalStepIdentifiers); err != nil { - return err - } else if !hasPredicateConstraints && !hasGlobalConstraints { - if predicateExpression, err := s.buildOptimizedRelationshipExistPredicate(patternPart, traversalStep); err != nil { - return err - } else { - predicateFuture.SyntaxNode = predicateExpression - } - - return nil + } else { + predicateFuture.SyntaxNode = predicateExpression + s.recordLowering(optimize.LoweringPredicatePlacement) } + + continue } } - if err := s.translateTraversalPatternPart(patternPart, true); err != nil { + if err := s.translateTraversalPatternPart(patternPart, true, true); err != nil { return err } diff --git a/cypher/models/pgsql/translate/predicate_placement.go b/cypher/models/pgsql/translate/predicate_placement.go new file mode 100644 index 00000000..6ed48f32 --- /dev/null +++ b/cypher/models/pgsql/translate/predicate_placement.go @@ -0,0 +1,46 @@ +package translate + +import ( + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" +) + +func (s *Translator) recordPredicatePlacementConsumption(part *PatternPart, stepIndex int, traversalStep *TraversalStep, constraints PatternConstraints) { + if part == nil || !part.HasTarget || traversalStep == nil { + return + } + + for _, decision := range s.predicatePlacementDecisions[part.Target.TraversalStep(stepIndex)] { + if predicatePlacementDecisionConsumed(decision, traversalStep, constraints) { + s.recordLowering(optimize.LoweringPredicatePlacement) + return + } + } +} + +func predicatePlacementDecisionConsumed(decision optimize.PredicatePlacementDecision, traversalStep *TraversalStep, constraints PatternConstraints) bool { + for _, symbol := range decision.Attachment.BindingSymbols { + if bindingConstraintConsumed(symbol, traversalStep.LeftNode, constraints.LeftNode) || + bindingConstraintConsumed(symbol, traversalStep.Edge, constraints.Edge) || + bindingConstraintConsumed(symbol, traversalStep.RightNode, constraints.RightNode) { + return true + } + } + + return false +} + +func bindingConstraintConsumed(symbol string, binding *BoundIdentifier, constraint *Constraint) bool { + return constraint != nil && + constraint.Expression != nil && + bindingMatchesSymbol(binding, pgsql.Identifier(symbol)) +} + +func bindingMatchesSymbol(binding *BoundIdentifier, symbol pgsql.Identifier) bool { + if binding == nil { + return false + } + + return binding.Identifier == symbol || + (binding.Alias.Set && binding.Alias.Value == symbol) +} diff --git a/cypher/models/pgsql/translate/predicate_test.go b/cypher/models/pgsql/translate/predicate_test.go index c94fb85e..d9182a10 100644 --- a/cypher/models/pgsql/translate/predicate_test.go +++ b/cypher/models/pgsql/translate/predicate_test.go @@ -30,8 +30,35 @@ RETURN p`) require.NoError(t, err) require.Contains(t, formatted, "as p from s0 where") - require.Contains(t, formatted, "with s1 as") - require.NotContains(t, formatted, "as p from s1 where") + require.Contains(t, formatted, "exists (select 1 from edge") + require.Contains(t, formatted, "kind_id = any (array [3, 4]::int2[])") + require.Contains(t, formatted, "start_id = (s0.n0).id") + require.Contains(t, formatted, "end_id = (s0.n1).id") + require.NotContains(t, formatted, "with s1 as") +} + +func TestOptimizedPatternPredicatesContinueAfterFirstPlacement(t *testing.T) { + kindMapper := pgutil.NewInMemoryKindMapper() + kindMapper.Put(graph.StringKind("Domain")) + kindMapper.Put(graph.StringKind("SpoofSIDHistory")) + kindMapper.Put(graph.StringKind("AbuseTGTDelegation")) + + query, err := frontend.ParseCypher(frontend.NewContext(), ` + MATCH (n:Domain), (m:Domain) + WHERE (n)-[:SpoofSIDHistory]-(m) + AND (n)-[:AbuseTGTDelegation]-(m) + RETURN n + `) + require.NoError(t, err) + + translation, err := Translate(context.Background(), query, kindMapper, nil, DefaultGraphID) + require.NoError(t, err) + + formatted, err := Translated(translation) + require.NoError(t, err) + + require.Contains(t, formatted, "array [2]::int2[]") + require.Contains(t, formatted, "array [3]::int2[]") } func translatePredicateQuery(t *testing.T, cypherQuery string, parameters map[string]any) string { diff --git a/cypher/models/pgsql/translate/projection.go b/cypher/models/pgsql/translate/projection.go index f8422068..af3eff99 100644 --- a/cypher/models/pgsql/translate/projection.go +++ b/cypher/models/pgsql/translate/projection.go @@ -8,6 +8,7 @@ import ( "github.com/specterops/dawgs/cypher/models" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" ) type BoundProjections struct { @@ -162,14 +163,6 @@ func bindingFrameReference(scope *Scope, binding *BoundIdentifier) pgsql.Compoun return pgsql.CompoundIdentifier{frameIdentifier, binding.Identifier} } -func expansionPathEdgeArrayReference(scope *Scope, expansionPath *BoundIdentifier) (pgsql.Expression, error) { - for _, dependency := range expansionPath.Dependencies { - return bindingFrameReference(scope, dependency), nil - } - - return nil, fmt.Errorf("expansion path %s does not reference an expansion edge binding", expansionPath.Identifier) -} - func pathBindingReference(scope *Scope, binding *BoundIdentifier) pgsql.Expression { if binding.LastProjection != nil { return pgsql.CompoundIdentifier{binding.LastProjection.Binding.Identifier, binding.Identifier} @@ -209,16 +202,78 @@ func pathCompositeColumnReference(scope *Scope, binding *BoundIdentifier, column return pgsql.CompoundIdentifier{binding.Identifier, column} } +func pathEdgeIDReference(scope *Scope, binding *BoundIdentifier) pgsql.Expression { + if binding.LastProjection != nil || scope.CurrentFrameBinding() != nil { + return pathBindingReference(scope, binding) + } + + return pgsql.CompoundIdentifier{binding.Identifier, pgsql.ColumnID} +} + +func pathEdgeArrayExpression(scope *Scope, edge *BoundIdentifier) pgsql.Expression { + return &pgsql.EdgeArrayFromPathIDs{ + PathIDs: pgsql.ArrayLiteral{ + Values: []pgsql.Expression{ + pathEdgeIDReference(scope, edge), + }, + CastType: pgsql.Int8Array, + }, + } +} + func expansionPathEdgeArrayExpression(scope *Scope, expansionPath *BoundIdentifier) (pgsql.Expression, error) { - if scope.CurrentFrameBinding() != nil || expansionPath.LastProjection != nil { - return expansionPathEdgeArrayReference(scope, expansionPath) + return &pgsql.EdgeArrayFromPathIDs{ + PathIDs: pathBindingReference(scope, expansionPath), + }, nil +} + +func optionalOr(leftOperand, rightOperand pgsql.Expression) pgsql.Expression { + if leftOperand == nil { + return rightOperand + } else if rightOperand == nil { + return leftOperand } - for _, dependency := range expansionPath.Dependencies { - return dependency.Identifier, nil + return pgsql.NewBinaryExpression(leftOperand, pgsql.OperatorOr, rightOperand) +} + +func expressionIsNull(expression pgsql.Expression) pgsql.Expression { + return pgsql.NewBinaryExpression(expression, pgsql.OperatorIs, pgsql.NullLiteral()) +} + +func pathCompositeDependencyNullGuard(scope *Scope, dependency *BoundIdentifier) pgsql.Expression { + if dependency == nil { + return nil } - return nil, fmt.Errorf("expansion path %s does not reference an expansion edge binding", expansionPath.Identifier) + switch dependency.DataType { + case pgsql.ExpansionPath: + return expressionIsNull(pathBindingReference(scope, dependency)) + + case pgsql.EdgeComposite: + return expressionIsNull(pathCompositeColumnReference(scope, dependency, pgsql.ColumnID)) + + case pgsql.PathEdge: + return expressionIsNull(pathEdgeIDReference(scope, dependency)) + + case pgsql.NodeComposite, pgsql.ExpansionRootNode, pgsql.ExpansionTerminalNode: + return expressionIsNull(pathCompositeColumnReference(scope, dependency, pgsql.ColumnID)) + + default: + return nil + } +} + +func nullGuardPathCompositeExpression(expression, nullGuard pgsql.Expression) pgsql.Expression { + if nullGuard == nil { + return expression + } + + return pgsql.Case{ + Conditions: []pgsql.Expression{nullGuard}, + Then: []pgsql.Expression{pgsql.NullLiteral()}, + Else: expression, + } } func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql.Expression, error) { @@ -232,12 +287,16 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql directNodeReferences []pgsql.Expression directEdgeReferences []pgsql.Expression seenExpansionPath = false + seenPathEdge = false + nullGuard pgsql.Expression ) // Path composite components are encoded as dependencies on the bound identifier representing the // path. This is not ideal as it escapes normal translation flow as driven by the structure of the // originating cypher AST. for _, dependency := range projected.Dependencies { + nullGuard = optionalOr(nullGuard, pathCompositeDependencyNullGuard(scope, dependency)) + switch dependency.DataType { case pgsql.ExpansionPath: seenExpansionPath = true @@ -256,6 +315,10 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql CastType: pgsql.EdgeCompositeArray, }) + case pgsql.PathEdge: + seenPathEdge = true + edgeArrayReferences = append(edgeArrayReferences, pathEdgeArrayExpression(scope, dependency)) + case pgsql.NodeComposite, pgsql.ExpansionRootNode, pgsql.ExpansionTerminalNode: directNodeReferences = append(directNodeReferences, pathCompositeReference(scope, dependency, pgsql.NodeTableColumns)) nodeReferences = append(nodeReferences, pathCompositeColumnReference(scope, dependency, pgsql.ColumnID)) @@ -269,8 +332,8 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql // those explicit components instead of reconstructing the path from edge IDs: this preserves path // order and duplicate nodes, and it also works for rows produced by data-modifying CTEs where // re-reading node/edge tables in the same statement may not see the RETURNING values. - if !seenExpansionPath && len(directNodeReferences) > 0 { - return pgsql.CompositeValue{ + if !seenExpansionPath && !seenPathEdge && len(directNodeReferences) > 0 { + return nullGuardPathCompositeExpression(pgsql.CompositeValue{ DataType: pgsql.PathComposite, Values: []pgsql.Expression{ pgsql.ArrayLiteral{ @@ -282,10 +345,10 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql CastType: pgsql.EdgeCompositeArray, }, }, - }, nil + }, nullGuard), nil } - if seenExpansionPath { + if seenExpansionPath || seenPathEdge { if len(directNodeReferences) == 0 { return nil, fmt.Errorf("expansion path %s does not contain a root node reference", projected.Identifier) } @@ -295,7 +358,7 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql edgeArrayExpression = pgsql.ArrayLiteral{CastType: pgsql.EdgeCompositeArray} } - return pgsql.FunctionCall{ + return nullGuardPathCompositeExpression(pgsql.FunctionCall{ Function: pgsql.FunctionOrderedEdgesToPath, Parameters: []pgsql.Expression{ directNodeReferences[0], @@ -306,9 +369,9 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql }, }, CastType: pgsql.PathComposite, - }, nil + }, nullGuard), nil } else if len(nodeReferences) > 0 { - return pgsql.FunctionCall{ + return nullGuardPathCompositeExpression(pgsql.FunctionCall{ Function: pgsql.FunctionNodesToPath, Parameters: []pgsql.Expression{ pgsql.Variadic{ @@ -319,7 +382,7 @@ func expressionForPathComposite(projected *BoundIdentifier, scope *Scope) (pgsql }, }, CastType: pgsql.PathComposite, - }, nil + }, nullGuard), nil } return nil, fmt.Errorf("path variable does not contain valid components") @@ -402,12 +465,11 @@ func buildProjectionForExpansionEdge(alias pgsql.Identifier, projected *BoundIde // Create a new final projection that's aliased to the visible binding's identifier return []pgsql.SelectItem{ &pgsql.AliasedExpression{ - Expression: &pgsql.Parenthetical{ - Expression: pgsql.FormattingLiteral(fmt.Sprintf( - "select coalesce(array_agg((%[1]s.id, %[1]s.start_id, %[1]s.end_id, %[1]s.kind_id, %[1]s.properties)::edgecomposite order by _path.ordinality), array []::edgecomposite[]) from unnest(%[2]s.path) with ordinality as _path(id, ordinality) join edge %[1]s on %[1]s.id = _path.id", - projected.Identifier, + Expression: &pgsql.EdgeArrayFromPathIDs{ + PathIDs: pgsql.CompoundIdentifier{ scope.CurrentFrame().Binding.Identifier, - )), + pgsql.ColumnPath, + }, }, Alias: pgsql.AsOptionalIdentifier(alias), }, @@ -441,6 +503,27 @@ func buildProjectionForEdgeComposite(alias pgsql.Identifier, projected *BoundIde }, nil } +func buildProjectionForPathEdge(alias pgsql.Identifier, projected *BoundIdentifier, referenceFrame *Frame) ([]pgsql.SelectItem, error) { + var expression pgsql.Expression + + if projected.LastProjection != nil { + if referenceFrame == nil { + referenceFrame = projected.LastProjection + } + + expression = pgsql.CompoundIdentifier{referenceFrame.Binding.Identifier, projected.Identifier} + } else { + expression = pgsql.CompoundIdentifier{projected.Identifier, pgsql.ColumnID} + } + + return []pgsql.SelectItem{ + &pgsql.AliasedExpression{ + Expression: expression, + Alias: pgsql.AsOptionalIdentifier(alias), + }, + }, nil +} + func buildProjection(alias pgsql.Identifier, projected *BoundIdentifier, scope *Scope, referenceFrame *Frame) ([]pgsql.SelectItem, error) { switch projected.DataType { case pgsql.ExpansionPath: @@ -461,6 +544,9 @@ func buildProjection(alias pgsql.Identifier, projected *BoundIdentifier, scope * case pgsql.EdgeComposite: return buildProjectionForEdgeComposite(alias, projected, referenceFrame) + case pgsql.PathEdge: + return buildProjectionForPathEdge(alias, projected, referenceFrame) + default: // If this isn't a type that requires a unique projection, reflect the identifier as-is with its alias var expression pgsql.Expression @@ -928,18 +1014,22 @@ func limitPushdownTailSource(currentPart *QueryPart, tailSelect pgsql.Select) (p return sourceFrame, true } -func pushDownShortestPathLimit(currentPart *QueryPart, tailSelect pgsql.Select) { +func pushDownShortestPathLimit(currentPart *QueryPart, tailSelect pgsql.Select) bool { sourceFrame, canPushDown := limitPushdownTailSource(currentPart, tailSelect) if !canPushDown { - return + return false } if sourceCTE := findCTE(currentPart.Model, sourceFrame); sourceCTE != nil && + currentPart.CanPushDownLimitTo(sourceFrame) && countLimitPushdownShortestPathHarnessCalls(sourceCTE.Query) == 1 { // Multiple harness calls in one source CTE would make one outer LIMIT // ambiguous, so only the single-harness case is rewritten. appendLimitToShortestPathHarness(&sourceCTE.Query, currentPart.Limit) + return true } + + return false } func findCTE(query *pgsql.Query, cteName pgsql.Identifier) *pgsql.CommonTableExpression { @@ -967,13 +1057,13 @@ func applyLimitToCTE(query *pgsql.Query, cteName pgsql.Identifier, limit pgsql.E return false } -func pushDownTraversalLimit(currentPart *QueryPart, tailSelect pgsql.Select) { +func pushDownTraversalLimit(currentPart *QueryPart, tailSelect pgsql.Select) bool { sourceFrame, canPushDown := limitPushdownTailSource(currentPart, tailSelect) if !canPushDown || !currentPart.CanPushDownLimitTo(sourceFrame) { - return + return false } - applyLimitToCTE(currentPart.Model, sourceFrame, currentPart.Limit) + return applyLimitToCTE(currentPart.Model, sourceFrame, currentPart.Limit) } func projectionAliasBindings(scope *Scope, projections []*Projection) map[pgsql.Identifier]pgsql.Identifier { @@ -1003,6 +1093,228 @@ func rewriteOrderByProjectionAlias(orderBy *pgsql.OrderBy, aliases map[pgsql.Ide } } +type pathCompositeReferenceCount struct { + binding *BoundIdentifier + full int + nodes int + edges int +} + +func (s pathCompositeReferenceCount) componentReferences() int { + return s.nodes + s.edges +} + +func (s pathCompositeReferenceCount) totalReferences() int { + return s.full + s.componentReferences() +} + +func pathCompositeBinding(scope *Scope, identifier pgsql.Identifier) (*BoundIdentifier, bool) { + binding, bound := scope.Lookup(identifier) + if !bound { + binding, bound = scope.AliasedLookup(identifier) + } + + if !bound || binding.DataType != pgsql.PathComposite || binding.LastProjection != nil { + return nil, false + } + + return binding, true +} + +func ensurePathCompositeReferenceCount( + counts map[pgsql.Identifier]*pathCompositeReferenceCount, + orderedCounts *[]*pathCompositeReferenceCount, + binding *BoundIdentifier, +) *pathCompositeReferenceCount { + if count, seen := counts[binding.Identifier]; seen { + return count + } + + count := &pathCompositeReferenceCount{ + binding: binding, + } + + counts[binding.Identifier] = count + *orderedCounts = append(*orderedCounts, count) + + return count +} + +func countPathCompositeComponents(scope *Scope, expressions ...pgsql.Expression) ([]*pathCompositeReferenceCount, error) { + var ( + counts = map[pgsql.Identifier]*pathCompositeReferenceCount{} + orderedCounts []*pathCompositeReferenceCount + ) + + for _, expression := range expressions { + if expression == nil { + continue + } + + if err := walk.PgSQL(expression, walk.NewSimpleVisitor[pgsql.SyntaxNode](func(node pgsql.SyntaxNode, _ walk.VisitorHandler) { + reference, isRowColumnReference := node.(pgsql.RowColumnReference) + if !isRowColumnReference || (reference.Column != pgsql.ColumnNodes && reference.Column != pgsql.ColumnEdges) { + return + } + + identifier, isIdentifier := unwrapParenthetical(reference.Identifier).(pgsql.Identifier) + if !isIdentifier { + return + } + + binding, bound := pathCompositeBinding(scope, identifier) + if !bound { + return + } + + count := ensurePathCompositeReferenceCount(counts, &orderedCounts, binding) + switch reference.Column { + case pgsql.ColumnNodes: + count.nodes += 1 + case pgsql.ColumnEdges: + count.edges += 1 + } + })); err != nil { + return nil, err + } + } + + return orderedCounts, nil +} + +func countPathCompositeProjectionReferences(scope *Scope, projections []*Projection) ([]*pathCompositeReferenceCount, error) { + var ( + counts = map[pgsql.Identifier]*pathCompositeReferenceCount{} + orderedCounts []*pathCompositeReferenceCount + expressions = make([]pgsql.Expression, 0, len(projections)) + ) + + for _, projection := range projections { + expressions = append(expressions, projection.SelectItem) + + identifier, isIdentifier := projection.SelectItem.(pgsql.Identifier) + if !isIdentifier { + continue + } + + binding, bound := pathCompositeBinding(scope, identifier) + if !bound { + continue + } + + ensurePathCompositeReferenceCount(counts, &orderedCounts, binding).full += 1 + } + + componentCounts, err := countPathCompositeComponents(scope, expressions...) + if err != nil { + return nil, err + } + + for _, componentCount := range componentCounts { + count := ensurePathCompositeReferenceCount(counts, &orderedCounts, componentCount.binding) + count.nodes += componentCount.nodes + count.edges += componentCount.edges + } + + return orderedCounts, nil +} + +func tailPathCompositeStageBindings(scope *Scope, expression pgsql.Expression) ([]*BoundIdentifier, error) { + counts, err := countPathCompositeComponents(scope, expression) + if err != nil { + return nil, err + } + + bindings := make([]*BoundIdentifier, 0, len(counts)) + for _, count := range counts { + if count.nodes > 0 { + bindings = append(bindings, count.binding) + } + } + + return bindings, nil +} + +func projectionPathCompositeStageBindings(scope *Scope, projections []*Projection) ([]*BoundIdentifier, error) { + counts, err := countPathCompositeProjectionReferences(scope, projections) + if err != nil { + return nil, err + } + + bindings := make([]*BoundIdentifier, 0, len(counts)) + for _, count := range counts { + switch { + case count.full > 0 && count.totalReferences() > count.full: + bindings = append(bindings, count.binding) + case count.full > 1: + bindings = append(bindings, count.binding) + case count.nodes > 0 && count.componentReferences() > 1: + bindings = append(bindings, count.binding) + } + } + + return bindings, nil +} + +func mergePathCompositeStageBindings(bindingSets ...[]*BoundIdentifier) []*BoundIdentifier { + var ( + merged = make([]*BoundIdentifier, 0) + seen = map[pgsql.Identifier]struct{}{} + ) + + for _, bindings := range bindingSets { + for _, binding := range bindings { + if _, alreadySeen := seen[binding.Identifier]; alreadySeen { + continue + } + + seen[binding.Identifier] = struct{}{} + merged = append(merged, binding) + } + } + + return merged +} + +func (s *Translator) stagePathCompositeBindings(fromClauses []pgsql.FromClause, bindings []*BoundIdentifier) ([]pgsql.FromClause, error) { + for _, binding := range bindings { + stageBinding, err := s.scope.DefineNew(pgsql.Scope) + if err != nil { + return nil, err + } + + stageFrame := &Frame{ + Binding: stageBinding, + Visible: pgsql.AsIdentifierSet(binding.Identifier), + Exported: pgsql.AsIdentifierSet(binding.Identifier), + stashedVisible: pgsql.NewIdentifierSet(), + stashedExported: pgsql.NewIdentifierSet(), + Synthetic: true, + } + + stageProjection, err := buildProjection(binding.Identifier, binding, s.scope, binding.LastProjection) + if err != nil { + return nil, err + } + + fromClauses = append(fromClauses, pgsql.FromClause{ + Source: pgsql.LateralSubquery{ + Query: pgsql.Query{ + Body: pgsql.Select{ + Projection: stageProjection, + }, + Offset: pgsql.NewLiteral(0, pgsql.Int), + }, + Binding: models.OptionalValue(stageBinding.Identifier), + }, + }) + + binding.MaterializedBy(stageFrame) + } + + return fromClauses, nil +} + func (s *Translator) buildTailProjection() error { var ( currentPart = s.query.CurrentPart() @@ -1016,6 +1328,15 @@ func (s *Translator) buildTailProjection() error { if projectionConstraint, err := s.treeTranslator.ConsumeAllConstraints(); err != nil { return err + } else if constraintStagedBindings, err := tailPathCompositeStageBindings(s.scope, projectionConstraint.Expression); err != nil { + return err + } else if projectionStagedBindings, err := projectionPathCompositeStageBindings(s.scope, currentPart.projections.Items); err != nil { + return err + } else if stagedFromClauses, err := s.stagePathCompositeBindings( + singlePartQuerySelect.From, + mergePathCompositeStageBindings(constraintStagedBindings, projectionStagedBindings), + ); err != nil { + return err } else if projection, err := buildExternalProjection(s.scope, currentPart.projections.Items); err != nil { return err } else if resolvedConstraint, err := resolvePathCompositeFieldReferences(s.scope, projectionConstraint.Expression); err != nil { @@ -1023,6 +1344,7 @@ func (s *Translator) buildTailProjection() error { } else if err := RewriteFrameBindings(s.scope, resolvedConstraint); err != nil { return err } else { + singlePartQuerySelect.From = stagedFromClauses singlePartQuerySelect.Projection = projection singlePartQuerySelect.Where = resolvedConstraint @@ -1060,8 +1382,10 @@ func (s *Translator) buildTailProjection() error { } currentPart.Model.Body = singlePartQuerySelect - pushDownShortestPathLimit(currentPart, singlePartQuerySelect) - pushDownTraversalLimit(currentPart, singlePartQuerySelect) + if pushDownShortestPathLimit(currentPart, singlePartQuerySelect) || + pushDownTraversalLimit(currentPart, singlePartQuerySelect) { + s.recordLowering(optimize.LoweringLimitPushdown) + } if currentPart.Skip != nil { currentPart.Model.Offset = currentPart.Skip diff --git a/cypher/models/pgsql/translate/property.go b/cypher/models/pgsql/translate/property.go index 0d34f0a2..29f44fb7 100644 --- a/cypher/models/pgsql/translate/property.go +++ b/cypher/models/pgsql/translate/property.go @@ -77,6 +77,8 @@ func (s *Translator) buildPatternPropertyConstraints(binding *BoundIdentifier, p if newConstraint, err := s.treeTranslator.PopBinaryExpression(pgsql.OperatorEquals); err != nil { return nil, err + } else if rewrittenConstraint, rewritten := buildStringPropertyEqualityPredicate(newConstraint); rewritten { + propertyConstraints = pgsql.OptionalAnd(propertyConstraints, rewrittenConstraint) } else { propertyConstraints = pgsql.OptionalAnd(propertyConstraints, newConstraint) } diff --git a/cypher/models/pgsql/translate/renamer.go b/cypher/models/pgsql/translate/renamer.go index c2a49c3c..0b80a499 100644 --- a/cypher/models/pgsql/translate/renamer.go +++ b/cypher/models/pgsql/translate/renamer.go @@ -387,6 +387,9 @@ func (s *FrameBindingRewriter) enter(node pgsql.SyntaxNode) error { } } + case *pgsql.EdgeArrayFromPathIDs: + return s.rewriteExpression(&typedExpression.PathIDs) + case *pgsql.AliasedExpression: switch typedInnerExpression := typedExpression.Expression.(type) { case pgsql.Identifier: diff --git a/cypher/models/pgsql/translate/renamer_test.go b/cypher/models/pgsql/translate/renamer_test.go index 27999e28..50b349e0 100644 --- a/cypher/models/pgsql/translate/renamer_test.go +++ b/cypher/models/pgsql/translate/renamer_test.go @@ -63,6 +63,13 @@ func TestRewriteFrameBindings(t *testing.T) { Expression: rewrittenA, Alias: pgsql.AsOptionalIdentifier("name"), }, + }, { + Case: &pgsql.EdgeArrayFromPathIDs{ + PathIDs: a.Identifier, + }, + Expected: &pgsql.EdgeArrayFromPathIDs{ + PathIDs: rewrittenA, + }, }, { Case: pgsql.NewBinaryExpression( pgsql.ArraySlice{ diff --git a/cypher/models/pgsql/translate/selectivity.go b/cypher/models/pgsql/translate/selectivity.go index c75b0a9f..715e58ce 100644 --- a/cypher/models/pgsql/translate/selectivity.go +++ b/cypher/models/pgsql/translate/selectivity.go @@ -2,226 +2,9 @@ package translate import ( "github.com/specterops/dawgs/cypher/models/pgsql" - "github.com/specterops/dawgs/cypher/models/walk" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" ) -const ( - // Below are a select set of constants to represent different weights to represent, roughly, the selectivity - // of a given PGSQL expression. These weights are meant to be inexact and are only useful in comparison to other - // summed weights. - // - // The goal of these weights are to enable reordering of queries such that the more selective side of a traversal - // step is expanded first. Eventually, these weights may also enable reordering of multipart queries. - - // Entity ID references are a safe selectivity bet. A direct reference will typically take the form of: - // `n0.id = 1` or some other direct comparison against the entity's ID. All entity IDs are covered by a unique - // b-tree index, making them both highly selective and lucrative to weight higher. - selectivityWeightEntityIDReference = 125 - - // Unique node properties are both covered by a compatible index and unique, making them highly selective - selectivityWeightUniqueNodeProperty = 100 - - // Bound identifiers are heavily weighted for preserving join order integrity - selectivityWeightBoundIdentifier = 700 - - // Operators that narrow the search space are given a higher selectivity - selectivityWeightNarrowSearch = 30 - - // Operators that perform string searches are given a higher selectivity - selectivityWeightStringSearch = 20 - - // Operators that perform range comparisons are reasonably selective - selectivityWeightRangeComparison = 10 - - // Conjunctions can narrow search space, especially when compounded, but may be order dependent and unreliable as - // a good selectivity heuristic - selectivityWeightConjunction = 5 - - // Exclusions can narrow the search space but often only slightly - selectivityWeightNotEquals = 1 - - // Disjunctions expand search space by adding a secondary, conditional operation - selectivityWeightDisjunction = -100 - - // selectivityFlipThreshold is the minimum score advantage the right-hand node must hold - // over the left-hand node before OptimizePatternConstraintBalance commits to a traversal - // direction flip. It is set to selectivityWeightNarrowSearch so that structural AST noise - // — in particular the per-AND-node conjunction bonus — cannot trigger a flip on its own. - // A single meaningful narrowing predicate (=, IN, kind filter) on the right side is - // sufficient to clear this bar; a bare AND connector (weight 5) or a range comparison on - // an unindexed property (weight 10) is not. - selectivityFlipThreshold = selectivityWeightNarrowSearch - - // selectivityBidirectionalAnchorThreshold is the minimum score each endpoint must carry - // before shortest-path translation starts a bidirectional search from both sides. This - // keeps broad label-only endpoints out of bidirectional BFS; a single kind predicate - // scores below this threshold, while a materially narrower property predicate can clear it. - selectivityBidirectionalAnchorThreshold = selectivityWeightNarrowSearch * 2 -) - -// knownNodePropertySelectivity is a hack to enable the selectivity measurement to take advantage of known property indexes -// or uniqueness constraints. -// -// Eventually, this should be replaced by a tool that can introspect a graph schema and derive this map. -var knownNodePropertySelectivity = map[string]int{ - "objectid": selectivityWeightUniqueNodeProperty, // Object ID contains a unique constraint giving this a high degree of selectivity - "name": selectivityWeightUniqueNodeProperty, // Name contains a unique constraint giving this a high degree of selectivity - "system_tags": selectivityWeightNarrowSearch, // Searches that use the system_tags property are likely to have a higher degree of selectivity. -} - -type measureSelectivityVisitor struct { - walk.Visitor[pgsql.SyntaxNode] - - scope *Scope - selectivityStack []int -} - -func newMeasureSelectivityVisitor(scope *Scope) *measureSelectivityVisitor { - return &measureSelectivityVisitor{ - Visitor: walk.NewVisitor[pgsql.SyntaxNode](), - scope: scope, - selectivityStack: []int{0}, - } -} - -func (s *measureSelectivityVisitor) Selectivity() int { - return s.selectivityStack[0] -} - -func (s *measureSelectivityVisitor) popSelectivity() int { - value := s.Selectivity() - s.selectivityStack = s.selectivityStack[:len(s.selectivityStack)-1] - - return value -} - -func (s *measureSelectivityVisitor) pushSelectivity(value int) { - s.selectivityStack = append(s.selectivityStack, value) -} - -func (s *measureSelectivityVisitor) addSelectivity(value int) { - if len(s.selectivityStack) == 0 { - s.pushSelectivity(value) - } else { - s.selectivityStack[len(s.selectivityStack)-1] += value - } -} - -func isColumnIDRef(expression pgsql.Expression) bool { - switch typedExpression := expression.(type) { - case pgsql.CompoundIdentifier: - if typedExpression.HasField() { - switch typedExpression.Field() { - case pgsql.ColumnID: - return true - } - } - } - - return false -} - -func (s *measureSelectivityVisitor) Enter(node pgsql.SyntaxNode) { - switch typedNode := node.(type) { - case *pgsql.UnaryExpression: - switch typedNode.Operator { - case pgsql.OperatorNot: - s.pushSelectivity(0) - } - - case *pgsql.BinaryExpression: - var ( - lOperandIsID = isColumnIDRef(typedNode.LOperand) - rOperandIsID = isColumnIDRef(typedNode.ROperand) - ) - - if lOperandIsID && !rOperandIsID { - // Point lookup: n0.id = — highly selective - s.addSelectivity(selectivityWeightEntityIDReference) - } else if rOperandIsID && !lOperandIsID { - // Canonically unusual, but handle it the same - s.addSelectivity(selectivityWeightEntityIDReference) - } - - // If both sides are ID refs, this is a join condition — do not score as a point lookup - - switch typedNode.Operator { - case pgsql.OperatorOr: - s.addSelectivity(selectivityWeightDisjunction) - - case pgsql.OperatorNotEquals: - s.addSelectivity(selectivityWeightNotEquals) - - case pgsql.OperatorAnd: - s.addSelectivity(selectivityWeightConjunction) - - case pgsql.OperatorLessThan, pgsql.OperatorGreaterThan, pgsql.OperatorLessThanOrEqualTo, pgsql.OperatorGreaterThanOrEqualTo: - s.addSelectivity(selectivityWeightRangeComparison) - - case pgsql.OperatorLike, pgsql.OperatorILike, pgsql.OperatorRegexMatch, pgsql.OperatorSimilarTo: - s.addSelectivity(selectivityWeightStringSearch) - - case pgsql.OperatorIn, pgsql.OperatorEquals, pgsql.OperatorIs: - s.addSelectivity(selectivityWeightNarrowSearch) - - case pgsql.OperatorPGArrayOverlap, pgsql.OperatorArrayOverlap: - s.addSelectivity(selectivityWeightNarrowSearch) - - case pgsql.OperatorPGArrayLHSContainsRHS: - // @> is strictly more selective than &&: all kind_ids must be present. - s.addSelectivity(selectivityWeightNarrowSearch + selectivityWeightConjunction) - - case pgsql.OperatorJSONField, pgsql.OperatorJSONTextField, pgsql.OperatorPropertyLookup: - if propertyLookup, err := binaryExpressionToPropertyLookup(typedNode); err != nil { - s.SetError(err) - } else { - // Lookup the reference - leftIdentifier := propertyLookup.Reference.Root() - - if binding, bound := s.scope.Lookup(leftIdentifier); !bound { - s.SetErrorf("unable to lookup identifier %s", leftIdentifier) - } else { - switch binding.DataType { - case pgsql.ExpansionRootNode, pgsql.ExpansionTerminalNode, pgsql.NodeComposite: - // This is a node property, search through the available node property selectivity weights - if selectivity, hasKnownSelectivity := knownNodePropertySelectivity[propertyLookup.Field]; hasKnownSelectivity { - s.addSelectivity(selectivity) - } - } - } - } - } - } -} - -func (s *measureSelectivityVisitor) Exit(node pgsql.SyntaxNode) { - switch typedNode := node.(type) { - case *pgsql.UnaryExpression: - switch typedNode.Operator { - case pgsql.OperatorNot: - selectivity := s.popSelectivity() - s.addSelectivity(-selectivity) - } - } -} - -// MeasureSelectivity attempts to measure how selective (i.e. how narrow) the query expression passed in is. This is -// a simple heuristic that is best-effort for attempting to find which side of a traversal step ()-[]->() is more -// selective. -// -// The boolean parameter owningIdentifierBound is intended to represent if the identifier the expression constraints -// is part of a materialized set of nodes where the entity IDs of each are known at time of query. In this case the -// bound component is considered to be highly-selective. -// -// Many numbers are magic values selected based on implementor's perception of selectivity of certain operators. func MeasureSelectivity(scope *Scope, expression pgsql.Expression) (int, error) { - visitor := newMeasureSelectivityVisitor(scope) - - if expression != nil { - if err := walk.PgSQL(expression, visitor); err != nil { - return 0, err - } - } - - return visitor.Selectivity(), nil + return optimize.MeasureSelectivity(scope, expression) } diff --git a/cypher/models/pgsql/translate/tracking.go b/cypher/models/pgsql/translate/tracking.go index a5c736fe..8d325cfe 100644 --- a/cypher/models/pgsql/translate/tracking.go +++ b/cypher/models/pgsql/translate/tracking.go @@ -26,6 +26,8 @@ func (s IdentifierGenerator) NewIdentifier(dataType pgsql.DataType) (pgsql.Ident prefixStr = "n" case pgsql.EdgeComposite: prefixStr = "e" + case pgsql.PathEdge: + prefixStr = "e" case pgsql.Scope: prefixStr = "s" case pgsql.ParameterIdentifier: @@ -342,6 +344,18 @@ func (s *Scope) LookupString(identifierString string) (*BoundIdentifier, bool) { return s.AliasedLookup(pgsql.Identifier(identifierString)) } +func (s *Scope) LookupDataType(identifier pgsql.Identifier) (pgsql.DataType, bool) { + if binding, bound := s.Lookup(identifier); bound { + return binding.DataType, true + } + + if binding, bound := s.AliasedLookup(identifier); bound { + return binding.DataType, true + } + + return "", false +} + func (s *Scope) Define(identifier pgsql.Identifier, dataType pgsql.DataType) *BoundIdentifier { boundIdentifier := &BoundIdentifier{ Identifier: identifier, diff --git a/cypher/models/pgsql/translate/tracking_test.go b/cypher/models/pgsql/translate/tracking_test.go index 3ea6c67c..3f8a02ca 100644 --- a/cypher/models/pgsql/translate/tracking_test.go +++ b/cypher/models/pgsql/translate/tracking_test.go @@ -3,6 +3,7 @@ package translate import ( "testing" + "github.com/specterops/dawgs/cypher/models/pgsql" "github.com/stretchr/testify/require" ) @@ -27,3 +28,16 @@ func TestScope(t *testing.T) { require.Nil(t, scope.UnwindToFrame(parent)) require.Equal(t, parent.id, scope.CurrentFrame().id) } + +func TestScopeLookupDataTypeResolvesAliases(t *testing.T) { + var ( + scope = NewScope() + binding = scope.Define(pgsql.Identifier("n0"), pgsql.NodeComposite) + ) + scope.Alias(pgsql.Identifier("n"), binding) + + dataType, found := scope.LookupDataType(pgsql.Identifier("n")) + + require.True(t, found) + require.Equal(t, pgsql.NodeComposite, dataType) +} diff --git a/cypher/models/pgsql/translate/translator.go b/cypher/models/pgsql/translate/translator.go index 18b96334..297237a5 100644 --- a/cypher/models/pgsql/translate/translator.go +++ b/cypher/models/pgsql/translate/translator.go @@ -7,6 +7,7 @@ import ( "github.com/specterops/dawgs/cypher/models/cypher" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" "github.com/specterops/dawgs/cypher/models/walk" "github.com/specterops/dawgs/graph" ) @@ -28,6 +29,23 @@ type Translator struct { query *Query scope *Scope unwindTargets map[*cypher.Variable]struct{} + + collectIDMembershipAliases map[pgsql.Identifier]struct{} + collectIDProjectionDepth int + + appliedLoweringCounts map[string]int + patternTargets map[*cypher.PatternPart]optimize.PatternTarget + patternPredicateTargets map[*cypher.PatternPredicate]optimize.PatternTarget + projectionPruningDecisions map[optimize.TraversalStepTarget]optimize.ProjectionPruningDecision + latePathDecisions map[optimize.TraversalStepTarget][]optimize.LatePathMaterializationDecision + suffixPushdownDecisions map[optimize.TraversalStepTarget][]optimize.ExpansionSuffixPushdownDecision + predicatePlacementDecisions map[optimize.TraversalStepTarget][]optimize.PredicatePlacementDecision + expandIntoDecisions map[optimize.TraversalStepTarget]optimize.ExpandIntoDecision + traversalDirectionDecisions map[optimize.TraversalStepTarget]optimize.TraversalDirectionDecision + shortestPathStrategyDecisions map[optimize.TraversalStepTarget]optimize.ShortestPathStrategyDecision + shortestPathFilterDecisions map[optimize.TraversalStepTarget][]optimize.ShortestPathFilterDecision + limitPushdownDecisions map[optimize.TraversalStepTarget][]optimize.LimitPushdownDecision + patternPredicateDecisions map[optimize.TraversalStepTarget]optimize.PatternPredicatePlacementDecision } func NewTranslator(ctx context.Context, kindMapper pgsql.KindMapper, parameters map[string]any, graphID int32) *Translator { @@ -40,8 +58,10 @@ func NewTranslator(ctx context.Context, kindMapper pgsql.KindMapper, parameters inputParameters[key] = value } - translatedParameters := map[string]any{} - ctxAwareKindMapper := newContextAwareKindMapper(ctx, kindMapper, translatedParameters) + var ( + translatedParameters = map[string]any{} + ctxAwareKindMapper = newContextAwareKindMapper(ctx, kindMapper, translatedParameters) + ) return &Translator{ Visitor: walk.NewVisitor[cypher.SyntaxNode](), @@ -59,6 +79,61 @@ func NewTranslator(ctx context.Context, kindMapper pgsql.KindMapper, parameters } } +func (s *Translator) SetOptimizationPlan(plan optimize.Plan) { + s.patternTargets = optimize.IndexPatternTargets(plan.Query) + s.patternPredicateTargets = optimize.IndexPatternPredicateTargets(plan.Query) + s.projectionPruningDecisions = map[optimize.TraversalStepTarget]optimize.ProjectionPruningDecision{} + s.latePathDecisions = map[optimize.TraversalStepTarget][]optimize.LatePathMaterializationDecision{} + s.suffixPushdownDecisions = map[optimize.TraversalStepTarget][]optimize.ExpansionSuffixPushdownDecision{} + s.predicatePlacementDecisions = map[optimize.TraversalStepTarget][]optimize.PredicatePlacementDecision{} + s.expandIntoDecisions = map[optimize.TraversalStepTarget]optimize.ExpandIntoDecision{} + s.traversalDirectionDecisions = map[optimize.TraversalStepTarget]optimize.TraversalDirectionDecision{} + s.shortestPathStrategyDecisions = map[optimize.TraversalStepTarget]optimize.ShortestPathStrategyDecision{} + s.shortestPathFilterDecisions = map[optimize.TraversalStepTarget][]optimize.ShortestPathFilterDecision{} + s.limitPushdownDecisions = map[optimize.TraversalStepTarget][]optimize.LimitPushdownDecision{} + s.patternPredicateDecisions = map[optimize.TraversalStepTarget]optimize.PatternPredicatePlacementDecision{} + + for _, decision := range plan.LoweringPlan.ProjectionPruning { + s.projectionPruningDecisions[decision.Target] = decision + } + + for _, decision := range plan.LoweringPlan.LatePathMaterialization { + s.latePathDecisions[decision.Target] = append(s.latePathDecisions[decision.Target], decision) + } + + for _, decision := range plan.LoweringPlan.ExpansionSuffixPushdown { + s.suffixPushdownDecisions[decision.Target] = append(s.suffixPushdownDecisions[decision.Target], decision) + } + + for _, decision := range plan.LoweringPlan.PredicatePlacement { + s.predicatePlacementDecisions[decision.Target] = append(s.predicatePlacementDecisions[decision.Target], decision) + } + + for _, decision := range plan.LoweringPlan.ExpandInto { + s.expandIntoDecisions[decision.Target] = decision + } + + for _, decision := range plan.LoweringPlan.TraversalDirection { + s.traversalDirectionDecisions[decision.Target] = decision + } + + for _, decision := range plan.LoweringPlan.ShortestPathStrategy { + s.shortestPathStrategyDecisions[decision.Target] = decision + } + + for _, decision := range plan.LoweringPlan.ShortestPathFilter { + s.shortestPathFilterDecisions[decision.Target] = append(s.shortestPathFilterDecisions[decision.Target], decision) + } + + for _, decision := range plan.LoweringPlan.LimitPushdown { + s.limitPushdownDecisions[decision.Target] = append(s.limitPushdownDecisions[decision.Target], decision) + } + + for _, decision := range plan.LoweringPlan.PatternPredicate { + s.patternPredicateDecisions[decision.Target] = decision + } +} + func (s *Translator) Enter(expression cypher.SyntaxNode) { switch typedExpression := expression.(type) { case *cypher.RegularQuery, *cypher.SingleQuery, *cypher.PatternElement, @@ -71,6 +146,13 @@ func (s *Translator) Enter(expression cypher.SyntaxNode) { *cypher.Return, *cypher.MultiPartQuery, *cypher.Properties, *cypher.KindMatcher, *cypher.Quantifier, *cypher.IDInCollection: + case *cypher.RangeQuantifier: + if typedExpression.Value != string(pgsql.WildcardIdentifier) { + s.SetErrorf("unsupported range quantifier expression: %s", typedExpression.Value) + } else { + s.treeTranslator.PushOperand(pgsql.WildcardIdentifier) + } + case *cypher.Unwind: if typedExpression.Variable != nil { // The UNWIND target is declared by the UNWIND clause itself, so later @@ -188,10 +270,15 @@ func (s *Translator) Enter(expression cypher.SyntaxNode) { } case *cypher.ProjectionItem: + if typedExpression.Alias != nil { + if _, collectIDs := s.collectIDMembershipAliases[pgsql.Identifier(typedExpression.Alias.Symbol)]; collectIDs { + s.collectIDProjectionDepth++ + } + } s.query.CurrentPart().PrepareProjection() case *cypher.PatternPredicate: - if err := s.preparePatternPredicate(); err != nil { + if err := s.preparePatternPredicate(typedExpression); err != nil { s.SetError(err) } @@ -483,6 +570,11 @@ func (s *Translator) Exit(expression cypher.SyntaxNode) { if err := s.translateProjectionItem(s.scope, typedExpression); err != nil { s.SetError(err) } + if typedExpression.Alias != nil { + if _, collectIDs := s.collectIDMembershipAliases[pgsql.Identifier(typedExpression.Alias.Symbol)]; collectIDs { + s.collectIDProjectionDepth-- + } + } case *cypher.Match: if err := s.translateMatch(typedExpression); err != nil { @@ -519,17 +611,168 @@ func (s *Translator) Exit(expression cypher.SyntaxNode) { } type Result struct { - Statement pgsql.Statement - Parameters map[string]any + Statement pgsql.Statement + Parameters map[string]any + Optimization OptimizationSummary +} + +type OptimizationSummary struct { + Rules []optimize.RuleResult `json:"rules,omitempty"` + PredicateAttachments []optimize.PredicateAttachment `json:"predicate_attachments,omitempty"` + PlannedLowerings []optimize.LoweringDecision `json:"planned_lowerings,omitempty"` + Lowerings []optimize.LoweringDecision `json:"lowerings,omitempty"` + SkippedLowerings []SkippedLowering `json:"skipped_lowerings,omitempty"` + LoweringPlan *optimize.LoweringPlan `json:"lowering_plan,omitempty"` +} + +type SkippedLowering struct { + Name string `json:"name"` + Reason string `json:"reason"` + Count int `json:"count,omitempty"` +} + +func (s *Translator) recordLowering(name string) { + if s.appliedLoweringCounts == nil { + s.appliedLoweringCounts = map[string]int{} + } + s.appliedLoweringCounts[name]++ + + for _, lowering := range s.translation.Optimization.Lowerings { + if lowering.Name == name { + return + } + } + + s.translation.Optimization.Lowerings = append(s.translation.Optimization.Lowerings, optimize.LoweringDecision{Name: name}) +} + +func (s *Translator) appliedLoweringCountSnapshot() map[string]int { + applied := map[string]int{} + + for _, lowering := range s.translation.Optimization.Lowerings { + applied[lowering.Name] = 1 + } + + for name, count := range s.appliedLoweringCounts { + applied[name] = count + } + + return applied +} + +func (s *Translator) recordSkippedLowerings() { + if s.translation.Optimization.LoweringPlan == nil { + return + } + + applied := s.appliedLoweringCountSnapshot() + + for _, planned := range plannedLoweringCounts(*s.translation.Optimization.LoweringPlan) { + if planned.Count == 0 { + continue + } + + skippedCount := planned.Count - applied[planned.Name] + if skippedCount <= 0 { + continue + } + + s.translation.Optimization.SkippedLowerings = append(s.translation.Optimization.SkippedLowerings, SkippedLowering{ + Name: planned.Name, + Reason: skippedLoweringReason(planned.Name, applied, *s.translation.Optimization.LoweringPlan), + Count: skippedCount, + }) + } +} + +func plannedLoweringCounts(plan optimize.LoweringPlan) []SkippedLowering { + return []SkippedLowering{ + {Name: optimize.LoweringProjectionPruning, Count: len(plan.ProjectionPruning)}, + {Name: optimize.LoweringLatePathMaterialization, Count: len(plan.LatePathMaterialization)}, + {Name: optimize.LoweringExpandIntoDetection, Count: len(plan.ExpandInto)}, + {Name: optimize.LoweringTraversalDirection, Count: len(plan.TraversalDirection)}, + {Name: optimize.LoweringShortestPathStrategy, Count: len(plan.ShortestPathStrategy)}, + {Name: optimize.LoweringShortestPathFilter, Count: len(plan.ShortestPathFilter)}, + {Name: optimize.LoweringLimitPushdown, Count: len(plan.LimitPushdown)}, + {Name: optimize.LoweringExpansionSuffixPushdown, Count: len(plan.ExpansionSuffixPushdown)}, + {Name: optimize.LoweringPredicatePlacement, Count: len(plan.PredicatePlacement) + len(plan.PatternPredicate)}, + {Name: optimize.LoweringCountStoreFastPath, Count: len(plan.CountStoreFastPath)}, + {Name: optimize.LoweringAggregateTraversalCount, Count: len(plan.AggregateTraversalCount)}, + } +} + +func skippedLoweringReason(name string, applied map[string]int, plan optimize.LoweringPlan) string { + if applied[optimize.LoweringCountStoreFastPath] > 0 && name != optimize.LoweringCountStoreFastPath { + return "superseded by CountStoreFastPath" + } + if applied[optimize.LoweringAggregateTraversalCount] > 0 && name != optimize.LoweringAggregateTraversalCount { + return "superseded by AggregateTraversalCount" + } + + switch name { + case optimize.LoweringPredicatePlacement: + return "planned predicate placements were not consumed by this translation shape" + case optimize.LoweringTraversalDirection: + if reason := skippedTraversalDirectionReason(plan); reason != "" { + return reason + } + default: + return "planned lowering did not change the emitted SQL" + } + + return "planned lowering did not change the emitted SQL" +} + +func skippedTraversalDirectionReason(plan optimize.LoweringPlan) string { + for _, decision := range plan.TraversalDirection { + if !decision.Flip && decision.Reason != "" { + return decision.Reason + } + } + + return "" } func Translate(ctx context.Context, cypherQuery *cypher.RegularQuery, kindMapper pgsql.KindMapper, parameters map[string]any, graphID int32) (Result, error) { + optimizedPlan, err := optimize.Optimize(cypherQuery) + if err != nil { + return Result{}, err + } + translator := NewTranslator(ctx, kindMapper, parameters, graphID) + if membershipAliases, err := collectIDMembershipAliases(optimizedPlan.Query); err != nil { + return Result{}, err + } else { + translator.collectIDMembershipAliases = membershipAliases + } + translator.SetOptimizationPlan(optimizedPlan) + translator.translation.Optimization.Rules = optimizedPlan.Rules + translator.translation.Optimization.PredicateAttachments = optimizedPlan.PredicateAttachments + if !optimizedPlan.LoweringPlan.Empty() { + loweringPlan := optimizedPlan.LoweringPlan + translator.translation.Optimization.LoweringPlan = &loweringPlan + translator.translation.Optimization.PlannedLowerings = loweringPlan.Decisions() + } + + if translated, err := translator.translateCountStoreFastPath(optimizedPlan.Query, optimizedPlan.LoweringPlan); err != nil { + return Result{}, err + } else if translated { + translator.recordSkippedLowerings() + return translator.translation, nil + } + + if translated, err := translator.translateAggregateTraversalCount(optimizedPlan.Query, optimizedPlan.LoweringPlan); err != nil { + return Result{}, err + } else if translated { + translator.recordSkippedLowerings() + return translator.translation, nil + } - if err := walk.Cypher(cypherQuery, translator); err != nil { + if err := walk.Cypher(optimizedPlan.Query, translator); err != nil { return Result{}, err } + translator.recordSkippedLowerings() return translator.translation, nil } diff --git a/cypher/models/pgsql/translate/traversal.go b/cypher/models/pgsql/translate/traversal.go index 26485677..799e0610 100644 --- a/cypher/models/pgsql/translate/traversal.go +++ b/cypher/models/pgsql/translate/traversal.go @@ -6,10 +6,223 @@ import ( "github.com/specterops/dawgs/cypher/models" "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" "github.com/specterops/dawgs/graph" ) +func boundEndpointIDReference(frame *Frame, binding *BoundIdentifier) pgsql.RowColumnReference { + return pgsql.RowColumnReference{ + Identifier: pgsql.CompoundIdentifier{frame.Binding.Identifier, binding.Identifier}, + Column: pgsql.ColumnID, + } +} + +func boundEndpointInequality(frame *Frame, traversalStep *TraversalStep) pgsql.Expression { + return pgsql.NewParenthetical( + pgsql.NewBinaryExpression( + boundEndpointIDReference(frame, traversalStep.LeftNode), + pgsql.OperatorCypherNotEquals, + boundEndpointIDReference(frame, traversalStep.RightNode), + ), + ) +} + +func (s *Translator) shouldUseExpandInto(part *PatternPart, stepIndex int, traversalStep *TraversalStep) bool { + if traversalStep == nil || traversalStep.Expansion != nil || !traversalStep.LeftNodeBound || !traversalStep.RightNodeBound { + return false + } + + if part != nil && part.HasTarget { + if _, hasDecision := s.expandIntoDecisions[part.Target.TraversalStep(stepIndex)]; hasDecision { + return true + } + + return false + } + + return true +} + +func (s *Translator) traversalDirectionDecision(part *PatternPart, stepIndex int) (optimize.TraversalDirectionDecision, bool) { + if part == nil || !part.HasTarget { + return optimize.TraversalDirectionDecision{}, false + } + + decision, hasDecision := s.traversalDirectionDecisions[part.Target.TraversalStep(stepIndex)] + return decision, hasDecision +} + +func (s *Translator) applyPatternConstraintBalance(part *PatternPart, stepIndex int, constraints *PatternConstraints, traversalStep *TraversalStep) error { + if decision, hasDecision := s.traversalDirectionDecision(part, stepIndex); hasDecision { + if decision.Flip { + if traversalStep.LeftNodeBound { + if traversalStep.Expansion == nil || !traversalStep.hasPreviousFrameBinding() { + return nil + } + } else if traversalStep.RightNodeBound && !traversalStep.hasPreviousFrameBinding() { + return nil + } + + traversalStep.FlipNodes() + constraints.FlipNodes() + s.recordLowering(optimize.LoweringTraversalDirection) + } + + return nil + } + + if flipped, err := constraints.OptimizePatternConstraintBalance(s.scope, traversalStep); err != nil { + return err + } else if flipped { + s.recordLowering(optimize.LoweringTraversalDirection) + } + + return nil +} + +func (s *Translator) shortestPathStrategyDecision(part *PatternPart, stepIndex int) (optimize.ShortestPathStrategyDecision, bool) { + if part == nil || !part.HasTarget { + return optimize.ShortestPathStrategyDecision{}, false + } + + decision, hasDecision := s.shortestPathStrategyDecisions[part.Target.TraversalStep(stepIndex)] + return decision, hasDecision +} + +func (s *Translator) useBidirectionalShortestPathStrategy(part *PatternPart, stepIndex int, traversalStep *TraversalStep) (bool, error) { + if decision, hasDecision := s.shortestPathStrategyDecision(part, stepIndex); hasDecision { + if decision.Strategy != optimize.ShortestPathStrategyBidirectional { + return false, nil + } + + if canExecute, err := traversalStep.CanExecutePairAwareBidirectionalSearch(s.scope); err != nil { + return false, err + } else if canExecute { + s.recordLowering(optimize.LoweringShortestPathStrategy) + return true, nil + } + + return false, nil + } + + if canExecute, err := traversalStep.CanExecutePairAwareBidirectionalSearch(s.scope); err != nil { + return false, err + } else if canExecute { + s.recordLowering(optimize.LoweringShortestPathStrategy) + return true, nil + } + + return false, nil +} + +func (s *Translator) shortestPathFilterDecisionsForStep(part *PatternPart, stepIndex int) []optimize.ShortestPathFilterDecision { + if part == nil || !part.HasTarget { + return nil + } + + return s.shortestPathFilterDecisions[part.Target.TraversalStep(stepIndex)] +} + +func (s *Translator) applyShortestPathFilterMaterialization(part *PatternPart, stepIndex int, traversalStep *TraversalStep, expansionModel *Expansion) { + for _, decision := range s.shortestPathFilterDecisionsForStep(part, stepIndex) { + switch decision.Mode { + case optimize.ShortestPathFilterTerminal: + if canMaterializeTerminalFilterForStep(traversalStep, expansionModel) { + expansionModel.UseMaterializedTerminalFilter = true + s.recordLowering(optimize.LoweringShortestPathFilter) + } + + case optimize.ShortestPathFilterEndpointPair: + if expansionModel.UseBidirectionalSearch && canMaterializeEndpointPairFilterForStep(traversalStep, expansionModel) { + expansionModel.UseMaterializedEndpointPairFilter = true + s.recordLowering(optimize.LoweringShortestPathFilter) + } + } + } +} + +func (s *Translator) hasLimitPushdownDecision(part *PatternPart, stepIndex int, mode optimize.LimitPushdownMode) bool { + if part == nil || !part.HasTarget { + return true + } + + for _, decision := range s.limitPushdownDecisions[part.Target.TraversalStep(stepIndex)] { + if decision.Mode == mode { + return true + } + } + + return false +} + +func (s *Translator) allowLimitPushdownForStep(part *PatternPart, stepIndex int, traversalStep *TraversalStep) { + if traversalStep == nil || traversalStep.Frame == nil { + return + } + if traversalStep.Expansion != nil && traversalStep.Expansion.Options.FindAllShortestPaths { + return + } + + mode := optimize.LimitPushdownTraversalCTE + if traversalStep.Expansion != nil && + traversalStep.Expansion.Options.FindShortestPath && + !traversalStep.Expansion.Options.FindAllShortestPaths { + mode = optimize.LimitPushdownShortestPathHarness + } + + if s.hasLimitPushdownDecision(part, stepIndex, mode) { + s.query.CurrentPart().AllowLimitPushdown(traversalStep.Frame.Binding.Identifier) + } +} + +func (s *Translator) buildBoundEndpointTraversalPattern(partFrame *Frame, traversalStep *TraversalStep) (pgsql.Query, error) { + if partFrame == nil || partFrame.Previous == nil { + return pgsql.Query{}, errors.New("expected previous frame for bound endpoint traversal") + } + + var ( + previousFrame = partFrame.Previous + nextSelect = pgsql.Select{ + Projection: traversalStep.Projection, + From: []pgsql.FromClause{{ + Source: pgsql.TableReference{ + Name: pgsql.CompoundIdentifier{previousFrame.Binding.Identifier}, + }, + Joins: []pgsql.Join{{ + Table: pgsql.TableReference{ + Name: pgsql.CompoundIdentifier{pgsql.TableEdge}, + Binding: models.OptionalValue(traversalStep.Edge.Identifier), + }, + JoinOperator: pgsql.JoinOperator{ + JoinType: pgsql.JoinTypeInner, + Constraint: pgsql.OptionalAnd( + traversalStep.EdgeJoinCondition, + traversalStep.RightNodeJoinCondition, + ), + }, + }}, + }}, + } + ) + + nextSelect.Where = pgsql.OptionalAnd(traversalStep.LeftNodeConstraints, nextSelect.Where) + nextSelect.Where = pgsql.OptionalAnd(traversalStep.EdgeConstraints.Expression, nextSelect.Where) + nextSelect.Where = pgsql.OptionalAnd(traversalStep.RightNodeConstraints, nextSelect.Where) + + if traversalStep.Direction == graph.DirectionBoth && traversalStep.LeftNode.Identifier != traversalStep.RightNode.Identifier { + nextSelect.Where = pgsql.OptionalAnd(boundEndpointInequality(previousFrame, traversalStep), nextSelect.Where) + } + + return pgsql.Query{ + Body: nextSelect, + }, nil +} + func (s *Translator) buildDirectionlessTraversalPatternRoot(traversalStep *TraversalStep) (pgsql.Query, error) { + if traversalStep.UseExpandInto { + return s.buildBoundEndpointTraversalPattern(traversalStep.Frame, traversalStep) + } + var ( // Partition node constraints rightJoinLocal, rightJoinExternal = partitionConstraintByLocality( @@ -258,6 +471,10 @@ func (s *Translator) buildTraversalPatternRoot(partFrame *Frame, traversalStep * return s.buildDirectionlessTraversalPatternRoot(traversalStep) } + if traversalStep.UseExpandInto { + return s.buildBoundEndpointTraversalPattern(partFrame, traversalStep) + } + var ( // Partition right-node constraints: only locally-scoped terms go into JOIN ON. // Constraints that reference comma-connected CTEs (e.g. s0.i0 from a prior WITH) @@ -440,6 +657,10 @@ func (s *Translator) buildTraversalPatternRoot(partFrame *Frame, traversalStep * } func (s *Translator) buildTraversalPatternStep(partFrame *Frame, traversalStep *TraversalStep) (pgsql.Query, error) { + if traversalStep.UseExpandInto { + return s.buildBoundEndpointTraversalPattern(partFrame, traversalStep) + } + nextSelect := pgsql.Select{ Projection: traversalStep.Projection, } @@ -503,7 +724,7 @@ func (s *Translator) buildTraversalPatternStep(partFrame *Frame, traversalStep * }, nil } -func (s *Translator) translateTraversalPatternPart(part *PatternPart, isolatedProjection bool) error { +func (s *Translator) translateTraversalPatternPart(part *PatternPart, isolatedProjection bool, allowProjectionPruning bool) error { var scopeSnapshot *Scope if isolatedProjection { @@ -511,6 +732,12 @@ func (s *Translator) translateTraversalPatternPart(part *PatternPart, isolatedPr } for idx, traversalStep := range part.TraversalSteps { + if traversalStep.UseExpandInto = s.shouldUseExpandInto(part, idx, traversalStep); traversalStep.UseExpandInto { + s.recordLowering(optimize.LoweringExpandIntoDetection) + } + + s.prepareProjectionPruning(part, idx, traversalStep) + if traversalStepFrame, err := s.scope.PushFrame(); err != nil { return err } else { @@ -519,16 +746,22 @@ func (s *Translator) translateTraversalPatternPart(part *PatternPart, isolatedPr } if traversalStep.Expansion != nil { - if err := s.translateTraversalPatternPartWithExpansion(idx == 0, traversalStep); err != nil { + if err := s.translateTraversalPatternPartWithExpansion(part, idx, idx == 0, traversalStep, allowProjectionPruning); err != nil { return err } } else if part.AllShortestPaths || part.ShortestPath { return fmt.Errorf("expected shortest path search to utilize variable expansion: ()-[*..]->()") - } else if err := s.translateTraversalPatternPartWithoutExpansion(part, idx, traversalStep); err != nil { + } else if err := s.translateTraversalPatternPartWithoutExpansion(part, idx, traversalStep, allowProjectionPruning); err != nil { return err } } + if applied, err := s.applyExpansionSuffixPushdown(part); err != nil { + return err + } else if applied > 0 { + s.recordLowering(optimize.LoweringExpansionSuffixPushdown) + } + if isolatedProjection { s.scope = scopeSnapshot } @@ -536,72 +769,247 @@ func (s *Translator) translateTraversalPatternPart(part *PatternPart, isolatedPr return nil } -func patternBindingDependsOn(queryPart *QueryPart, part *PatternPart, binding *BoundIdentifier) bool { - if queryPart == nil || part == nil || part.PatternBinding == nil || binding == nil { +func (s *Translator) applyExpansionSuffixPushdown(part *PatternPart) (int, error) { + if part == nil || !part.HasTarget { + return applyExpansionSuffixPushdown(part) + } + + var applied int + for stepIndex := range part.TraversalSteps { + var ( + target = part.Target.TraversalStep(stepIndex) + decisions = s.suffixPushdownDecisions[target] + ) + if len(decisions) == 0 { + continue + } + + for _, decision := range decisions { + if decision.SuffixLength <= 0 || + decision.SuffixStartStep <= stepIndex || + decision.SuffixEndStep < decision.SuffixStartStep || + decision.SuffixEndStep >= len(part.TraversalSteps) || + decision.SuffixEndStep-decision.SuffixStartStep+1 != decision.SuffixLength { + continue + } + + var ( + currentStep = part.TraversalSteps[stepIndex] + suffixSteps = part.TraversalSteps[decision.SuffixStartStep : decision.SuffixEndStep+1] + ) + if candidateApplied, err := applyExpansionSuffixPushdownCandidate(currentStep, suffixSteps); err != nil { + return applied, err + } else if candidateApplied { + if len(decision.PredicateAttachments) > 0 { + s.recordLowering(optimize.LoweringPredicatePlacement) + } + + applied++ + } + } + } + + return applied, nil +} + +func traversalStepHasContinuation(part *PatternPart, stepIndex int) bool { + return part != nil && stepIndex+1 < len(part.TraversalSteps) +} + +func relationshipIDReference(scope *Scope, binding *BoundIdentifier) pgsql.Expression { + if binding != nil && binding.DataType == pgsql.EdgeComposite { + return pathCompositeColumnReference(scope, binding, pgsql.ColumnID) + } + + return pathEdgeIDReference(scope, binding) +} + +func relationshipIDNotInPath(edgeID, pathIDs pgsql.Expression) pgsql.Expression { + return pgsql.NewBinaryExpression( + edgeID, + pgsql.OperatorNotEquals, + pgsql.NewAllExpression(pathIDs), + ) +} + +func previousRelationshipUniquenessConstraint(scope *Scope, part *PatternPart, stepIndex int, traversalStep *TraversalStep) pgsql.Expression { + if scope == nil || part == nil || stepIndex <= 0 || traversalStep == nil || traversalStep.Edge == nil { + return nil + } + + var ( + currentEdgeID pgsql.Expression = pgsql.CompoundIdentifier{traversalStep.Edge.Identifier, pgsql.ColumnID} + constraint pgsql.Expression + ) + + for _, previousStep := range part.TraversalSteps[:stepIndex] { + if previousStep == nil || previousStep.Edge == nil { + continue + } + + if previousStep.Expansion != nil { + if previousStep.Expansion.PathBinding != nil { + constraint = pgsql.OptionalAnd( + constraint, + relationshipIDNotInPath(currentEdgeID, pathBindingReference(scope, previousStep.Expansion.PathBinding)), + ) + } + + continue + } + + constraint = pgsql.OptionalAnd( + constraint, + pgsql.NewBinaryExpression( + currentEdgeID, + pgsql.OperatorNotEquals, + relationshipIDReference(scope, previousStep.Edge), + ), + ) + } + + return constraint +} + +func (s *Translator) projectionPruningDecision(part *PatternPart, stepIndex int) (optimize.ProjectionPruningDecision, bool) { + if part == nil || !part.HasTarget { + return optimize.ProjectionPruningDecision{}, false + } + + decision, hasDecision := s.projectionPruningDecisions[part.Target.TraversalStep(stepIndex)] + return decision, hasDecision +} + +func (s *Translator) prepareProjectionPruning(part *PatternPart, stepIndex int, traversalStep *TraversalStep) { + decision, hasDecision := s.projectionPruningDecision(part, stepIndex) + if !hasDecision || traversalStep == nil { + return + } + + if decision.OmitLeftNode { + traversalStep.ProjectionPruning.LeftNode = traversalStep.LeftNode + } + + if decision.OmitRelationship { + traversalStep.ProjectionPruning.Relationship = traversalStep.Edge + } + + if decision.OmitRightNode { + traversalStep.ProjectionPruning.RightNode = traversalStep.RightNode + } + + if decision.OmitPathBinding && traversalStep.Expansion != nil { + traversalStep.ProjectionPruning.PathBinding = traversalStep.Expansion.PathBinding + } +} + +func (s *Translator) latePathMaterializationDecision(part *PatternPart, stepIndex int, mode optimize.LatePathMaterializationMode) (optimize.LatePathMaterializationDecision, bool) { + if part == nil || !part.HasTarget { + return optimize.LatePathMaterializationDecision{}, false + } + + for _, decision := range s.latePathDecisions[part.Target.TraversalStep(stepIndex)] { + if decision.Mode == mode { + return decision, true + } + } + + return optimize.LatePathMaterializationDecision{}, false +} + +func (s *Translator) applyPathEdgeIDMaterialization(part *PatternPart, stepIndex int, traversalStep *TraversalStep) bool { + if traversalStep == nil || + traversalStep.Edge == nil || + traversalStep.Edge.DataType != pgsql.EdgeComposite { return false } - if !queryPart.ReferencesBinding(part.PatternBinding) { + if _, hasDecision := s.latePathMaterializationDecision(part, stepIndex, optimize.LatePathMaterializationPathEdgeID); !hasDecision { return false } - for _, dependency := range part.PatternBinding.Dependencies { - if dependency.Identifier == binding.Identifier { - return true - } + traversalStep.Edge.DataType = pgsql.PathEdge + return true +} + +func unexportFrameBinding(frame *Frame, identifier pgsql.Identifier) bool { + if frame == nil { + return false } - return false + exported := frame.Exported.Contains(identifier) + frame.Unexport(identifier) + return exported } -func traversalStepProjectsBinding(queryPart *QueryPart, part *PatternPart, stepIndex int, binding *BoundIdentifier) bool { - if binding == nil { +func traversalStepBindingBound(traversalStep *TraversalStep, binding *BoundIdentifier) bool { + if traversalStep == nil || binding == nil { return false } - // Keep aliases referenced by later clauses and bindings needed to materialize - // a referenced path pattern. Everything else can stay internal to this step. - if (binding.Alias.Set && queryPart.ReferencesBinding(binding)) || patternBindingDependsOn(queryPart, part, binding) { - return true + if traversalStep.LeftNode == binding { + return traversalStep.LeftNodeBound } - if stepIndex+1 < len(part.TraversalSteps) { - // A multi-hop pattern needs the right node from this step as the next - // step's left node even when the user never projects it. - nextStep := part.TraversalSteps[stepIndex+1] - return nextStep.LeftNode != nil && nextStep.LeftNode.Identifier == binding.Identifier + if traversalStep.RightNode == binding { + return traversalStep.RightNodeBound } return false } -func pruneTraversalStepProjectionExports(queryPart *QueryPart, part *PatternPart, stepIndex int, traversalStep *TraversalStep) { - // Bound endpoints already exist in an outer frame. Only unexport unbound - // values that later clauses and continuation steps cannot observe. - if !traversalStep.LeftNodeBound && !traversalStepProjectsBinding(queryPart, part, stepIndex, traversalStep.LeftNode) { - traversalStep.Frame.Unexport(traversalStep.LeftNode.Identifier) +func unexportPrunedNodeBinding(traversalStep *TraversalStep, binding *BoundIdentifier) bool { + if binding == nil || traversalStepBindingBound(traversalStep, binding) { + return false } - if !traversalStepProjectsBinding(queryPart, part, stepIndex, traversalStep.Edge) { - traversalStep.Frame.Unexport(traversalStep.Edge.Identifier) + return unexportFrameBinding(traversalStep.Frame, binding.Identifier) +} + +func pruneTraversalStepProjectionExports(part *PatternPart, stepIndex int, traversalStep *TraversalStep) bool { + var applied bool + + applied = unexportPrunedNodeBinding(traversalStep, traversalStep.ProjectionPruning.LeftNode) || applied + if traversalStep.ProjectionPruning.Relationship != nil && !traversalStepHasContinuation(part, stepIndex) { + applied = unexportFrameBinding(traversalStep.Frame, traversalStep.ProjectionPruning.Relationship.Identifier) || applied } + applied = unexportPrunedNodeBinding(traversalStep, traversalStep.ProjectionPruning.RightNode) || applied + + return applied +} - if !traversalStep.RightNodeBound && !traversalStepProjectsBinding(queryPart, part, stepIndex, traversalStep.RightNode) { - traversalStep.Frame.Unexport(traversalStep.RightNode.Identifier) +func pruneExpansionStepProjectionExports(part *PatternPart, stepIndex int, traversalStep *TraversalStep) bool { + if traversalStep == nil || traversalStep.Expansion == nil { + return false } + + var applied bool + if traversalStep.ProjectionPruning.Relationship != nil { + applied = unexportFrameBinding(traversalStep.Frame, traversalStep.ProjectionPruning.Relationship.Identifier) || applied + } + + if traversalStep.ProjectionPruning.PathBinding != nil && !traversalStepHasContinuation(part, stepIndex) { + applied = unexportFrameBinding(traversalStep.Frame, traversalStep.ProjectionPruning.PathBinding.Identifier) || applied + } + + return applied } -func (s *Translator) translateTraversalPatternPartWithoutExpansion(part *PatternPart, stepIndex int, traversalStep *TraversalStep) error { +func (s *Translator) translateTraversalPatternPartWithoutExpansion(part *PatternPart, stepIndex int, traversalStep *TraversalStep, allowProjectionPruning bool) error { isFirstTraversalStep := stepIndex == 0 if constraints, err := consumePatternConstraints(isFirstTraversalStep, nonRecursivePattern, traversalStep, s.treeTranslator); err != nil { return err } else { if isFirstTraversalStep { - if err := constraints.OptimizePatternConstraintBalance(s.scope, traversalStep); err != nil { + if err := s.applyPatternConstraintBalance(part, stepIndex, &constraints, traversalStep); err != nil { return err } + } + + s.recordPredicatePlacementConsumption(part, stepIndex, traversalStep, constraints) + if isFirstTraversalStep { hasPreviousFrame := traversalStep.Frame.Previous != nil if hasPreviousFrame { @@ -650,6 +1058,10 @@ func (s *Translator) translateTraversalPatternPartWithoutExpansion(part *Pattern } else { traversalStep.EdgeConstraints = constraints.Edge } + traversalStep.EdgeConstraints.Expression = pgsql.OptionalAnd( + traversalStep.EdgeConstraints.Expression, + previousRelationshipUniquenessConstraint(s.scope, part, stepIndex, traversalStep), + ) traversalStep.Frame.Export(traversalStep.RightNode.Identifier) @@ -668,7 +1080,16 @@ func (s *Translator) translateTraversalPatternPartWithoutExpansion(part *Pattern } } - pruneTraversalStepProjectionExports(s.query.CurrentPart(), part, stepIndex, traversalStep) + if allowProjectionPruning { + if s.applyPathEdgeIDMaterialization(part, stepIndex, traversalStep) { + s.recordLowering(optimize.LoweringLatePathMaterialization) + } + + _, hasDecision := s.projectionPruningDecision(part, stepIndex) + if hasDecision && pruneTraversalStepProjectionExports(part, stepIndex, traversalStep) { + s.recordLowering(optimize.LoweringProjectionPruning) + } + } if boundProjections, err := buildVisibleProjections(s.scope); err != nil { return err diff --git a/cypher/models/pgsql/translate/with.go b/cypher/models/pgsql/translate/with.go index 6009940d..b8b66f60 100644 --- a/cypher/models/pgsql/translate/with.go +++ b/cypher/models/pgsql/translate/with.go @@ -25,6 +25,14 @@ func (s *Translator) translateWith() error { for _, projectionItem := range currentPart.projections.Items { if err := RewriteFrameBindings(s.scope, projectionItem.SelectItem); err != nil { return err + } else if _, isIdentifier := projectionItem.SelectItem.(pgsql.Identifier); isIdentifier { + continue + } else if resolvedSelectItem, err := resolvePathCompositeFieldReferences(s.scope, projectionItem.SelectItem); err != nil { + return err + } else if selectItem, isSelectItem := resolvedSelectItem.(pgsql.SelectItem); !isSelectItem { + return fmt.Errorf("resolved with projection item is not selectable: %T", resolvedSelectItem) + } else { + projectionItem.SelectItem = selectItem } } diff --git a/cypher/models/walk/walk_pgsql.go b/cypher/models/walk/walk_pgsql.go index a140fb66..37e0a8c1 100644 --- a/cypher/models/walk/walk_pgsql.go +++ b/cypher/models/walk/walk_pgsql.go @@ -205,6 +205,12 @@ func newSQLWalkCursor(node pgsql.SyntaxNode) (*Cursor[pgsql.SyntaxNode], error) Branches: []pgsql.SyntaxNode{typedNode.Expression}, }, nil + case *pgsql.EdgeArrayFromPathIDs: + return &Cursor[pgsql.SyntaxNode]{ + Node: node, + Branches: []pgsql.SyntaxNode{typedNode.PathIDs}, + }, nil + case pgsql.FunctionCall: if branches, err := pgsqlSyntaxNodeSliceTypeConvert(typedNode.Parameters); err != nil { return nil, err @@ -252,6 +258,18 @@ func newSQLWalkCursor(node pgsql.SyntaxNode) (*Cursor[pgsql.SyntaxNode], error) Branches: []pgsql.SyntaxNode{typedNode.Expression}, }, nil + case pgsql.AllExpression: + return &Cursor[pgsql.SyntaxNode]{ + Node: node, + Branches: []pgsql.SyntaxNode{typedNode.Expression}, + }, nil + + case *pgsql.AllExpression: + return &Cursor[pgsql.SyntaxNode]{ + Node: node, + Branches: []pgsql.SyntaxNode{typedNode.Expression}, + }, nil + case *pgsql.AnyExpression: return &Cursor[pgsql.SyntaxNode]{ Node: node, @@ -382,6 +400,11 @@ func newSQLWalkCursor(node pgsql.SyntaxNode) (*Cursor[pgsql.SyntaxNode], error) Branches: []pgsql.SyntaxNode{typedNode.Query}, }, nil + case pgsql.FormattingLiteral: + return &Cursor[pgsql.SyntaxNode]{ + Node: node, + }, nil + case pgsql.SyntaxNodeFuture: cursor := &Cursor[pgsql.SyntaxNode]{ Node: typedNode, diff --git a/drivers/neo4j/batch_integration_test.go b/drivers/neo4j/batch_integration_test.go index 80eb96c6..798db901 100644 --- a/drivers/neo4j/batch_integration_test.go +++ b/drivers/neo4j/batch_integration_test.go @@ -42,7 +42,7 @@ func prepareNode(index int) *graph.Node { func isNeo4jConnectionString(connStr string) bool { u, err := url.Parse(connStr) - return err == nil && u.Scheme == neo4j.DriverName + return err == nil && (u.Scheme == neo4j.DriverName || u.Scheme == "neo4j+s" || u.Scheme == "neo4j+ssc") } func TestBatchTransaction_NodeUpdate(t *testing.T) { diff --git a/drivers/neo4j/driver.go b/drivers/neo4j/driver.go index d78b8511..3c9fa317 100644 --- a/drivers/neo4j/driver.go +++ b/drivers/neo4j/driver.go @@ -14,16 +14,19 @@ const ( DriverName = "neo4j" ) -func readCfg() neo4j.SessionConfig { +func sessionConfig(accessMode neo4j.AccessMode, databaseName string) neo4j.SessionConfig { return neo4j.SessionConfig{ - AccessMode: neo4j.AccessModeRead, + AccessMode: accessMode, + DatabaseName: databaseName, } } -func writeCfg() neo4j.SessionConfig { - return neo4j.SessionConfig{ - AccessMode: neo4j.AccessModeWrite, - } +func (s *driver) readCfg() neo4j.SessionConfig { + return sessionConfig(neo4j.AccessModeRead, s.databaseName) +} + +func (s *driver) writeCfg() neo4j.SessionConfig { + return sessionConfig(neo4j.AccessModeWrite, s.databaseName) } type driver struct { @@ -33,6 +36,7 @@ type driver struct { batchWriteSize int writeFlushSize int graphQueryMemoryLimit size.Size + databaseName string } func (s *driver) SetBatchWriteSize(size int) { @@ -64,7 +68,7 @@ func (s *driver) BatchOperation(ctx context.Context, batchDelegate graph.BatchDe Timeout: s.defaultTransactionTimeout, } - session = s.driver.NewSession(writeCfg()) + session = s.driver.NewSession(s.writeCfg()) batch = newBatchOperation(ctx, session, cfg, s.writeFlushSize, config.BatchSize, s.graphQueryMemoryLimit) ) @@ -110,14 +114,14 @@ func (s *driver) transaction(ctx context.Context, txDelegate graph.TransactionDe } func (s *driver) ReadTransaction(ctx context.Context, txDelegate graph.TransactionDelegate, options ...graph.TransactionOption) error { - session := s.driver.NewSession(readCfg()) + session := s.driver.NewSession(s.readCfg()) defer session.Close() return s.transaction(ctx, txDelegate, session, options) } func (s *driver) WriteTransaction(ctx context.Context, txDelegate graph.TransactionDelegate, options ...graph.TransactionOption) error { - session := s.driver.NewSession(writeCfg()) + session := s.driver.NewSession(s.writeCfg()) defer session.Close() return s.transaction(ctx, txDelegate, session, options) diff --git a/drivers/neo4j/neo4j.go b/drivers/neo4j/neo4j.go index c6a5ea2a..ffddb6ea 100644 --- a/drivers/neo4j/neo4j.go +++ b/drivers/neo4j/neo4j.go @@ -5,6 +5,7 @@ import ( "fmt" "math" "net/url" + "strings" "github.com/neo4j/neo4j-go-driver/v5/neo4j" "github.com/specterops/dawgs" @@ -21,14 +22,16 @@ const ( func newNeo4jDB(_ context.Context, cfg dawgs.Config) (graph.Database, error) { if connectionURL, err := url.Parse(cfg.ConnectionString); err != nil { return nil, err - } else if connectionURL.Scheme != DriverName { + } else if !isNeo4jConnectionScheme(connectionURL.Scheme) { return nil, fmt.Errorf("expected connection URL scheme %s for Neo4J but got %s", DriverName, connectionURL.Scheme) } else if password, isSet := connectionURL.User.Password(); !isSet { return nil, fmt.Errorf("no password provided in connection URL") + } else if target, err := neo4jConnectionTarget(connectionURL); err != nil { + return nil, err + } else if databaseName, err := neo4jConnectionDatabaseName(connectionURL); err != nil { + return nil, err } else { - boltURL := fmt.Sprintf("bolt://%s:%s", connectionURL.Hostname(), connectionURL.Port()) - - if internalDriver, err := neo4j.NewDriver(boltURL, neo4j.BasicAuth(connectionURL.User.Username(), password, "")); err != nil { + if internalDriver, err := neo4j.NewDriver(target, neo4j.BasicAuth(connectionURL.User.Username(), password, "")); err != nil { return nil, fmt.Errorf("unable to connect to Neo4J: %w", err) } else { return &driver{ @@ -38,11 +41,49 @@ func newNeo4jDB(_ context.Context, cfg dawgs.Config) (graph.Database, error) { writeFlushSize: DefaultWriteFlushSize, batchWriteSize: DefaultBatchWriteSize, graphQueryMemoryLimit: cfg.GraphQueryMemoryLimit, + databaseName: databaseName, }, nil } } } +func isNeo4jConnectionScheme(scheme string) bool { + return scheme == DriverName || scheme == "neo4j+s" || scheme == "neo4j+ssc" +} + +func neo4jConnectionTarget(connectionURL *url.URL) (string, error) { + if connectionURL.Host == "" { + return "", fmt.Errorf("Neo4j connection string host is required") + } + + return (&url.URL{ + Scheme: connectionURL.Scheme, + Host: connectionURL.Host, + RawQuery: connectionURL.RawQuery, + }).String(), nil +} + +func neo4jConnectionDatabaseName(connectionURL *url.URL) (string, error) { + databasePath := strings.Trim(connectionURL.EscapedPath(), "/") + if databasePath == "" { + return "", nil + } + + if strings.Contains(databasePath, "/") { + return "", fmt.Errorf("Neo4j database path must contain a single database name") + } + + databaseName, err := url.PathUnescape(databasePath) + if err != nil { + return "", fmt.Errorf("parse Neo4j database name: %w", err) + } + if strings.Contains(databaseName, "/") { + return "", fmt.Errorf("Neo4j database path must contain a single database name") + } + + return databaseName, nil +} + func init() { dawgs.Register(DriverName, func(ctx context.Context, cfg dawgs.Config) (graph.Database, error) { return newNeo4jDB(ctx, cfg) diff --git a/drivers/neo4j/neo4j_internal_test.go b/drivers/neo4j/neo4j_internal_test.go new file mode 100644 index 00000000..78086b87 --- /dev/null +++ b/drivers/neo4j/neo4j_internal_test.go @@ -0,0 +1,61 @@ +package neo4j + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNeo4jConnectionTargetPreservesAcceptedSchemes(t *testing.T) { + testCases := []struct { + name string + connStr string + expectedTarget string + expectedDatabase string + }{{ + name: "plain routing", + connStr: "neo4j://neo4j:password@localhost:7687", + expectedTarget: "neo4j://localhost:7687", + expectedDatabase: "", + }, { + name: "secure routing", + connStr: "neo4j+s://neo4j:password@cluster.example:7687", + expectedTarget: "neo4j+s://cluster.example:7687", + expectedDatabase: "", + }, { + name: "self signed routing with database and query", + connStr: "neo4j+ssc://neo4j:password@cluster.example:7687/analytics?policy=fast", + expectedTarget: "neo4j+ssc://cluster.example:7687?policy=fast", + expectedDatabase: "analytics", + }} + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + connectionURL, err := url.Parse(testCase.connStr) + require.NoError(t, err) + require.True(t, isNeo4jConnectionScheme(connectionURL.Scheme)) + + target, err := neo4jConnectionTarget(connectionURL) + require.NoError(t, err) + require.Equal(t, testCase.expectedTarget, target) + + databaseName, err := neo4jConnectionDatabaseName(connectionURL) + require.NoError(t, err) + require.Equal(t, testCase.expectedDatabase, databaseName) + }) + } +} + +func TestNeo4jConnectionDatabaseNameRejectsNestedPath(t *testing.T) { + for _, connStr := range []string{ + "neo4j://neo4j:password@localhost:7687/db/extra", + "neo4j://neo4j:password@localhost:7687/db%2Fextra", + } { + connectionURL, err := url.Parse(connStr) + require.NoError(t, err) + + _, err = neo4jConnectionDatabaseName(connectionURL) + require.ErrorContains(t, err, "single database name") + } +} diff --git a/drivers/pg/batch.go b/drivers/pg/batch.go index ebea05cd..6fda114b 100644 --- a/drivers/pg/batch.go +++ b/drivers/pg/batch.go @@ -4,23 +4,15 @@ import ( "bytes" "context" "fmt" - "log/slog" "strconv" - "strings" - "github.com/jackc/pgtype" "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgxpool" - "github.com/specterops/dawgs/cypher/models/pgsql" "github.com/specterops/dawgs/drivers/pg/model" sql "github.com/specterops/dawgs/drivers/pg/query" "github.com/specterops/dawgs/graph" ) -const ( - LargeNodeUpdateThreshold = 1_000_000 -) - type Int2ArrayEncoder struct { buffer *bytes.Buffer } @@ -95,112 +87,7 @@ func (s *batch) UpdateNodeBy(update graph.NodeUpdate) error { return s.tryFlush(s.batchWriteSize) } -// largeUpdate performs a bulk node update using PostgreSQL's COPY FROM to stream -// nodes into a temporary staging table and then MERGE INTO the live node partition. -// This path is more efficient than a parameterised UPDATE for very large batches -// (see LargeUpdateThreshold). -func (s *batch) largeUpdate(nodes []*graph.Node) error { - tx, err := s.innerTransaction.conn.Begin(s.ctx) - if err != nil { - return err - } - - defer tx.Rollback(s.ctx) - - if _, err := tx.Exec(s.ctx, sql.FormatCreateNodeUpdateStagingTable(sql.NodeUpdateStagingTable)); err != nil { - return fmt.Errorf("creating node update staging table: %w", err) - } - - nodeRows := NewLargeNodeUpdateRows(len(nodes)) - if err := nodeRows.AppendAll(s.ctx, nodes, s.schemaManager, s.kindIDEncoder); err != nil { - return err - } - - // Stream the rows into the staging table via COPY FROM. - if _, err := tx.Conn().CopyFrom( - s.ctx, - pgx.Identifier{sql.NodeUpdateStagingTable}, - sql.NodeUpdateStagingColumns, - pgx.CopyFromRows(nodeRows.Rows()), - ); err != nil { - return fmt.Errorf("copying nodes into staging table: %w", err) - } - - graphTarget, err := s.innerTransaction.getTargetGraph() - if err != nil { - return err - } - - if _, err := tx.Exec(s.ctx, sql.FormatMergeNodeLargeUpdate(graphTarget, sql.NodeUpdateStagingTable)); err != nil { - return fmt.Errorf("merging node updates from staging table: %w", err) - } - - if err := tx.Commit(s.ctx); err != nil { - return err - } - - return nil -} - -// LargeNodeUpdateRows accumulates encoded node rows for bulk loading via COPY FROM. -// The column order matches sql.NodeUpdateStagingColumns. -type LargeNodeUpdateRows struct { - rows [][]any -} - -func NewLargeNodeUpdateRows(size int) *LargeNodeUpdateRows { - return &LargeNodeUpdateRows{ - rows: make([][]any, 0, size), - } -} - -func (s *LargeNodeUpdateRows) Rows() [][]any { - return s.rows -} - -func (s *LargeNodeUpdateRows) Append(ctx context.Context, node *graph.Node, schemaManager *SchemaManager, kindIDEncoder Int2ArrayEncoder) error { - addedKindIDs, err := schemaManager.AssertKinds(ctx, node.Kinds) - if err != nil { - return fmt.Errorf("mapping added kinds for node %d: %w", node.ID, err) - } - - deletedKindIDs, err := schemaManager.AssertKinds(ctx, node.DeletedKinds) - if err != nil { - return fmt.Errorf("mapping deleted kinds for node %d: %w", node.ID, err) - } - - propertiesJSONB, err := pgsql.PropertiesToJSONB(node.Properties) - if err != nil { - return fmt.Errorf("encoding properties for node %d: %w", node.ID, err) - } - - s.rows = append(s.rows, []any{ - node.ID.Int64(), - kindIDEncoder.Encode(addedKindIDs), - kindIDEncoder.Encode(deletedKindIDs), - string(propertiesJSONB.Bytes), - pgsql.DeletedPropertiesToString(node.Properties), - }) - - return nil -} - -// AppendAll encodes every node in the slice and appends its row to the accumulator. -func (s *LargeNodeUpdateRows) AppendAll(ctx context.Context, nodes []*graph.Node, schemaManager *SchemaManager, kindIDEncoder Int2ArrayEncoder) error { - for _, node := range nodes { - if err := s.Append(ctx, node, schemaManager, kindIDEncoder); err != nil { - return err - } - } - - return nil -} - func (s *batch) UpdateNodes(nodes []*graph.Node) error { - if len(nodes) > LargeNodeUpdateThreshold { - return s.largeUpdate(nodes) - } - for _, node := range nodes { s.nodeUpdateBuffer = append(s.nodeUpdateBuffer, node) @@ -212,8 +99,31 @@ func (s *batch) UpdateNodes(nodes []*graph.Node) error { return nil } +func encodeDeleteIDCopyRow(_ context.Context, id graph.ID) ([]any, error) { + return []any{id.Int64()}, nil +} + func (s *batch) flushNodeDeleteBuffer() error { - if _, err := s.innerTransaction.conn.Exec(s.ctx, deleteNodeWithIDStatement, s.nodeDeletionBuffer); err != nil { + if len(s.nodeDeletionBuffer) == 0 { + return nil + } + + stage := batchCopyStage{ + Name: "node delete", + TableIdentifier: pgx.Identifier{sql.NodeDeleteStagingTable}, + Columns: sql.DeleteStagingColumns, + Source: newBatchSliceCopySource(s.ctx, s.nodeDeletionBuffer, encodeDeleteIDCopyRow), + BeforeCopy: []batchChunkStatement{{ + Name: "create node delete staging table", + Statement: sql.FormatCreateDeleteStagingTable(sql.NodeDeleteStagingTable), + }}, + AfterCopy: []batchChunkStatement{{ + Name: "merge node delete staging table", + Statement: sql.FormatMergeNodeDeleteStaging(sql.NodeDeleteStagingTable), + }}, + } + + if _, err := copyBatchStageChunk(s.ctx, s.innerTransaction.conn, stage); err != nil { return err } @@ -222,7 +132,26 @@ func (s *batch) flushNodeDeleteBuffer() error { } func (s *batch) flushRelationshipDeleteBuffer() error { - if _, err := s.innerTransaction.conn.Exec(s.ctx, deleteEdgeWithIDStatement, s.relationshipDeletionBuffer); err != nil { + if len(s.relationshipDeletionBuffer) == 0 { + return nil + } + + stage := batchCopyStage{ + Name: "relationship delete", + TableIdentifier: pgx.Identifier{sql.RelationshipDeleteStagingTable}, + Columns: sql.DeleteStagingColumns, + Source: newBatchSliceCopySource(s.ctx, s.relationshipDeletionBuffer, encodeDeleteIDCopyRow), + BeforeCopy: []batchChunkStatement{{ + Name: "create relationship delete staging table", + Statement: sql.FormatCreateDeleteStagingTable(sql.RelationshipDeleteStagingTable), + }}, + AfterCopy: []batchChunkStatement{{ + Name: "merge relationship delete staging table", + Statement: sql.FormatMergeRelationshipDeleteStaging(sql.RelationshipDeleteStagingTable), + }}, + } + + if _, err := copyBatchStageChunk(s.ctx, s.innerTransaction.conn, stage); err != nil { return err } @@ -255,111 +184,154 @@ func (s *batch) flushNodeCreateBuffer() error { return s.flushNodeCreateBufferWithIDs() } -func (s *batch) flushNodeCreateBufferWithIDs() error { - var ( - numCreates = len(s.nodeCreateBuffer) - nodeIDs = make([]uint64, numCreates) - kindIDSlices = make([]string, numCreates) - kindIDEncoder = Int2ArrayEncoder{ - buffer: &bytes.Buffer{}, +func (s *batch) assertNodeCreateKinds() error { + for _, nextNode := range s.nodeCreateBuffer { + if _, err := s.schemaManager.AssertKinds(s.ctx, nextNode.Kinds); err != nil { + return fmt.Errorf("unable to map kinds %w", err) } - properties = make([]pgtype.JSONB, numCreates) - ) + } - for idx, nextNode := range s.nodeCreateBuffer { - nodeIDs[idx] = nextNode.ID.Uint64() + return nil +} - if mappedKindIDs, err := s.schemaManager.AssertKinds(s.ctx, nextNode.Kinds); err != nil { - return fmt.Errorf("unable to map kinds %w", err) - } else { - kindIDSlices[idx] = kindIDEncoder.Encode(mappedKindIDs) - } +func (s *batch) flushNodeCreateCopyStage(includeID bool, columns []string, mergeStatement func(model.Graph, string) string) error { + if len(s.nodeCreateBuffer) == 0 { + return nil + } - if propertiesJSONB, err := pgsql.PropertiesToJSONB(nextNode.Properties); err != nil { - return err - } else { - properties[idx] = propertiesJSONB - } + if err := s.assertNodeCreateKinds(); err != nil { + return err } if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { return err - } else if _, err := s.innerTransaction.conn.Exec(s.ctx, createNodeWithIDBatchStatement, graphTarget.ID, nodeIDs, kindIDSlices, properties); err != nil { - return err + } else { + stage := batchCopyStage{ + Name: "node create", + TableIdentifier: pgx.Identifier{sql.NodeCreateStagingTable}, + Columns: columns, + Source: newNodeCreateCopySource(s.ctx, graphTarget.ID, s.nodeCreateBuffer, s.schemaManager, includeID), + BeforeCopy: []batchChunkStatement{{ + Name: "create node create staging table", + Statement: sql.FormatCreateNodeCreateStagingTable(sql.NodeCreateStagingTable), + }}, + AfterCopy: []batchChunkStatement{{ + Name: "merge node create staging table", + Statement: mergeStatement(graphTarget, sql.NodeCreateStagingTable), + }}, + } + + if _, err := copyBatchStageChunk(s.ctx, s.innerTransaction.conn, stage); err != nil { + return err + } } s.nodeCreateBuffer = s.nodeCreateBuffer[:0] return nil } -func (s *batch) flushNodeCreateBufferWithoutIDs() error { - var ( - numCreates = len(s.nodeCreateBuffer) - kindIDSlices = make([]string, numCreates) - kindIDEncoder = Int2ArrayEncoder{ - buffer: &bytes.Buffer{}, - } - properties = make([]pgtype.JSONB, numCreates) - ) +func (s *batch) flushNodeCreateBufferWithIDs() error { + return s.flushNodeCreateCopyStage(true, sql.NodeCreateWithIDStagingColumns, sql.FormatMergeNodeCreateStagingWithIDs) +} - for idx, nextNode := range s.nodeCreateBuffer { - if mappedKindIDs, err := s.schemaManager.AssertKinds(s.ctx, nextNode.Kinds); err != nil { - return fmt.Errorf("unable to map kinds %w", err) - } else { - kindIDSlices[idx] = kindIDEncoder.Encode(mappedKindIDs) - } +func (s *batch) flushNodeCreateBufferWithoutIDs() error { + return s.flushNodeCreateCopyStage(false, sql.NodeCreateWithoutIDStagingColumns, sql.FormatMergeNodeCreateStagingWithoutIDs) +} - if propertiesJSONB, err := pgsql.PropertiesToJSONB(nextNode.Properties); err != nil { - return err - } else { - properties[idx] = propertiesJSONB - } +func nodeUpsertValues(updates *sql.NodeUpdateBatch) []*sql.NodeUpdate { + values := make([]*sql.NodeUpdate, 0, len(updates.Updates)) + for _, update := range updates.Updates { + values = append(values, update) } - if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { - return err - } else if _, err := s.innerTransaction.conn.Exec(s.ctx, createNodeWithoutIDBatchStatement, graphTarget.ID, kindIDSlices, properties); err != nil { - return err + return values +} + +func (s *batch) assertNodeUpsertKinds(updates []*sql.NodeUpdate) error { + for _, update := range updates { + if _, err := s.schemaManager.AssertKinds(s.ctx, update.Node.Kinds); err != nil { + return fmt.Errorf("unable to map kinds %w", err) + } } - s.nodeCreateBuffer = s.nodeCreateBuffer[:0] return nil } func (s *batch) flushNodeUpsertBatch(updates *sql.NodeUpdateBatch) error { - parameters := NewNodeUpsertParameters(len(updates.Updates)) + values := nodeUpsertValues(updates) + if len(values) == 0 { + return nil + } - if err := parameters.AppendAll(s.ctx, updates, s.schemaManager, s.kindIDEncoder); err != nil { + if err := s.assertNodeUpsertKinds(values); err != nil { return err } if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { return err } else { - query := sql.FormatNodeUpsert(graphTarget, updates.IdentityProperties) + return runBatchChunk(s.ctx, s.innerTransaction.conn, func(tx pgx.Tx) error { + if err := execBatchChunkStatements(s.ctx, tx, []batchChunkStatement{{ + Name: "create node upsert staging table", + Statement: sql.FormatCreateNodeUpsertStagingTable(sql.NodeUpsertStagingTable), + }}); err != nil { + return err + } - if rows, err := s.innerTransaction.conn.Query(s.ctx, query, parameters.Format(graphTarget)...); err != nil { - return err - } else { + if _, err := tx.CopyFrom( + s.ctx, + pgx.Identifier{sql.NodeUpsertStagingTable}, + sql.NodeUpsertStagingColumns, + newNodeUpsertCopySource(s.ctx, graphTarget.ID, values, s.schemaManager), + ); err != nil { + return fmt.Errorf("node upsert copy: %w", err) + } + + rows, err := tx.Query(s.ctx, sql.FormatMergeNodeUpsertStaging(graphTarget, updates.IdentityProperties, sql.NodeUpsertStagingTable)) + if err != nil { + return err + } defer rows.Close() - idFutureIndex := 0 + var ( + idsResolved = 0 + seenRows = make([]bool, len(values)) + ) for rows.Next() { - if err := rows.Scan(¶meters.IDFutures[idFutureIndex].Value); err != nil { + var ( + rowOrdinal int + id graph.ID + ) + + if err := rows.Scan(&rowOrdinal, &id); err != nil { return err } - idFutureIndex++ + if rowOrdinal < 0 || rowOrdinal >= len(values) { + return fmt.Errorf("node upsert returned row ordinal %d outside staged row count %d", rowOrdinal, len(values)) + } + + if seenRows[rowOrdinal] { + return fmt.Errorf("node upsert returned duplicate row ordinal %d", rowOrdinal) + } + + values[rowOrdinal].IDFuture.Value = id + seenRows[rowOrdinal] = true + idsResolved++ } if err := rows.Err(); err != nil { return err } - } - } - return nil + if idsResolved != len(values) { + return fmt.Errorf("node upsert returned %d ids for %d staged rows", idsResolved, len(values)) + } + + return nil + }) + } } func (s *batch) tryFlushNodeUpdateByBuffer() error { @@ -373,144 +345,86 @@ func (s *batch) tryFlushNodeUpdateByBuffer() error { return nil } -func (s *batch) flushNodeUpdateBatch(nodes []*graph.Node) error { - parameters := NewNodeUpdateParameters(len(nodes)) - - if err := parameters.AppendAll(s.ctx, nodes, s.schemaManager, s.kindIDEncoder); err != nil { - return err - } - - if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { - return err - } else { - query := sql.FormatNodesUpdate(graphTarget) - - if rows, err := s.innerTransaction.conn.Query(s.ctx, query, parameters.Format()...); err != nil { - return err - } else { - rows.Close() - - return rows.Err() +func (s *batch) assertNodeUpdateKinds(nodes []*graph.Node) error { + for _, node := range nodes { + if _, err := s.schemaManager.AssertKinds(s.ctx, node.Kinds); err != nil { + return fmt.Errorf("unable to map kinds %w", err) } - } -} -func (s *batch) tryFlushNodeUpdateBuffer() error { - if err := s.flushNodeUpdateBatch(s.nodeUpdateBuffer); err != nil { - return err + if _, err := s.schemaManager.AssertKinds(s.ctx, node.DeletedKinds); err != nil { + return fmt.Errorf("unable to map kinds %w", err) + } } - s.nodeUpdateBuffer = s.nodeUpdateBuffer[:0] return nil } -type NodeUpdateParameters struct { - NodeIDs []graph.ID - KindSlices []string - DeletedKindSlices []string - Properties []pgtype.JSONB - DeletedProperties []string -} - -func NewNodeUpdateParameters(size int) *NodeUpdateParameters { - return &NodeUpdateParameters{ - NodeIDs: make([]graph.ID, 0, size), - KindSlices: make([]string, 0, size), - DeletedKindSlices: make([]string, 0, size), - Properties: make([]pgtype.JSONB, 0, size), - DeletedProperties: make([]string, 0, size), +func cloneProperties(properties *graph.Properties) *graph.Properties { + if properties == nil { + return graph.NewProperties() } -} -func (s *NodeUpdateParameters) Format() []any { - return []any{ - s.NodeIDs, - s.KindSlices, - s.DeletedKindSlices, - s.Properties, - s.DeletedProperties, - } + return properties.Clone() } -func (s *NodeUpdateParameters) Append(ctx context.Context, node *graph.Node, schemaManager *SchemaManager, kindIDEncoder Int2ArrayEncoder) error { - s.NodeIDs = append(s.NodeIDs, node.ID) - - if mappedKindIDs, err := schemaManager.AssertKinds(ctx, node.Kinds); err != nil { - return fmt.Errorf("unable to map kinds %w", err) - } else { - s.KindSlices = append(s.KindSlices, kindIDEncoder.Encode(mappedKindIDs)) - } - - if mappedKindIDs, err := schemaManager.AssertKinds(ctx, node.DeletedKinds); err != nil { - return fmt.Errorf("unable to map kinds %w", err) - } else { - s.DeletedKindSlices = append(s.DeletedKindSlices, kindIDEncoder.Encode(mappedKindIDs)) +func cloneNodeUpdate(node *graph.Node) *graph.Node { + return &graph.Node{ + ID: node.ID, + Kinds: node.Kinds.Copy(), + AddedKinds: node.AddedKinds.Copy(), + DeletedKinds: node.DeletedKinds.Copy(), + Properties: cloneProperties(node.Properties), } - - if propertiesJSONB, err := pgsql.PropertiesToJSONB(node.Properties); err != nil { - return err - } else { - s.Properties = append(s.Properties, propertiesJSONB) - } - - s.DeletedProperties = append(s.DeletedProperties, pgsql.DeletedPropertiesToString(node.Properties)) - - return nil } -func (s *NodeUpdateParameters) AppendAll(ctx context.Context, nodes []*graph.Node, schemaManager *SchemaManager, kindIDEncoder Int2ArrayEncoder) error { +func coalesceNodeUpdates(nodes []*graph.Node) []*graph.Node { + var ( + coalesced = make([]*graph.Node, 0, len(nodes)) + nodeIndexes = make(map[graph.ID]int, len(nodes)) + ) + for _, node := range nodes { - if err := s.Append(ctx, node, schemaManager, kindIDEncoder); err != nil { - return err + if existingIndex, hasExisting := nodeIndexes[node.ID]; hasExisting { + coalesced[existingIndex].Merge(cloneNodeUpdate(node)) + } else { + nodeIndexes[node.ID] = len(coalesced) + coalesced = append(coalesced, cloneNodeUpdate(node)) } } - return nil -} - -type NodeUpsertParameters struct { - IDFutures []*sql.Future[graph.ID] - KindIDSlices []string - Properties []pgtype.JSONB + return coalesced } -func NewNodeUpsertParameters(size int) *NodeUpsertParameters { - return &NodeUpsertParameters{ - IDFutures: make([]*sql.Future[graph.ID], 0, size), - KindIDSlices: make([]string, 0, size), - Properties: make([]pgtype.JSONB, 0, size), +func (s *batch) flushNodeUpdateBatch(nodes []*graph.Node) error { + if len(nodes) == 0 { + return nil } -} -func (s *NodeUpsertParameters) Format(graphTarget model.Graph) []any { - return []any{ - graphTarget.ID, - s.KindIDSlices, - s.Properties, - } -} + nodes = coalesceNodeUpdates(nodes) -func (s *NodeUpsertParameters) Append(ctx context.Context, update *sql.NodeUpdate, schemaManager *SchemaManager, kindIDEncoder Int2ArrayEncoder) error { - s.IDFutures = append(s.IDFutures, update.IDFuture) - - if mappedKindIDs, err := schemaManager.AssertKinds(ctx, update.Node.Kinds); err != nil { - return fmt.Errorf("unable to map kinds %w", err) - } else { - s.KindIDSlices = append(s.KindIDSlices, kindIDEncoder.Encode(mappedKindIDs)) + if err := s.assertNodeUpdateKinds(nodes); err != nil { + return err } - if propertiesJSONB, err := pgsql.PropertiesToJSONB(update.Node.Properties); err != nil { + if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { return err } else { - s.Properties = append(s.Properties, propertiesJSONB) - } - - return nil -} + stage := batchCopyStage{ + Name: "node update", + TableIdentifier: pgx.Identifier{sql.NodeUpdateStagingTable}, + Columns: sql.NodeUpdateStagingColumns, + Source: newNodeUpdateCopySource(s.ctx, nodes, s.schemaManager), + BeforeCopy: []batchChunkStatement{{ + Name: "create node update staging table", + Statement: sql.FormatCreateNodeUpdateStagingTable(sql.NodeUpdateStagingTable), + }}, + AfterCopy: []batchChunkStatement{{ + Name: "merge node update staging table", + Statement: sql.FormatMergeNodeLargeUpdate(graphTarget, sql.NodeUpdateStagingTable), + }}, + } -func (s *NodeUpsertParameters) AppendAll(ctx context.Context, updates *sql.NodeUpdateBatch, schemaManager *SchemaManager, kindIDEncoder Int2ArrayEncoder) error { - for _, nextUpdate := range updates.Updates { - if err := s.Append(ctx, nextUpdate, schemaManager, kindIDEncoder); err != nil { + if _, err := copyBatchStageChunk(s.ctx, s.innerTransaction.conn, stage); err != nil { return err } } @@ -518,53 +432,27 @@ func (s *NodeUpsertParameters) AppendAll(ctx context.Context, updates *sql.NodeU return nil } -type RelationshipUpdateByParameters struct { - StartIDs []graph.ID - EndIDs []graph.ID - KindIDs []int16 - Properties []pgtype.JSONB -} - -func NewRelationshipUpdateByParameters(size int) *RelationshipUpdateByParameters { - return &RelationshipUpdateByParameters{ - StartIDs: make([]graph.ID, 0, size), - EndIDs: make([]graph.ID, 0, size), - KindIDs: make([]int16, 0, size), - Properties: make([]pgtype.JSONB, 0, size), +func (s *batch) tryFlushNodeUpdateBuffer() error { + if err := s.flushNodeUpdateBatch(s.nodeUpdateBuffer); err != nil { + return err } -} -func (s *RelationshipUpdateByParameters) Format(graphTarget model.Graph) []any { - return []any{ - graphTarget.ID, - s.StartIDs, - s.EndIDs, - s.KindIDs, - s.Properties, - } + s.nodeUpdateBuffer = s.nodeUpdateBuffer[:0] + return nil } -func (s *RelationshipUpdateByParameters) Append(ctx context.Context, update *sql.RelationshipUpdate, schemaManager *SchemaManager) error { - s.StartIDs = append(s.StartIDs, update.StartID.Value) - s.EndIDs = append(s.EndIDs, update.EndID.Value) - - if mappedKindIDs, err := schemaManager.AssertKinds(ctx, []graph.Kind{update.Relationship.Kind}); err != nil { - return err - } else { - s.KindIDs = append(s.KindIDs, mappedKindIDs...) +func relationshipUpdateValues(updates *sql.RelationshipUpdateBatch) []*sql.RelationshipUpdate { + values := make([]*sql.RelationshipUpdate, 0, len(updates.Updates)) + for _, update := range updates.Updates { + values = append(values, update) } - if propertiesJSONB, err := pgsql.PropertiesToJSONB(update.Relationship.Properties); err != nil { - return err - } else { - s.Properties = append(s.Properties, propertiesJSONB) - } - return nil + return values } -func (s *RelationshipUpdateByParameters) AppendAll(ctx context.Context, updates *sql.RelationshipUpdateBatch, schemaManager *SchemaManager) error { - for _, nextUpdate := range updates.Updates { - if err := s.Append(ctx, nextUpdate, schemaManager); err != nil { +func (s *batch) assertRelationshipUpdateKinds(updates []*sql.RelationshipUpdate) error { + for _, update := range updates { + if _, err := s.schemaManager.AssertKinds(s.ctx, []graph.Kind{update.Relationship.Kind}); err != nil { return err } } @@ -577,18 +465,34 @@ func (s *batch) flushRelationshipUpdateByBuffer(updates *sql.RelationshipUpdateB return err } - parameters := NewRelationshipUpdateByParameters(len(updates.Updates)) + values := relationshipUpdateValues(updates) + if len(values) == 0 { + return nil + } - if err := parameters.AppendAll(s.ctx, updates, s.schemaManager); err != nil { + if err := s.assertRelationshipUpdateKinds(values); err != nil { return err } if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { return err } else { - query := sql.FormatRelationshipPartitionUpsert(graphTarget, updates.IdentityProperties) + stage := batchCopyStage{ + Name: "relationship update", + TableIdentifier: pgx.Identifier{sql.RelationshipCreateStagingTable}, + Columns: sql.RelationshipCreateStagingColumns, + Source: newRelationshipUpdateCopySource(s.ctx, graphTarget.ID, values, s.schemaManager), + BeforeCopy: []batchChunkStatement{{ + Name: "create relationship update staging table", + Statement: sql.FormatCreateRelationshipCreateStagingTable(sql.RelationshipCreateStagingTable), + }}, + AfterCopy: []batchChunkStatement{{ + Name: "merge relationship update staging table", + Statement: sql.FormatMergeRelationshipUpdateStaging(graphTarget, updates.IdentityProperties, sql.RelationshipCreateStagingTable), + }}, + } - if _, err := s.innerTransaction.conn.Exec(s.ctx, query, parameters.Format(graphTarget)...); err != nil { + if _, err := copyBatchStageChunk(s.ctx, s.innerTransaction.conn, stage); err != nil { return err } } @@ -607,111 +511,34 @@ func (s *batch) tryFlushRelationshipUpdateByBuffer() error { return nil } -type relationshipCreateBatch struct { - startIDs []uint64 - endIDs []uint64 - edgeKindIDs []int16 - edgePropertyBags []pgtype.JSONB -} - -func newRelationshipCreateBatch(size int) *relationshipCreateBatch { - return &relationshipCreateBatch{ - startIDs: make([]uint64, 0, size), - endIDs: make([]uint64, 0, size), - edgeKindIDs: make([]int16, 0, size), - edgePropertyBags: make([]pgtype.JSONB, 0, size), - } -} - -func (s *relationshipCreateBatch) Add(startID, endID uint64, edgeKindID int16) { - s.startIDs = append(s.startIDs, startID) - s.edgeKindIDs = append(s.edgeKindIDs, edgeKindID) - s.endIDs = append(s.endIDs, endID) -} - -func (s *relationshipCreateBatch) EncodeProperties(edgePropertiesBatch []*graph.Properties) error { - for _, edgeProperties := range edgePropertiesBatch { - if propertiesJSONB, err := pgsql.PropertiesToJSONB(edgeProperties); err != nil { - return err - } else { - s.edgePropertyBags = append(s.edgePropertyBags, propertiesJSONB) - } - } - - return nil -} - -type relationshipCreateBatchBuilder struct { - keyToEdgeID map[string]uint64 - relationshipUpdateBatch *relationshipCreateBatch - edgePropertiesIndex map[uint64]int - edgePropertiesBatch []*graph.Properties -} - -func newRelationshipCreateBatchBuilder(size int) *relationshipCreateBatchBuilder { - return &relationshipCreateBatchBuilder{ - keyToEdgeID: map[string]uint64{}, - relationshipUpdateBatch: newRelationshipCreateBatch(size), - edgePropertiesIndex: map[uint64]int{}, +func (s *batch) flushRelationshipCreateBuffer() error { + if len(s.relationshipCreateBuffer) == 0 { + return nil } -} -func (s *relationshipCreateBatchBuilder) Build() (*relationshipCreateBatch, error) { - return s.relationshipUpdateBatch, s.relationshipUpdateBatch.EncodeProperties(s.edgePropertiesBatch) -} - -func (s *relationshipCreateBatchBuilder) Add(ctx context.Context, kindMapper KindMapper, edge *graph.Relationship) error { - keyBuilder := strings.Builder{} - - keyBuilder.WriteString(edge.StartID.String()) - keyBuilder.WriteString(edge.EndID.String()) - keyBuilder.WriteString(edge.Kind.String()) - - key := keyBuilder.String() - - if existingPropertiesIdx, hasExisting := s.keyToEdgeID[key]; hasExisting { - s.edgePropertiesBatch[existingPropertiesIdx].Merge(edge.Properties) + if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { + return err } else { - var ( - startID = edge.StartID.Uint64() - edgeID = edge.ID.Uint64() - endID = edge.EndID.Uint64() - edgeProperties = edge.Properties.Clone() - ) - - if edgeKindID, err := kindMapper.MapKind(ctx, edge.Kind); err != nil { - return err - } else { - s.relationshipUpdateBatch.Add(startID, endID, edgeKindID) + stage := batchCopyStage{ + Name: "relationship create", + TableIdentifier: pgx.Identifier{sql.RelationshipCreateStagingTable}, + Columns: sql.RelationshipCreateStagingColumns, + Source: newRelationshipCreateCopySource(s.ctx, graphTarget.ID, s.relationshipCreateBuffer, s.schemaManager), + BeforeCopy: []batchChunkStatement{{ + Name: "create relationship create staging table", + Statement: sql.FormatCreateRelationshipCreateStagingTable(sql.RelationshipCreateStagingTable), + }}, + AfterCopy: []batchChunkStatement{{ + Name: "merge relationship create staging table", + Statement: sql.FormatMergeRelationshipCreateStaging(graphTarget, sql.RelationshipCreateStagingTable), + }}, } - s.keyToEdgeID[key] = edgeID - - s.edgePropertiesBatch = append(s.edgePropertiesBatch, edgeProperties) - s.edgePropertiesIndex[edgeID] = len(s.edgePropertiesBatch) - 1 - } - - return nil -} - -func (s *batch) flushRelationshipCreateBuffer() error { - batchBuilder := newRelationshipCreateBatchBuilder(len(s.relationshipCreateBuffer)) - - for _, nextRel := range s.relationshipCreateBuffer { - if err := batchBuilder.Add(s.ctx, s.schemaManager, nextRel); err != nil { + if _, err := copyBatchStageChunk(s.ctx, s.innerTransaction.conn, stage); err != nil { return err } } - if createBatch, err := batchBuilder.Build(); err != nil { - return err - } else if graphTarget, err := s.innerTransaction.getTargetGraph(); err != nil { - return err - } else if _, err := s.innerTransaction.conn.Exec(s.ctx, createEdgeBatchStatement, graphTarget.ID, createBatch.startIDs, createBatch.endIDs, createBatch.edgeKindIDs, createBatch.edgePropertyBags); err != nil { - slog.Info(fmt.Sprintf("Num merged property bags: %d - Num edge keys: %d - StartID batch size: %d", len(batchBuilder.edgePropertiesIndex), len(batchBuilder.keyToEdgeID), len(batchBuilder.relationshipUpdateBatch.startIDs))) - return err - } - s.relationshipCreateBuffer = s.relationshipCreateBuffer[:0] return nil } diff --git a/drivers/pg/batch_copy.go b/drivers/pg/batch_copy.go new file mode 100644 index 00000000..056bf9ca --- /dev/null +++ b/drivers/pg/batch_copy.go @@ -0,0 +1,96 @@ +package pg + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +type batchChunkStatement struct { + Name string + Statement string + Args []any +} + +type batchCopyStage struct { + Name string + TableIdentifier pgx.Identifier + Columns []string + Source pgx.CopyFromSource + BeforeCopy []batchChunkStatement + AfterCopy []batchChunkStatement +} + +func runBatchChunk(ctx context.Context, conn *pgxpool.Conn, delegate func(pgx.Tx) error) error { + tx, err := conn.Begin(ctx) + if err != nil { + return err + } + + committed := false + defer func() { + if !committed { + _ = tx.Rollback(ctx) + } + }() + + if err := delegate(tx); err != nil { + return err + } + + if err := tx.Commit(ctx); err != nil { + return err + } + + committed = true + return nil +} + +func execBatchChunkStatements(ctx context.Context, tx pgx.Tx, statements []batchChunkStatement) error { + for _, statement := range statements { + if _, err := tx.Exec(ctx, statement.Statement, statement.Args...); err != nil { + if statement.Name == "" { + return err + } + + return fmt.Errorf("%s: %w", statement.Name, err) + } + } + + return nil +} + +func copyBatchStage(ctx context.Context, tx pgx.Tx, stage batchCopyStage) (int64, error) { + if err := execBatchChunkStatements(ctx, tx, stage.BeforeCopy); err != nil { + return 0, err + } + + copied, err := tx.CopyFrom(ctx, stage.TableIdentifier, stage.Columns, stage.Source) + if err != nil { + if stage.Name == "" { + return copied, err + } + + return copied, fmt.Errorf("%s copy: %w", stage.Name, err) + } + + if err := execBatchChunkStatements(ctx, tx, stage.AfterCopy); err != nil { + return copied, err + } + + return copied, nil +} + +func copyBatchStageChunk(ctx context.Context, conn *pgxpool.Conn, stage batchCopyStage) (int64, error) { + var copied int64 + + err := runBatchChunk(ctx, conn, func(tx pgx.Tx) error { + var err error + copied, err = copyBatchStage(ctx, tx, stage) + return err + }) + + return copied, err +} diff --git a/drivers/pg/batch_copy_source.go b/drivers/pg/batch_copy_source.go new file mode 100644 index 00000000..6df4a2dd --- /dev/null +++ b/drivers/pg/batch_copy_source.go @@ -0,0 +1,55 @@ +package pg + +import ( + "context" + + "github.com/jackc/pgx/v5" +) + +type batchCopyRowEncoder[T any] func(context.Context, T) ([]any, error) + +type batchSliceCopySource[T any] struct { + ctx context.Context + values []T + encoder batchCopyRowEncoder[T] + idx int + row []any + err error +} + +var _ pgx.CopyFromSource = (*batchSliceCopySource[int])(nil) + +func newBatchSliceCopySource[T any](ctx context.Context, values []T, encoder batchCopyRowEncoder[T]) *batchSliceCopySource[T] { + return &batchSliceCopySource[T]{ + ctx: ctx, + values: values, + encoder: encoder, + idx: -1, + } +} + +func (s *batchSliceCopySource[T]) Next() bool { + if s.err != nil { + return false + } + + s.idx++ + if s.idx >= len(s.values) { + return false + } + + s.row, s.err = s.encoder(s.ctx, s.values[s.idx]) + return s.err == nil +} + +func (s *batchSliceCopySource[T]) Values() ([]any, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *batchSliceCopySource[T]) Err() error { + return s.err +} diff --git a/drivers/pg/batch_copy_source_test.go b/drivers/pg/batch_copy_source_test.go new file mode 100644 index 00000000..44288523 --- /dev/null +++ b/drivers/pg/batch_copy_source_test.go @@ -0,0 +1,53 @@ +package pg + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBatchSliceCopySourceStreamsRows(t *testing.T) { + source := newBatchSliceCopySource(context.Background(), []int{1, 2}, func(_ context.Context, value int) ([]any, error) { + return []any{value, value * 10}, nil + }) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Equal(t, []any{1, 10}, values) + + require.True(t, source.Next()) + values, err = source.Values() + require.NoError(t, err) + require.Equal(t, []any{2, 20}, values) + + require.False(t, source.Next()) + require.NoError(t, source.Err()) +} + +func TestBatchSliceCopySourceStopsOnEncodeError(t *testing.T) { + var ( + expectedErr = errors.New("encode failed") + source = newBatchSliceCopySource(context.Background(), []int{1, 2}, func(_ context.Context, value int) ([]any, error) { + if value == 2 { + return nil, expectedErr + } + + return []any{value}, nil + }) + ) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Equal(t, []any{1}, values) + + require.False(t, source.Next()) + require.ErrorIs(t, source.Err(), expectedErr) + + values, err = source.Values() + require.Nil(t, values) + require.ErrorIs(t, err, expectedErr) +} diff --git a/drivers/pg/batch_node_source.go b/drivers/pg/batch_node_source.go new file mode 100644 index 00000000..1d049a88 --- /dev/null +++ b/drivers/pg/batch_node_source.go @@ -0,0 +1,97 @@ +package pg + +import ( + "bytes" + "context" + + "github.com/jackc/pgx/v5" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/graph" +) + +type nodeCreateCopySource struct { + ctx context.Context + graphID int32 + nodes []*graph.Node + kindMapper KindMapper + includeID bool + kindIDEncoder Int2ArrayEncoder + idx int + row []any + err error +} + +var _ pgx.CopyFromSource = (*nodeCreateCopySource)(nil) + +func newNodeCreateCopySource(ctx context.Context, graphID int32, nodes []*graph.Node, kindMapper KindMapper, includeID bool) *nodeCreateCopySource { + return &nodeCreateCopySource{ + ctx: ctx, + graphID: graphID, + nodes: nodes, + kindMapper: kindMapper, + includeID: includeID, + kindIDEncoder: Int2ArrayEncoder{ + buffer: &bytes.Buffer{}, + }, + idx: -1, + } +} + +func (s *nodeCreateCopySource) Next() bool { + if s.err != nil { + return false + } + + s.idx++ + if s.idx >= len(s.nodes) { + return false + } + + nextNode := s.nodes[s.idx] + + kindIDs, err := s.kindMapper.MapKinds(s.ctx, nextNode.Kinds) + if err != nil { + s.err = err + return false + } + + propertiesJSONB, err := pgsql.PropertiesToJSONB(nextNode.Properties) + if err != nil { + s.err = err + return false + } + + var ( + kindIDsText = s.kindIDEncoder.Encode(kindIDs) + propertiesText = string(propertiesJSONB.Bytes) + ) + + if s.includeID { + s.row = []any{ + nextNode.ID.Int64(), + s.graphID, + kindIDsText, + propertiesText, + } + } else { + s.row = []any{ + s.graphID, + kindIDsText, + propertiesText, + } + } + + return true +} + +func (s *nodeCreateCopySource) Values() ([]any, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *nodeCreateCopySource) Err() error { + return s.err +} diff --git a/drivers/pg/batch_node_source_test.go b/drivers/pg/batch_node_source_test.go new file mode 100644 index 00000000..d4c03406 --- /dev/null +++ b/drivers/pg/batch_node_source_test.go @@ -0,0 +1,75 @@ +package pg + +import ( + "context" + "encoding/json" + "strconv" + "testing" + + "github.com/specterops/dawgs/drivers/pg/pgutil" + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +func TestNodeCreateCopySourceStreamsNodesWithIDs(t *testing.T) { + var ( + ctx = context.Background() + kindMapper = pgutil.NewInMemoryKindMapper() + userKind = graph.StringKind("User") + userKindID = kindMapper.Put(userKind) + node = graph.NewNode(graph.ID(10), graph.NewProperties().Set("name", "alice"), userKind) + source = newNodeCreateCopySource(ctx, 7, []*graph.Node{node}, kindMapper, true) + ) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Len(t, values, 4) + require.Equal(t, int64(10), values[0]) + require.Equal(t, int32(7), values[1]) + require.Equal(t, "{"+strconv.Itoa(int(userKindID))+"}", values[2]) + + propertiesText, ok := values[3].(string) + require.True(t, ok) + + var properties map[string]any + require.NoError(t, json.Unmarshal([]byte(propertiesText), &properties)) + require.Equal(t, map[string]any{"name": "alice"}, properties) + + require.False(t, source.Next()) + require.NoError(t, source.Err()) +} + +func TestNodeCreateCopySourceStreamsNodesWithoutIDs(t *testing.T) { + var ( + ctx = context.Background() + kindMapper = pgutil.NewInMemoryKindMapper() + userKind = graph.StringKind("User") + ) + kindMapper.Put(userKind) + + var ( + node = graph.PrepareNode(graph.NewProperties(), userKind) + source = newNodeCreateCopySource(ctx, 7, []*graph.Node{node}, kindMapper, false) + ) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Len(t, values, 3) + require.Equal(t, int32(7), values[0]) + require.Equal(t, "{}", values[2]) +} + +func TestNodeCreateCopySourceStopsWhenKindIsMissing(t *testing.T) { + source := newNodeCreateCopySource( + context.Background(), + 7, + []*graph.Node{graph.NewNode(1, graph.NewProperties(), graph.StringKind("Missing"))}, + pgutil.NewInMemoryKindMapper(), + true, + ) + + require.False(t, source.Next()) + require.Error(t, source.Err()) +} diff --git a/drivers/pg/batch_node_update_source.go b/drivers/pg/batch_node_update_source.go new file mode 100644 index 00000000..8cf7a048 --- /dev/null +++ b/drivers/pg/batch_node_update_source.go @@ -0,0 +1,87 @@ +package pg + +import ( + "bytes" + "context" + + "github.com/jackc/pgx/v5" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/graph" +) + +type nodeUpdateCopySource struct { + ctx context.Context + nodes []*graph.Node + kindMapper KindMapper + kindIDEncoder Int2ArrayEncoder + idx int + row []any + err error +} + +var _ pgx.CopyFromSource = (*nodeUpdateCopySource)(nil) + +func newNodeUpdateCopySource(ctx context.Context, nodes []*graph.Node, kindMapper KindMapper) *nodeUpdateCopySource { + return &nodeUpdateCopySource{ + ctx: ctx, + nodes: nodes, + kindMapper: kindMapper, + kindIDEncoder: Int2ArrayEncoder{ + buffer: &bytes.Buffer{}, + }, + idx: -1, + } +} + +func (s *nodeUpdateCopySource) Next() bool { + if s.err != nil { + return false + } + + s.idx++ + if s.idx >= len(s.nodes) { + return false + } + + nextNode := s.nodes[s.idx] + + addedKindIDs, err := s.kindMapper.MapKinds(s.ctx, nextNode.Kinds) + if err != nil { + s.err = err + return false + } + + deletedKindIDs, err := s.kindMapper.MapKinds(s.ctx, nextNode.DeletedKinds) + if err != nil { + s.err = err + return false + } + + propertiesJSONB, err := pgsql.PropertiesToJSONB(nextNode.Properties) + if err != nil { + s.err = err + return false + } + + s.row = []any{ + nextNode.ID.Int64(), + s.kindIDEncoder.Encode(addedKindIDs), + s.kindIDEncoder.Encode(deletedKindIDs), + string(propertiesJSONB.Bytes), + pgsql.DeletedPropertiesToString(nextNode.Properties), + } + + return true +} + +func (s *nodeUpdateCopySource) Values() ([]any, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *nodeUpdateCopySource) Err() error { + return s.err +} diff --git a/drivers/pg/batch_node_update_source_test.go b/drivers/pg/batch_node_update_source_test.go new file mode 100644 index 00000000..170aa115 --- /dev/null +++ b/drivers/pg/batch_node_update_source_test.go @@ -0,0 +1,62 @@ +package pg + +import ( + "context" + "encoding/json" + "strconv" + "testing" + + "github.com/specterops/dawgs/drivers/pg/pgutil" + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +func TestNodeUpdateCopySourceStreamsNodes(t *testing.T) { + var ( + ctx = context.Background() + kindMapper = pgutil.NewInMemoryKindMapper() + userKind = graph.StringKind("User") + groupKind = graph.StringKind("Group") + userKindID = kindMapper.Put(userKind) + groupKindID = kindMapper.Put(groupKind) + ) + + properties := graph.NewProperties() + properties.Set("name", "alice") + properties.Delete("stale") + + node := graph.NewNode(graph.ID(10), properties, userKind) + node.DeletedKinds = graph.Kinds{groupKind} + + source := newNodeUpdateCopySource(ctx, []*graph.Node{node}, kindMapper) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Len(t, values, 5) + require.Equal(t, int64(10), values[0]) + require.Equal(t, "{"+strconv.Itoa(int(userKindID))+"}", values[1]) + require.Equal(t, "{"+strconv.Itoa(int(groupKindID))+"}", values[2]) + require.Equal(t, `{"stale"}`, values[4]) + + propertiesText, ok := values[3].(string) + require.True(t, ok) + + var encodedProperties map[string]any + require.NoError(t, json.Unmarshal([]byte(propertiesText), &encodedProperties)) + require.Equal(t, map[string]any{"name": "alice"}, encodedProperties) + + require.False(t, source.Next()) + require.NoError(t, source.Err()) +} + +func TestNodeUpdateCopySourceStopsWhenKindIsMissing(t *testing.T) { + source := newNodeUpdateCopySource( + context.Background(), + []*graph.Node{graph.NewNode(1, graph.NewProperties(), graph.StringKind("Missing"))}, + pgutil.NewInMemoryKindMapper(), + ) + + require.False(t, source.Next()) + require.Error(t, source.Err()) +} diff --git a/drivers/pg/batch_node_upsert_source.go b/drivers/pg/batch_node_upsert_source.go new file mode 100644 index 00000000..b1a1be58 --- /dev/null +++ b/drivers/pg/batch_node_upsert_source.go @@ -0,0 +1,82 @@ +package pg + +import ( + "bytes" + "context" + + "github.com/jackc/pgx/v5" + "github.com/specterops/dawgs/cypher/models/pgsql" + sql "github.com/specterops/dawgs/drivers/pg/query" +) + +type nodeUpsertCopySource struct { + ctx context.Context + graphID int32 + updates []*sql.NodeUpdate + kindMapper KindMapper + kindIDEncoder Int2ArrayEncoder + idx int + row []any + err error +} + +var _ pgx.CopyFromSource = (*nodeUpsertCopySource)(nil) + +func newNodeUpsertCopySource(ctx context.Context, graphID int32, updates []*sql.NodeUpdate, kindMapper KindMapper) *nodeUpsertCopySource { + return &nodeUpsertCopySource{ + ctx: ctx, + graphID: graphID, + updates: updates, + kindMapper: kindMapper, + kindIDEncoder: Int2ArrayEncoder{ + buffer: &bytes.Buffer{}, + }, + idx: -1, + } +} + +func (s *nodeUpsertCopySource) Next() bool { + if s.err != nil { + return false + } + + s.idx++ + if s.idx >= len(s.updates) { + return false + } + + nextUpdate := s.updates[s.idx] + + kindIDs, err := s.kindMapper.MapKinds(s.ctx, nextUpdate.Node.Kinds) + if err != nil { + s.err = err + return false + } + + propertiesJSONB, err := pgsql.PropertiesToJSONB(nextUpdate.Node.Properties) + if err != nil { + s.err = err + return false + } + + s.row = []any{ + s.idx, + s.graphID, + s.kindIDEncoder.Encode(kindIDs), + string(propertiesJSONB.Bytes), + } + + return true +} + +func (s *nodeUpsertCopySource) Values() ([]any, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *nodeUpsertCopySource) Err() error { + return s.err +} diff --git a/drivers/pg/batch_node_upsert_source_test.go b/drivers/pg/batch_node_upsert_source_test.go new file mode 100644 index 00000000..1d61a943 --- /dev/null +++ b/drivers/pg/batch_node_upsert_source_test.go @@ -0,0 +1,57 @@ +package pg + +import ( + "context" + "encoding/json" + "strconv" + "testing" + + "github.com/specterops/dawgs/drivers/pg/pgutil" + sql "github.com/specterops/dawgs/drivers/pg/query" + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +func TestNodeUpsertCopySourceStreamsUpdates(t *testing.T) { + var ( + ctx = context.Background() + kindMapper = pgutil.NewInMemoryKindMapper() + userKind = graph.StringKind("User") + userKindID = kindMapper.Put(userKind) + update = &sql.NodeUpdate{ + IDFuture: sql.NewFuture(graph.ID(0)), + Node: graph.NewNode(0, graph.NewProperties().Set("objectid", "alice"), userKind), + } + source = newNodeUpsertCopySource(ctx, 7, []*sql.NodeUpdate{update}, kindMapper) + ) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Len(t, values, 4) + require.Equal(t, 0, values[0]) + require.Equal(t, int32(7), values[1]) + require.Equal(t, "{"+strconv.Itoa(int(userKindID))+"}", values[2]) + + propertiesText, ok := values[3].(string) + require.True(t, ok) + + var properties map[string]any + require.NoError(t, json.Unmarshal([]byte(propertiesText), &properties)) + require.Equal(t, map[string]any{"objectid": "alice"}, properties) +} + +func TestNodeUpsertCopySourceStopsWhenKindIsMissing(t *testing.T) { + source := newNodeUpsertCopySource( + context.Background(), + 7, + []*sql.NodeUpdate{{ + IDFuture: sql.NewFuture(graph.ID(0)), + Node: graph.NewNode(0, graph.NewProperties(), graph.StringKind("Missing")), + }}, + pgutil.NewInMemoryKindMapper(), + ) + + require.False(t, source.Next()) + require.Error(t, source.Err()) +} diff --git a/drivers/pg/batch_relationship_source.go b/drivers/pg/batch_relationship_source.go new file mode 100644 index 00000000..2cf1b026 --- /dev/null +++ b/drivers/pg/batch_relationship_source.go @@ -0,0 +1,79 @@ +package pg + +import ( + "context" + + "github.com/jackc/pgx/v5" + "github.com/specterops/dawgs/cypher/models/pgsql" + "github.com/specterops/dawgs/graph" +) + +type relationshipCreateCopySource struct { + ctx context.Context + graphID int32 + relationships []*graph.Relationship + kindMapper KindMapper + idx int + row []any + err error +} + +var _ pgx.CopyFromSource = (*relationshipCreateCopySource)(nil) + +func newRelationshipCreateCopySource(ctx context.Context, graphID int32, relationships []*graph.Relationship, kindMapper KindMapper) *relationshipCreateCopySource { + return &relationshipCreateCopySource{ + ctx: ctx, + graphID: graphID, + relationships: relationships, + kindMapper: kindMapper, + idx: -1, + } +} + +func (s *relationshipCreateCopySource) Next() bool { + if s.err != nil { + return false + } + + s.idx++ + if s.idx >= len(s.relationships) { + return false + } + + nextRelationship := s.relationships[s.idx] + + kindID, err := s.kindMapper.MapKind(s.ctx, nextRelationship.Kind) + if err != nil { + s.err = err + return false + } + + propertiesJSONB, err := pgsql.PropertiesToJSONB(nextRelationship.Properties) + if err != nil { + s.err = err + return false + } + + s.row = []any{ + s.idx, + s.graphID, + nextRelationship.StartID.Int64(), + nextRelationship.EndID.Int64(), + kindID, + string(propertiesJSONB.Bytes), + } + + return true +} + +func (s *relationshipCreateCopySource) Values() ([]any, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *relationshipCreateCopySource) Err() error { + return s.err +} diff --git a/drivers/pg/batch_relationship_source_test.go b/drivers/pg/batch_relationship_source_test.go new file mode 100644 index 00000000..83b07860 --- /dev/null +++ b/drivers/pg/batch_relationship_source_test.go @@ -0,0 +1,62 @@ +package pg + +import ( + "context" + "encoding/json" + "testing" + + "github.com/specterops/dawgs/drivers/pg/pgutil" + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +func TestRelationshipCreateCopySourceStreamsRelationships(t *testing.T) { + var ( + ctx = context.Background() + kindMapper = pgutil.NewInMemoryKindMapper() + edgeKind = graph.StringKind("MemberOf") + edgeKindID = kindMapper.Put(edgeKind) + ) + + relationship := graph.NewRelationship( + graph.ID(5), + graph.ID(10), + graph.ID(20), + graph.NewProperties().Set("name", "alpha"), + edgeKind, + ) + + source := newRelationshipCreateCopySource(ctx, 7, []*graph.Relationship{relationship}, kindMapper) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Len(t, values, 6) + require.Equal(t, 0, values[0]) + require.Equal(t, int32(7), values[1]) + require.Equal(t, int64(10), values[2]) + require.Equal(t, int64(20), values[3]) + require.Equal(t, edgeKindID, values[4]) + + propertiesText, ok := values[5].(string) + require.True(t, ok) + + var properties map[string]any + require.NoError(t, json.Unmarshal([]byte(propertiesText), &properties)) + require.Equal(t, map[string]any{"name": "alpha"}, properties) + + require.False(t, source.Next()) + require.NoError(t, source.Err()) +} + +func TestRelationshipCreateCopySourceStopsWhenKindIsMissing(t *testing.T) { + source := newRelationshipCreateCopySource( + context.Background(), + 7, + []*graph.Relationship{graph.NewRelationship(1, 2, 3, graph.NewProperties(), graph.StringKind("Missing"))}, + pgutil.NewInMemoryKindMapper(), + ) + + require.False(t, source.Next()) + require.Error(t, source.Err()) +} diff --git a/drivers/pg/batch_relationship_update_source.go b/drivers/pg/batch_relationship_update_source.go new file mode 100644 index 00000000..c93cc424 --- /dev/null +++ b/drivers/pg/batch_relationship_update_source.go @@ -0,0 +1,79 @@ +package pg + +import ( + "context" + + "github.com/jackc/pgx/v5" + "github.com/specterops/dawgs/cypher/models/pgsql" + sql "github.com/specterops/dawgs/drivers/pg/query" +) + +type relationshipUpdateCopySource struct { + ctx context.Context + graphID int32 + updates []*sql.RelationshipUpdate + kindMapper KindMapper + idx int + row []any + err error +} + +var _ pgx.CopyFromSource = (*relationshipUpdateCopySource)(nil) + +func newRelationshipUpdateCopySource(ctx context.Context, graphID int32, updates []*sql.RelationshipUpdate, kindMapper KindMapper) *relationshipUpdateCopySource { + return &relationshipUpdateCopySource{ + ctx: ctx, + graphID: graphID, + updates: updates, + kindMapper: kindMapper, + idx: -1, + } +} + +func (s *relationshipUpdateCopySource) Next() bool { + if s.err != nil { + return false + } + + s.idx++ + if s.idx >= len(s.updates) { + return false + } + + nextUpdate := s.updates[s.idx] + + kindID, err := s.kindMapper.MapKind(s.ctx, nextUpdate.Relationship.Kind) + if err != nil { + s.err = err + return false + } + + propertiesJSONB, err := pgsql.PropertiesToJSONB(nextUpdate.Relationship.Properties) + if err != nil { + s.err = err + return false + } + + s.row = []any{ + s.idx, + s.graphID, + nextUpdate.StartID.Value.Int64(), + nextUpdate.EndID.Value.Int64(), + kindID, + string(propertiesJSONB.Bytes), + } + + return true +} + +func (s *relationshipUpdateCopySource) Values() ([]any, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *relationshipUpdateCopySource) Err() error { + return s.err +} diff --git a/drivers/pg/batch_relationship_update_source_test.go b/drivers/pg/batch_relationship_update_source_test.go new file mode 100644 index 00000000..22a1c00d --- /dev/null +++ b/drivers/pg/batch_relationship_update_source_test.go @@ -0,0 +1,66 @@ +package pg + +import ( + "context" + "encoding/json" + "testing" + + "github.com/specterops/dawgs/drivers/pg/pgutil" + sql "github.com/specterops/dawgs/drivers/pg/query" + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +func TestRelationshipUpdateCopySourceStreamsUpdates(t *testing.T) { + var ( + ctx = context.Background() + kindMapper = pgutil.NewInMemoryKindMapper() + edgeKind = graph.StringKind("MemberOf") + edgeKindID = kindMapper.Put(edgeKind) + update = &sql.RelationshipUpdate{ + StartID: sql.NewFuture(graph.ID(10)), + EndID: sql.NewFuture(graph.ID(20)), + Relationship: graph.NewRelationship( + 0, + 0, + 0, + graph.NewProperties().Set("objectid", "edge-1"), + edgeKind, + ), + } + source = newRelationshipUpdateCopySource(ctx, 7, []*sql.RelationshipUpdate{update}, kindMapper) + ) + + require.True(t, source.Next()) + values, err := source.Values() + require.NoError(t, err) + require.Len(t, values, 6) + require.Equal(t, 0, values[0]) + require.Equal(t, int32(7), values[1]) + require.Equal(t, int64(10), values[2]) + require.Equal(t, int64(20), values[3]) + require.Equal(t, edgeKindID, values[4]) + + propertiesText, ok := values[5].(string) + require.True(t, ok) + + var properties map[string]any + require.NoError(t, json.Unmarshal([]byte(propertiesText), &properties)) + require.Equal(t, map[string]any{"objectid": "edge-1"}, properties) +} + +func TestRelationshipUpdateCopySourceStopsWhenKindIsMissing(t *testing.T) { + source := newRelationshipUpdateCopySource( + context.Background(), + 7, + []*sql.RelationshipUpdate{{ + StartID: sql.NewFuture(graph.ID(10)), + EndID: sql.NewFuture(graph.ID(20)), + Relationship: graph.NewRelationship(0, 0, 0, graph.NewProperties(), graph.StringKind("Missing")), + }}, + pgutil.NewInMemoryKindMapper(), + ) + + require.False(t, source.Next()) + require.Error(t, source.Err()) +} diff --git a/drivers/pg/batch_test.go b/drivers/pg/batch_test.go new file mode 100644 index 00000000..4df8a948 --- /dev/null +++ b/drivers/pg/batch_test.go @@ -0,0 +1,33 @@ +package pg + +import ( + "testing" + + "github.com/specterops/dawgs/graph" + "github.com/stretchr/testify/require" +) + +func TestCoalesceNodeUpdates(t *testing.T) { + t.Parallel() + + var ( + nodeKind = graph.StringKind("Node") + extraKind = graph.StringKind("Extra") + nodeID = graph.ID(1) + first = graph.NewNode(nodeID, graph.NewProperties().Set("status", "first").Set("kept", "yes"), nodeKind) + second = graph.NewNode(nodeID, graph.NewProperties().Set("status", "second"), nodeKind) + ) + + second.Properties.Delete("removed") + second.AddKinds(extraKind) + + coalesced := coalesceNodeUpdates([]*graph.Node{first, second}) + require.Len(t, coalesced, 1) + + require.Equal(t, "second", coalesced[0].Properties.Get("status").Any()) + require.Equal(t, "yes", coalesced[0].Properties.Get("kept").Any()) + _, deleted := coalesced[0].Properties.Deleted["removed"] + require.True(t, deleted) + require.True(t, coalesced[0].Kinds.ContainsOneOf(extraKind)) + require.False(t, first.Kinds.ContainsOneOf(extraKind)) +} diff --git a/drivers/pg/manager.go b/drivers/pg/manager.go index 4ce56419..b52ffc8c 100644 --- a/drivers/pg/manager.go +++ b/drivers/pg/manager.go @@ -406,6 +406,10 @@ func (s *SchemaManager) AssertSchema(ctx context.Context, schema graph.Schema) e s.lock.Lock() defer s.lock.Unlock() + s.graphs = map[string]model.Graph{} + s.defaultGraph = model.Graph{} + s.hasDefaultGraph = false + return s.WriteTransaction(ctx, func(tx graph.Transaction) error { return s.assertSchema(tx, schema) }, OptionSetQueryExecMode(pgx.QueryExecModeSimpleProtocol)) diff --git a/drivers/pg/query/format.go b/drivers/pg/query/format.go index bcf9d878..8d028f2a 100644 --- a/drivers/pg/query/format.go +++ b/drivers/pg/query/format.go @@ -118,10 +118,10 @@ func FormatNodesUpdate(graphTarget model.Graph) string { ) } -// NodeUpdateStagingTable is the name of the temporary staging table used by largeUpdate. +// NodeUpdateStagingTable is the name of the temporary staging table used by node update COPY flushes. const NodeUpdateStagingTable = "node_update_staging" -// NodeUpdateStagingColumns lists the columns (in order) written by a COPY FROM during largeUpdate. +// NodeUpdateStagingColumns lists the columns (in order) written by node update COPY flushes. var NodeUpdateStagingColumns = []string{"id", "added_kinds", "deleted_kinds", "properties", "deleted_props"} func FormatCreateNodeUpdateStagingTable(stagingTable string) string { @@ -157,6 +157,126 @@ func FormatNodeUpsert(graphTarget model.Graph, identityProperties []string) stri ) } +const NodeUpsertStagingTable = "node_upsert_staging" + +var NodeUpsertStagingColumns = []string{"row_ord", "graph_id", "kind_ids", "properties"} + +func FormatCreateNodeUpsertStagingTable(stagingTable string) string { + return join( + "create temp table if not exists ", stagingTable, " (", + "row_ord integer not null, ", + "graph_id integer not null, ", + "kind_ids text not null, ", + "properties text not null", + ") on commit drop;", + ) +} + +func FormatMergeNodeUpsertStaging(graphTarget model.Graph, identityProperties []string, stagingTable string) string { + return join( + "with upserted as (", + "insert into ", graphTarget.Partitions.Node.Name, " as n ", + "(graph_id, kind_ids, properties) ", + "select graph_id, kind_ids::int2[], properties::jsonb ", + "from ", stagingTable, " ", + "order by row_ord ", + formatConflictMatcher(identityProperties, "id, graph_id"), + "do update set properties = n.properties || excluded.properties, kind_ids = uniq(sort(n.kind_ids || excluded.kind_ids)) ", + "returning id, graph_id, properties", + ") ", + "select s.row_ord, u.id ", + "from ", stagingTable, " as s ", + "join upserted as u on ", formatNodeUpsertStagingMatcher(identityProperties), " ", + "order by s.row_ord;", + ) +} + +func formatNodeUpsertStagingMatcher(identityProperties []string) string { + if len(identityProperties) == 0 { + return "u.graph_id = s.graph_id" + } + + builder := strings.Builder{} + builder.WriteString("u.graph_id = s.graph_id") + + for _, identityProperty := range identityProperties { + builder.WriteString(" and u.properties->>'") + builder.WriteString(identityProperty) + builder.WriteString("' = s.properties::jsonb->>'") + builder.WriteString(identityProperty) + builder.WriteString("'") + } + + return builder.String() +} + +const NodeCreateStagingTable = "node_create_staging" + +var ( + NodeCreateWithIDStagingColumns = []string{"id", "graph_id", "kind_ids", "properties"} + NodeCreateWithoutIDStagingColumns = []string{"graph_id", "kind_ids", "properties"} +) + +func FormatCreateNodeCreateStagingTable(stagingTable string) string { + return join( + "create temp table if not exists ", stagingTable, " (", + "id bigint, ", + "graph_id integer not null, ", + "kind_ids text not null, ", + "properties text not null", + ") on commit drop;", + ) +} + +func FormatMergeNodeCreateStagingWithIDs(graphTarget model.Graph, stagingTable string) string { + return join( + "insert into ", graphTarget.Partitions.Node.Name, " ", + "(id, graph_id, kind_ids, properties) ", + "select id, graph_id, kind_ids::int2[], properties::jsonb ", + "from ", stagingTable, ";", + ) +} + +func FormatMergeNodeCreateStagingWithoutIDs(graphTarget model.Graph, stagingTable string) string { + return join( + "insert into ", graphTarget.Partitions.Node.Name, " ", + "(graph_id, kind_ids, properties) ", + "select graph_id, kind_ids::int2[], properties::jsonb ", + "from ", stagingTable, ";", + ) +} + +const ( + NodeDeleteStagingTable = "node_delete_staging" + RelationshipDeleteStagingTable = "relationship_delete_staging" +) + +var DeleteStagingColumns = []string{"id"} + +func FormatCreateDeleteStagingTable(stagingTable string) string { + return join( + "create temp table if not exists ", stagingTable, " (", + "id bigint not null", + ") on commit drop;", + ) +} + +func FormatMergeNodeDeleteStaging(stagingTable string) string { + return join( + "delete from node as n ", + "using ", stagingTable, " as d ", + "where n.id = d.id;", + ) +} + +func FormatMergeRelationshipDeleteStaging(stagingTable string) string { + return join( + "delete from edge as e ", + "using ", stagingTable, " as d ", + "where e.id = d.id;", + ) +} + func FormatRelationshipPartitionUpsert(graphTarget model.Graph, identityProperties []string) string { return join("insert into ", graphTarget.Partitions.Edge.Name, " as e ", "(graph_id, start_id, end_id, kind_id, properties) ", @@ -166,6 +286,58 @@ func FormatRelationshipPartitionUpsert(graphTarget model.Graph, identityProperti ) } +const RelationshipCreateStagingTable = "relationship_create_staging" + +var RelationshipCreateStagingColumns = []string{"row_ord", "graph_id", "start_id", "end_id", "kind_id", "properties"} + +func FormatCreateRelationshipCreateStagingTable(stagingTable string) string { + return join( + "create temp table if not exists ", stagingTable, " (", + "row_ord integer not null, ", + "graph_id integer not null, ", + "start_id bigint not null, ", + "end_id bigint not null, ", + "kind_id smallint not null, ", + "properties text not null", + ") on commit drop;", + ) +} + +func FormatMergeRelationshipStaging(graphTarget model.Graph, identityProperties []string, stagingTable string) string { + return join( + "insert into ", graphTarget.Partitions.Edge.Name, " as e ", + "(graph_id, start_id, end_id, kind_id, properties) ", + "select graph_id, start_id, end_id, kind_id, ", + "coalesce(jsonb_object_agg(key, value) filter (where key is not null), '{}'::jsonb) as properties ", + "from (", + "select distinct on (graph_id, start_id, end_id, kind_id, key) ", + "graph_id, start_id, end_id, kind_id, key, value, row_ord ", + "from ", stagingTable, " ", + "left join lateral jsonb_each(properties::jsonb) as property(key, value) on true ", + "order by graph_id, start_id, end_id, kind_id, key, row_ord desc", + ") as deduped ", + "group by graph_id, start_id, end_id, kind_id ", + formatConflictMatcher(identityProperties, "start_id, end_id, kind_id, graph_id"), + "do update set properties = e.properties || excluded.properties;", + ) +} + +func FormatMergeRelationshipUpdateStaging(graphTarget model.Graph, identityProperties []string, stagingTable string) string { + return join( + "insert into ", graphTarget.Partitions.Edge.Name, " as e ", + "(graph_id, start_id, end_id, kind_id, properties) ", + "select graph_id, start_id, end_id, kind_id, properties::jsonb ", + "from ", stagingTable, " ", + "order by row_ord ", + formatConflictMatcher(identityProperties, "start_id, end_id, kind_id, graph_id"), + "do update set properties = e.properties || excluded.properties;", + ) +} + +func FormatMergeRelationshipCreateStaging(graphTarget model.Graph, stagingTable string) string { + return FormatMergeRelationshipStaging(graphTarget, nil, stagingTable) +} + type NodeUpdate struct { IDFuture *Future[graph.ID] Node *graph.Node diff --git a/drivers/pg/query/format_test.go b/drivers/pg/query/format_test.go index 2092182b..258d7aa1 100644 --- a/drivers/pg/query/format_test.go +++ b/drivers/pg/query/format_test.go @@ -91,3 +91,232 @@ func TestNodeUpdateStagingColumns(t *testing.T) { assert.Equal(t, expected, query.NodeUpdateStagingColumns) } + +func TestFormatCreateNodeCreateStagingTable(t *testing.T) { + t.Parallel() + + var ( + tableName = "my_node_staging" + expected = strings.Join([]string{ + "create temp table if not exists my_node_staging (", + "id bigint, ", + "graph_id integer not null, ", + "kind_ids text not null, ", + "properties text not null", + ") on commit drop;", + }, "") + result = query.FormatCreateNodeCreateStagingTable(tableName) + ) + + assert.Equal(t, expected, result) +} + +func TestFormatMergeNodeCreateStagingWithIDs(t *testing.T) { + t.Parallel() + + var ( + stagingTable = "my_node_staging" + graphTarget = generateTestGraphTarget("node_part_1") + expected = strings.Join([]string{ + "insert into node_part_1 ", + "(id, graph_id, kind_ids, properties) ", + "select id, graph_id, kind_ids::int2[], properties::jsonb ", + "from my_node_staging;", + }, "") + result = query.FormatMergeNodeCreateStagingWithIDs(graphTarget, stagingTable) + ) + + assert.Equal(t, expected, result) +} + +func TestFormatMergeNodeCreateStagingWithoutIDs(t *testing.T) { + t.Parallel() + + var ( + stagingTable = "my_node_staging" + graphTarget = generateTestGraphTarget("node_part_1") + expected = strings.Join([]string{ + "insert into node_part_1 ", + "(graph_id, kind_ids, properties) ", + "select graph_id, kind_ids::int2[], properties::jsonb ", + "from my_node_staging;", + }, "") + result = query.FormatMergeNodeCreateStagingWithoutIDs(graphTarget, stagingTable) + ) + + assert.Equal(t, expected, result) +} + +func TestNodeCreateStagingColumns(t *testing.T) { + t.Parallel() + + assert.Equal(t, []string{"id", "graph_id", "kind_ids", "properties"}, query.NodeCreateWithIDStagingColumns) + assert.Equal(t, []string{"graph_id", "kind_ids", "properties"}, query.NodeCreateWithoutIDStagingColumns) +} + +func TestFormatCreateDeleteStagingTable(t *testing.T) { + t.Parallel() + + var ( + tableName = "my_delete_staging" + expected = strings.Join([]string{ + "create temp table if not exists my_delete_staging (", + "id bigint not null", + ") on commit drop;", + }, "") + result = query.FormatCreateDeleteStagingTable(tableName) + ) + + assert.Equal(t, expected, result) +} + +func TestFormatMergeDeleteStaging(t *testing.T) { + t.Parallel() + + assert.Equal(t, "delete from node as n using my_delete_staging as d where n.id = d.id;", query.FormatMergeNodeDeleteStaging("my_delete_staging")) + assert.Equal(t, "delete from edge as e using my_delete_staging as d where e.id = d.id;", query.FormatMergeRelationshipDeleteStaging("my_delete_staging")) +} + +func TestDeleteStagingColumns(t *testing.T) { + t.Parallel() + + assert.Equal(t, []string{"id"}, query.DeleteStagingColumns) +} + +func TestFormatCreateNodeUpsertStagingTable(t *testing.T) { + t.Parallel() + + var ( + tableName = "my_node_upsert_staging" + expected = strings.Join([]string{ + "create temp table if not exists my_node_upsert_staging (", + "row_ord integer not null, ", + "graph_id integer not null, ", + "kind_ids text not null, ", + "properties text not null", + ") on commit drop;", + }, "") + result = query.FormatCreateNodeUpsertStagingTable(tableName) + ) + + assert.Equal(t, expected, result) +} + +func TestFormatMergeNodeUpsertStaging(t *testing.T) { + t.Parallel() + + var ( + stagingTable = "my_node_upsert_staging" + graphTarget = generateTestGraphTarget("node_part_1") + expected = strings.Join([]string{ + "with upserted as (", + "insert into node_part_1 as n ", + "(graph_id, kind_ids, properties) ", + "select graph_id, kind_ids::int2[], properties::jsonb ", + "from my_node_upsert_staging ", + "order by row_ord ", + "on conflict ((properties->>'objectid')) ", + "do update set properties = n.properties || excluded.properties, kind_ids = uniq(sort(n.kind_ids || excluded.kind_ids)) ", + "returning id, graph_id, properties", + ") ", + "select s.row_ord, u.id ", + "from my_node_upsert_staging as s ", + "join upserted as u on u.graph_id = s.graph_id and u.properties->>'objectid' = s.properties::jsonb->>'objectid' ", + "order by s.row_ord;", + }, "") + result = query.FormatMergeNodeUpsertStaging(graphTarget, []string{"objectid"}, stagingTable) + ) + + assert.Equal(t, expected, result) +} + +func TestNodeUpsertStagingColumns(t *testing.T) { + t.Parallel() + + assert.Equal(t, []string{"row_ord", "graph_id", "kind_ids", "properties"}, query.NodeUpsertStagingColumns) +} + +func TestFormatCreateRelationshipCreateStagingTable(t *testing.T) { + t.Parallel() + + var ( + tableName = "my_relationship_staging" + expected = strings.Join([]string{ + "create temp table if not exists my_relationship_staging (", + "row_ord integer not null, ", + "graph_id integer not null, ", + "start_id bigint not null, ", + "end_id bigint not null, ", + "kind_id smallint not null, ", + "properties text not null", + ") on commit drop;", + }, "") + result = query.FormatCreateRelationshipCreateStagingTable(tableName) + ) + + assert.Equal(t, expected, result) +} + +func TestFormatMergeRelationshipCreateStaging(t *testing.T) { + t.Parallel() + + var ( + stagingTable = "my_relationship_staging" + graphTarget = model.Graph{ + Partitions: model.GraphPartitions{ + Edge: model.NewGraphPartition("edge_part_1"), + }, + } + expected = strings.Join([]string{ + "insert into edge_part_1 as e ", + "(graph_id, start_id, end_id, kind_id, properties) ", + "select graph_id, start_id, end_id, kind_id, ", + "coalesce(jsonb_object_agg(key, value) filter (where key is not null), '{}'::jsonb) as properties ", + "from (", + "select distinct on (graph_id, start_id, end_id, kind_id, key) ", + "graph_id, start_id, end_id, kind_id, key, value, row_ord ", + "from my_relationship_staging ", + "left join lateral jsonb_each(properties::jsonb) as property(key, value) on true ", + "order by graph_id, start_id, end_id, kind_id, key, row_ord desc", + ") as deduped ", + "group by graph_id, start_id, end_id, kind_id ", + "on conflict (start_id, end_id, kind_id, graph_id) do update set properties = e.properties || excluded.properties;", + }, "") + result = query.FormatMergeRelationshipCreateStaging(graphTarget, stagingTable) + ) + + assert.Equal(t, expected, result) +} + +func TestFormatMergeRelationshipUpdateStagingWithIdentityProperties(t *testing.T) { + t.Parallel() + + var ( + stagingTable = "my_relationship_staging" + graphTarget = model.Graph{ + Partitions: model.GraphPartitions{ + Edge: model.NewGraphPartition("edge_part_1"), + }, + } + expected = strings.Join([]string{ + "insert into edge_part_1 as e ", + "(graph_id, start_id, end_id, kind_id, properties) ", + "select graph_id, start_id, end_id, kind_id, properties::jsonb ", + "from my_relationship_staging ", + "order by row_ord ", + "on conflict ((properties->>'objectid')) ", + "do update set properties = e.properties || excluded.properties;", + }, "") + result = query.FormatMergeRelationshipUpdateStaging(graphTarget, []string{"objectid"}, stagingTable) + ) + + assert.Equal(t, expected, result) +} + +func TestRelationshipCreateStagingColumns(t *testing.T) { + t.Parallel() + + expected := []string{"row_ord", "graph_id", "start_id", "end_id", "kind_id", "properties"} + + assert.Equal(t, expected, query.RelationshipCreateStagingColumns) +} diff --git a/drivers/pg/query/sql/schema_up.sql b/drivers/pg/query/sql/schema_up.sql index 27912cbe..d3500afd 100644 --- a/drivers/pg/query/sql/schema_up.sql +++ b/drivers/pg/query/sql/schema_up.sql @@ -184,12 +184,14 @@ drop index if exists edge_kind_index; drop index if exists edge_start_kind_index; drop index if exists edge_end_kind_index; --- Covering indexes for traversal joins. The INCLUDE columns allow index-only scans for the common case where --- the join needs (id, start_id, end_id, kind_id) without fetching from the heap. The standalone start_id, --- end_id, and kind_id indexes are intentionally omitted: the composite indexes satisfy left-prefix lookups --- on start_id or end_id alone, and kind_id is never queried in isolation during traversal. +-- Covering indexes for traversal joins and relationship counts. The INCLUDE columns allow index-only scans for +-- the common case where the join needs (id, start_id, end_id, kind_id) without fetching from the heap. The standalone +-- start_id and end_id indexes are intentionally omitted: the composite indexes satisfy left-prefix lookups on start_id +-- or end_id alone. Relationship count fast paths query kind_id without an endpoint anchor, so keep a kind_id-first +-- covering index for those shapes. create index if not exists edge_start_id_kind_id_id_end_id_index on edge using btree (start_id, kind_id) include (id, end_id); create index if not exists edge_end_id_kind_id_id_start_id_index on edge using btree (end_id, kind_id) include (id, start_id); +create index if not exists edge_kind_id_id_start_id_end_id_index on edge using btree (kind_id) include (id, start_id, end_id); -- Path composite type do diff --git a/go.mod b/go.mod index d507391d..8d3a5014 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,6 @@ require ( github.com/fzipp/gocyclo v0.6.0 github.com/gammazero/deque v1.2.1 github.com/jackc/pgtype v1.14.4 - github.com/jackc/pgx/v4 v4.18.2 github.com/jackc/pgx/v5 v5.9.2 github.com/neo4j/neo4j-go-driver/v5 v5.28.4 github.com/stretchr/testify v1.11.1 @@ -125,13 +124,9 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hexops/gotextdiff v1.0.3 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/jackc/chunkreader/v2 v2.0.1 // indirect - github.com/jackc/pgconn v1.14.3 // indirect github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect - github.com/jackc/pgproto3/v2 v2.3.3 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect - github.com/jackc/puddle v1.3.0 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/jgautheron/goconst v1.8.2 // indirect github.com/jingyugao/rowserrcheck v1.1.1 // indirect diff --git a/go.sum b/go.sum index 1c7c760d..40995d1e 100644 --- a/go.sum +++ b/go.sum @@ -110,7 +110,6 @@ github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQ github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= github.com/ckaznocha/intrange v0.3.1 h1:j1onQyXvHUsPWujDH6WIjhyH26gkRt/txNlV7LspvJs= github.com/ckaznocha/intrange v0.3.1/go.mod h1:QVepyz1AkUoFQkpEqksSYpNpUo3c5W7nWh/s6SHIJJk= -github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/cockroachdb/apd/v3 v3.2.2 h1:R1VaDQkMR321HBM6+6b2eYZfxi0ybPJgUh0Ztr7twzU= github.com/cockroachdb/apd/v3 v3.2.2/go.mod h1:klXJcjp+FffLTHlhIG69tezTDvdP065naDsHzKhYSqc= @@ -196,7 +195,6 @@ github.com/godoc-lint/godoc-lint v0.11.2 h1:Bp0FkJWoSdNsBikdNgIcgtaoo+xz6I/Y9s5W github.com/godoc-lint/godoc-lint v0.11.2/go.mod h1:iVpGdL1JCikNH2gGeAn3Hh+AgN5Gx/I/cxV+91L41jo= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= -github.com/gofrs/uuid v4.0.0+incompatible h1:1SD/1F5pU8p29ybwgQSwpQk+mwdRrXCYuPhW6m+TnJw= github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= github.com/golangci/asciicheck v0.5.0 h1:jczN/BorERZwK8oiFBOGvlGPknhvq0bjnysTj4nUfo0= github.com/golangci/asciicheck v0.5.0/go.mod h1:5RMNAInbNFw2krqN6ibBxN/zfRFa9S6tA1nPdM0l8qQ= @@ -259,6 +257,7 @@ github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUq github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jackc/chunkreader v1.0.0 h1:4s39bBR8ByfqH+DKm8rQA3E1LHZWB9XWcrz8fqaZbe0= github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= @@ -275,10 +274,10 @@ github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE= github.com/jackc/pgmock v0.0.0-20201204152224-4fe30f7445fd/go.mod h1:hrBW0Enj2AZTNpt/7Y5rr2xe/9Mn757Wtb2xeBzPv2c= -github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65 h1:DadwsjnMwFjfWc9y5Wi/+Zz7xoE5ALHsRQlOctkOiHc= github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgproto3 v1.1.0 h1:FYYE4yRw+AgI8wXIinMlNjBbp/UitDJwfj5LqqewP1A= github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78= github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA= github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg= @@ -310,7 +309,6 @@ github.com/jackc/pgx/v5 v5.9.2/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= -github.com/jackc/puddle v1.3.0 h1:eHK/5clGOatcjX3oWGBO/MpxpbHzSwud5EWTSCI+MX0= github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= @@ -496,7 +494,6 @@ github.com/securego/gosec/v2 v2.24.8-0.20260309165252-619ce2117e08/go.mod h1:+XL github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4= -github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk= github.com/shurcooL/go-goon v0.0.0-20170922171312-37c2f522c041/go.mod h1:N5mDOmsrJOB+vfqUK+7DmDyjhSLIIBnXo9lvZJj3MWQ= diff --git a/graph/graph.go b/graph/graph.go index da8ea3c7..5fb9bdbd 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -354,7 +354,7 @@ type Transaction interface { // return value) results in a transactional commit of work done within the TransactionDelegate. type TransactionDelegate func(tx Transaction) error -// BatchDelegate represents a transactional database context actor. +// BatchDelegate represents a buffered write context actor. type BatchDelegate func(batch Batch) error // TransactionConfig is a generic configuration that may apply to all supported databases. @@ -396,10 +396,12 @@ type Database interface { // given logic function. WriteTransaction(ctx context.Context, txDelegate TransactionDelegate, options ...TransactionOption) error - // BatchOperation opens up a new write transactional context in the database and then defers the context to the - // given logic function. Batch operations are fundamentally different between databases supported by DAWGS, - // necessitating a different interface that lacks many of the convenience features of a regular read or write - // transaction. + // BatchOperation opens a buffered write context and passes it to the given logic function. + // + // BatchOperation is not transactional across the whole delegate. Implementations may flush and commit chunks before + // the delegate returns, and successfully flushed chunks may remain persisted if a later flush fails or if the delegate + // returns an error. Callers that need all-or-nothing behavior should use WriteTransaction or another transactional + // workflow instead. BatchOperation(ctx context.Context, batchDelegate BatchDelegate, options ...BatchOption) error // AssertSchema will apply the given schema to the underlying database. diff --git a/integration/cypher_template_test.go b/integration/cypher_template_test.go index 7dde0d51..acdc0412 100644 --- a/integration/cypher_template_test.go +++ b/integration/cypher_template_test.go @@ -70,9 +70,10 @@ type cypherMetamorphicQuery struct { func TestCypherTemplates(t *testing.T) { templateFiles := loadCypherTemplateFiles(t) - nodeKinds, edgeKinds := cypherTemplateKinds(templateFiles) + nodeKinds, edgeKinds := cypherTemplateKinds(t, templateFiles) db, ctx := SetupDBWithKindsNoGraphCleanup(t, nodeKinds, edgeKinds) + ClearGraph(t, db, ctx) for _, templateFile := range templateFiles { fileName := strings.TrimSuffix(filepath.Base(templateFile.path), filepath.Ext(templateFile.path)) @@ -81,8 +82,10 @@ func TestCypherTemplates(t *testing.T) { t.Run(family.Name, func(t *testing.T) { for _, variant := range family.Variants { t.Run(variant.Name, func(t *testing.T) { - cypher := renderCypherTemplate(t, family.Template, variant.Vars) - check := parseAssertion(t, variant.Assert) + var ( + cypher = renderCypherTemplate(t, family.Template, variant.Vars) + check = parseAssertion(t, variant.Assert) + ) tc := testCase{ Name: variant.Name, Cypher: cypher, @@ -135,23 +138,33 @@ func loadCypherTemplateFiles(t *testing.T) []cypherTemplateFile { return templateFiles } -func cypherTemplateKinds(templateFiles []cypherTemplateFile) (graph.Kinds, graph.Kinds) { +func cypherTemplateKinds(t *testing.T, templateFiles []cypherTemplateFile) (graph.Kinds, graph.Kinds) { + t.Helper() + var nodeKinds, edgeKinds graph.Kinds for _, templateFile := range templateFiles { for _, family := range templateFile.Families { - if family.Fixture != nil { - familyNodeKinds, familyEdgeKinds := family.Fixture.Kinds() - nodeKinds = nodeKinds.Add(familyNodeKinds...) - edgeKinds = edgeKinds.Add(familyEdgeKinds...) + fixtureNodeKinds, fixtureEdgeKinds := collectFixtureKinds(family.Fixture) + nodeKinds = nodeKinds.Add(fixtureNodeKinds...) + edgeKinds = edgeKinds.Add(fixtureEdgeKinds...) + + for _, variant := range family.Variants { + queryNodeKinds, queryEdgeKinds := collectCypherKinds(t, renderCypherTemplate(t, family.Template, variant.Vars)) + nodeKinds = nodeKinds.Add(queryNodeKinds...) + edgeKinds = edgeKinds.Add(queryEdgeKinds...) } } for _, family := range templateFile.Metamorphic { - if family.Fixture != nil { - familyNodeKinds, familyEdgeKinds := family.Fixture.Kinds() - nodeKinds = nodeKinds.Add(familyNodeKinds...) - edgeKinds = edgeKinds.Add(familyEdgeKinds...) + fixtureNodeKinds, fixtureEdgeKinds := collectFixtureKinds(family.Fixture) + nodeKinds = nodeKinds.Add(fixtureNodeKinds...) + edgeKinds = edgeKinds.Add(fixtureEdgeKinds...) + + for _, query := range family.Queries { + queryNodeKinds, queryEdgeKinds := collectCypherKinds(t, query.Cypher) + nodeKinds = nodeKinds.Add(queryNodeKinds...) + edgeKinds = edgeKinds.Add(queryEdgeKinds...) } } } @@ -197,22 +210,24 @@ func runWithTemplateFixture(t *testing.T, ctx context.Context, db graph.Database t.Fatal("template cases must define an inline fixture") } - queryErrorObserved := false - err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { - idMap, err := opengraph.WriteGraphTx(tx, tc.Fixture) - if err != nil { - return fmt.Errorf("creating fixture: %w", err) - } + var ( + queryErrorObserved = false + err = db.WriteTransaction(ctx, func(tx graph.Transaction) error { + idMap, err := opengraph.WriteGraphTx(tx, tc.Fixture) + if err != nil { + return fmt.Errorf("creating fixture: %w", err) + } - result := tx.Query(tc.Cypher, tc.Params) - defer result.Close() - assertion.checkResult(t, result, newAssertionContext(idMap)) - if assertion.expectQueryError { - queryErrorObserved = true - } + result := tx.Query(tc.Cypher, tc.Params) + defer result.Close() + assertion.checkResult(t, result, newAssertionContext(idMap)) + if assertion.expectQueryError { + queryErrorObserved = true + } - return errFixtureRollback - }) + return errFixtureRollback + }) + ) if assertion.expectQueryError && queryErrorObserved && err != nil { return @@ -245,8 +260,10 @@ func runMetamorphicFamily(t *testing.T, ctx context.Context, db graph.Database, var baseline []string for _, query := range family.Queries { - result := tx.Query(query.Cypher, query.Params) - collected := collectResult(t, result) + var ( + result = tx.Query(query.Cypher, query.Params) + collected = collectResult(t, result) + ) result.Close() signature := comparisonSignature(t, collected, assertCtx, family.Compare) diff --git a/integration/cypher_test.go b/integration/cypher_test.go index c66e2745..01563635 100644 --- a/integration/cypher_test.go +++ b/integration/cypher_test.go @@ -68,6 +68,7 @@ func TestCypher(t *testing.T) { } groups := map[string]*group{} var datasetNames []string + var extraNodeKinds, extraEdgeKinds graph.Kinds for _, path := range files { raw, err := os.ReadFile(path) @@ -90,9 +91,13 @@ func TestCypher(t *testing.T) { datasetNames = append(datasetNames, ds) } groups[ds].files = append(groups[ds].files, cf) + + caseNodeKinds, caseEdgeKinds := collectCypherCaseKinds(t, cf.Cases) + extraNodeKinds = extraNodeKinds.Add(caseNodeKinds...) + extraEdgeKinds = extraEdgeKinds.Add(caseEdgeKinds...) } - db, ctx := SetupDB(t, datasetNames...) + db, ctx := SetupDBWithKinds(t, extraNodeKinds, extraEdgeKinds, datasetNames...) for _, g := range groups { ClearGraph(t, db, ctx) @@ -120,6 +125,24 @@ func TestCypher(t *testing.T) { } } +func collectCypherCaseKinds(t *testing.T, cases []testCase) (graph.Kinds, graph.Kinds) { + t.Helper() + + var nodeKinds, edgeKinds graph.Kinds + + for _, tc := range cases { + queryNodeKinds, queryEdgeKinds := collectCypherKinds(t, tc.Cypher) + fixtureNodeKinds, fixtureEdgeKinds := collectFixtureKinds(tc.Fixture) + + nodeKinds = nodeKinds.Add(queryNodeKinds...) + nodeKinds = nodeKinds.Add(fixtureNodeKinds...) + edgeKinds = edgeKinds.Add(queryEdgeKinds...) + edgeKinds = edgeKinds.Add(fixtureEdgeKinds...) + } + + return nodeKinds, edgeKinds +} + // parseAssertion converts a JSON assertion value into a function that checks // a query result. Supports: // @@ -145,6 +168,7 @@ func TestCypher(t *testing.T) { // {"path_node_ids": [["a", "b"]]} — exact multiset of returned path node ID sequences // {"path_lengths": [N...]} — exact multiset of returned path edge counts // {"path_edge_kinds": [["K"...]]} — exact multiset of returned path edge kind sequences +// {"relationship_list_kinds": [["K"...]]} — exact multiset of returned relationship-list kind sequences // // Object assertions may combine multiple keys; every assertion must pass. func parseAssertion(t *testing.T, raw json.RawMessage) caseAssertion { @@ -231,6 +255,9 @@ func parseAssertion(t *testing.T, raw json.RawMessage) caseAssertion { case "path_edge_kinds": assertions = append(assertions, assertPathEdgeKinds(decodeAssertionValue[[][]string](t, key, val))) + case "relationship_list_kinds": + assertions = append(assertions, assertRelationshipListKinds(decodeAssertionValue[[][]string](t, key, val))) + default: t.Fatalf("unknown assertion key: %q", key) } @@ -258,16 +285,18 @@ var errFixtureRollback = errors.New("fixture rollback") func runReadOnly(t *testing.T, ctx context.Context, db graph.Database, idMap opengraph.IDMap, tc testCase, assertion caseAssertion) { t.Helper() - queryErrorObserved := false - err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { - result := tx.Query(tc.Cypher, tc.Params) - defer result.Close() - assertion.checkResult(t, result, newAssertionContext(idMap)) - if assertion.expectQueryError { - queryErrorObserved = true - } - return nil - }) + var ( + queryErrorObserved = false + err = db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Query(tc.Cypher, tc.Params) + defer result.Close() + assertion.checkResult(t, result, newAssertionContext(idMap)) + if assertion.expectQueryError { + queryErrorObserved = true + } + return nil + }) + ) if err != nil { if assertion.expectQueryError && queryErrorObserved { return @@ -282,26 +311,28 @@ func runReadOnly(t *testing.T, ctx context.Context, db graph.Database, idMap ope func runWithFixture(t *testing.T, ctx context.Context, db graph.Database, tc testCase, assertion caseAssertion) { t.Helper() - queryErrorObserved := false - err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { - if err := tx.Nodes().Delete(); err != nil { - return fmt.Errorf("clearing graph before fixture: %w", err) - } + var ( + queryErrorObserved = false + err = db.WriteTransaction(ctx, func(tx graph.Transaction) error { + if err := tx.Nodes().Delete(); err != nil { + return fmt.Errorf("clearing graph before fixture: %w", err) + } - idMap, err := opengraph.WriteGraphTx(tx, tc.Fixture) - if err != nil { - return fmt.Errorf("creating fixture: %w", err) - } + idMap, err := opengraph.WriteGraphTx(tx, tc.Fixture) + if err != nil { + return fmt.Errorf("creating fixture: %w", err) + } - result := tx.Query(tc.Cypher, tc.Params) - defer result.Close() - assertion.checkResult(t, result, newAssertionContext(idMap)) - if assertion.expectQueryError { - queryErrorObserved = true - } + result := tx.Query(tc.Cypher, tc.Params) + defer result.Close() + assertion.checkResult(t, result, newAssertionContext(idMap)) + if assertion.expectQueryError { + queryErrorObserved = true + } - return errFixtureRollback - }) + return errFixtureRollback + }) + ) if assertion.expectQueryError && queryErrorObserved && err != nil { return @@ -788,8 +819,10 @@ func assertNodeListIDs(expected [][]string) resultAssertion { func collectNodeIDs(t *testing.T, result queryResult, ctx assertionContext, unique bool) []string { t.Helper() - ids := make([]string, 0, len(result.rows)) - seen := map[string]bool{} + var ( + ids = make([]string, 0, len(result.rows)) + seen = map[string]bool{} + ) for _, row := range result.rows { for _, rawVal := range row.values { @@ -885,6 +918,35 @@ func assertPathEdgeKinds(expected [][]string) resultAssertion { } } +func assertRelationshipListKinds(expected [][]string) resultAssertion { + return func(t *testing.T, result queryResult, _ assertionContext) { + t.Helper() + + got := make([]string, 0, len(result.rows)) + for _, row := range result.rows { + for _, rawVal := range row.values { + var relationshipPointers []*graph.Relationship + if result.mapper.Map(rawVal, &relationshipPointers) { + got = append(got, relationshipListKindSignature(t, relationshipPointers)) + continue + } + + var relationships []graph.Relationship + if result.mapper.Map(rawVal, &relationships) { + got = append(got, relationshipValueListKindSignature(t, relationships)) + } + } + } + + want := make([]string, len(expected)) + for idx, expectedKinds := range expected { + want[idx] = strings.Join(expectedKinds, "->") + } + + assertStringMultiset(t, got, want, "relationship-list kind sequences") + } +} + func pathNodeIDSignature(t *testing.T, path graph.Path, ctx assertionContext) string { t.Helper() @@ -919,6 +981,40 @@ func pathEdgeKindSignature(t *testing.T, path graph.Path) string { return strings.Join(edgeKinds, "->") } +func relationshipListKindSignature(t *testing.T, relationships []*graph.Relationship) string { + t.Helper() + + edgeKinds := make([]string, len(relationships)) + for idx, relationship := range relationships { + if relationship == nil { + t.Fatalf("relationship list contains nil relationship at index %d", idx) + } + + if relationship.Kind == nil { + t.Fatalf("relationship list item at index %d has nil kind", idx) + } + + edgeKinds[idx] = relationship.Kind.String() + } + + return strings.Join(edgeKinds, "->") +} + +func relationshipValueListKindSignature(t *testing.T, relationships []graph.Relationship) string { + t.Helper() + + edgeKinds := make([]string, len(relationships)) + for idx, relationship := range relationships { + if relationship.Kind == nil { + t.Fatalf("relationship list item at index %d has nil kind", idx) + } + + edgeKinds[idx] = relationship.Kind.String() + } + + return strings.Join(edgeKinds, "->") +} + func collectPaths(t *testing.T, result queryResult) []graph.Path { t.Helper() diff --git a/integration/harness.go b/integration/harness.go index 88111efc..9603c631 100644 --- a/integration/harness.go +++ b/integration/harness.go @@ -28,6 +28,9 @@ import ( "github.com/jackc/pgx/v5/pgxpool" "github.com/specterops/dawgs" + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/cypher" + "github.com/specterops/dawgs/cypher/models/walk" "github.com/specterops/dawgs/drivers/pg" "github.com/specterops/dawgs/graph" "github.com/specterops/dawgs/opengraph" @@ -90,7 +93,7 @@ func setupDB(t *testing.T, cleanupGraph bool, extraNodeKinds, extraEdgeKinds gra connStr := os.Getenv("CONNECTION_STRING") if connStr == "" { - t.Fatal("CONNECTION_STRING env var is not set") + t.Skip("CONNECTION_STRING env var is not set") } driver, err := driverFromConnStr(connStr) @@ -175,6 +178,59 @@ func collectKinds(t *testing.T, datasets []string) (graph.Kinds, graph.Kinds) { return nodeKinds, edgeKinds } +func collectCypherKinds(t *testing.T, queries ...string) (graph.Kinds, graph.Kinds) { + t.Helper() + + var nodeKinds, edgeKinds graph.Kinds + + for _, queryText := range queries { + if strings.TrimSpace(queryText) == "" { + continue + } + + queryModel, err := frontend.ParseCypher(frontend.NewContext(), queryText) + if err != nil { + t.Fatalf("failed to parse Cypher query for kind scanning: %v\nquery: %s", err, queryText) + } + + visitor := walk.NewSimpleVisitor[cypher.SyntaxNode](func(node cypher.SyntaxNode, _ walk.VisitorHandler) { + switch typedNode := node.(type) { + case *cypher.NodePattern: + nodeKinds = nodeKinds.Add(typedNode.Kinds...) + + case *cypher.RelationshipPattern: + edgeKinds = edgeKinds.Add(typedNode.Kinds...) + + case *cypher.KindMatcher: + nodeKinds = nodeKinds.Add(typedNode.Kinds...) + edgeKinds = edgeKinds.Add(typedNode.Kinds...) + } + }) + + if err := walk.Cypher(queryModel, visitor); err != nil { + t.Fatalf("failed to walk Cypher query for kind scanning: %v\nquery: %s", err, queryText) + } + } + + return nodeKinds, edgeKinds +} + +func collectFixtureKinds(fixtures ...*opengraph.Graph) (graph.Kinds, graph.Kinds) { + var nodeKinds, edgeKinds graph.Kinds + + for _, fixture := range fixtures { + if fixture == nil { + continue + } + + fixtureNodeKinds, fixtureEdgeKinds := fixture.Kinds() + nodeKinds = nodeKinds.Add(fixtureNodeKinds...) + edgeKinds = edgeKinds.Add(fixtureEdgeKinds...) + } + + return nodeKinds, edgeKinds +} + // ClearGraph deletes all nodes (and cascading edges) from the database. func ClearGraph(t *testing.T, db graph.Database, ctx context.Context) { t.Helper() diff --git a/integration/harness_test.go b/integration/harness_test.go new file mode 100644 index 00000000..84a272f9 --- /dev/null +++ b/integration/harness_test.go @@ -0,0 +1,24 @@ +package integration + +import ( + "testing" + + "github.com/specterops/dawgs/graph" +) + +func TestCollectCypherKindsIncludesQueryOnlyRelationshipKinds(t *testing.T) { + nodeKinds, edgeKinds := collectCypherKinds(t, "MATCH p=(n:Domain)-[:CrossForestTrust|SpoofSIDHistory|AbuseTGTDelegation]-(m:Domain) WHERE (n)-[:SpoofSIDHistory|AbuseTGTDelegation]-(m) RETURN p") + + assertKindsContain(t, nodeKinds, "Domain") + assertKindsContain(t, edgeKinds, "CrossForestTrust", "SpoofSIDHistory", "AbuseTGTDelegation") +} + +func assertKindsContain(t *testing.T, kinds graph.Kinds, expected ...string) { + t.Helper() + + for _, name := range expected { + if !kinds.ContainsOneOf(graph.StringKind(name)) { + t.Fatalf("expected kinds %v to contain %q", kinds.Strings(), name) + } + } +} diff --git a/integration/pgsql_aggregate_traversal_plan_test.go b/integration/pgsql_aggregate_traversal_plan_test.go new file mode 100644 index 00000000..92a42cd7 --- /dev/null +++ b/integration/pgsql_aggregate_traversal_plan_test.go @@ -0,0 +1,307 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package integration + +import ( + "context" + "fmt" + "os" + "regexp" + "strings" + "testing" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/specterops/dawgs/cypher/frontend" + "github.com/specterops/dawgs/cypher/models/pgsql/optimize" + "github.com/specterops/dawgs/cypher/models/pgsql/translate" + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" +) + +const liveAggregateTraversalCypher = ` +MATCH (u:User) +WHERE u.hasspn = true + AND u.enabled = true + AND NOT u.objectid ENDS WITH '-502' + AND NOT COALESCE(u.gmsa, false) = true + AND NOT COALESCE(u.msa, false) = true +MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) +WITH DISTINCT u, COUNT(c) AS adminCount +RETURN u +ORDER BY adminCount DESC +LIMIT 100 +` + +type livePGKindMapper struct { + pool *pgxpool.Pool +} + +func (s livePGKindMapper) MapKinds(ctx context.Context, kinds graph.Kinds) ([]int16, error) { + ids := make([]int16, 0, len(kinds)) + + for _, kind := range kinds { + id, err := liveKindID(ctx, s.pool, kind.String()) + if err != nil { + return nil, err + } + + ids = append(ids, id) + } + + return ids, nil +} + +func (s livePGKindMapper) AssertKinds(ctx context.Context, kinds graph.Kinds) ([]int16, error) { + return s.MapKinds(ctx, kinds) +} + +func TestPostgreSQLLiveAggregateTraversalCountPlanShape(t *testing.T) { + connStr := os.Getenv("CONNECTION_STRING") + if connStr == "" { + t.Skip("CONNECTION_STRING env var is not set") + } + + driver, err := driverFromConnStr(connStr) + if err != nil { + t.Fatalf("failed to detect driver: %v", err) + } + if driver != pg.DriverName { + t.Skipf("CONNECTION_STRING is not a PostgreSQL connection string") + } + + ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second) + defer cancel() + + poolCfg, err := pgxpool.ParseConfig(connStr) + if err != nil { + t.Fatalf("failed to parse PG connection string: %v", err) + } + pool, err := pgxpool.NewWithConfig(ctx, poolCfg) + if err != nil { + t.Fatalf("failed to connect to PostgreSQL: %v", err) + } + defer pool.Close() + + liveStats, ok := liveAggregateTraversalStats(ctx, t, pool) + if !ok { + return + } + if liveStats.candidateUsers == 0 || liveStats.computers < 1000 || liveStats.adminEdges < 10000 { + t.Skipf( + "connected PostgreSQL database does not look like the live aggregate traversal dataset: %+v", + liveStats, + ) + } + + regularQuery, err := frontend.ParseCypher(frontend.NewContext(), liveAggregateTraversalCypher) + if err != nil { + t.Fatalf("failed to parse live aggregate traversal query: %v", err) + } + + translation, err := translate.Translate(ctx, regularQuery, livePGKindMapper{pool: pool}, nil, translate.DefaultGraphID) + if err != nil { + t.Fatalf("failed to translate live aggregate traversal query: %v", err) + } + requireLoweringDecision(t, translation.Optimization.PlannedLowerings, optimize.LoweringAggregateTraversalCount) + requireLoweringDecision(t, translation.Optimization.Lowerings, optimize.LoweringAggregateTraversalCount) + + sqlQuery, err := translate.Translated(translation) + if err != nil { + t.Fatalf("failed to render live aggregate traversal SQL: %v", err) + } + normalizedSQL := strings.Join(strings.Fields(strings.ToLower(sqlQuery)), " ") + + for _, expected := range []string{ + "with recursive candidate_sources(root_id)", + "traversal(root_id, next_id, depth, path)", + "terminal_nodes(id) as materialized", + "terminal_hits(root_id)", + "ranked(root_id, admincount)", + "join edge e on e.start_id = candidate_sources.root_id", + "e.start_id = traversal.next_id", + "group by terminal_hits.root_id", + "from ranked join node source_node on source_node.id = ranked.root_id", + } { + if !strings.Contains(normalizedSQL, expected) { + t.Fatalf("expected translated SQL to contain %q, got:\n%s", expected, sqlQuery) + } + } + if strings.Contains(normalizedSQL, "group by (") { + t.Fatalf("expected aggregate traversal SQL to avoid grouping by composites, got:\n%s", sqlQuery) + } + + plan := explainAggregateTraversalPlan(ctx, t, pool, sqlQuery, translation.Parameters) + for _, expected := range []string{ + "CTE traversal", + "Recursive Union", + "start_id = source_node", + "start_id = traversal", + "Group Key: traversal.root_id", + "Hash Cond: (traversal.next_id = terminal_nodes.id)", + } { + if !strings.Contains(plan, expected) { + t.Fatalf("expected PostgreSQL plan to contain %q, got:\n%s", expected, plan) + } + } + for _, unexpected := range []string{ + "end_id = source_node", + "end_id = traversal", + "Group Key: (", + } { + if strings.Contains(plan, unexpected) { + t.Fatalf("expected PostgreSQL plan to avoid %q, got:\n%s", unexpected, plan) + } + } + + var ( + limitMatch = regexp.MustCompile(`(?m)->\s+Limit\b`).FindStringIndex(plan) + sourceMaterializationIndex = strings.LastIndex(plan, "Index Scan using node_") + ) + if limitMatch == nil || sourceMaterializationIndex < 0 || sourceMaterializationIndex < limitMatch[0] { + t.Fatalf("expected source node materialization after top-N limiting, got:\n%s", plan) + } +} + +type liveAggregateStats struct { + candidateUsers int64 + computers int64 + adminEdges int64 +} + +func liveAggregateTraversalStats(ctx context.Context, t *testing.T, pool *pgxpool.Pool) (liveAggregateStats, bool) { + t.Helper() + + userKindID, err := liveKindID(ctx, pool, "User") + if err != nil { + t.Skipf("connected PostgreSQL database has no User kind: %v", err) + return liveAggregateStats{}, false + } + computerKindID, err := liveKindID(ctx, pool, "Computer") + if err != nil { + t.Skipf("connected PostgreSQL database has no Computer kind: %v", err) + return liveAggregateStats{}, false + } + memberOfKindID, err := liveKindID(ctx, pool, "MemberOf") + if err != nil { + t.Skipf("connected PostgreSQL database has no MemberOf kind: %v", err) + return liveAggregateStats{}, false + } + adminToKindID, err := liveKindID(ctx, pool, "AdminTo") + if err != nil { + t.Skipf("connected PostgreSQL database has no AdminTo kind: %v", err) + return liveAggregateStats{}, false + } + + var stats liveAggregateStats + if err := pool.QueryRow(ctx, ` + select + ( + select count(*) + from node n + where n.kind_ids operator (pg_catalog.@>) array[$1::int2] + and (n.properties -> 'hasspn') = to_jsonb(true) + and (n.properties -> 'enabled') = to_jsonb(true) + and coalesce(n.properties ->> 'objectid', '') not like '%-502' + and not coalesce((n.properties ->> 'gmsa')::bool, false) + and not coalesce((n.properties ->> 'msa')::bool, false) + ), + ( + select count(*) + from node n + where n.kind_ids operator (pg_catalog.@>) array[$2::int2] + ), + ( + select count(*) + from edge e + where e.kind_id = any(array[$3::int2, $4::int2]) + ) + `, userKindID, computerKindID, memberOfKindID, adminToKindID).Scan( + &stats.candidateUsers, + &stats.computers, + &stats.adminEdges, + ); err != nil { + t.Fatalf("failed to inspect live aggregate traversal dataset: %v", err) + } + + return stats, true +} + +func liveKindID(ctx context.Context, pool *pgxpool.Pool, name string) (int16, error) { + var id int16 + if err := pool.QueryRow(ctx, `select id from kind where name = $1`, name).Scan(&id); err != nil { + return 0, fmt.Errorf("map kind %q: %w", name, err) + } + + return id, nil +} + +func explainAggregateTraversalPlan(ctx context.Context, t *testing.T, pool *pgxpool.Pool, sqlQuery string, params map[string]any) string { + t.Helper() + + tx, err := pool.Begin(ctx) + if err != nil { + t.Fatalf("failed to begin PostgreSQL explain transaction: %v", err) + } + defer func() { + _ = tx.Rollback(context.Background()) + }() + + if _, err := tx.Exec(ctx, `set local statement_timeout = '30s'`); err != nil { + t.Fatalf("failed to set PostgreSQL statement timeout: %v", err) + } + + args := []any{} + if len(params) > 0 { + args = append(args, pgx.NamedArgs(params)) + } + + rows, err := tx.Query(ctx, "explain (analyze, buffers, timing off, summary off) "+sqlQuery, args...) + if err != nil { + t.Fatalf("failed to explain live aggregate traversal query: %v", err) + } + defer rows.Close() + + var planLines []string + for rows.Next() { + var line string + if err := rows.Scan(&line); err != nil { + t.Fatalf("failed to scan live aggregate traversal plan line: %v", err) + } + planLines = append(planLines, line) + } + if err := rows.Err(); err != nil { + t.Fatalf("failed while reading live aggregate traversal plan: %v", err) + } + + return strings.Join(planLines, "\n") +} + +func requireLoweringDecision(t *testing.T, lowerings []optimize.LoweringDecision, name string) { + t.Helper() + + for _, lowering := range lowerings { + if lowering.Name == name { + return + } + } + + t.Fatalf("expected lowering %s in %v", name, lowerings) +} diff --git a/integration/pgsql_batch_operation_test.go b/integration/pgsql_batch_operation_test.go new file mode 100644 index 00000000..9e67c55a --- /dev/null +++ b/integration/pgsql_batch_operation_test.go @@ -0,0 +1,439 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package integration + +import ( + "context" + "errors" + "os" + "testing" + + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/query" +) + +func requirePostgreSQLBatchConnection(t *testing.T) { + t.Helper() + + connStr := os.Getenv("CONNECTION_STRING") + if connStr == "" { + t.Skip("CONNECTION_STRING env var is not set") + } + + driver, err := driverFromConnStr(connStr) + if err != nil { + t.Fatalf("failed to detect driver: %v", err) + } + if driver != pg.DriverName { + t.Skip("CONNECTION_STRING is not a PostgreSQL connection string") + } +} + +func assertBatchOperationSchema(t *testing.T, ctx context.Context, db graph.Database, nodeKinds, edgeKinds graph.Kinds, nodeConstraints, edgeConstraints []graph.Constraint) { + t.Helper() + + graphSchema := graph.Graph{ + Name: "integration_test", + Nodes: nodeKinds, + Edges: edgeKinds, + NodeConstraints: nodeConstraints, + EdgeConstraints: edgeConstraints, + } + + schema := graph.Schema{ + Graphs: []graph.Graph{graphSchema}, + DefaultGraph: graphSchema, + } + + if err := db.AssertSchema(ctx, schema); err != nil { + t.Fatalf("failed to assert batch operation schema: %v", err) + } +} + +func countNodesByKind(t *testing.T, ctx context.Context, db graph.Database, kind graph.Kind) int64 { + t.Helper() + + var count int64 + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var err error + count, err = tx.Nodes().Filter(query.KindIn(query.Node(), kind)).Count() + return err + }); err != nil { + t.Fatalf("failed to count nodes: %v", err) + } + + return count +} + +func countRelationshipsByKind(t *testing.T, ctx context.Context, db graph.Database, kind graph.Kind) int64 { + t.Helper() + + var count int64 + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var err error + count, err = tx.Relationships().Filter(query.KindIn(query.Relationship(), kind)).Count() + return err + }); err != nil { + t.Fatalf("failed to count relationships: %v", err) + } + + return count +} + +func fetchNodeByID(t *testing.T, ctx context.Context, db graph.Database, id graph.ID) *graph.Node { + t.Helper() + + var node *graph.Node + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var err error + node, err = tx.Nodes().Filter(query.Equals(query.NodeID(), id)).First() + return err + }); err != nil { + t.Fatalf("failed to fetch node %d: %v", id, err) + } + + return node +} + +func firstRelationshipByKind(t *testing.T, ctx context.Context, db graph.Database, kind graph.Kind) *graph.Relationship { + t.Helper() + + var relationship *graph.Relationship + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var err error + relationship, err = tx.Relationships().Filter(query.KindIn(query.Relationship(), kind)).First() + return err + }); err != nil { + t.Fatalf("failed to fetch relationship: %v", err) + } + + return relationship +} + +func requireStringProperty(t *testing.T, properties *graph.Properties, key, expected string) { + t.Helper() + + if actual, err := properties.Get(key).String(); err != nil { + t.Fatalf("property %q: %v", key, err) + } else if actual != expected { + t.Fatalf("property %q: got %q, want %q", key, actual, expected) + } +} + +func TestPostgreSQLBatchOperationFlushPersistsBeforeDelegateError(t *testing.T) { + requirePostgreSQLBatchConnection(t) + + var ( + nodeKind = graph.StringKind("PgBatchFlushNode") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind}, nil) + sentinel = errors.New("delegate failed after flush") + ) + + err := db.BatchOperation(ctx, func(batch graph.Batch) error { + if err := batch.CreateNode(graph.PrepareNode(graph.NewProperties().Set("name", "first"), nodeKind)); err != nil { + return err + } + if err := batch.CreateNode(graph.PrepareNode(graph.NewProperties().Set("name", "second"), nodeKind)); err != nil { + return err + } + + return sentinel + }, graph.WithBatchSize(1)) + + if !errors.Is(err, sentinel) { + t.Fatalf("BatchOperation error: got %v, want %v", err, sentinel) + } + + if count := countNodesByKind(t, ctx, db, nodeKind); count != 2 { + t.Fatalf("persisted node count: got %d, want 2", count) + } +} + +func TestPostgreSQLBatchOperationNodeCreateWithAndWithoutIDs(t *testing.T) { + requirePostgreSQLBatchConnection(t) + + var ( + nodeKind = graph.StringKind("PgBatchCreateNode") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind}, nil) + presetID = graph.ID(424242) + ) + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.CreateNode(graph.NewNode(presetID, graph.NewProperties().Set("name", "preset"), nodeKind)) + }, graph.WithBatchSize(1)); err != nil { + t.Fatalf("failed to create preset-id node: %v", err) + } + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.CreateNode(graph.PrepareNode(graph.NewProperties().Set("name", "generated"), nodeKind)) + }, graph.WithBatchSize(1)); err != nil { + t.Fatalf("failed to create generated-id node: %v", err) + } + + requireStringProperty(t, fetchNodeByID(t, ctx, db, presetID).Properties, "name", "preset") + + if count := countNodesByKind(t, ctx, db, nodeKind); count != 2 { + t.Fatalf("node count: got %d, want 2", count) + } +} + +func TestPostgreSQLBatchOperationRelationshipCreateCoalescesDuplicates(t *testing.T) { + requirePostgreSQLBatchConnection(t) + + var ( + nodeKind = graph.StringKind("PgBatchRelationshipNode") + edgeKind = graph.StringKind("PgBatchRelationshipEdge") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind}, graph.Kinds{edgeKind}) + start *graph.Node + end *graph.Node + ) + + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + var err error + if start, err = tx.CreateNode(graph.NewProperties().Set("name", "start"), nodeKind); err != nil { + return err + } + if end, err = tx.CreateNode(graph.NewProperties().Set("name", "end"), nodeKind); err != nil { + return err + } + + return nil + }); err != nil { + t.Fatalf("failed to create relationship endpoints: %v", err) + } + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + if err := batch.CreateRelationship(graph.NewRelationship(0, start.ID, end.ID, graph.NewProperties().Set("first", "one").Set("shared", "old"), edgeKind)); err != nil { + return err + } + + return batch.CreateRelationship(graph.NewRelationship(0, start.ID, end.ID, graph.NewProperties().Set("second", "two").Set("shared", "new"), edgeKind)) + }, graph.WithBatchSize(1)); err != nil { + t.Fatalf("failed to create duplicate relationships: %v", err) + } + + if count := countRelationshipsByKind(t, ctx, db, edgeKind); count != 1 { + t.Fatalf("relationship count: got %d, want 1", count) + } + + relationship := firstRelationshipByKind(t, ctx, db, edgeKind) + requireStringProperty(t, relationship.Properties, "first", "one") + requireStringProperty(t, relationship.Properties, "second", "two") + requireStringProperty(t, relationship.Properties, "shared", "new") +} + +func TestPostgreSQLBatchOperationNodeUpdateUsesStaging(t *testing.T) { + requirePostgreSQLBatchConnection(t) + + var ( + nodeKind = graph.StringKind("PgBatchUpdateNode") + extraKind = graph.StringKind("PgBatchUpdateNodeExtra") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind, extraKind}, nil) + node *graph.Node + ) + + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + var err error + node, err = tx.CreateNode(graph.NewProperties().Set("status", "old").Set("removed", "yes"), nodeKind) + return err + }); err != nil { + t.Fatalf("failed to create node: %v", err) + } + + var ( + firstUpdate = graph.NewNode(node.ID, graph.NewProperties().Set("status", "first").Set("kept", "yes"), nodeKind) + secondUpdate = graph.NewNode(node.ID, graph.NewProperties().Set("status", "new"), nodeKind) + ) + secondUpdate.Properties.Delete("removed") + secondUpdate.AddKinds(extraKind) + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.UpdateNodes([]*graph.Node{firstUpdate, secondUpdate}) + }, graph.WithBatchSize(2)); err != nil { + t.Fatalf("failed to update node: %v", err) + } + + updated := fetchNodeByID(t, ctx, db, node.ID) + requireStringProperty(t, updated.Properties, "status", "new") + requireStringProperty(t, updated.Properties, "kept", "yes") + + if updated.Properties.Exists("removed") { + t.Fatalf("expected removed property to be deleted") + } + if !updated.Kinds.ContainsOneOf(extraKind) { + t.Fatalf("expected updated node to contain kind %q", extraKind.String()) + } +} + +func TestPostgreSQLBatchOperationDeleteUsesStaging(t *testing.T) { + requirePostgreSQLBatchConnection(t) + + var ( + nodeKind = graph.StringKind("PgBatchDeleteNode") + edgeKind = graph.StringKind("PgBatchDeleteEdge") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind}, graph.Kinds{edgeKind}) + start *graph.Node + end *graph.Node + relationship *graph.Relationship + ) + + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + var err error + if start, err = tx.CreateNode(graph.NewProperties().Set("name", "start"), nodeKind); err != nil { + return err + } + if end, err = tx.CreateNode(graph.NewProperties().Set("name", "end"), nodeKind); err != nil { + return err + } + if relationship, err = tx.CreateRelationshipByIDs(start.ID, end.ID, edgeKind, graph.NewProperties().Set("name", "edge")); err != nil { + return err + } + + return nil + }); err != nil { + t.Fatalf("failed to create delete fixture: %v", err) + } + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + if err := batch.DeleteRelationship(relationship.ID); err != nil { + return err + } + + return batch.DeleteNode(start.ID) + }, graph.WithBatchSize(2)); err != nil { + t.Fatalf("failed to delete batch fixture: %v", err) + } + + if count := countRelationshipsByKind(t, ctx, db, edgeKind); count != 0 { + t.Fatalf("relationship count after delete: got %d, want 0", count) + } + if count := countNodesByKind(t, ctx, db, nodeKind); count != 1 { + t.Fatalf("node count after delete: got %d, want 1", count) + } + requireStringProperty(t, fetchNodeByID(t, ctx, db, end.ID).Properties, "name", "end") +} + +func TestPostgreSQLBatchOperationUpdateByUsesStaging(t *testing.T) { + requirePostgreSQLBatchConnection(t) + + var ( + nodeKind = graph.StringKind("PgBatchUpsertNode") + edgeKind = graph.StringKind("PgBatchUpsertEdge") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind}, graph.Kinds{edgeKind}) + ) + + assertBatchOperationSchema(t, ctx, db, graph.Kinds{nodeKind}, graph.Kinds{edgeKind}, []graph.Constraint{{ + Field: "node_key", + Type: graph.BTreeIndex, + }}, []graph.Constraint{{ + Field: "edge_key", + Type: graph.BTreeIndex, + }}) + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.UpdateNodeBy(graph.NodeUpdate{ + Node: graph.NewNode(0, graph.NewProperties().Set("node_key", "standalone").Set("value", "first"), nodeKind), + IdentityKind: nodeKind, + IdentityProperties: []string{"node_key"}, + }) + }, graph.WithBatchSize(1)); err != nil { + t.Fatalf("failed to create node by identity: %v", err) + } + + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.UpdateNodeBy(graph.NodeUpdate{ + Node: graph.NewNode(0, graph.NewProperties().Set("node_key", "standalone").Set("value", "second"), nodeKind), + IdentityKind: nodeKind, + IdentityProperties: []string{"node_key"}, + }) + }, graph.WithBatchSize(1)); err != nil { + t.Fatalf("failed to update node by identity: %v", err) + } + + if count := countNodesByKind(t, ctx, db, nodeKind); count != 1 { + t.Fatalf("upserted node count: got %d, want 1", count) + } + + var node *graph.Node + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var err error + node, err = tx.Nodes().Filter(query.Equals(query.Property(query.Node(), "node_key"), "standalone")).First() + return err + }); err != nil { + t.Fatalf("failed to fetch upserted node: %v", err) + } + requireStringProperty(t, node.Properties, "value", "second") + + for _, value := range []string{"first", "second"} { + value := value + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.UpdateRelationshipBy(graph.RelationshipUpdate{ + Relationship: graph.NewRelationship(0, 0, 0, graph.NewProperties().Set("edge_key", "connected").Set("value", value), edgeKind), + IdentityProperties: []string{"edge_key"}, + Start: graph.NewNode(0, graph.NewProperties().Set("node_key", "start").Set("name", "start"), nodeKind), + StartIdentityKind: nodeKind, + StartIdentityProperties: []string{ + "node_key", + }, + End: graph.NewNode(0, graph.NewProperties().Set("node_key", "end").Set("name", "end"), nodeKind), + EndIdentityKind: nodeKind, + EndIdentityProperties: []string{ + "node_key", + }, + }) + }, graph.WithBatchSize(1)); err != nil { + t.Fatalf("failed to upsert relationship by identity: %v", err) + } + } + + if count := countRelationshipsByKind(t, ctx, db, edgeKind); count != 1 { + t.Fatalf("upserted relationship count: got %d, want 1", count) + } + + relationship := firstRelationshipByKind(t, ctx, db, edgeKind) + requireStringProperty(t, relationship.Properties, "value", "second") + + var ( + start = fetchNodeByID(t, ctx, db, relationship.StartID) + end = fetchNodeByID(t, ctx, db, relationship.EndID) + ) + requireStringProperty(t, start.Properties, "node_key", "start") + requireStringProperty(t, end.Properties, "node_key", "end") + + err := db.BatchOperation(ctx, func(batch graph.Batch) error { + return batch.UpdateRelationshipBy(graph.RelationshipUpdate{ + Relationship: graph.NewRelationship(0, 0, 0, graph.NewProperties().Set("edge_key", "same-endpoints").Set("value", "conflict"), edgeKind), + IdentityProperties: []string{"edge_key"}, + Start: graph.NewNode(0, graph.NewProperties().Set("node_key", "start").Set("name", "start"), nodeKind), + StartIdentityKind: nodeKind, + StartIdentityProperties: []string{ + "node_key", + }, + End: graph.NewNode(0, graph.NewProperties().Set("node_key", "end").Set("name", "end"), nodeKind), + EndIdentityKind: nodeKind, + EndIdentityProperties: []string{ + "node_key", + }, + }) + }, graph.WithBatchSize(1)) + if err == nil { + t.Fatalf("expected relationship identity update with duplicate endpoints to fail") + } +} diff --git a/integration/pgsql_count_fast_path_test.go b/integration/pgsql_count_fast_path_test.go new file mode 100644 index 00000000..a29a9610 --- /dev/null +++ b/integration/pgsql_count_fast_path_test.go @@ -0,0 +1,94 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package integration + +import ( + "errors" + "os" + "testing" + + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" +) + +func TestPostgreSQLCountStoreFastPathRequiresRelationshipEndpoints(t *testing.T) { + connStr := os.Getenv("CONNECTION_STRING") + if connStr == "" { + t.Skip("CONNECTION_STRING env var is not set") + } + + driver, err := driverFromConnStr(connStr) + if err != nil { + t.Fatalf("failed to detect driver: %v", err) + } + if driver != pg.DriverName { + t.Skip("CONNECTION_STRING is not a PostgreSQL connection string") + } + + var ( + nodeKind = graph.StringKind("CountFastPathNode") + edgeKind = graph.StringKind("CountFastPathEdge") + db, ctx = SetupDBWithKinds(t, graph.Kinds{nodeKind}, graph.Kinds{edgeKind}) + ) + + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + start, err := tx.CreateNode(graph.NewProperties(), nodeKind) + if err != nil { + return err + } + + if _, err := tx.CreateRelationshipByIDs(start.ID, 0, edgeKind, graph.NewProperties()); err != nil { + return err + } + + return nil + }); err != nil { + t.Fatalf("failed to create dangling relationship fixture: %v", err) + } + + var count int64 + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Query("MATCH ()-[r:CountFastPathEdge]->() RETURN count(r)", nil) + defer result.Close() + + if !result.Next() { + if err := result.Error(); err != nil { + return err + } + + return errors.New("expected count row") + } + + if err := result.Scan(&count); err != nil { + return err + } + + if result.Next() { + return errors.New("expected one count row") + } + + return result.Error() + }); err != nil { + t.Fatalf("query failed: %v", err) + } + + if count != 0 { + t.Fatalf("relationship count: got %d, want 0", count) + } +} diff --git a/integration/pgsql_property_equality_test.go b/integration/pgsql_property_equality_test.go index 51dcddb0..5db0293e 100644 --- a/integration/pgsql_property_equality_test.go +++ b/integration/pgsql_property_equality_test.go @@ -19,9 +19,14 @@ package integration import ( + "context" "os" + "strings" "testing" + "time" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" "github.com/specterops/dawgs/drivers/pg" "github.com/specterops/dawgs/graph" "github.com/specterops/dawgs/query" @@ -62,13 +67,14 @@ func TestPostgreSQLPropertyTextEqualityCompatibility(t *testing.T) { } var ( - userKind = graph.StringKind("User") - groupKind = graph.StringKind("Group") - memberOf = graph.StringKind("MemberOf") - db, ctx = SetupDBWithKinds(t, graph.Kinds{userKind, groupKind}, graph.Kinds{memberOf}) - boolTrue *graph.Relationship - boolFalse *graph.Relationship - stringTrue *graph.Relationship + userKind = graph.StringKind("User") + groupKind = graph.StringKind("Group") + memberOf = graph.StringKind("MemberOf") + db, ctx = SetupDBWithKinds(t, graph.Kinds{userKind, groupKind}, graph.Kinds{memberOf}) + boolTrue *graph.Relationship + boolFalse *graph.Relationship + stringTrue *graph.Relationship + stringFalse *graph.Relationship ) if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { @@ -103,6 +109,14 @@ func TestPostgreSQLPropertyTextEqualityCompatibility(t *testing.T) { return err } + stringFalseGroup, err := tx.CreateNode(graph.AsProperties(map[string]any{ + "isassignabletorole": "false", + "rank": "2", + }), groupKind) + if err != nil { + return err + } + if boolTrue, err = tx.CreateRelationshipByIDs(user.ID, boolTrueGroup.ID, memberOf, graph.NewProperties()); err != nil { return err } @@ -112,6 +126,9 @@ func TestPostgreSQLPropertyTextEqualityCompatibility(t *testing.T) { if stringTrue, err = tx.CreateRelationshipByIDs(user.ID, stringTrueGroup.ID, memberOf, graph.NewProperties()); err != nil { return err } + if stringFalse, err = tx.CreateRelationshipByIDs(user.ID, stringFalseGroup.ID, memberOf, graph.NewProperties()); err != nil { + return err + } return nil }); err != nil { @@ -124,20 +141,20 @@ func TestPostgreSQLPropertyTextEqualityCompatibility(t *testing.T) { value any expected []graph.ID }{{ - name: "text true matches JSON boolean and string true", + name: "text true only matches JSON string true", property: "isassignabletorole", value: "true", - expected: []graph.ID{boolTrue.ID, stringTrue.ID}, + expected: []graph.ID{stringTrue.ID}, }, { name: "boolean true remains strict", property: "isassignabletorole", value: true, expected: []graph.ID{boolTrue.ID}, }, { - name: "text false matches JSON boolean false text", + name: "text false only matches JSON string false", property: "isassignabletorole", value: "false", - expected: []graph.ID{boolFalse.ID}, + expected: []graph.ID{stringFalse.ID}, }, { name: "boolean false remains strict", property: "isassignabletorole", @@ -181,3 +198,92 @@ func TestPostgreSQLPropertyTextEqualityCompatibility(t *testing.T) { }) } } + +func TestPostgreSQLLiveObjectIDEqualityPlanUsesTextExpressionIndex(t *testing.T) { + connStr := os.Getenv("CONNECTION_STRING") + if connStr == "" { + t.Skip("CONNECTION_STRING env var is not set") + } + + driver, err := driverFromConnStr(connStr) + if err != nil { + t.Fatalf("failed to detect driver: %v", err) + } + if driver != pg.DriverName { + t.Skipf("CONNECTION_STRING is not a PostgreSQL connection string") + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + poolCfg, err := pgxpool.ParseConfig(connStr) + if err != nil { + t.Fatalf("failed to parse PG connection string: %v", err) + } + pool, err := pgxpool.NewWithConfig(ctx, poolCfg) + if err != nil { + t.Fatalf("failed to connect to PostgreSQL: %v", err) + } + defer pool.Close() + + var hasObjectIDIndex bool + if err := pool.QueryRow(ctx, ` + select exists ( + select 1 + from pg_indexes + where tablename like 'node\_%' escape '\' + and indexdef like '%properties ->> ''objectid''%' + ) + `).Scan(&hasObjectIDIndex); err != nil { + t.Fatalf("failed to inspect node indexes: %v", err) + } + if !hasObjectIDIndex { + t.Skip("connected PostgreSQL database has no node objectid text expression index") + } + + var objectID string + if err := pool.QueryRow(ctx, ` + select n.properties ->> 'objectid' + from node n + join kind k on k.name = 'Group' + where n.kind_ids operator (pg_catalog.@>) array[k.id]::int2[] + and jsonb_typeof(n.properties -> 'objectid') = 'string' + limit 1 + `).Scan(&objectID); err != nil { + if err == pgx.ErrNoRows { + t.Skip("connected PostgreSQL database has no Group node with a string objectid") + } + + t.Fatalf("failed to find live objectid sample: %v", err) + } + + rows, err := pool.Query(ctx, ` + explain (analyze, buffers, timing off, summary off) + select n.id + from node n + where jsonb_typeof(n.properties -> 'objectid') = 'string' + and n.properties ->> 'objectid' = $1 + limit 1 + `, objectID) + if err != nil { + t.Fatalf("failed to explain objectid lookup: %v", err) + } + defer rows.Close() + + var planLines []string + for rows.Next() { + var line string + if err := rows.Scan(&line); err != nil { + t.Fatalf("failed to scan plan line: %v", err) + } + planLines = append(planLines, line) + } + if err := rows.Err(); err != nil { + t.Fatalf("failed while reading plan: %v", err) + } + + plan := strings.Join(planLines, "\n") + if !strings.Contains(plan, "Index") || !strings.Contains(plan, "objectid") { + t.Fatalf("expected objectid text expression index plan, got:\n%s", plan) + } +} diff --git a/integration/testdata/adcs_fanout.json b/integration/testdata/adcs_fanout.json new file mode 100644 index 00000000..dafbb835 --- /dev/null +++ b/integration/testdata/adcs_fanout.json @@ -0,0 +1,50 @@ +{ + "graph": { + "nodes": [ + {"id": "n", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-2643190041-1319121918-239771340-513"}}, + {"id": "p1-a", "kinds": ["Group"]}, + {"id": "p1-b", "kinds": ["Group"]}, + {"id": "p1-c", "kinds": ["Group"]}, + {"id": "p2-good", "kinds": ["Group"]}, + {"id": "p2-disabled", "kinds": ["Group"]}, + {"id": "p2-wrong-ca", "kinds": ["Group"]}, + {"id": "ca", "kinds": ["EnterpriseCA"]}, + {"id": "other-ca", "kinds": ["EnterpriseCA"]}, + {"id": "store", "kinds": ["NTAuthStore"]}, + {"id": "domain", "kinds": ["Domain"]}, + {"id": "other-domain", "kinds": ["Domain"]}, + {"id": "template-good", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 1, "authorizedsignatures": 1}}, + {"id": "template-alt", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 2, "authorizedsignatures": 0}}, + {"id": "template-disabled", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": false, "requiresmanagerapproval": true, "enrolleesuppliessubject": false, "schemaversion": 2, "authorizedsignatures": 1}}, + {"id": "template-wrong-ca", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 1, "authorizedsignatures": 1}}, + {"id": "root", "kinds": ["RootCA"]}, + {"id": "other-root", "kinds": ["RootCA"]} + ], + "edges": [ + {"start_id": "n", "end_id": "p1-a", "kind": "MemberOf"}, + {"start_id": "n", "end_id": "p1-b", "kind": "MemberOf"}, + {"start_id": "p1-b", "end_id": "p1-c", "kind": "MemberOf"}, + {"start_id": "n", "end_id": "p2-good", "kind": "MemberOf"}, + {"start_id": "n", "end_id": "p2-disabled", "kind": "MemberOf"}, + {"start_id": "n", "end_id": "p2-wrong-ca", "kind": "MemberOf"}, + {"start_id": "n", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "p1-a", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "p1-b", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "p1-c", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "ca", "end_id": "store", "kind": "TrustedForNTAuth"}, + {"start_id": "store", "end_id": "domain", "kind": "NTAuthStoreFor"}, + {"start_id": "p2-good", "end_id": "template-good", "kind": "GenericAll"}, + {"start_id": "p2-good", "end_id": "template-alt", "kind": "Enroll"}, + {"start_id": "p2-disabled", "end_id": "template-disabled", "kind": "AllExtendedRights"}, + {"start_id": "p2-wrong-ca", "end_id": "template-wrong-ca", "kind": "GenericAll"}, + {"start_id": "template-good", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-alt", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-disabled", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-wrong-ca", "end_id": "other-ca", "kind": "PublishedTo"}, + {"start_id": "ca", "end_id": "root", "kind": "IssuedSignedBy"}, + {"start_id": "ca", "end_id": "other-root", "kind": "EnterpriseCAFor"}, + {"start_id": "root", "end_id": "domain", "kind": "RootCAFor"}, + {"start_id": "other-root", "end_id": "other-domain", "kind": "RootCAFor"} + ] + } +} diff --git a/integration/testdata/cases/aggregation_inline.json b/integration/testdata/cases/aggregation_inline.json index e78589b8..3c5a9309 100644 --- a/integration/testdata/cases/aggregation_inline.json +++ b/integration/testdata/cases/aggregation_inline.json @@ -202,7 +202,20 @@ } }, { - "name": "add grouped property expression to aggregate count", + "name": "add grouped property expression to aggregate count inline", + "cypher": "MATCH (n) RETURN toInteger(n.value) + count(n)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"value": 10}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"value": 10}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"value": 20}} + ], + "edges": [] + }, + "assert": {"scalar_values": [12, 21]} + }, + { + "name": "add grouped property expression to aggregate count through aliases", "cypher": "MATCH (n) WITH toInteger(n.value) AS value, count(n) AS node_count RETURN value + node_count", "fixture": { "nodes": [ @@ -215,7 +228,20 @@ "assert": {"scalar_values": [12, 21]} }, { - "name": "project grouped property plus aggregate through with", + "name": "project grouped property plus aggregate through with inline expression", + "cypher": "MATCH (n) WITH toInteger(n.value) + count(n) AS score RETURN score", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"value": 10}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"value": 10}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"value": 20}} + ], + "edges": [] + }, + "assert": {"scalar_values": [12, 21]} + }, + { + "name": "project grouped property plus aggregate through with aliases", "cypher": "MATCH (n) WITH toInteger(n.value) AS value, count(n) AS node_count WITH value + node_count AS score RETURN score", "fixture": { "nodes": [ @@ -250,6 +276,25 @@ "edges": [] }, "assert": {"scalar_values": [40]} + }, + { + "name": "aggregate after optimized expansion groups by terminal node", + "cypher": "MATCH p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) WHERE src.name = 'aggregation-expansion-src' WITH dst, count(p) AS path_count RETURN dst.name, path_count", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "aggregation-expansion-src"}}, + {"id": "mid-a", "kinds": ["NodeKind1"]}, + {"id": "mid-b", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"name": "aggregation-expansion-dst"}} + ], + "edges": [ + {"start_id": "src", "end_id": "mid-a", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "mid-b", "kind": "EdgeKind1"}, + {"start_id": "mid-a", "end_id": "dst", "kind": "EdgeKind2"}, + {"start_id": "mid-b", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": {"row_values": [["aggregation-expansion-dst", 2]]} } ] } diff --git a/integration/testdata/cases/expansion_inline.json b/integration/testdata/cases/expansion_inline.json index 28e18cfa..8eeb3782 100644 --- a/integration/testdata/cases/expansion_inline.json +++ b/integration/testdata/cases/expansion_inline.json @@ -48,6 +48,82 @@ }, "assert": {"node_ids": ["leaf"]} }, + { + "name": "bind expansion path through only terminals that satisfy a fixed suffix", + "cypher": "match p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) where src.name = 'terminal-pushdown-src' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "terminal-pushdown-src"}}, + {"id": "good-mid", "kinds": ["NodeKind1"]}, + {"id": "dead-mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"]} + ], + "edges": [ + {"start_id": "src", "end_id": "good-mid", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "dead-mid", "kind": "EdgeKind1"}, + {"start_id": "good-mid", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": {"path_lengths": [2], "path_node_ids": [["src", "good-mid", "dst"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2"]]} + }, + { + "name": "bind zero hop expansion path through only terminals that satisfy a fixed suffix", + "cypher": "match p = (src:NodeKind1)-[:EdgeKind1*0..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) where src.name = 'terminal-pushdown-zero-src' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "terminal-pushdown-zero-src"}}, + {"id": "dead-mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"]} + ], + "edges": [ + {"start_id": "src", "end_id": "dead-mid", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": {"path_lengths": [1], "path_node_ids": [["src", "dst"]], "path_edge_kinds": [["EdgeKind2"]]} + }, + { + "name": "bind expansion path through a fixed suffix chain to a bound endpoint", + "cypher": "match (dst:NodeKind2 {name: 'terminal-pushdown-bound-dst'}) match p = (src:NodeKind1)-[:EdgeKind1*0..]->(mid)-[:EdgeKind2]->(bridge:NodeKind1)-[:EdgeKind1]->(dst) where src.name = 'terminal-pushdown-bound-src' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "terminal-pushdown-bound-src"}}, + {"id": "good-mid", "kinds": ["NodeKind1"]}, + {"id": "bad-mid", "kinds": ["NodeKind1"]}, + {"id": "bridge", "kinds": ["NodeKind1"]}, + {"id": "bad-bridge", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"name": "terminal-pushdown-bound-dst"}}, + {"id": "other-dst", "kinds": ["NodeKind2"], "properties": {"name": "terminal-pushdown-other-dst"}} + ], + "edges": [ + {"start_id": "src", "end_id": "good-mid", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "bad-mid", "kind": "EdgeKind1"}, + {"start_id": "good-mid", "end_id": "bridge", "kind": "EdgeKind2"}, + {"start_id": "bad-mid", "end_id": "bad-bridge", "kind": "EdgeKind2"}, + {"start_id": "bridge", "end_id": "dst", "kind": "EdgeKind1"}, + {"start_id": "bad-bridge", "end_id": "other-dst", "kind": "EdgeKind1"} + ] + }, + "assert": {"row_count": 1, "path_lengths": [3], "path_node_ids": [["src", "good-mid", "bridge", "dst"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2", "EdgeKind1"]]} + }, + { + "name": "relationships of optimized expansion path preserve suffix order", + "cypher": "match p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) where src.name = 'terminal-pushdown-src' return relationships(p)", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "terminal-pushdown-src"}}, + {"id": "good-mid", "kinds": ["NodeKind1"]}, + {"id": "dead-mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"]} + ], + "edges": [ + {"start_id": "src", "end_id": "good-mid", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "dead-mid", "kind": "EdgeKind1"}, + {"start_id": "good-mid", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": {"row_count": 1, "relationship_list_kinds": [["EdgeKind1", "EdgeKind2"]]} + }, { "name": "fixed step followed by a bounded variable-length expansion", "cypher": "match (n)-[]->(e:NodeKind1)-[*2..3]->(l) where n.name = 'start' return l", @@ -175,6 +251,76 @@ "edges": [{"start_id": "src", "end_id": "tgt", "kind": "EdgeKind1"}] }, "assert": {"path_node_ids": [["src", "tgt"]], "path_edge_kinds": [["EdgeKind1"]]} + }, + { + "name": "expansion followed by fixed suffix cannot reuse an expansion relationship", + "cypher": "match p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind1]-(dst:NodeKind1) where src.name = 'reuse-source' and dst.name = 'reuse-source' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "reuse-source"}}, + {"id": "mid", "kinds": ["NodeKind1"], "properties": {"name": "reuse-mid"}} + ], + "edges": [ + {"start_id": "src", "end_id": "mid", "kind": "EdgeKind1"} + ] + }, + "assert": "empty" + }, + { + "name": "anonymous continuation suffix reaches a bound endpoint after expansion", + "cypher": "match (dst:NodeKind2 {name: 'anonymous-bound-dst'}) match p = (src:NodeKind1)-[:EdgeKind1*1..]->(:NodeKind1)-[:EdgeKind2]->(dst) where src.name = 'anonymous-bound-src' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "anonymous-bound-src"}}, + {"id": "root", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"name": "anonymous-bound-dst"}}, + {"id": "decoy", "kinds": ["NodeKind2"], "properties": {"name": "anonymous-decoy-dst"}} + ], + "edges": [ + {"start_id": "src", "end_id": "root", "kind": "EdgeKind1"}, + {"start_id": "root", "end_id": "dst", "kind": "EdgeKind2"}, + {"start_id": "root", "end_id": "decoy", "kind": "EdgeKind2"} + ] + }, + "assert": {"row_count": 1, "path_node_ids": [["src", "root", "dst"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2"]]} + }, + { + "name": "directionless fixed suffix after expansion preserves path semantics", + "cypher": "match p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]-(dst:NodeKind2) where src.name = 'directionless-suffix-src' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "directionless-suffix-src"}}, + {"id": "mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"]}, + {"id": "dead", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "src", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "dead", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": {"path_node_ids": [["src", "mid", "dst"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2"]]} + }, + { + "name": "inbound expansion suffix preserves path functions", + "cypher": "match p = (ca:EnterpriseCA)<-[:PublishedTo*1..]-(ct)<-[:Enroll]-(m:Group) return size(relationships(p)), nodes(p), relationships(p)", + "fixture": { + "nodes": [ + {"id": "ca", "kinds": ["EnterpriseCA"]}, + {"id": "template", "kinds": ["CertTemplate"]}, + {"id": "member", "kinds": ["Group"]} + ], + "edges": [ + {"start_id": "template", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "member", "end_id": "template", "kind": "Enroll"} + ] + }, + "assert": { + "scalar_values": [2], + "node_list_ids": [["ca", "template", "member"]], + "relationship_list_kinds": [["PublishedTo", "Enroll"]] + } } ] } diff --git a/integration/testdata/cases/multipart_inline.json b/integration/testdata/cases/multipart_inline.json index 2c69227c..8a637fb9 100644 --- a/integration/testdata/cases/multipart_inline.json +++ b/integration/testdata/cases/multipart_inline.json @@ -129,6 +129,40 @@ "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}] }, "assert": {"row_count": 1, "node_ids": ["a", "b"]} + }, + { + "name": "with barrier keeps optimized expansion path semantics", + "cypher": "match p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) where src.name = 'with-expansion-src' with p, size(relationships(p)) as hops where hops = 2 return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "with-expansion-src"}}, + {"id": "mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"]}, + {"id": "dead", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "src", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "dst", "kind": "EdgeKind2"}, + {"start_id": "src", "end_id": "dead", "kind": "EdgeKind1"} + ] + }, + "assert": {"path_node_ids": [["src", "mid", "dst"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2"]]} + }, + { + "name": "optional match barrier preserves anchor row around optimized expansion", + "cypher": "match (src:NodeKind1) where src.name = 'optional-expansion-src' optional match p = (src)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) where dst.name = 'missing-dst' return count(p)", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "optional-expansion-src"}}, + {"id": "mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"name": "present-dst"}} + ], + "edges": [ + {"start_id": "src", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": {"exact_int": 0} } ] } diff --git a/integration/testdata/cases/optimizer_inline.json b/integration/testdata/cases/optimizer_inline.json new file mode 100644 index 00000000..fe2253d5 --- /dev/null +++ b/integration/testdata/cases/optimizer_inline.json @@ -0,0 +1,345 @@ +{ + "cases": [ + { + "name": "return two ADCS-style paths with shared CA and domain endpoints", + "cypher": "MATCH (n:Group) WHERE n.objectid = 'S-1-5-21-2643190041-1319121918-239771340-513' MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) WHERE ct.authenticationenabled = true AND ct.requiresmanagerapproval = false AND ct.enrolleesuppliessubject = true AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) RETURN p1, p2", + "fixture": { + "nodes": [ + {"id": "n", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-2643190041-1319121918-239771340-513"}}, + {"id": "p1-mid", "kinds": ["Group"]}, + {"id": "p2-mid", "kinds": ["Group"]}, + {"id": "ca", "kinds": ["EnterpriseCA"]}, + {"id": "store", "kinds": ["NTAuthStore"]}, + {"id": "domain", "kinds": ["Domain"]}, + {"id": "template", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 1, "authorizedsignatures": 1}}, + {"id": "root", "kinds": ["RootCA"]}, + {"id": "unused-root", "kinds": ["RootCA"]}, + {"id": "unused-template", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": false, "requiresmanagerapproval": true, "enrolleesuppliessubject": false, "schemaversion": 2, "authorizedsignatures": 1}} + ], + "edges": [ + {"start_id": "n", "end_id": "p1-mid", "kind": "MemberOf"}, + {"start_id": "p1-mid", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "ca", "end_id": "store", "kind": "TrustedForNTAuth"}, + {"start_id": "store", "end_id": "domain", "kind": "NTAuthStoreFor"}, + {"start_id": "n", "end_id": "p2-mid", "kind": "MemberOf"}, + {"start_id": "p2-mid", "end_id": "template", "kind": "GenericAll"}, + {"start_id": "template", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "ca", "end_id": "root", "kind": "IssuedSignedBy"}, + {"start_id": "root", "end_id": "domain", "kind": "RootCAFor"}, + {"start_id": "ca", "end_id": "unused-root", "kind": "EnterpriseCAFor"}, + {"start_id": "p2-mid", "end_id": "unused-template", "kind": "AllExtendedRights"} + ] + }, + "assert": { + "keys": ["p1", "p2"], + "row_count": 1, + "path_lengths": [4, 5], + "path_node_ids": [ + ["n", "p1-mid", "ca", "store", "domain"], + ["n", "p2-mid", "template", "ca", "root", "domain"] + ], + "path_edge_kinds": [ + ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + ["MemberOf", "GenericAll", "PublishedTo", "IssuedSignedBy", "RootCAFor"] + ], + "contains_node_with_props": {"objectid": "S-1-5-21-2643190041-1319121918-239771340-513"}, + "contains_edge": {"start": "template", "end": "ca", "kind": "PublishedTo"} + } + }, + { + "name": "ADCS template predicate accepts both OR branches and rejects false alternatives", + "cypher": "MATCH (n:Group) WHERE n.objectid = 'optimizer-or-source' MATCH p = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca:EnterpriseCA)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d:Domain) WHERE ct.authenticationenabled = true AND ct.requiresmanagerapproval = false AND ct.enrolleesuppliessubject = true AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) RETURN p", + "fixture": { + "nodes": [ + {"id": "n", "kinds": ["Group"], "properties": {"objectid": "optimizer-or-source"}}, + {"id": "mid-v1", "kinds": ["Group"]}, + {"id": "mid-sig", "kinds": ["Group"]}, + {"id": "mid-bad", "kinds": ["Group"]}, + {"id": "template-v1", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 1, "authorizedsignatures": 2}}, + {"id": "template-sig", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 2, "authorizedsignatures": 0}}, + {"id": "template-bad", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 2, "authorizedsignatures": 1}}, + {"id": "ca", "kinds": ["EnterpriseCA"]}, + {"id": "root", "kinds": ["RootCA"]}, + {"id": "domain", "kinds": ["Domain"]} + ], + "edges": [ + {"start_id": "n", "end_id": "mid-v1", "kind": "MemberOf"}, + {"start_id": "mid-v1", "end_id": "template-v1", "kind": "GenericAll"}, + {"start_id": "n", "end_id": "mid-sig", "kind": "MemberOf"}, + {"start_id": "mid-sig", "end_id": "template-sig", "kind": "Enroll"}, + {"start_id": "n", "end_id": "mid-bad", "kind": "MemberOf"}, + {"start_id": "mid-bad", "end_id": "template-bad", "kind": "AllExtendedRights"}, + {"start_id": "template-v1", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-sig", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-bad", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "ca", "end_id": "root", "kind": "IssuedSignedBy"}, + {"start_id": "root", "end_id": "domain", "kind": "RootCAFor"} + ] + }, + "assert": { + "row_count": 2, + "path_node_ids": [ + ["n", "mid-v1", "template-v1", "ca", "root", "domain"], + ["n", "mid-sig", "template-sig", "ca", "root", "domain"] + ], + "path_edge_kinds": [ + ["MemberOf", "GenericAll", "PublishedTo", "IssuedSignedBy", "RootCAFor"], + ["MemberOf", "Enroll", "PublishedTo", "IssuedSignedBy", "RootCAFor"] + ] + } + }, + { + "name": "ADCS fanout returns every p1 and p2 path pair without endpoint collapse", + "cypher": "MATCH (n:Group) WHERE n.objectid = 'optimizer-fanout-source' MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) WHERE ct.authenticationenabled = true AND ct.requiresmanagerapproval = false AND ct.enrolleesuppliessubject = true AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) RETURN p1, p2", + "fixture": { + "nodes": [ + {"id": "n", "kinds": ["Group"], "properties": {"objectid": "optimizer-fanout-source"}}, + {"id": "p1-a", "kinds": ["Group"]}, + {"id": "p1-b", "kinds": ["Group"]}, + {"id": "p2-a", "kinds": ["Group"]}, + {"id": "p2-b", "kinds": ["Group"]}, + {"id": "template-a", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 1, "authorizedsignatures": 1}}, + {"id": "template-b", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 2, "authorizedsignatures": 0}}, + {"id": "ca", "kinds": ["EnterpriseCA"]}, + {"id": "store", "kinds": ["NTAuthStore"]}, + {"id": "domain", "kinds": ["Domain"]}, + {"id": "root", "kinds": ["RootCA"]} + ], + "edges": [ + {"start_id": "n", "end_id": "p1-a", "kind": "MemberOf"}, + {"start_id": "p1-a", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "n", "end_id": "p1-b", "kind": "MemberOf"}, + {"start_id": "p1-b", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "ca", "end_id": "store", "kind": "TrustedForNTAuth"}, + {"start_id": "store", "end_id": "domain", "kind": "NTAuthStoreFor"}, + {"start_id": "n", "end_id": "p2-a", "kind": "MemberOf"}, + {"start_id": "p2-a", "end_id": "template-a", "kind": "GenericAll"}, + {"start_id": "n", "end_id": "p2-b", "kind": "MemberOf"}, + {"start_id": "p2-b", "end_id": "template-b", "kind": "AllExtendedRights"}, + {"start_id": "template-a", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-b", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "ca", "end_id": "root", "kind": "IssuedSignedBy"}, + {"start_id": "root", "end_id": "domain", "kind": "RootCAFor"} + ] + }, + "assert": { + "row_count": 4, + "path_node_ids": [ + ["n", "p1-a", "ca", "store", "domain"], + ["n", "p1-a", "ca", "store", "domain"], + ["n", "p1-b", "ca", "store", "domain"], + ["n", "p1-b", "ca", "store", "domain"], + ["n", "p2-a", "template-a", "ca", "root", "domain"], + ["n", "p2-a", "template-a", "ca", "root", "domain"], + ["n", "p2-b", "template-b", "ca", "root", "domain"], + ["n", "p2-b", "template-b", "ca", "root", "domain"] + ], + "path_edge_kinds": [ + ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + ["MemberOf", "Enroll", "TrustedForNTAuth", "NTAuthStoreFor"], + ["MemberOf", "GenericAll", "PublishedTo", "IssuedSignedBy", "RootCAFor"], + ["MemberOf", "GenericAll", "PublishedTo", "IssuedSignedBy", "RootCAFor"], + ["MemberOf", "AllExtendedRights", "PublishedTo", "IssuedSignedBy", "RootCAFor"], + ["MemberOf", "AllExtendedRights", "PublishedTo", "IssuedSignedBy", "RootCAFor"] + ] + } + }, + { + "name": "ADCS fanout endpoint projection preserves row multiplicity", + "cypher": "MATCH (n:Group) WHERE n.objectid = 'optimizer-endpoint-fanout-source' MATCH p1 = (n)-[:MemberOf*0..]->()-[:Enroll]->(ca:EnterpriseCA)-[:TrustedForNTAuth]->(:NTAuthStore)-[:NTAuthStoreFor]->(d:Domain) MATCH p2 = (n)-[:MemberOf*0..]->()-[:GenericAll|Enroll|AllExtendedRights]->(ct:CertTemplate)-[:PublishedTo]->(ca)-[:IssuedSignedBy|EnterpriseCAFor*1..]->(:RootCA)-[:RootCAFor]->(d) WHERE ct.authenticationenabled = true AND ct.requiresmanagerapproval = false AND ct.enrolleesuppliessubject = true AND (ct.schemaversion = 1 OR ct.authorizedsignatures = 0) RETURN count(*) AS rows, count(distinct id(ca)) AS ca_count, count(distinct id(d)) AS domain_count, count(distinct id(ct)) AS template_count", + "fixture": { + "nodes": [ + {"id": "n", "kinds": ["Group"], "properties": {"objectid": "optimizer-endpoint-fanout-source"}}, + {"id": "p1-a", "kinds": ["Group"]}, + {"id": "p1-b", "kinds": ["Group"]}, + {"id": "p2-a", "kinds": ["Group"]}, + {"id": "p2-b", "kinds": ["Group"]}, + {"id": "template-a", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 1, "authorizedsignatures": 1}}, + {"id": "template-b", "kinds": ["CertTemplate"], "properties": {"authenticationenabled": true, "requiresmanagerapproval": false, "enrolleesuppliessubject": true, "schemaversion": 2, "authorizedsignatures": 0}}, + {"id": "ca", "kinds": ["EnterpriseCA"]}, + {"id": "store", "kinds": ["NTAuthStore"]}, + {"id": "domain", "kinds": ["Domain"]}, + {"id": "root", "kinds": ["RootCA"]} + ], + "edges": [ + {"start_id": "n", "end_id": "p1-a", "kind": "MemberOf"}, + {"start_id": "p1-a", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "n", "end_id": "p1-b", "kind": "MemberOf"}, + {"start_id": "p1-b", "end_id": "ca", "kind": "Enroll"}, + {"start_id": "ca", "end_id": "store", "kind": "TrustedForNTAuth"}, + {"start_id": "store", "end_id": "domain", "kind": "NTAuthStoreFor"}, + {"start_id": "n", "end_id": "p2-a", "kind": "MemberOf"}, + {"start_id": "p2-a", "end_id": "template-a", "kind": "GenericAll"}, + {"start_id": "n", "end_id": "p2-b", "kind": "MemberOf"}, + {"start_id": "p2-b", "end_id": "template-b", "kind": "AllExtendedRights"}, + {"start_id": "template-a", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "template-b", "end_id": "ca", "kind": "PublishedTo"}, + {"start_id": "ca", "end_id": "root", "kind": "IssuedSignedBy"}, + {"start_id": "root", "end_id": "domain", "kind": "RootCAFor"} + ] + }, + "assert": {"row_values": [[4, 1, 1, 2]]} + }, + { + "name": "common search domain admins reverse membership source label disjunction", + "cypher": "MATCH p = (t:Group)<-[:MemberOf*1..]-(a) WHERE (a:User OR a:Computer) AND t.objectid ENDS WITH '-512' RETURN p LIMIT 1000", + "fixture": { + "nodes": [ + {"id": "admins", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-512"}}, + {"id": "user", "kinds": ["User"]}, + {"id": "computer", "kinds": ["Computer"]}, + {"id": "mid", "kinds": ["Group"]}, + {"id": "other", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-513"}}, + {"id": "ignored", "kinds": ["Base"]} + ], + "edges": [ + {"start_id": "user", "end_id": "admins", "kind": "MemberOf"}, + {"start_id": "computer", "end_id": "mid", "kind": "MemberOf"}, + {"start_id": "mid", "end_id": "admins", "kind": "MemberOf"}, + {"start_id": "ignored", "end_id": "admins", "kind": "MemberOf"}, + {"start_id": "user", "end_id": "other", "kind": "MemberOf"} + ] + }, + "assert": { + "row_count": 2, + "path_node_ids": [["admins", "user"], ["admins", "mid", "computer"]], + "path_edge_kinds": [["MemberOf"], ["MemberOf", "MemberOf"]] + } + }, + { + "name": "common search dangerous domain users privileges exclude memberof relationships", + "cypher": "MATCH p=(s:Group)-[r:MemberOf|GenericAll|GenericWrite]->(t:Base) WHERE s.objectid ENDS WITH '-513' AND NOT r:MemberOf RETURN p LIMIT 1000", + "fixture": { + "nodes": [ + {"id": "domain-users", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-513"}}, + {"id": "member-target", "kinds": ["Base"]}, + {"id": "generic-target", "kinds": ["Base"]}, + {"id": "other-group", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-512"}}, + {"id": "other-target", "kinds": ["Base"]} + ], + "edges": [ + {"start_id": "domain-users", "end_id": "member-target", "kind": "MemberOf"}, + {"start_id": "domain-users", "end_id": "generic-target", "kind": "GenericAll"}, + {"start_id": "other-group", "end_id": "other-target", "kind": "GenericWrite"} + ] + }, + "assert": { + "row_count": 1, + "path_node_ids": [["domain-users", "generic-target"]], + "path_edge_kinds": [["GenericAll"]] + } + }, + { + "name": "common search domain admins logons excludes domain controllers", + "cypher": "MATCH (s)-[:MemberOf*0..]->(g:Group) WHERE g.objectid ENDS WITH '-516' WITH COLLECT(s) AS exclude MATCH p = (c:Computer)-[:HasSession]->(:User)-[:MemberOf*1..]->(g:Group) WHERE g.objectid ENDS WITH '-512' AND NOT c IN exclude RETURN p LIMIT 1000", + "fixture": { + "nodes": [ + {"id": "dc", "kinds": ["Computer"]}, + {"id": "dc-group", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-516"}}, + {"id": "workstation", "kinds": ["Computer"]}, + {"id": "admin-user", "kinds": ["User"]}, + {"id": "domain-admins", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-512"}} + ], + "edges": [ + {"start_id": "dc", "end_id": "dc-group", "kind": "MemberOf"}, + {"start_id": "workstation", "end_id": "admin-user", "kind": "HasSession"}, + {"start_id": "dc", "end_id": "admin-user", "kind": "HasSession"}, + {"start_id": "admin-user", "end_id": "domain-admins", "kind": "MemberOf"} + ] + }, + "assert": { + "row_count": 1, + "path_node_ids": [["workstation", "admin-user", "domain-admins"]], + "path_edge_kinds": [["HasSession", "MemberOf"]] + } + }, + { + "name": "common search kerberoastable users ordered by reachable admin privilege count", + "cypher": "MATCH (u:User) WHERE u.hasspn = true AND u.enabled = true AND NOT u.objectid ENDS WITH '-502' AND NOT COALESCE(u.gmsa, false) = true AND NOT COALESCE(u.msa, false) = true MATCH (u)-[:MemberOf|AdminTo*1..]->(c:Computer) WITH DISTINCT u, COUNT(c) AS adminCount RETURN u ORDER BY adminCount DESC LIMIT 100", + "fixture": { + "nodes": [ + {"id": "roastable", "kinds": ["User"], "properties": {"objectid": "S-1-5-21-1-1100", "hasspn": true, "enabled": true, "gmsa": false, "msa": false}}, + {"id": "disabled", "kinds": ["User"], "properties": {"objectid": "S-1-5-21-1-1101", "hasspn": true, "enabled": false}}, + {"id": "krbtgt", "kinds": ["User"], "properties": {"objectid": "S-1-5-21-1-502", "hasspn": true, "enabled": true}}, + {"id": "ops-group", "kinds": ["Group"]}, + {"id": "computer-a", "kinds": ["Computer"]}, + {"id": "computer-b", "kinds": ["Computer"]}, + {"id": "computer-c", "kinds": ["Computer"]} + ], + "edges": [ + {"start_id": "roastable", "end_id": "computer-a", "kind": "AdminTo"}, + {"start_id": "roastable", "end_id": "ops-group", "kind": "MemberOf"}, + {"start_id": "ops-group", "end_id": "computer-b", "kind": "AdminTo"}, + {"start_id": "disabled", "end_id": "computer-c", "kind": "AdminTo"}, + {"start_id": "krbtgt", "end_id": "computer-a", "kind": "AdminTo"} + ] + }, + "assert": {"node_ids": ["roastable"]} + }, + { + "name": "common search shortest path from domain users to tier zero target", + "cypher": "MATCH p=shortestPath((s:Group)-[:MemberOf|GenericAll|AdminTo*1..]->(t:Tag_Tier_Zero)) WHERE s.objectid ENDS WITH '-513' AND s<>t RETURN p LIMIT 1000", + "fixture": { + "nodes": [ + {"id": "domain-users", "kinds": ["Group"], "properties": {"objectid": "S-1-5-21-1-513"}}, + {"id": "bridge", "kinds": ["Group"]}, + {"id": "tier-zero", "kinds": ["Base", "Tag_Tier_Zero"]}, + {"id": "other", "kinds": ["Base"]} + ], + "edges": [ + {"start_id": "domain-users", "end_id": "bridge", "kind": "MemberOf"}, + {"start_id": "bridge", "end_id": "tier-zero", "kind": "GenericAll"}, + {"start_id": "domain-users", "end_id": "other", "kind": "AdminTo"} + ] + }, + "assert": { + "row_count": 1, + "path_node_ids": [["domain-users", "bridge", "tier-zero"]], + "path_edge_kinds": [["MemberOf", "GenericAll"]] + } + }, + { + "name": "common search cross forest trusts require connected abuse edge", + "cypher": "MATCH p=(n:Domain)-[:CrossForestTrust|SpoofSIDHistory|AbuseTGTDelegation]-(m:Domain) WHERE (n)-[:SpoofSIDHistory|AbuseTGTDelegation]-(m) RETURN p LIMIT 1000", + "fixture": { + "nodes": [ + {"id": "domain-a", "kinds": ["Domain"]}, + {"id": "domain-b", "kinds": ["Domain"]}, + {"id": "domain-c", "kinds": ["Domain"]} + ], + "edges": [ + {"start_id": "domain-a", "end_id": "domain-b", "kind": "CrossForestTrust"}, + {"start_id": "domain-a", "end_id": "domain-b", "kind": "SpoofSIDHistory"}, + {"start_id": "domain-a", "end_id": "domain-c", "kind": "CrossForestTrust"} + ] + }, + "assert": "non_empty" + }, + { + "name": "common search azure high privileged role bounded membership expansion", + "cypher": "MATCH p=(t:AZRole)<-[:AZHasRole|AZMemberOf*1..2]-(a:AZBase) WHERE t.name =~ '(?i)Global Administrator.*' RETURN p LIMIT 1000", + "fixture": { + "nodes": [ + {"id": "role", "kinds": ["AZRole"], "properties": {"name": "Global Administrator"}}, + {"id": "direct-user", "kinds": ["AZUser", "AZBase"]}, + {"id": "delegated-user", "kinds": ["AZUser", "AZBase"]}, + {"id": "delegated-group", "kinds": ["AZGroup", "AZBase"]}, + {"id": "other-role", "kinds": ["AZRole"], "properties": {"name": "Reader"}} + ], + "edges": [ + {"start_id": "direct-user", "end_id": "role", "kind": "AZHasRole"}, + {"start_id": "delegated-user", "end_id": "delegated-group", "kind": "AZMemberOf"}, + {"start_id": "delegated-group", "end_id": "role", "kind": "AZHasRole"}, + {"start_id": "direct-user", "end_id": "other-role", "kind": "AZHasRole"} + ] + }, + "assert": { + "row_count": 3, + "path_node_ids": [["role", "direct-user"], ["role", "delegated-group"], ["role", "delegated-group", "delegated-user"]], + "path_edge_kinds": [["AZHasRole"], ["AZHasRole"], ["AZHasRole", "AZMemberOf"]] + } + } + ] +} diff --git a/integration/testdata/cases/pattern_binding_inline.json b/integration/testdata/cases/pattern_binding_inline.json index 02d482a7..e08bfb7c 100644 --- a/integration/testdata/cases/pattern_binding_inline.json +++ b/integration/testdata/cases/pattern_binding_inline.json @@ -144,7 +144,7 @@ {"start_id": "b", "end_id": "t", "kind": "EdgeKind2"} ] }, - "assert": "non_empty" + "assert": {"path_lengths": [2], "path_node_ids": [["a", "b", "t"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2"]]} }, { "name": "filter a typed node with WHERE then bind its variable-length expansion path", @@ -168,7 +168,23 @@ ], "edges": [{"start_id": "x", "end_id": "y", "kind": "EdgeKind1"}] }, - "assert": "non_empty" + "assert": {"path_lengths": [1], "path_node_ids": [["x", "y"]], "path_edge_kinds": [["EdgeKind1"]]} + }, + { + "name": "reorder independent node anchor before binding a connecting path", + "cypher": "match (x) match (y:NodeKind2 {name: 'target'}) match p=(x)-[:EdgeKind1]->(y) return p", + "fixture": { + "nodes": [ + {"id": "x", "kinds": ["NodeKind1"]}, + {"id": "target", "kinds": ["NodeKind2"], "properties": {"name": "target"}}, + {"id": "other", "kinds": ["NodeKind2"], "properties": {"name": "other"}} + ], + "edges": [ + {"start_id": "x", "end_id": "target", "kind": "EdgeKind1"}, + {"start_id": "x", "end_id": "other", "kind": "EdgeKind1"} + ] + }, + "assert": {"path_lengths": [1], "path_node_ids": [["x", "target"]], "path_edge_kinds": [["EdgeKind1"]]} }, { "name": "match a node with an inline property map then bind its outgoing path to a second inline-map node", diff --git a/integration/testdata/cases/unwind_inline.json b/integration/testdata/cases/unwind_inline.json index 389d7438..d87cd43b 100644 --- a/integration/testdata/cases/unwind_inline.json +++ b/integration/testdata/cases/unwind_inline.json @@ -154,6 +154,24 @@ "edges": [] }, "assert": {"scalar_values": ["alpha", "beta", "tail"]} + }, + { + "name": "unwind barrier feeds an optimized expansion predicate", + "cypher": "WITH ['unwind-expansion-dst'] AS names UNWIND names AS name MATCH p = (src:NodeKind1)-[:EdgeKind1*1..]->(mid)-[:EdgeKind2]->(dst:NodeKind2) WHERE src.name = 'unwind-expansion-src' AND dst.name = name RETURN p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"name": "unwind-expansion-src"}}, + {"id": "mid", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"name": "unwind-expansion-dst"}}, + {"id": "decoy", "kinds": ["NodeKind2"], "properties": {"name": "unwind-expansion-decoy"}} + ], + "edges": [ + {"start_id": "src", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "dst", "kind": "EdgeKind2"}, + {"start_id": "mid", "end_id": "decoy", "kind": "EdgeKind2"} + ] + }, + "assert": {"path_node_ids": [["src", "mid", "dst"]], "path_edge_kinds": [["EdgeKind1", "EdgeKind2"]]} } ] } diff --git a/integration/testdata/templates/pattern_shapes.json b/integration/testdata/templates/pattern_shapes.json index f7e117dc..563cceb7 100644 --- a/integration/testdata/templates/pattern_shapes.json +++ b/integration/testdata/templates/pattern_shapes.json @@ -77,7 +77,7 @@ ] }, { - "name": "path node-list functions", + "name": "path component functions", "template": "{{query}}", "fixture": { "nodes": [ @@ -107,6 +107,13 @@ "query": "match p=(e:TemplateNodeKind1)<-[:TemplateEdgeKind1]-(d:TemplateNodeKind2) where e.name = 'inbound' return nodes(p)" }, "assert": {"node_list_ids": [["e", "d"]]} + }, + { + "name": "relationships function returns path traversal order", + "vars": { + "query": "match p=(a:TemplateNodeKind1)-[:TemplateEdgeKind1]->(b:TemplateNodeKind2)-[:TemplateEdgeKind2]->(c:TemplateNodeKind1) where a.name = 'src' return relationships(p)" + }, + "assert": {"relationship_list_kinds": [["TemplateEdgeKind1", "TemplateEdgeKind2"]]} } ] }, diff --git a/tools/metrics/internal/metrics/quality.go b/tools/metrics/internal/metrics/quality.go index a260dadd..fce8b1d0 100644 --- a/tools/metrics/internal/metrics/quality.go +++ b/tools/metrics/internal/metrics/quality.go @@ -472,8 +472,10 @@ func analyzeBackendEquivalence(results []NamedPath) BackendEquivalenceReport { sort.Strings(sortedKeys) for _, key := range sortedKeys { - statuses := map[string]string{} - missing := false + var ( + statuses = map[string]string{} + missing = false + ) for driverName, tests := range driverTests { status, found := tests[key] if !found { @@ -529,9 +531,11 @@ type goTestEvent struct { } func parseBackendTestResult(result NamedPath) (map[string]string, BackendDriverResult, []QualityFinding) { - summary := BackendDriverResult{Name: result.Name, Path: result.Path} - findings := []QualityFinding{} - tests := map[string]string{} + var ( + summary = BackendDriverResult{Name: result.Name, Path: result.Path} + findings = []QualityFinding{} + tests = map[string]string{} + ) file, err := os.Open(result.Path) if err != nil { @@ -687,8 +691,10 @@ func validateTemplateFile(path string, doc qualityTemplateFile, report *Invarian findings = append(findings, fileFinding("invariants", "high", "template_family_missing_template", path, family.Name+" has no template")) } - placeholders := placeholderNames(family.Template) - variantNames := map[string]struct{}{} + var ( + placeholders = placeholderNames(family.Template) + variantNames = map[string]struct{}{} + ) for _, variant := range family.Variants { contextName := family.Name + "/" + variant.Name if variant.Name == "" { @@ -821,14 +827,16 @@ func discoverFuzzTargets(sourceRoot string) ([]FuzzTarget, []QualityFinding) { continue } - position := fileSet.Position(function.Pos()) - target := FuzzTarget{ - Package: parsedFile.Name.Name, - Name: function.Name.Name, - File: relativePath, - Line: position.Line, - CorpusFiles: countCorpusFiles(filepath.Join(filepath.Dir(path), "testdata", "fuzz", function.Name.Name)), - } + var ( + position = fileSet.Position(function.Pos()) + target = FuzzTarget{ + Package: parsedFile.Name.Name, + Name: function.Name.Name, + File: relativePath, + Line: position.Line, + CorpusFiles: countCorpusFiles(filepath.Join(filepath.Dir(path), "testdata", "fuzz", function.Name.Name)), + } + ) targets = append(targets, target) } @@ -845,8 +853,10 @@ func discoverFuzzTargets(sourceRoot string) ([]FuzzTarget, []QualityFinding) { } sort.SliceStable(targets, func(leftIndex, rightIndex int) bool { - left := targets[leftIndex] - right := targets[rightIndex] + var ( + left = targets[leftIndex] + right = targets[rightIndex] + ) if left.File != right.File { return left.File < right.File } @@ -1046,8 +1056,10 @@ func analyzeBenchmarkDrift(options QualityOptions) BenchmarkDriftReport { return report } - currentByKey := benchmarkResultsByKey(current) - baselineByKey := benchmarkResultsByKey(baseline) + var ( + currentByKey = benchmarkResultsByKey(current) + baselineByKey = benchmarkResultsByKey(baseline) + ) report.Results = len(currentByKey) report.BaselineResults = len(baselineByKey) @@ -1093,14 +1105,16 @@ func (s *BenchmarkDriftReport) compareBenchmarkMetric(key, metric string, baseli return } - delta := (float64(current) - float64(baseline)) / float64(baseline) - record := BenchmarkRegression{ - Key: key, - Metric: metric, - BaselineNanos: baseline, - CurrentNanos: current, - DeltaFraction: delta, - } + var ( + delta = (float64(current) - float64(baseline)) / float64(baseline) + record = BenchmarkRegression{ + Key: key, + Metric: metric, + BaselineNanos: baseline, + CurrentNanos: current, + DeltaFraction: delta, + } + ) if delta > s.RegressionThreshold { s.Regressions = append(s.Regressions, record) } else if delta < -s.RegressionThreshold { @@ -1128,15 +1142,17 @@ func benchmarkResultsByKey(report benchmarkInputReport) map[string]benchmarkInpu } func summarizeQuality(report QualityReport) QualitySummary { - summary := QualitySummary{} - statuses := []string{ - report.SemanticDrift.Status, - report.BackendEquivalence.Status, - report.Invariants.Status, - report.Fuzz.Status, - report.Mutation.Status, - report.BenchmarkDrift.Status, - } + var ( + summary = QualitySummary{} + statuses = []string{ + report.SemanticDrift.Status, + report.BackendEquivalence.Status, + report.Invariants.Status, + report.Fuzz.Status, + report.Mutation.Status, + report.BenchmarkDrift.Status, + } + ) for _, status := range statuses { switch status { @@ -1175,8 +1191,10 @@ func qualityFindings(report QualityReport) []QualityFinding { findings = append(findings, report.BenchmarkDrift.Findings...) sort.SliceStable(findings, func(leftIndex, rightIndex int) bool { - left := findings[leftIndex] - right := findings[rightIndex] + var ( + left = findings[leftIndex] + right = findings[rightIndex] + ) if severityRank(left.Severity) != severityRank(right.Severity) { return severityRank(left.Severity) > severityRank(right.Severity) } @@ -1380,6 +1398,7 @@ var allowedObjectAssertions = map[string]struct{}{ "path_edge_kinds": {}, "path_lengths": {}, "path_node_ids": {}, + "relationship_list_kinds": {}, "row_count": {}, "row_values": {}, "scalar_values": {},