From 4fdf5f1858feafe7187cb527b7e259fa8ae5bb8c Mon Sep 17 00:00:00 2001
From: "roller100 (BearingNode)" <contact@bearingnode.com>
Date: Wed, 18 Mar 2026 13:04:59 +0000
Subject: [PATCH 1/3] feat(dbt): add dbt test assertions coverage to
 csv_to_postgres scenario

Add `dbt-ol test` to test/run.sh so test-phase events are captured
alongside seed and run events.

Add three expected event files for the dataQualityAssertions facet:
- events/customers/data_quality_event.json (not_null + unique on customer_id)
- events/orders/data_quality_event.json (not_null on customer_id, not_null + unique on order_id)
- events/analytics/data_quality_event.json (not_null on customer_id + total_revenue, unique on customer_id)

Register all three as test cases in config.json with the dataQualityAssertions
facet tag. Expected event content derived from actual dbt-ol 1.23.0 output
against a live PostgreSQL 15 instance.

Signed-off-by: roller100 (BearingNode) <contact@bearingnode.com>
---
 .../dbt/scenarios/csv_to_postgres/config.json | 51 +++++++++++++++++++
 .../events/analytics/data_quality_event.json  | 34 +++++++++++++
 .../events/customers/data_quality_event.json  | 29 +++++++++++
 .../events/orders/data_quality_event.json     | 34 +++++++++++++
 .../dbt/scenarios/csv_to_postgres/test/run.sh |  1 +
 5 files changed, 149 insertions(+)
 create mode 100644 producer/dbt/scenarios/csv_to_postgres/events/analytics/data_quality_event.json
 create mode 100644 producer/dbt/scenarios/csv_to_postgres/events/customers/data_quality_event.json
 create mode 100644 producer/dbt/scenarios/csv_to_postgres/events/orders/data_quality_event.json

diff --git a/producer/dbt/scenarios/csv_to_postgres/config.json b/producer/dbt/scenarios/csv_to_postgres/config.json
index 6a737208..c28fc7f2 100644
--- a/producer/dbt/scenarios/csv_to_postgres/config.json
+++ b/producer/dbt/scenarios/csv_to_postgres/config.json
@@ -265,6 +265,57 @@
           ]
         }
       }
+    },
+    {
+      "name": "customers_data_quality_assertions_test",
+      "path": "events/customers/data_quality_event.json",
+      "openlineage_versions": {
+        "min": "1.38.0"
+      },
+      "tags": {
+        "facets": [
+          "dataQualityAssertions"
+        ],
+        "lineage_level": {
+          "postgres": [
+            "dataset"
+          ]
+        }
+      }
+    },
+    {
+      "name": "orders_data_quality_assertions_test",
+      "path": "events/orders/data_quality_event.json",
+      "openlineage_versions": {
+        "min": "1.38.0"
+      },
+      "tags": {
+        "facets": [
+          "dataQualityAssertions"
+        ],
+        "lineage_level": {
+          "postgres": [
+            "dataset"
+          ]
+        }
+      }
+    },
+    {
+      "name": "analytics_data_quality_assertions_test",
+      "path": "events/analytics/data_quality_event.json",
+      "openlineage_versions": {
+        "min": "1.38.0"
+      },
+      "tags": {
+        "facets": [
+          "dataQualityAssertions"
+        ],
+        "lineage_level": {
+          "postgres": [
+            "dataset"
+          ]
+        }
+      }
     }
   ]
 }
\ No newline at end of file
diff --git a/producer/dbt/scenarios/csv_to_postgres/events/analytics/data_quality_event.json b/producer/dbt/scenarios/csv_to_postgres/events/analytics/data_quality_event.json
new file mode 100644
index 00000000..d7d89c16
--- /dev/null
+++ b/producer/dbt/scenarios/csv_to_postgres/events/analytics/data_quality_event.json
@@ -0,0 +1,34 @@
+{
+  "eventType": "COMPLETE",
+  "job": {
+    "namespace": "dbt",
+    "name": "dbt_test.main.openlineage_compatibility_test.customer_analytics.test"
+  },
+  "inputs": [
+    {
+      "name": "dbt_test.main.customer_analytics",
+      "namespace": "postgres://localhost:5432",
+      "facets": {
+        "dataQualityAssertions": {
+          "assertions": [
+            {
+              "assertion": "not_null",
+              "column": "customer_id",
+              "success": true
+            },
+            {
+              "assertion": "not_null",
+              "column": "total_revenue",
+              "success": true
+            },
+            {
+              "assertion": "unique",
+              "column": "customer_id",
+              "success": true
+            }
+          ]
+        }
+      }
+    }
+  ]
+}
diff --git a/producer/dbt/scenarios/csv_to_postgres/events/customers/data_quality_event.json b/producer/dbt/scenarios/csv_to_postgres/events/customers/data_quality_event.json
new file mode 100644
index 00000000..97635709
--- /dev/null
+++ b/producer/dbt/scenarios/csv_to_postgres/events/customers/data_quality_event.json
@@ -0,0 +1,29 @@
+{
+  "eventType": "COMPLETE",
+  "job": {
+    "namespace": "dbt",
+    "name": "dbt_test.main.openlineage_compatibility_test.stg_customers.test"
+  },
+  "inputs": [
+    {
+      "name": "dbt_test.main.stg_customers",
+      "namespace": "postgres://localhost:5432",
+      "facets": {
+        "dataQualityAssertions": {
+          "assertions": [
+            {
+              "assertion": "not_null",
+              "column": "customer_id",
+              "success": true
+            },
+            {
+              "assertion": "unique",
+              "column": "customer_id",
+              "success": true
+            }
+          ]
+        }
+      }
+    }
+  ]
+}
diff --git a/producer/dbt/scenarios/csv_to_postgres/events/orders/data_quality_event.json b/producer/dbt/scenarios/csv_to_postgres/events/orders/data_quality_event.json
new file mode 100644
index 00000000..4d9efe83
--- /dev/null
+++ b/producer/dbt/scenarios/csv_to_postgres/events/orders/data_quality_event.json
@@ -0,0 +1,34 @@
+{
+  "eventType": "COMPLETE",
+  "job": {
+    "namespace": "dbt",
+    "name": "dbt_test.main.openlineage_compatibility_test.stg_orders.test"
+  },
+  "inputs": [
+    {
+      "name": "dbt_test.main.stg_orders",
+      "namespace": "postgres://localhost:5432",
+      "facets": {
+        "dataQualityAssertions": {
+          "assertions": [
+            {
+              "assertion": "not_null",
+              "column": "customer_id",
+              "success": true
+            },
+            {
+              "assertion": "not_null",
+              "column": "order_id",
+              "success": true
+            },
+            {
+              "assertion": "unique",
+              "column": "order_id",
+              "success": true
+            }
+          ]
+        }
+      }
+    }
+  ]
+}
diff --git a/producer/dbt/scenarios/csv_to_postgres/test/run.sh b/producer/dbt/scenarios/csv_to_postgres/test/run.sh
index acb9dbd3..63035380 100644
--- a/producer/dbt/scenarios/csv_to_postgres/test/run.sh
+++ b/producer/dbt/scenarios/csv_to_postgres/test/run.sh
@@ -14,6 +14,7 @@ EOF
 
   dbt-ol seed --project-dir="../../../runner" --profiles-dir="../../../runner" --target=postgres --no-version-check
   dbt-ol run --project-dir="../../../runner" --profiles-dir="../../../runner" --target=postgres --no-version-check
+  dbt-ol test --project-dir="../../../runner" --profiles-dir="../../../runner" --target=postgres --no-version-check
 
   jq -c '.' "${PRODUCER_OUTPUT_EVENTS_DIR}/events.jsonl" | nl -w1 -s' ' | while read -r i line; do
     echo "$line" | jq '.' > "${PRODUCER_OUTPUT_EVENTS_DIR}/event-$i.json"

From 9816cb3be445a769293c618982a8c4f5d2237085 Mon Sep 17 00:00:00 2001
From: "roller100 (BearingNode)" <contact@bearingnode.com>
Date: Wed, 18 Mar 2026 13:05:26 +0000
Subject: [PATCH 2/3] fix(dbt): update README to reflect actual run.sh workflow

The local debugging section referenced run_dbt_tests.sh, which was
removed during the PR #211 cleanup. Replace with accurate instructions
using docker compose + run.sh directly.

Update the workflow description, test structure layout, and validation
scope (add dataQualityAssertions) to match the current architecture.

Signed-off-by: roller100 (BearingNode) <contact@bearingnode.com>
---
 producer/dbt/README.md | 58 +++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/producer/dbt/README.md b/producer/dbt/README.md
index 282a8da3..bf153fc2 100644
--- a/producer/dbt/README.md
+++ b/producer/dbt/README.md
@@ -13,23 +13,21 @@ It is important to note that this is a **compatibility validation framework** us
 
 ## Test Architecture and Workflow
 
-The test is orchestrated by the `run_dbt_tests.sh` script and follows a clear, sequential workflow designed for reliability and ease of use. This structure ensures that each component of the integration is validated systematically.
+The test is orchestrated by the scenario's `test/run.sh` script and follows a clear, sequential workflow designed for reliability and ease of use. This structure ensures that each component of the integration is validated systematically.
 
 The end-to-end process is as follows:
 
-1.  **Test Orchestration**: The `run_dbt_tests.sh` script serves as the main entry point. It sets up the environment and initiates over the scenarios folder to execute each test scenario.
+1.  **Scenario Execution**: The `test/run.sh` script executes the dbt project defined in the `runner/` directory using `dbt-ol seed`, `dbt-ol run`, and `dbt-ol test`.
 
-2.  **Scenario Execution**: The test runner executes the dbt project defined in the `runner/` directory. The specific dbt commands to be run (e.g., `dbt seed`, `dbt run`, `dbt test`) are defined in the test scenarios run script (`test/run.sh`).
-
-3.  **Event Generation and Capture**: During the execution, the `dbt-ol` wrapper intercepts the dbt commands and emits OpenLineage events. The `test/openlineage.yml` configuration directs these events to be captured as a local file (`{directory_input_param}/events.jsonl`) using the `file` transport.
+2.  **Event Generation and Capture**: During the execution, the `dbt-ol` wrapper intercepts the dbt commands and emits OpenLineage events. The `test/run.sh` script writes an `openlineage.yml` configuration that directs these events to be captured as a local file (`{output_dir}/events.jsonl`) using the `file` transport.
  
-4.  **Extract events**: OpenLineage emits events reliable to one file ('append: true' causes overwrites and events to be lost) so it is required to extract the before validation.
+3.  **Extract events**: OpenLineage emits all events to one file, so `run.sh` splits them into individual numbered files (`event-1.json`, `event-2.json`, …) before deleting the combined `.jsonl`.
 
-5.  **Event Validation**: Once the dbt process is complete, the test framework performs a two-stage validation on the generated events:
-    *   **Syntax Validation**: Each event is validated against the official OpenLineage JSON schema (e.g., version `1.40.1`) to ensure it is structurally correct.
-    *   **Semantic Validation**: The content of the events is compared against expected templates. This deep comparison, powered by the `scripts/compare_events.py` utility, verifies the accuracy of job names, dataset identifiers, lineage relationships, and the presence and structure of key facets.
+4.  **Event Validation**: Once the dbt process is complete, the shared framework (`scripts/validate_ol_events.py`) performs a two-stage validation on the generated events:
+    *   **Syntax Validation**: Each event is validated against the official OpenLineage JSON schema to ensure it is structurally correct.
+    *   **Semantic Validation**: The content of the events is compared against expected templates in `scenarios/csv_to_postgres/events/`. This comparison, powered by the `scripts/compare_events.py` utility, verifies the accuracy of job names, dataset identifiers, lineage relationships, and the presence and structure of key facets.
 
-6.  **Reporting**: Upon completion, the test runner generates a standardized JSON report (`dbt_producer_report.json`) that details the results of each validation step. This report is designed to be consumed by higher-level aggregation scripts in a CI/CD environment.
+5.  **Reporting**: Upon completion, the framework generates a standardised JSON report that details the results of each validation step for consumption by CI/CD aggregation scripts.
 
 ## Validation Scope
 
@@ -38,6 +36,7 @@ This test validates that the `openlineage-dbt` integration correctly generates O
 #### dbt Operations Covered:
 -   `dbt seed`: To load initial data.
 -   `dbt run`: To execute dbt models.
+-   `dbt test`: To run data quality tests and capture `dataQualityAssertions` facets.
 
 #### Validation Checks:
 -   **Event Generation**: Correctly creates `START` and `COMPLETE` events for jobs and runs.
@@ -50,6 +49,7 @@ This test validates that the `openlineage-dbt` integration correctly generates O
     -   `schema`
     -   `dataSource`
     -   `columnLineage`
+    -   `dataQualityAssertions`
 -   **Specification Compliance**: Events are validated against the OpenLineage specification schema (version `2-0-2`).
 
 ## Test Structure
@@ -58,16 +58,14 @@ The test is organized into the following key directories, each with a specific r
 
 ```
 producer/dbt/
-├── run_dbt_tests.sh           # Main test execution script
-├── scenarios/                 # Defines the dbt commands and expected outcomes for each test case
-├── output/                    # Default output directory for generated OpenLineage events (generated during execution)
+├── scenarios/                 # Test scenarios; each defines expected events and a run script
 ├── runner/                    # A self-contained dbt project used as the test target
-└── specs/                     # Stores OpenLineage spcification get from local repository (generated during execution)
+├── versions.json              # Supported component and OpenLineage version ranges
+└── maintainers.json           # Maintainer contact information
 ```
 
 -   **`runner/`**: A self-contained dbt project with models, seeds, and configuration. This is the target of the `dbt-ol` command.
--   **`scenarios/`**: Defines the dbt commands to be executed and contains the expected event templates for validation.
--   **`output/`**: The default output directory for the generated `events.jsonl` file and extracted events.
+-   **`scenarios/`**: Contains one directory per scenario. Each scenario has a `config.json` defining expected event templates, an `events/` directory of expected event JSON files, and a `test/` directory with `run.sh` and `compose.yml`.
 
 ## How to Run the Tests
 
@@ -106,34 +104,30 @@ The GitHub Actions workflow:
 
 If you need to debug event generation locally:
 
-1.  **Start PostgreSQL (Optional)**:
+1.  **Start PostgreSQL**:
     ```bash
-    cd producer/dbt/scenarions/csv_to_postgres/test
-    docker compose up
+    docker compose -f producer/dbt/scenarios/csv_to_postgres/test/compose.yml up -d
     ```
 
-2.  **Install Python Dependencies**:
+2.  **Install dbt and the OpenLineage wrapper** (use a virtual environment outside the repo):
     ```bash
-    # Activate virtual environment (recommended)
-    python -m venv venv
-    source venv/bin/activate  # On Windows: venv\Scripts\activate
+    python -m venv ~/.venvs/dbt-compat-test
+    source ~/.venvs/dbt-compat-test/bin/activate
+    pip install dbt-core==1.8.0 dbt-postgres openlineage-dbt==1.23.0
     ```
     
-3.  **Run Test Scenario**:
+3.  **Run the scenario**:
     ```bash
-    ./producer/dbt/run_dbt_tests.sh  --openlineage-directory <open_lineage_directory>
+    mkdir -p /tmp/dbt-events
+    bash producer/dbt/scenarios/csv_to_postgres/test/run.sh /tmp/dbt-events
     ```
 
-4.  **Inspect Generated Events**:
+4.  **Inspect generated events**:
     ```bash
-    # View events
-    cat ./producer/dbt/output/csv_to_postgres/event-{id}.json | jq '.'
-    
-    # check report
-    cat ./producer/dbt/dbt_producer_report.json | jq '.'
+    cat /tmp/dbt-events/event-1.json | jq '.'
     ```
 
-**Note**: Local debugging is entirely optional. All official validation happens in GitHub Actions with PostgreSQL service containers. The test runner (`test/run.sh`) is the same code used by CI/CD, ensuring consistency.
+**Note**: Local debugging is entirely optional. All official validation happens in GitHub Actions with PostgreSQL service containers. The `test/run.sh` script is the same code used by CI/CD, ensuring consistency.
 
 ## Important dbt Integration Notes
 

From 4322364b11d8f7cf202c6cdc58a338b9ebaf3dcd Mon Sep 17 00:00:00 2001
From: "roller100 (BearingNode)" <contact@bearingnode.com>
Date: Wed, 18 Mar 2026 13:34:12 +0000
Subject: [PATCH 3/3] chore: ignore jsonl event files generated during local
 testing

Add **/events/*.jsonl to the existing local-testing ignore block.
dbt-ol writes events.jsonl before extraction; this prevents it from
being accidentally staged in any producer's events directory.

Signed-off-by: roller100 (BearingNode) <contact@bearingnode.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 2c95801b..db6ef1f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,4 +173,5 @@ bin/
 **/specs/
 **/output/
 **/test/openlineage.yml
+**/events/*.jsonl
 dbt_producer_report.json
\ No newline at end of file