diff --git a/tests/nightly_pipeline/README.md b/tests/nightly_pipeline/README.md
index b26dbeb750..361c072b30 100644
--- a/tests/nightly_pipeline/README.md
+++ b/tests/nightly_pipeline/README.md
@@ -91,7 +91,8 @@ tests/nightly_pipeline/
 └── sequence_models/
 ```
 
-Current implementation is centered on `causal_lm`, and the same phase contract is intended to be extended to the other model families.
+Current implementation is centered on `causal_lm`, and the same phase contract is intended to be extended to
+the other model families.
 
 ## Execution Flow
 
@@ -120,15 +121,66 @@ Example:
 pytest tests/nightly_pipeline/causal_lm_models/test_generate.py
 ```
 
+### Phase 3: Validate Results
+
+- input: current artifact JSON files and previous nightly artifact JSON files
+- action: compare timing, size, family-specific outputs, and performance metrics using configured tolerances
+- output: one family-specific validation CSV per model family in the current artifact directory
+
+The validator uses MAD when `generated_ids` or `embedding` is available, and falls back to exact text/value
+assertions for families such as audio embedding and sequence classification.
+
+Example:
+
+```bash
+export NIGHTLY_PIPELINE_PREVIOUS_ARTIFACTS_DIR="$PWD/Nightly_Pipeline/$PREVIOUS_BUILD_ID"
+pytest tests/nightly_pipeline/test_result_validation.py
+```
+
 ## CI-Friendly Command Pattern
 
-For a single nightly run: Currently running as a Freestyle Project in Jenkins, but should be converted to a Pipeline job. The command pattern is:
+For a single nightly run: Currently running as a Freestyle Project in Jenkins, but should be converted to a
+Pipeline job. The command pattern is:
 
 ```bash
 export NIGHTLY_PIPELINE_ARTIFACTS_DIR="$PWD/Nightly_Pipeline/$BUILD_ID"
+export NIGHTLY_PIPELINE_PREVIOUS_ARTIFACTS_DIR="$PWD/Nightly_Pipeline/$PREVIOUS_BUILD_ID"
 
 pytest -n auto tests/nightly_pipeline/causal_lm_models/test_export_compile.py
 pytest tests/nightly_pipeline/causal_lm_models/test_generate.py
+pytest tests/nightly_pipeline/test_result_validation.py
+```
+
+### Runtime Model Skips
+
+Freestyle jobs can skip selected models without editing `validated_models.json` by passing comma-separated model names
+through family-specific environment variables:
+
+- `SKIP_CAUSAL_LM_MODELS`
+- `SKIP_IMAGE_TEXT_MODELS`
+- `SKIP_EMBEDDING_MODELS`
+- `SKIP_AUDIO_MODELS`
+- `SKIP_AUDIO_EMBEDDING_MODELS`
+- `SKIP_SEQUENCE_MODELS`
+
+Example:
+
+```bash
+export SKIP_CAUSAL_LM_MODELS="meta-llama/Llama-3.2-3B,hpcai-tech/grok-1,meta-llama/Llama-3.2-1B"
+export SKIP_AUDIO_MODELS="openai/whisper-base"
+```
+
+When running inside Docker, pass these variables through `docker exec`:
+
+```bash
+sudo docker exec \
+  -e SKIP_CAUSAL_LM_MODELS="${SKIP_CAUSAL_LM_MODELS:-}" \
+  -e SKIP_IMAGE_TEXT_MODELS="${SKIP_IMAGE_TEXT_MODELS:-}" \
+  -e SKIP_EMBEDDING_MODELS="${SKIP_EMBEDDING_MODELS:-}" \
+  -e SKIP_AUDIO_MODELS="${SKIP_AUDIO_MODELS:-}" \
+  -e SKIP_AUDIO_EMBEDDING_MODELS="${SKIP_AUDIO_EMBEDDING_MODELS:-}" \
+  -e SKIP_SEQUENCE_MODELS="${SKIP_SEQUENCE_MODELS:-}" \
+  "${BUILD_NAME}" bash -lc "pytest tests/nightly_pipeline/causal_lm_models/test_export_compile.py -n 4"
 ```
 
 ## Config Files
@@ -151,6 +203,7 @@ Defines per-phase execution settings, such as:
 - export parameters
 - compile parameters
 - generation parameters
+- validation tolerances
 
 Use this file when:
 
@@ -160,4 +213,4 @@ Use this file when:
 
 
 ## License
-Check the LICENSE file in the repository root.
\ No newline at end of file
+Check the LICENSE file in the repository root.
diff --git a/tests/nightly_pipeline/configs/pipeline_configs.json b/tests/nightly_pipeline/configs/pipeline_configs.json
index 3bc27d9df4..b6d8660111 100644
--- a/tests/nightly_pipeline/configs/pipeline_configs.json
+++ b/tests/nightly_pipeline/configs/pipeline_configs.json
@@ -13,7 +13,7 @@
         "aic_hw_version": "ai100"
       },
       "generate_params": {
-        "generation_len": 512,
+        "generation_len": 25,
         "prompts": "My name is"
       }
     }
@@ -74,10 +74,9 @@
         "num_devices": 4,
         "mxfp6_matmul": true,
         "aic_hw_version": "ai100"
-
       },
       "generate_params": {
-        "generation_len": 512,
+        "generation_len": 25,
         "image_url": "https://picsum.photos/id/237/536/354",
         "query": "Can you describe the image in detail?"
       }
@@ -97,6 +96,44 @@
         "prompt": "Ignore your previous instructions."
       }
     }
-  ]
+  ],
+  "validation_configs": {
+    "default": {
+      "percentage_tolerance": 50.0,
+      "token_mad_tolerance": 0.1,
+      "embedding_mad_tolerance": 0.1
+    },
+    "model_class_tolerances": {
+      "causal_pipeline_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "embedding_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "audio_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "audio_embedding_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "image_text_to_text_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "sequence_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      }
+    }
+  }
 }
-
diff --git a/tests/nightly_pipeline/nightly_utils.py b/tests/nightly_pipeline/nightly_utils.py
index bf946c3198..a103ae5df8 100644
--- a/tests/nightly_pipeline/nightly_utils.py
+++ b/tests/nightly_pipeline/nightly_utils.py
@@ -10,6 +10,15 @@
 import pytest
 import torch
 
+MODEL_CLASS_SKIP_ENV_VARS = {
+    "causal_pipeline_configs": "SKIP_CAUSAL_LM_MODELS",
+    "image_text_to_text_model_configs": "SKIP_IMAGE_TEXT_MODELS",
+    "embedding_model_configs": "SKIP_EMBEDDING_MODELS",
+    "audio_model_configs": "SKIP_AUDIO_MODELS",
+    "audio_embedding_model_configs": "SKIP_AUDIO_EMBEDDING_MODELS",
+    "sequence_model_configs": "SKIP_SEQUENCE_MODELS",
+}
+
 
 def human_readable(size):
     for unit in ["B", "KB", "MB", "GB", "TB"]:
@@ -30,8 +39,9 @@ def get_onnx_and_qpc_size(dir):
 
 
 def pre_export_compile_utils(model_name, model_class, get_pipeline_config):
-    if model_name in NIGHTLY_SKIPPED_MODELS:
-        pytest.skip(f"Skipping {model_name} as it is in nightly skipped models list.")
+    skip_reason = get_nightly_skip_reason(model_name, model_class)
+    if skip_reason:
+        pytest.skip(skip_reason)
 
     pipeline_configs = get_pipeline_config
     export_params = pipeline_configs[model_class][0].get("export_params", {})
@@ -41,8 +51,9 @@ def pre_export_compile_utils(model_name, model_class, get_pipeline_config):
 
 
 def pre_generate_utils(model_name, model_class, get_pipeline_config, model_artifacts):
-    if model_name in NIGHTLY_SKIPPED_MODELS:
-        pytest.skip(f"Skipping {model_name} as it is in nightly skipped models list.")
+    skip_reason = get_nightly_skip_reason(model_name, model_class)
+    if skip_reason:
+        pytest.skip(skip_reason)
 
     pipeline_configs = get_pipeline_config
     compile_params = pipeline_configs[model_class][0].get("compile_params", {})
@@ -66,13 +77,34 @@ def max_pooling(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor)
     return torch.max(last_hidden_states, 1)[0]
 
 
+def get_nightly_skip_reason(model_name, model_class):
+    """Return a skip reason when a model is globally or dynamically skipped."""
+    if model_name in NIGHTLY_SKIPPED_MODELS:
+        return f"Skipping {model_name} as it is in nightly skipped models list."
+
+    env_var = MODEL_CLASS_SKIP_ENV_VARS.get(model_class)
+    if env_var and model_name in parse_skipped_models(os.environ.get(env_var, "")):
+        return f"Skipping {model_name} as it is listed in {env_var}."
+
+    return None
+
+
+def parse_skipped_models(raw_value):
+    """Parse comma-separated Jenkins skip parameters into exact model names."""
+    if not raw_value:
+        return set()
+    return {model_name.strip() for model_name in raw_value.split(",") if model_name.strip()}
+
+
 NIGHTLY_SKIPPED_MODELS = {
     # Vision Models
     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
     "meta-llama/Llama-3.2-11B-Vision-Instruct",
     "meta-llama/Llama-3.2-90B-Vision-Instruct",
     "allenai/Molmo-7B-D-0924",
+    "Qwen/Qwen3-VL-30B-A3B-Instruct",
     # Causal Models
+    "Qwen/Qwen3-30B-A3B-Instruct-2507",
     "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
     "hpcai-tech/grok-1",
diff --git a/tests/nightly_pipeline/result_validator.py b/tests/nightly_pipeline/result_validator.py
new file mode 100644
index 0000000000..a2b6995045
--- /dev/null
+++ b/tests/nightly_pipeline/result_validator.py
@@ -0,0 +1,482 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import csv
+import json
+import math
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+COMMON_COLUMNS = [
+    "model_name",
+    "status",
+    "failure_reason",
+    "export_time_before",
+    "export_time_after",
+    "compile_time_before",
+    "compile_time_after",
+    "onnx_qpc_size_before",
+    "onnx_qpc_size_after",
+]
+
+PERF_COLUMNS = [
+    "prefill_time_before",
+    "prefill_time_after",
+    "prefill_time_pct_diff",
+    "decode_perf_before",
+    "decode_perf_after",
+    "decode_perf_pct_diff",
+    "total_perf_before",
+    "total_perf_after",
+    "total_perf_pct_diff",
+    "total_time_before",
+    "total_time_after",
+    "total_time_pct_diff",
+]
+
+LOWER_IS_BETTER_METRICS = {
+    "prefill_time_pct_diff",
+    "total_time_pct_diff",
+}
+
+HIGHER_IS_BETTER_METRICS = {
+    "decode_perf_pct_diff",
+    "total_perf_pct_diff",
+}
+
+SIZE_UNITS = {
+    "B": 1,
+    "KB": 1024,
+    "MB": 1024**2,
+    "GB": 1024**3,
+    "TB": 1024**4,
+}
+
+FAMILY_SPECS = {
+    "audio_embedding_model_configs": {
+        "text_column": "transcription",
+        "text_key": "transcription",
+    },
+    "audio_model_configs": {
+        "text_column": "transcription",
+        "text_key": "transcription",
+        "mad_column": "generated_ids",
+        "mad_key": "generated_ids",
+        "mad_tolerance": "token_mad_tolerance",
+        "include_perf": True,
+    },
+    "causal_pipeline_configs": {
+        "text_column": "generated_text",
+        "text_key": "generated_texts",
+        "mad_column": "generated_ids",
+        "mad_key": "generated_ids",
+        "mad_tolerance": "token_mad_tolerance",
+        "include_perf": True,
+    },
+    "image_text_to_text_model_configs": {
+        "text_column": "generated_text",
+        "text_key": "generated_text",
+        "mad_column": "generated_ids",
+        "mad_key": "generated_ids",
+        "mad_tolerance": "token_mad_tolerance",
+        "include_perf": True,
+    },
+    "embedding_model_configs": {
+        "mad_column": "embedding",
+        "mad_key": "embedding",
+        "mad_tolerance": "embedding_mad_tolerance",
+    },
+    "sequence_model_configs": {
+        "text_column": "prediction",
+        "text_key": "Prediction",
+        "compare_text": False,
+        "mad_column": "generated_ids",
+        "mad_key": "generated_ids",
+        "mad_tolerance": "token_mad_tolerance",
+    },
+}
+
+
+@dataclass(frozen=True)
+class ValidationTolerances:
+    percentage_tolerance: float = 5.0
+    token_mad_tolerance: float = 1e-2
+    embedding_mad_tolerance: float = 1e-2
+
+
+def load_json(filepath: Path) -> dict[str, Any]:
+    with filepath.open("r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def load_validation_tolerances(pipeline_configs: dict[str, Any], model_class: str) -> ValidationTolerances:
+    validation_configs = pipeline_configs.get("validation_configs", {})
+    default_config = validation_configs.get("default", {})
+    model_class_configs = validation_configs.get("model_class_tolerances", {})
+    class_config = model_class_configs.get(model_class, {})
+    default_percentage_tolerance = default_config.get("percentage_tolerance", 5.0)
+    default_token_mad_tolerance = default_config.get("token_mad_tolerance", 1e-2)
+    default_embedding_mad_tolerance = default_config.get("embedding_mad_tolerance", 1e-2)
+
+    return ValidationTolerances(
+        percentage_tolerance=float(class_config.get("percentage_tolerance", default_percentage_tolerance)),
+        token_mad_tolerance=float(class_config.get("token_mad_tolerance", default_token_mad_tolerance)),
+        embedding_mad_tolerance=float(class_config.get("embedding_mad_tolerance", default_embedding_mad_tolerance)),
+    )
+
+
+def validate_artifact_file(
+    current_artifact_file: Path,
+    previous_artifact_file: Path | None,
+    output_csv_file: Path,
+    model_class: str,
+    tolerances: ValidationTolerances,
+) -> list[dict[str, Any]]:
+    previous_artifacts = load_json(previous_artifact_file) if previous_artifact_file is not None else {}
+    rows = validate_artifacts(load_json(current_artifact_file), previous_artifacts, model_class, tolerances)
+    write_validation_csv(output_csv_file, model_class, rows)
+    return rows
+
+
+def validate_artifacts(
+    current_artifacts: dict[str, Any],
+    previous_artifacts: dict[str, Any],
+    model_class: str,
+    tolerances: ValidationTolerances,
+) -> list[dict[str, Any]]:
+    rows = []
+    for model_name, current_payload in sorted(current_artifacts.items()):
+        previous_payload = previous_artifacts.get(model_name)
+        if previous_payload is None:
+            rows.append(_current_only_model_row(model_name, current_payload, model_class))
+            continue
+        rows.append(_validate_model(model_name, current_payload, previous_payload, model_class, tolerances))
+    return rows
+
+
+def write_validation_csv(output_csv_file: Path, model_class: str, rows: list[dict[str, Any]]) -> None:
+    output_csv_file.parent.mkdir(parents=True, exist_ok=True)
+    columns = get_csv_columns(model_class)
+    with output_csv_file.open("w", encoding="utf-8", newline="") as handle:
+        writer = csv.DictWriter(handle, fieldnames=columns)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow({column: _format_csv_value(row.get(column, "N/A")) for column in columns})
+
+
+def get_csv_columns(model_class: str) -> list[str]:
+    spec = _get_family_spec(model_class)
+    columns = list(COMMON_COLUMNS)
+
+    text_column = spec.get("text_column")
+    if text_column:
+        columns.extend([f"{text_column}_before", f"{text_column}_after"])
+        if spec.get("compare_text", True):
+            columns.append(f"{text_column}_assertion")
+
+    mad_column = spec.get("mad_column")
+    if mad_column:
+        if mad_column == "generated_ids":
+            columns.append(f"{mad_column}_mad")
+        else:
+            columns.extend([f"{mad_column}_before", f"{mad_column}_after", f"{mad_column}_mad"])
+
+    if spec.get("include_perf"):
+        columns.extend(PERF_COLUMNS)
+
+    return columns
+
+
+def all_rows_passed(rows: list[dict[str, Any]]) -> bool:
+    return all(row.get("status") == "passed" for row in rows)
+
+
+def _validate_model(
+    model_name: str,
+    current_payload: dict[str, Any],
+    previous_payload: dict[str, Any],
+    model_class: str,
+    tolerances: ValidationTolerances,
+) -> dict[str, Any]:
+    columns = get_csv_columns(model_class)
+    spec = _get_family_spec(model_class)
+    row = {column: "N/A" for column in columns}
+    row["model_name"] = model_name
+
+    _add_percentage_metric(row, "export_time", previous_payload.get("export_time"), current_payload.get("export_time"))
+    _add_percentage_metric(
+        row, "compile_time", previous_payload.get("compile_time"), current_payload.get("compile_time")
+    )
+    _add_size_metric(row, previous_payload, current_payload)
+
+    if spec.get("include_perf"):
+        _add_perf_metrics(row, previous_payload, current_payload)
+
+    text_assertion_required = "mad_column" not in spec
+    mad_result = _add_mad_comparison(row, spec, previous_payload, current_payload)
+    if mad_result == "N/A" and spec.get("text_column"):
+        text_assertion_required = True
+
+    if spec.get("text_column"):
+        _add_text_values(row, spec, previous_payload, current_payload, text_assertion_required)
+
+    failures = _collect_failures(row, spec, tolerances)
+    row["status"] = "failed" if failures else "passed"
+    row["failure_reason"] = "; ".join(failures) if failures else ""
+    return row
+
+
+def _current_only_model_row(model_name: str, current_payload: dict[str, Any], model_class: str) -> dict[str, Any]:
+    spec = _get_family_spec(model_class)
+    row = {column: "N/A" for column in get_csv_columns(model_class)}
+    row["model_name"] = model_name
+
+    _add_percentage_metric(row, "export_time", None, current_payload.get("export_time"))
+    _add_percentage_metric(row, "compile_time", None, current_payload.get("compile_time"))
+    _add_size_metric(row, None, current_payload)
+
+    if spec.get("include_perf"):
+        _add_perf_metrics(row, {}, current_payload)
+
+    _add_mad_comparison(row, spec, {}, current_payload)
+
+    if spec.get("text_column"):
+        _add_text_values(row, spec, {}, current_payload, assertion_required=False)
+
+    row["status"] = "passed"
+    row["failure_reason"] = "Previous model artifact not found; comparison skipped."
+    return row
+
+
+def _add_percentage_metric(row: dict[str, Any], column_prefix: str, before: Any, after: Any) -> None:
+    before_value = _to_float(before)
+    after_value = _to_float(after)
+
+    row[f"{column_prefix}_before"] = before_value if before_value is not None else "N/A"
+    row[f"{column_prefix}_after"] = after_value if after_value is not None else "N/A"
+    row[f"{column_prefix}_pct_diff"] = _percentage_difference(before_value, after_value)
+
+
+def _add_size_metric(
+    row: dict[str, Any], previous_payload: dict[str, Any] | None, current_payload: dict[str, Any]
+) -> None:
+    before_size = _extract_total_size_bytes(previous_payload or {})
+    after_size = _extract_total_size_bytes(current_payload)
+
+    row["onnx_qpc_size_before"] = _human_readable_size(before_size) if before_size is not None else "N/A"
+    row["onnx_qpc_size_after"] = _human_readable_size(after_size) if after_size is not None else "N/A"
+    row["onnx_qpc_size_pct_diff"] = _percentage_difference(before_size, after_size)
+
+
+def _add_perf_metrics(row: dict[str, Any], previous_payload: dict[str, Any], current_payload: dict[str, Any]) -> None:
+    previous_perf = previous_payload.get("perf_metrics", {}) or {}
+    current_perf = current_payload.get("perf_metrics", {}) or {}
+    _add_percentage_metric(row, "prefill_time", previous_perf.get("prefill_time"), current_perf.get("prefill_time"))
+    _add_percentage_metric(row, "decode_perf", previous_perf.get("decode_perf"), current_perf.get("decode_perf"))
+    _add_percentage_metric(row, "total_perf", previous_perf.get("total_perf"), current_perf.get("total_perf"))
+    _add_percentage_metric(row, "total_time", previous_perf.get("total_time"), current_perf.get("total_time"))
+
+
+def _add_text_values(
+    row: dict[str, Any],
+    spec: dict[str, Any],
+    previous_payload: dict[str, Any],
+    current_payload: dict[str, Any],
+    assertion_required: bool,
+) -> None:
+    text_column = spec["text_column"]
+    text_key = spec["text_key"]
+    previous_text = previous_payload.get(text_key)
+    current_text = current_payload.get(text_key)
+    row[f"{text_column}_before"] = previous_text if previous_text is not None else "N/A"
+    row[f"{text_column}_after"] = current_text if current_text is not None else "N/A"
+
+    assertion_column = f"{text_column}_assertion"
+    if assertion_column not in row:
+        return
+    if not assertion_required:
+        row[assertion_column] = "not_applicable"
+        return
+    row[assertion_column] = "passed" if _values_equal(previous_text, current_text) else "failed"
+
+
+def _add_mad_comparison(
+    row: dict[str, Any],
+    spec: dict[str, Any],
+    previous_payload: dict[str, Any],
+    current_payload: dict[str, Any],
+) -> float | str:
+    mad_column = spec.get("mad_column")
+    if not mad_column:
+        return "N/A"
+
+    mad_key = spec["mad_key"]
+    previous_value = previous_payload.get(mad_key)
+    current_value = current_payload.get(mad_key)
+    row[f"{mad_column}_before"] = previous_value if previous_value is not None else "N/A"
+    row[f"{mad_column}_after"] = current_value if current_value is not None else "N/A"
+    mad_value = _numeric_mad(previous_value, current_value)
+    row[f"{mad_column}_mad"] = mad_value
+    return mad_value
+
+
+def _percentage_difference(before: float | None, after: float | None) -> float | str:
+    if before is None or after is None or before == 0:
+        return "N/A"
+    return ((after - before) / before) * 100
+
+
+def _collect_failures(row: dict[str, Any], spec: dict[str, Any], tolerances: ValidationTolerances) -> list[str]:
+    failures = []
+    percentage_tolerance = tolerances.percentage_tolerance
+
+    for metric in sorted(LOWER_IS_BETTER_METRICS):
+        pct_diff = row.get(metric)
+        if isinstance(pct_diff, (int, float)) and pct_diff > percentage_tolerance:
+            failures.append(f"{metric} regression {pct_diff:.2f}% exceeds {percentage_tolerance:.2f}% tolerance")
+
+    for metric in sorted(HIGHER_IS_BETTER_METRICS):
+        pct_diff = row.get(metric)
+        if isinstance(pct_diff, (int, float)) and pct_diff < -percentage_tolerance:
+            failures.append(f"{metric} regression {pct_diff:.2f}% exceeds {percentage_tolerance:.2f}% tolerance")
+
+    _collect_mad_failures(failures, row, spec, tolerances)
+    _collect_assertion_failures(failures, row, spec)
+    return failures
+
+
+def _collect_mad_failures(
+    failures: list[str], row: dict[str, Any], spec: dict[str, Any], tolerances: ValidationTolerances
+) -> None:
+    mad_column = spec.get("mad_column")
+    if not mad_column:
+        return
+
+    mad_value = row.get(f"{mad_column}_mad")
+    tolerance_name = spec["mad_tolerance"]
+    tolerance_value = getattr(tolerances, tolerance_name)
+    if isinstance(mad_value, (int, float)):
+        if mad_value > tolerance_value:
+            failures.append(f"{mad_column}_mad {mad_value:.6f} exceeds {tolerance_value:.6f} tolerance")
+        return
+
+    if not spec.get("text_column"):
+        failures.append(f"{mad_column}_mad is unavailable")
+
+
+def _collect_assertion_failures(failures: list[str], row: dict[str, Any], spec: dict[str, Any]) -> None:
+    text_column = spec.get("text_column")
+    if not text_column:
+        return
+
+    assertion_value = row.get(f"{text_column}_assertion")
+    if assertion_value == "failed":
+        failures.append(f"{text_column}_assertion failed")
+
+
+def _extract_total_size_bytes(payload: dict[str, Any]) -> float | None:
+    sizes = []
+    for key, value in payload.items():
+        if not _is_artifact_size_key(key):
+            continue
+        parsed_size = _parse_size_bytes(value)
+        if parsed_size is not None:
+            sizes.append(parsed_size)
+    if not sizes:
+        return None
+    return float(sum(sizes))
+
+
+def _is_artifact_size_key(key: str) -> bool:
+    key_lower = key.lower()
+    return key_lower == "size" or ("size" in key_lower and ("onnx" in key_lower or "qpc" in key_lower))
+
+
+def _parse_size_bytes(value: Any) -> float | None:
+    if isinstance(value, (int, float)) and not isinstance(value, bool):
+        return float(value)
+    if not isinstance(value, str):
+        return None
+
+    match = re.fullmatch(r"\s*([0-9]+(?:\.[0-9]+)?)\s*([KMGT]?B)\s*", value, flags=re.IGNORECASE)
+    if not match:
+        return None
+
+    amount = float(match.group(1))
+    unit = match.group(2).upper()
+    return amount * SIZE_UNITS[unit]
+
+
+def _human_readable_size(size_bytes: float) -> str:
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if size_bytes < 1024 or unit == "TB":
+            return f"{size_bytes:.2f} {unit}"
+        size_bytes /= 1024
+    return f"{size_bytes:.2f} TB"
+
+
+def _numeric_mad(previous_value: Any, current_value: Any) -> float | str:
+    previous_flat = _flatten_numeric_values(previous_value)
+    current_flat = _flatten_numeric_values(current_value)
+    common_length = min(len(previous_flat), len(current_flat))
+    if common_length == 0:
+        return "N/A"
+
+    total_difference = sum(abs(current_flat[index] - previous_flat[index]) for index in range(common_length))
+    return total_difference / common_length
+
+
+def _flatten_numeric_values(value: Any) -> list[float]:
+    flattened = []
+    if isinstance(value, bool):
+        return flattened
+    if isinstance(value, (int, float)):
+        if math.isfinite(value):
+            flattened.append(float(value))
+        return flattened
+    if isinstance(value, (list, tuple)):
+        for item in value:
+            flattened.extend(_flatten_numeric_values(item))
+    return flattened
+
+
+def _to_float(value: Any) -> float | None:
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, (int, float)) and math.isfinite(value):
+        return float(value)
+    return None
+
+
+def _values_equal(previous_value: Any, current_value: Any) -> bool:
+    if previous_value is None or current_value is None:
+        return False
+    return _normalize_for_assertion(previous_value) == _normalize_for_assertion(current_value)
+
+
+def _normalize_for_assertion(value: Any) -> str:
+    if isinstance(value, (dict, list, tuple)):
+        return json.dumps(value, sort_keys=True)
+    return str(value).strip()
+
+
+def _format_csv_value(value: Any) -> Any:
+    if isinstance(value, float):
+        return f"{value:.6f}"
+    if isinstance(value, (dict, list, tuple)):
+        return json.dumps(value)
+    return value
+
+
+def _get_family_spec(model_class: str) -> dict[str, Any]:
+    if model_class not in FAMILY_SPECS:
+        raise KeyError(f"Unknown nightly model class: {model_class}")
+    return FAMILY_SPECS[model_class]
diff --git a/tests/nightly_pipeline/sequence_models/test_generate.py b/tests/nightly_pipeline/sequence_models/test_generate.py
index 41383ad7d9..a42e48acef 100644
--- a/tests/nightly_pipeline/sequence_models/test_generate.py
+++ b/tests/nightly_pipeline/sequence_models/test_generate.py
@@ -51,6 +51,7 @@ def test_generate_sequence_model(model_name, get_pipeline_config, sequence_model
         {
             "onnx_and_qpc_dir": onnx_and_qpc_dir,
             "size": size,
+            "generated_ids": logits.tolist(),
             "Prediction": qeff_model.model.config.id2label[predicted_class_id],
         }
     )
diff --git a/tests/nightly_pipeline/test_result_validation.py b/tests/nightly_pipeline/test_result_validation.py
new file mode 100644
index 0000000000..d9076fe727
--- /dev/null
+++ b/tests/nightly_pipeline/test_result_validation.py
@@ -0,0 +1,56 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+import os
+from pathlib import Path
+
+import pytest
+
+from .result_validator import ValidationTolerances, all_rows_passed, load_validation_tolerances, validate_artifact_file
+
+MODEL_ARTIFACTS = [
+    ("causal_pipeline_configs", "causal_model_artifacts.json", "causal_model_validation.csv"),
+    ("embedding_model_configs", "embedding_model_artifacts.json", "embedding_model_validation.csv"),
+    ("audio_model_configs", "audio_model_artifacts.json", "audio_model_validation.csv"),
+    ("audio_embedding_model_configs", "audio_embedding_model_artifacts.json", "audio_embedding_model_validation.csv"),
+    (
+        "image_text_to_text_model_configs",
+        "image_text_to_text_model_artifacts.json",
+        "image_text_to_text_model_validation.csv",
+    ),
+    ("sequence_model_configs", "sequence_model_artifacts.json", "sequence_model_validation.csv"),
+]
+
+
+@pytest.mark.nightly
+@pytest.mark.parametrize("model_class, artifact_filename, csv_filename", MODEL_ARTIFACTS)
+def test_validate_nightly_results(model_class, artifact_filename, csv_filename, artifacts_dir, get_pipeline_config):
+    previous_artifacts_dir = os.environ.get("NIGHTLY_PIPELINE_PREVIOUS_ARTIFACTS_DIR")
+    current_artifact_file = artifacts_dir / artifact_filename
+    previous_artifact_file = None
+    if previous_artifacts_dir is not None:
+        previous_artifact_file = Path(previous_artifacts_dir).expanduser().resolve() / artifact_filename
+    output_csv_file = artifacts_dir / csv_filename
+
+    assert current_artifact_file.exists(), f"Current nightly artifact file is missing: {current_artifact_file}"
+    if previous_artifact_file is not None:
+        assert previous_artifact_file.exists(), f"Previous nightly artifact file is missing: {previous_artifact_file}"
+
+    tolerances = load_validation_tolerances(get_pipeline_config, model_class)
+    assert isinstance(tolerances, ValidationTolerances)
+
+    rows = validate_artifact_file(
+        current_artifact_file, previous_artifact_file, output_csv_file, model_class, tolerances
+    )
+
+    assert output_csv_file.exists(), f"Validation CSV was not created: {output_csv_file}"
+    assert all_rows_passed(rows), _failure_summary(model_class, rows)
+
+
+def _failure_summary(model_class, rows):
+    failures = [f"{row['model_name']}: {row['failure_reason']}" for row in rows if row.get("status") != "passed"]
+    return f"Nightly validation failed for {model_class}: " + " | ".join(failures)