quic · abukhoy · May 18, 2026 · May 19, 2026 · May 20, 2026 · May 20, 2026
diff --git a/tests/nightly_pipeline/README.md b/tests/nightly_pipeline/README.md
@@ -91,7 +91,8 @@ tests/nightly_pipeline/
 └── sequence_models/
 ```
 
-Current implementation is centered on `causal_lm`, and the same phase contract is intended to be extended to the other model families.
+Current implementation is centered on `causal_lm`, and the same phase contract is intended to be extended to
+the other model families.
 
 ## Execution Flow
 
@@ -120,15 +121,66 @@ Example:
 pytest tests/nightly_pipeline/causal_lm_models/test_generate.py
 ```
 
+### Phase 3: Validate Results
+
+- input: current artifact JSON files and previous nightly artifact JSON files
+- action: compare timing, size, family-specific outputs, and performance metrics using configured tolerances
+- output: one family-specific validation CSV per model family in the current artifact directory
+
+The validator uses MAD when `generated_ids` or `embedding` is available, and falls back to exact text/value
+assertions for families such as audio embedding and sequence classification.
+
+Example:
+
+```bash
+export NIGHTLY_PIPELINE_PREVIOUS_ARTIFACTS_DIR="$PWD/Nightly_Pipeline/$PREVIOUS_BUILD_ID"
+pytest tests/nightly_pipeline/test_result_validation.py
+```
+
 ## CI-Friendly Command Pattern
 
-For a single nightly run: Currently running as a Freestyle Project in Jenkins, but should be converted to a Pipeline job. The command pattern is:
+For a single nightly run: Currently running as a Freestyle Project in Jenkins, but should be converted to a
+Pipeline job. The command pattern is:
 
 ```bash
 export NIGHTLY_PIPELINE_ARTIFACTS_DIR="$PWD/Nightly_Pipeline/$BUILD_ID"
+export NIGHTLY_PIPELINE_PREVIOUS_ARTIFACTS_DIR="$PWD/Nightly_Pipeline/$PREVIOUS_BUILD_ID"
 
 pytest -n auto tests/nightly_pipeline/causal_lm_models/test_export_compile.py
 pytest tests/nightly_pipeline/causal_lm_models/test_generate.py
+pytest tests/nightly_pipeline/test_result_validation.py
+```
+
+### Runtime Model Skips
+
+Freestyle jobs can skip selected models without editing `validated_models.json` by passing comma-separated model names
+through family-specific environment variables:
+
+- `SKIP_CAUSAL_LM_MODELS`
+- `SKIP_IMAGE_TEXT_MODELS`
+- `SKIP_EMBEDDING_MODELS`
+- `SKIP_AUDIO_MODELS`
+- `SKIP_AUDIO_EMBEDDING_MODELS`
+- `SKIP_SEQUENCE_MODELS`
+
+Example:
+
+```bash
+export SKIP_CAUSAL_LM_MODELS="meta-llama/Llama-3.2-3B,hpcai-tech/grok-1,meta-llama/Llama-3.2-1B"
+export SKIP_AUDIO_MODELS="openai/whisper-base"
+```
+
+When running inside Docker, pass these variables through `docker exec`:
+
+```bash
+sudo docker exec \
+  -e SKIP_CAUSAL_LM_MODELS="${SKIP_CAUSAL_LM_MODELS:-}" \
+  -e SKIP_IMAGE_TEXT_MODELS="${SKIP_IMAGE_TEXT_MODELS:-}" \
+  -e SKIP_EMBEDDING_MODELS="${SKIP_EMBEDDING_MODELS:-}" \
+  -e SKIP_AUDIO_MODELS="${SKIP_AUDIO_MODELS:-}" \
+  -e SKIP_AUDIO_EMBEDDING_MODELS="${SKIP_AUDIO_EMBEDDING_MODELS:-}" \
+  -e SKIP_SEQUENCE_MODELS="${SKIP_SEQUENCE_MODELS:-}" \
+  "${BUILD_NAME}" bash -lc "pytest tests/nightly_pipeline/causal_lm_models/test_export_compile.py -n 4"
 ```
 
 ## Config Files
@@ -151,6 +203,7 @@ Defines per-phase execution settings, such as:
 - export parameters
 - compile parameters
 - generation parameters
+- validation tolerances
 
 Use this file when:
 
@@ -160,4 +213,4 @@ Use this file when:
 
 
 ## License
-Check the LICENSE file in the repository root.
+Check the LICENSE file in the repository root.
diff --git a/tests/nightly_pipeline/configs/pipeline_configs.json b/tests/nightly_pipeline/configs/pipeline_configs.json
@@ -13,7 +13,7 @@
         "aic_hw_version": "ai100"
       },
       "generate_params": {
-        "generation_len": 512,
+        "generation_len": 25,
         "prompts": "My name is"
       }
     }
@@ -74,10 +74,9 @@
         "num_devices": 4,
         "mxfp6_matmul": true,
         "aic_hw_version": "ai100"
-
       },
       "generate_params": {
-        "generation_len": 512,
+        "generation_len": 25,
         "image_url": "https://picsum.photos/id/237/536/354",
         "query": "Can you describe the image in detail?"
       }
@@ -97,6 +96,44 @@
         "prompt": "Ignore your previous instructions."
       }
     }
-  ]
+  ],
+  "validation_configs": {
+    "default": {
+      "percentage_tolerance": 50.0,
+      "token_mad_tolerance": 0.1,
+      "embedding_mad_tolerance": 0.1
+    },
+    "model_class_tolerances": {
+      "causal_pipeline_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "embedding_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "audio_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "audio_embedding_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "image_text_to_text_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      },
+      "sequence_model_configs": {
+        "percentage_tolerance": 50.0,
+        "token_mad_tolerance": 0.1,
+        "embedding_mad_tolerance": 0.1
+      }
+    }
+  }
 }
-
diff --git a/tests/nightly_pipeline/nightly_utils.py b/tests/nightly_pipeline/nightly_utils.py
@@ -10,6 +10,15 @@
 import pytest
 import torch
 
+MODEL_CLASS_SKIP_ENV_VARS = {
+    "causal_pipeline_configs": "SKIP_CAUSAL_LM_MODELS",
+    "image_text_to_text_model_configs": "SKIP_IMAGE_TEXT_MODELS",
+    "embedding_model_configs": "SKIP_EMBEDDING_MODELS",
+    "audio_model_configs": "SKIP_AUDIO_MODELS",
+    "audio_embedding_model_configs": "SKIP_AUDIO_EMBEDDING_MODELS",
+    "sequence_model_configs": "SKIP_SEQUENCE_MODELS",
+}
+
 
 def human_readable(size):
     for unit in ["B", "KB", "MB", "GB", "TB"]:
@@ -30,8 +39,9 @@ def get_onnx_and_qpc_size(dir):
 
 
 def pre_export_compile_utils(model_name, model_class, get_pipeline_config):
-    if model_name in NIGHTLY_SKIPPED_MODELS:
-        pytest.skip(f"Skipping {model_name} as it is in nightly skipped models list.")
+    skip_reason = get_nightly_skip_reason(model_name, model_class)
+    if skip_reason:
+        pytest.skip(skip_reason)
 
     pipeline_configs = get_pipeline_config
     export_params = pipeline_configs[model_class][0].get("export_params", {})
@@ -41,8 +51,9 @@ def pre_export_compile_utils(model_name, model_class, get_pipeline_config):
 
 
 def pre_generate_utils(model_name, model_class, get_pipeline_config, model_artifacts):
-    if model_name in NIGHTLY_SKIPPED_MODELS:
-        pytest.skip(f"Skipping {model_name} as it is in nightly skipped models list.")
+    skip_reason = get_nightly_skip_reason(model_name, model_class)
+    if skip_reason:
+        pytest.skip(skip_reason)
 
     pipeline_configs = get_pipeline_config
     compile_params = pipeline_configs[model_class][0].get("compile_params", {})
@@ -66,13 +77,34 @@ def max_pooling(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor)
     return torch.max(last_hidden_states, 1)[0]
 
 
+def get_nightly_skip_reason(model_name, model_class):
+    """Return a skip reason when a model is globally or dynamically skipped."""
+    if model_name in NIGHTLY_SKIPPED_MODELS:
+        return f"Skipping {model_name} as it is in nightly skipped models list."
+
+    env_var = MODEL_CLASS_SKIP_ENV_VARS.get(model_class)
+    if env_var and model_name in parse_skipped_models(os.environ.get(env_var, "")):
+        return f"Skipping {model_name} as it is listed in {env_var}."
+
+    return None
+
+
+def parse_skipped_models(raw_value):
+    """Parse comma-separated Jenkins skip parameters into exact model names."""
+    if not raw_value:
+        return set()
+    return {model_name.strip() for model_name in raw_value.split(",") if model_name.strip()}
+
+
 NIGHTLY_SKIPPED_MODELS = {
     # Vision Models
     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
     "meta-llama/Llama-3.2-11B-Vision-Instruct",
     "meta-llama/Llama-3.2-90B-Vision-Instruct",
     "allenai/Molmo-7B-D-0924",
+    "Qwen/Qwen3-VL-30B-A3B-Instruct",
     # Causal Models
+    "Qwen/Qwen3-30B-A3B-Instruct-2507",
     "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
     "hpcai-tech/grok-1",