diff --git a/tools/launcher/examples/Qwen/qwen3-v0353a-eagle3/step2_hidden.yaml b/tools/launcher/examples/Qwen/qwen3-v0353a-eagle3/step2_hidden.yaml new file mode 100644 index 00000000000..e0be7958ad5 --- /dev/null +++ b/tools/launcher/examples/Qwen/qwen3-v0353a-eagle3/step2_hidden.yaml @@ -0,0 +1,35 @@ +# Step 2 hidden-state dump for qwen3-v0353a-eagle3. +# +# Standalone task that dumps hidden states from the target model. +# Output is written to /scratchspace/offline_hidden_states for consumption +# by the downstream EAGLE3 training step. +# +# Usage: +# uv run slurm.py --yaml modules/Model-Optimizer/tools/launcher/examples/Qwen/qwen3-v0353a-eagle3/step2_hidden.yaml --dry-run + +job_name: qwen3-v0353a-eagle3_EAGLE3_hidden_dump +pipeline: + allow_to_fail: false + skip: false + note: + + global_vars: + hf_model: /hf-local/Qwen/qwen3-v0353a-eagle3 + + # Step 2: Dump hidden states from target model + task_0: + script: common/eagle3/dump_offline_data.sh + args: + - --input-data /scratchspace/data + - --output-dir /scratchspace/offline_hidden_states + - --max-seq-len 8192 + - --tp 8 + - --moe-ep 8 + environment: + - HF_MODEL_CKPT: <> + slurm_config: + _factory_: "slurm_factory" + nodes: 1 + ntasks_per_node: 8 + gpus_per_node: 8 + container: nvcr.io/nvidia/tensorrt-llm/release:1.2.0