From 7af2716ac9299e36e24e755ee524083d3598994e Mon Sep 17 00:00:00 2001
From: Chenjie Luo <chenjiel@nvidia.com>
Date: Thu, 7 May 2026 11:54:11 -0700
Subject: [PATCH 1/3] [Recipes][LLM PTQ] Add nvfp4_experts_only_mse-fp8_cast_kv
 recipe + --recipe support in scripts

- Add modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml,
  combining experts-only NVFP4 W4A4 with the MSE FP8 scale-sweep weight
  calibration (algorithm: mse, fp8_scale_sweep: true; expert weight blocks
  use nvfp4_static so the static FP8 sweep applies) and FP8 KV cache via
  the kv_fp8_cast unit (use_constant_amax: true).

- examples/llm_ptq/scripts: thread a new --recipe flag through parser.sh and
  huggingface_example.sh. Either --quant or --recipe is required; passing both
  errors out. When --recipe is used, the script derives MODEL_NAME from the
  recipe basename, passes --recipe= to hf_ptq.py, and exits after export with
  a TRT-LLM deployment hint (recipes can produce arbitrary configs).

- Drop the qformat case-statement whitelist in huggingface_example.sh; let
  hf_ptq.py be the single source of truth for valid qformats / recipes.

Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
---
 .../llm_ptq/scripts/huggingface_example.sh    | 36 ++++++++------
 examples/llm_ptq/scripts/parser.sh            | 16 +++++--
 .../nvfp4_experts_only_mse-fp8_cast_kv.yaml   | 48 +++++++++++++++++++
 3 files changed, 82 insertions(+), 18 deletions(-)
 create mode 100644 modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml

diff --git a/examples/llm_ptq/scripts/huggingface_example.sh b/examples/llm_ptq/scripts/huggingface_example.sh
index 6ca99c7f963..693506929d9 100755
--- a/examples/llm_ptq/scripts/huggingface_example.sh
+++ b/examples/llm_ptq/scripts/huggingface_example.sh
@@ -49,18 +49,7 @@ dense | sparsegpt) ;;
     ;;
 esac
 
-#Iterate over list of qformats provided and check if they are valid
-IFS=","
-for qformat in $QFORMAT; do
-    case $qformat in
-    fp8 | fp8_pc_pt | fp8_pb_wo | int8_wo | int8_sq | int4_awq | w4a8_awq | fp16 | bf16 | nvfp4 | nvfp4_awq | nvfp4_mse | w4a8_nvfp4_fp8 | w4a8_mxfp4_fp8 | nvfp4_experts_only | nvfp4_mlp_only | nvfp4_omlp_only | nvfp4_svdquant | mxfp8 | nvfp4_local_hessian) ;;
-    *)
-        echo "Unknown quant argument: Expected one of: [fp8, fp8_pc_pt, fp8_pb_wo, int8_wo, int8_sq, int4_awq, w4a8_awq, fp16, bf16, nvfp4, nvfp4_awq, nvfp4_mse, w4a8_nvfp4_fp8, w4a8_mxfp4_fp8, nvfp4_experts_only, nvfp4_mlp_only, nvfp4_omlp_only, nvfp4_svdquant, mxfp8, nvfp4_local_hessian]" >&2
-        exit 1
-        ;;
-    esac
-done
-IFS=" "
+# Quant format / recipe validation is delegated to hf_ptq.py.
 
 script_dir="$(dirname "$(readlink -f "$0")")"
 
@@ -72,7 +61,14 @@ fi
 
 QFORMAT_MODIFIED="${QFORMAT//,/_}"
 
-MODEL_NAME=$(basename $MODEL_PATH | sed 's/[^0-9a-zA-Z\-]/_/g')_${QFORMAT_MODIFIED}${KV_CACHE_QUANT:+_kv_${KV_CACHE_QUANT}}
+# When using --recipe, build the model name from the recipe basename (without
+# directory or .yaml suffix) so each recipe gets its own SAVE_PATH.
+if [ -n "$RECIPE" ]; then
+    RECIPE_TAG=$(basename "$RECIPE" .yaml | sed 's/[^0-9a-zA-Z\-]/_/g')
+    MODEL_NAME=$(basename $MODEL_PATH | sed 's/[^0-9a-zA-Z\-]/_/g')_recipe_${RECIPE_TAG}
+else
+    MODEL_NAME=$(basename $MODEL_PATH | sed 's/[^0-9a-zA-Z\-]/_/g')_${QFORMAT_MODIFIED}${KV_CACHE_QUANT:+_kv_${KV_CACHE_QUANT}}
+fi
 
 SAVE_PATH=${ROOT_SAVE_PATH}/saved_models_${MODEL_NAME}
 
@@ -177,11 +173,16 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
 
     if [[ "$MODEL_CONFIG_EXIST" == false ]]; then
         echo "Quantizing original model..."
+        if [ -n "$RECIPE" ]; then
+            QUANT_SPEC_ARGS="--recipe=$RECIPE"
+        else
+            QUANT_SPEC_ARGS="--qformat=${QFORMAT// /,}"
+        fi
         python hf_ptq.py \
             --pyt_ckpt_path=$MODEL_PATH \
             --export_path=$SAVE_PATH \
             --sparsity_fmt=$SPARSITY_FMT \
-            --qformat="${QFORMAT// /,}" \
+            $QUANT_SPEC_ARGS \
             --calib_size=$CALIB_SIZE \
             --batch_size=$CALIB_BATCH_SIZE \
             --inference_tensor_parallel=$TP \
@@ -203,7 +204,7 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
         exit 0
     fi
 
-    if [[ "$QFORMAT" == *"nvfp4"* ]] || [[ "$KV_CACHE_QUANT" == *"nvfp4"* ]]; then
+    if [[ "$QFORMAT" == *"nvfp4"* ]] || [[ "$KV_CACHE_QUANT" == *"nvfp4"* ]] || [[ "$RECIPE" == *"nvfp4"* ]]; then
         cuda_major=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader -i 0 | cut -d. -f1)
 
         if [ "$cuda_major" -lt 10 ]; then
@@ -212,6 +213,11 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
         fi
     fi
 
+    if [ -n "$RECIPE" ]; then
+        echo "Recipe $RECIPE used. Please deploy with TensorRT-LLM directly. Checkpoint export_path: $SAVE_PATH"
+        exit 0
+    fi
+
     if [[ ! " fp8 nvfp4 bf16 fp16 " =~ " ${QFORMAT} " ]]; then
         echo "Quant $QFORMAT specified. Please read TensorRT-LLM quantization support matrix https://nvidia.github.io/TensorRT-LLM/features/quantization.html#quantization-in-tensorrt-llm and use TensorRT-LLM for deployment. Checkpoint export_path: $SAVE_PATH"
         exit 0
diff --git a/examples/llm_ptq/scripts/parser.sh b/examples/llm_ptq/scripts/parser.sh
index 3817c1dee7c..2a9a28b3566 100644
--- a/examples/llm_ptq/scripts/parser.sh
+++ b/examples/llm_ptq/scripts/parser.sh
@@ -20,6 +20,7 @@ parse_options() {
   # Default values
     MODEL_PATH=""
     QFORMAT=""
+    RECIPE=""
     KV_CACHE_QUANT=""
     TP=1
     PP=1
@@ -37,13 +38,14 @@ parse_options() {
     CAST_MXFP4_TO_NVFP4=false
 
   # Parse command-line options
-  ARGS=$(getopt -o "" -l "model:,quant:,kv_cache_quant:,tp:,pp:,sparsity:,awq_block_size:,calib:,calib_batch_size:,auto_quantize_bits:,output:,batch:,tasks:,lm_eval_tasks:,lm_eval_limit:,simple_eval_tasks:,trust_remote_code,use_seq_device_map,gpu_max_mem_percentage:,kv_cache_free_gpu_memory_fraction:,low_memory_mode,no-verbose,calib_dataset:,calib_seq:,auto_quantize_method:,auto_quantize_score_size:,auto_quantize_checkpoint:,moe_calib_experts_ratio:,cast_mxfp4_to_nvfp4" -n "$0" -- "$@")
+  ARGS=$(getopt -o "" -l "model:,quant:,recipe:,kv_cache_quant:,tp:,pp:,sparsity:,awq_block_size:,calib:,calib_batch_size:,auto_quantize_bits:,output:,batch:,tasks:,lm_eval_tasks:,lm_eval_limit:,simple_eval_tasks:,trust_remote_code,use_seq_device_map,gpu_max_mem_percentage:,kv_cache_free_gpu_memory_fraction:,low_memory_mode,no-verbose,calib_dataset:,calib_seq:,auto_quantize_method:,auto_quantize_score_size:,auto_quantize_checkpoint:,moe_calib_experts_ratio:,cast_mxfp4_to_nvfp4" -n "$0" -- "$@")
 
   eval set -- "$ARGS"
   while true; do
     case "$1" in
       --model ) MODEL_PATH="$2"; shift 2;;
       --quant ) QFORMAT="$2"; shift 2;;
+      --recipe ) RECIPE="$2"; shift 2;;
       --kv_cache_quant ) KV_CACHE_QUANT="$2"; shift 2;;
       --tp ) TP="$2"; shift 2;;
       --pp ) PP="$2"; shift 2;;
@@ -99,12 +101,19 @@ parse_options() {
   fi
 
   # Verify required options are provided
-  if [ -z "$MODEL_PATH" ] || [ -z "$QFORMAT" ] || [ -z "$TASKS" ]; then
-    echo "Usage: $0 --model=<MODEL_PATH> --quant=<QFORMAT> --tasks=<TASK,...>"
+  if [ -z "$MODEL_PATH" ] || [ -z "$TASKS" ] || ([ -z "$QFORMAT" ] && [ -z "$RECIPE" ]); then
+    echo "Usage: $0 --model=<MODEL_PATH> (--quant=<QFORMAT> | --recipe=<RECIPE>) --tasks=<TASK,...>"
     echo "Optional args: --sparsity=<SPARSITY_FMT> --awq_block_size=<AWQ_BLOCK_SIZE> --calib=<CALIB_SIZE>"
     exit 1
   fi
 
+  # --quant and --recipe are mutually exclusive: --recipe is a full PTQ spec, while
+  # --quant selects a built-in qformat preset. Pick exactly one.
+  if [ -n "$QFORMAT" ] && [ -n "$RECIPE" ]; then
+    echo "Cannot specify both --quant and --recipe; pick one." >&2
+    exit 1
+  fi
+
   VALID_TASKS=("quant" "mmlu" "lm_eval" "livecodebench" "simple_eval")
 
   for task in $(echo "$TASKS" | tr ',' ' '); do
@@ -135,6 +144,7 @@ parse_options() {
   echo "================="
   echo "model: $MODEL_PATH"
   echo "quant: $QFORMAT"
+  echo "recipe: $RECIPE"
   echo "tp (TensorRT-LLM Checkpoint only): $TP"
   echo "pp (TensorRT-LLM Checkpoint only): $PP"
   echo "sparsity: $SPARSITY_FMT"
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml
new file mode 100644
index 00000000000..5db1666402d
--- /dev/null
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4
+  nvfp4_static: configs/numerics/nvfp4_static
+  kv_fp8_cast: configs/ptq/units/kv_fp8_cast
+
+metadata:
+  recipe_type: ptq
+  description: NVFP4 static weight (MSE FP8-scale sweep) and dynamic activation for expert layers only (W4A4), FP8 KV cache with constant amax.
+quantize:
+  algorithm:
+    method: mse
+    fp8_scale_sweep: true
+    # layerwise=false required for VLMs where the decoder layers are nested under
+    # `model.language_model.layers` (layerwise_calibrate can't find them otherwise).
+    layerwise: false
+  quant_cfg:
+    - $import: base_disable_all
+    - quantizer_name: '*mlp.experts*weight_quantizer'
+      cfg:
+        $import: nvfp4_static
+    - quantizer_name: '*mlp.experts*input_quantizer'
+      cfg:
+        $import: nvfp4
+    - quantizer_name: '*block_sparse_moe*weight_quantizer'
+      cfg:
+        $import: nvfp4_static
+    - quantizer_name: '*block_sparse_moe*input_quantizer'
+      cfg:
+        $import: nvfp4
+    - $import: kv_fp8_cast
+    - $import: default_disabled_quantizers

From 2fdfe866e65ca69b044270bc7faf978870e76a5d Mon Sep 17 00:00:00 2001
From: Chenjie Luo <chenjiel@nvidia.com>
Date: Thu, 7 May 2026 11:54:59 -0700
Subject: [PATCH 2/3] [Recipes] Add nvfp4_mlp_only_mse-fp8_cast_kv

Same shape as nvfp4_experts_only_mse-fp8_cast_kv but with the broader
*mlp* / *block_sparse_moe* / *.experts.* patterns from
nvfp4_mlp_only-kv_fp8.yaml so it covers both dense MLP and MoE expert
weights:

- algorithm: { method: mse, fp8_scale_sweep: true, layerwise: false }
- All MLP weight quantizers use nvfp4_static so the static FP8 scale
  sweep applies (otherwise mse_calibrate skips them).
- Input quantizers use nvfp4 (dynamic).
- KV bmm uses kv_fp8_cast (skips KV calibration; amax hardcoded to FP8
  E4M3 max 448.0).

Pre-commit hook check-modelopt-recipes was skipped because the host
conda env has a broken torchvision install that prevents the validator
from importing modelopt; the same hook fails on the existing committed
sibling nvfp4_experts_only-kv_fp8.yaml in this env.

Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
---
 .../ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml   | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml

diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml
new file mode 100644
index 00000000000..a3b690faa82
--- /dev/null
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4
+  nvfp4_static: configs/numerics/nvfp4_static
+  kv_fp8_cast: configs/ptq/units/kv_fp8_cast
+
+metadata:
+  recipe_type: ptq
+  description: NVFP4 static weight (MSE FP8-scale sweep) and dynamic activation for all linear layers (W4A4), FP8 KV cache with constant amax.
+quantize:
+  algorithm:
+    method: mse
+    fp8_scale_sweep: true
+    # layerwise=false required for VLMs where the decoder layers are nested under
+    # `model.language_model.layers` (layerwise_calibrate can't find them otherwise).
+    layerwise: false
+  quant_cfg:
+    - $import: base_disable_all
+    - quantizer_name: '*mlp*weight_quantizer'
+      cfg:
+        $import: nvfp4_static
+    - quantizer_name: '*mlp*input_quantizer'
+      cfg:
+        $import: nvfp4
+    - quantizer_name: '*block_sparse_moe*weight_quantizer'
+      cfg:
+        $import: nvfp4_static
+    - quantizer_name: '*block_sparse_moe*input_quantizer'
+      cfg:
+        $import: nvfp4
+    - quantizer_name: '*.experts.*weight_quantizer'
+      cfg:
+        $import: nvfp4_static
+    - quantizer_name: '*.experts.*input_quantizer'
+      cfg:
+        $import: nvfp4
+    - $import: kv_fp8_cast
+    - $import: default_disabled_quantizers

From 9f08df06ade11a914a52a92289a6cda1a398d34d Mon Sep 17 00:00:00 2001
From: Chenjie Luo <chenjiel@nvidia.com>
Date: Thu, 7 May 2026 13:28:53 -0700
Subject: [PATCH 3/3] [Recipes] Address PR review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- huggingface_example.sh: remove the bf16/fp16 shortcut block. The lowercase
  $qformat variable it referenced was bound by the `for qformat in $QFORMAT;
  do … done` whitelist loop deleted in the parent commit, so the shortcut
  became dead code (regression flagged by reviewers). Per consensus on the
  PR, drop the path entirely rather than reintroduce the binding — the
  bf16/fp16 case still goes through `python hf_ptq.py --qformat=...` and the
  TRT-LLM whitelist on line 210 already covers it.

- huggingface_example.sh: quote $MODEL_PATH in the basename calls used to
  build MODEL_NAME (CodeRabbit shellcheck SC2086).

- nvfp4_mlp_only_mse-kv_fp8_cast.yaml: tighten metadata description from
  "all linear layers" to "MLP/MoE linear layers" — quantization is only
  enabled for *mlp* / *block_sparse_moe* / *.experts.* patterns.

- Rename both recipes to match the sibling `<numerics>_<scope>-kv_<kv_fmt>`
  convention used by `nvfp4_default-kv_fp8_cast.yaml` etc.:
    nvfp4_experts_only_mse-fp8_cast_kv.yaml
      → nvfp4_experts_only_mse-kv_fp8_cast.yaml
    nvfp4_mlp_only_mse-fp8_cast_kv.yaml
      → nvfp4_mlp_only_mse-kv_fp8_cast.yaml

Pre-commit hook check-modelopt-recipes was skipped (same env-broken
torchvision issue as the prior commits); the renamed recipes were verified
independently with tools/precommit/check_modelopt_recipes.py against the
working-tree modelopt — both load and produce the expected effective
config.

Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
---
 examples/llm_ptq/scripts/huggingface_example.sh   | 15 ++-------------
 ...ml => nvfp4_experts_only_mse-kv_fp8_cast.yaml} |  0
 ...v.yaml => nvfp4_mlp_only_mse-kv_fp8_cast.yaml} |  2 +-
 3 files changed, 3 insertions(+), 14 deletions(-)
 rename modelopt_recipes/general/ptq/{nvfp4_experts_only_mse-fp8_cast_kv.yaml => nvfp4_experts_only_mse-kv_fp8_cast.yaml} (100%)
 rename modelopt_recipes/general/ptq/{nvfp4_mlp_only_mse-fp8_cast_kv.yaml => nvfp4_mlp_only_mse-kv_fp8_cast.yaml} (95%)

diff --git a/examples/llm_ptq/scripts/huggingface_example.sh b/examples/llm_ptq/scripts/huggingface_example.sh
index 693506929d9..5c7889cc341 100755
--- a/examples/llm_ptq/scripts/huggingface_example.sh
+++ b/examples/llm_ptq/scripts/huggingface_example.sh
@@ -65,9 +65,9 @@ QFORMAT_MODIFIED="${QFORMAT//,/_}"
 # directory or .yaml suffix) so each recipe gets its own SAVE_PATH.
 if [ -n "$RECIPE" ]; then
     RECIPE_TAG=$(basename "$RECIPE" .yaml | sed 's/[^0-9a-zA-Z\-]/_/g')
-    MODEL_NAME=$(basename $MODEL_PATH | sed 's/[^0-9a-zA-Z\-]/_/g')_recipe_${RECIPE_TAG}
+    MODEL_NAME=$(basename "$MODEL_PATH" | sed 's/[^0-9a-zA-Z\-]/_/g')_recipe_${RECIPE_TAG}
 else
-    MODEL_NAME=$(basename $MODEL_PATH | sed 's/[^0-9a-zA-Z\-]/_/g')_${QFORMAT_MODIFIED}${KV_CACHE_QUANT:+_kv_${KV_CACHE_QUANT}}
+    MODEL_NAME=$(basename "$MODEL_PATH" | sed 's/[^0-9a-zA-Z\-]/_/g')_${QFORMAT_MODIFIED}${KV_CACHE_QUANT:+_kv_${KV_CACHE_QUANT}}
 fi
 
 SAVE_PATH=${ROOT_SAVE_PATH}/saved_models_${MODEL_NAME}
@@ -160,17 +160,6 @@ fi
 
 if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH) ]]; then
 
-    if [ "$qformat" == "bf16" ] || [ "$qformat" == "fp16" ]; then
-        if [ -d "$MODEL_PATH" ]; then
-            MODEL_CONFIG_EXIST=true
-            MODEL_CONFIG=$MODEL_PATH/config.json
-            for file in $MODEL_PATH/*; do ln -sf "$file" $SAVE_PATH/; done
-        else
-            echo "Please use the model directory where the config.json file is present."
-            exit 1
-        fi
-    fi
-
     if [[ "$MODEL_CONFIG_EXIST" == false ]]; then
         echo "Quantizing original model..."
         if [ -n "$RECIPE" ]; then
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only_mse-kv_fp8_cast.yaml
similarity index 100%
rename from modelopt_recipes/general/ptq/nvfp4_experts_only_mse-fp8_cast_kv.yaml
rename to modelopt_recipes/general/ptq/nvfp4_experts_only_mse-kv_fp8_cast.yaml
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml
similarity index 95%
rename from modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml
rename to modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml
index a3b690faa82..875fb47c9b3 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-fp8_cast_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only_mse-kv_fp8_cast.yaml
@@ -22,7 +22,7 @@ imports:
 
 metadata:
   recipe_type: ptq
-  description: NVFP4 static weight (MSE FP8-scale sweep) and dynamic activation for all linear layers (W4A4), FP8 KV cache with constant amax.
+  description: NVFP4 static weight (MSE FP8-scale sweep) and dynamic activation for MLP/MoE linear layers (W4A4), FP8 KV cache with constant amax.
 quantize:
   algorithm:
     method: mse