pytorch · meta-codesync · Mar 18, 2026 · Mar 10, 2026 · Mar 11, 2026 · Mar 11, 2026
@@ -217,6 +217,21 @@
         "rhs": "Darwin"
       }
     },
+    {
+      "name": "llm-debug-vulkan",
+      "displayName": "LLM debug build with Vulkan",
+      "inherits": [
+        "llm-debug"
+      ],
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_VULKAN": "ON"
+      },
+      "condition": {
+        "type": "inList",
+        "string": "${hostSystemName}",
+        "list": ["Linux", "Windows"]
+      }
+    },
     {
       "name": "llm-metal-stats",
       "displayName": "LLM Metal build with stats collection and logging",
@@ -354,6 +369,15 @@
       ],
       "jobs": 0
     },
+    {
+      "name": "llm-debug-vulkan-install",
+      "displayName": "Build and install LLM extension debug artifacts (Vulkan)",
+      "configurePreset": "llm-debug-vulkan",
+      "targets": [
+        "install"
+      ],
+      "jobs": 0
+    },
     {
       "name": "llm-metal-stats-install",
       "displayName": "Build and install LLM extension artifacts with Metal stats",
@@ -449,6 +473,20 @@
         }
       ]
     },
+    {
+      "name": "llm-debug-vulkan",
+      "displayName": "Configure, build and install ExecuTorch LLM extension with Vulkan enabled (Debug)",
+      "steps": [
+        {
+          "type": "configure",
+          "name": "llm-debug-vulkan"
+        },
+        {
+          "type": "build",
+          "name": "llm-debug-vulkan-install"
+        }
+      ]
+    },
     {
       "name": "llm-metal-stats",
       "displayName": "Configure, build and install ExecuTorch LLM extension with Metal stats and logging",

diff --git a/Makefile b/Makefile
@@ -91,7 +91,7 @@
 #
 # ==============================================================================
 
-.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
+.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-vulkan sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
 
 help:
 	@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
@@ -109,6 +109,7 @@ help:
 	@echo "  parakeet-cuda-debug - Build Parakeet runner with CUDA backend (debug mode)"
 	@echo "  parakeet-cpu        - Build Parakeet runner with CPU backend"
 	@echo "  parakeet-metal      - Build Parakeet runner with Metal backend (macOS only)"
+	@echo "  parakeet-vulkan     - Build Parakeet runner with Vulkan backend"
 	@echo "  sortformer-cuda     - Build Sortformer runner with CUDA backend"
 	@echo "  sortformer-cpu      - Build Sortformer runner with CPU backend"
 	@echo "  silero-vad-cpu      - Build Silero VAD runner with CPU backend"
@@ -219,6 +220,15 @@ parakeet-metal:
 	@echo "✓ Build complete!"
 	@echo "  Binary: cmake-out/examples/models/parakeet/parakeet_runner"
 
+parakeet-vulkan:
+	@echo "==> Building and installing ExecuTorch with Vulkan..."
+	cmake --workflow --preset llm-debug-vulkan
+	@echo "==> Building Parakeet runner with Vulkan..."
+	cd examples/models/parakeet && cmake --workflow --preset parakeet-vulkan
+	@echo ""
+	@echo "✓ Build complete!"
+	@echo "  Binary: cmake-out/examples/models/parakeet/parakeet_runner"
+
 sortformer-cuda:
 	@echo "==> Building and installing ExecuTorch with CUDA..."
 	cmake --workflow --preset llm-release-cuda

@@ -91,6 +91,11 @@ if(EXECUTORCH_BUILD_METAL)
   executorch_target_link_options_shared_lib(metal_backend)
 endif()
 
+if(EXECUTORCH_BUILD_VULKAN)
+  list(APPEND link_libraries vulkan_backend)
+  executorch_target_link_options_shared_lib(vulkan_backend)
+endif()
+
 add_executable(parakeet_runner main.cpp timestamp_utils.cpp tokenizer_utils.cpp)
 if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
   target_link_options_gc_sections(parakeet_runner)

@@ -55,6 +55,19 @@
                 "type": "equals",
                 "rhs": "Darwin"
             }
+        },
+        {
+            "name": "parakeet-vulkan",
+            "displayName": "Parakeet runner (Vulkan)",
+            "inherits": ["parakeet-base"],
+            "cacheVariables": {
+                "EXECUTORCH_BUILD_VULKAN": "ON"
+            },
+            "condition": {
+                "type": "inList",
+                "string": "${hostSystemName}",
+                "list": ["Linux", "Windows"]
+            }
         }
     ],
     "buildPresets": [
@@ -85,6 +98,13 @@
             "configurePreset": "parakeet-metal",
             "configuration": "Release",
             "targets": ["parakeet_runner"]
+        },
+        {
+            "name": "parakeet-vulkan",
+            "displayName": "Build Parakeet runner (Vulkan)",
+            "configurePreset": "parakeet-vulkan",
+            "configuration": "Release",
+            "targets": ["parakeet_runner"]
         }
     ],
     "workflowPresets": [
@@ -143,6 +163,20 @@
                     "name": "parakeet-metal"
                 }
             ]
+        },
+        {
+            "name": "parakeet-vulkan",
+            "displayName": "Configure and build Parakeet runner (Vulkan)",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "parakeet-vulkan"
+                },
+                {
+                    "type": "build",
+                    "name": "parakeet-vulkan"
+                }
+            ]
         }
     ]
 }
@@ -9,7 +9,6 @@
 
 import torch
 import torchaudio
-
 from executorch.examples.models.parakeet.quantize import quantize_model_
 from executorch.exir import (
     EdgeCompileConfig,
@@ -560,7 +559,25 @@ def _create_cuda_partitioners(programs, is_windows=False):
     return partitioner, updated_programs
 
 
-def lower_to_executorch(programs, metadata=None, backend="portable"):
+def _create_vulkan_partitioners(programs, vulkan_force_fp16=False):
+    """Create Vulkan partitioners for all programs except preprocessor."""
+    from executorch.backends.vulkan.partitioner.vulkan_partitioner import (
+        VulkanPartitioner,
+    )
+
+    print("\nLowering to ExecuTorch with Vulkan...")
+    partitioner = {}
+    for key in programs.keys():
+        if key == "preprocessor":
+            partitioner[key] = []
+        else:
+            partitioner[key] = [VulkanPartitioner({"force_fp16": vulkan_force_fp16})]
+    return partitioner, programs
+
+
+def lower_to_executorch(
+    programs, metadata=None, backend="portable", vulkan_force_fp16=False
+):
     if backend == "xnnpack":
         partitioner, programs = _create_xnnpack_partitioners(programs)
     elif backend == "metal":
@@ -569,6 +586,10 @@ def lower_to_executorch(programs, metadata=None, backend="portable"):
         partitioner, programs = _create_cuda_partitioners(
             programs, is_windows=(backend == "cuda-windows")
         )
+    elif backend == "vulkan":
+        partitioner, programs = _create_vulkan_partitioners(
+            programs, vulkan_force_fp16=vulkan_force_fp16
+        )
     else:
         print("\nLowering to ExecuTorch...")
         partitioner = []
@@ -607,7 +628,7 @@ def main():
         "--backend",
         type=str,
         default="xnnpack",
-        choices=["portable", "xnnpack", "metal", "cuda", "cuda-windows"],
+        choices=["portable", "xnnpack", "metal", "cuda", "cuda-windows", "vulkan"],
         help="Backend for acceleration (default: xnnpack)",
     )
     parser.add_argument(
@@ -672,6 +693,8 @@ def main():
         help="Group size for embedding quantization (default: 0 = per-axis)",
     )
 
+    parser.add_argument("--vulkan_force_fp16", action="store_true")
+
     args = parser.parse_args()
 
     # Validate dtype
@@ -719,7 +742,12 @@ def main():
         qembedding_group_size=args.qembedding_group_size,
     )
 
-    et = lower_to_executorch(programs, metadata=metadata, backend=args.backend)
+    et = lower_to_executorch(
+        programs,
+        metadata=metadata,
+        backend=args.backend,
+        vulkan_force_fp16=args.vulkan_force_fp16,
+    )
 
     pte_path = os.path.join(args.output_dir, "model.pte")
     print(f"\nSaving ExecuTorch program to: {pte_path}")