Skip to content
38 changes: 38 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,21 @@
"rhs": "Darwin"
}
},
{
"name": "llm-debug-vulkan",
"displayName": "LLM debug build with Vulkan",
"inherits": [
"llm-debug"
],
"cacheVariables": {
"EXECUTORCH_BUILD_VULKAN": "ON"
},
"condition": {
"type": "inList",
"string": "${hostSystemName}",
"list": ["Linux", "Windows"]
}
},
{
"name": "llm-metal-stats",
"displayName": "LLM Metal build with stats collection and logging",
Expand Down Expand Up @@ -354,6 +369,15 @@
],
"jobs": 0
},
{
"name": "llm-debug-vulkan-install",
"displayName": "Build and install LLM extension debug artifacts (Vulkan)",
"configurePreset": "llm-debug-vulkan",
"targets": [
"install"
],
"jobs": 0
},
{
"name": "llm-metal-stats-install",
"displayName": "Build and install LLM extension artifacts with Metal stats",
Expand Down Expand Up @@ -449,6 +473,20 @@
}
]
},
{
"name": "llm-debug-vulkan",
"displayName": "Configure, build and install ExecuTorch LLM extension with Vulkan enabled (Debug)",
"steps": [
{
"type": "configure",
"name": "llm-debug-vulkan"
},
{
"type": "build",
"name": "llm-debug-vulkan-install"
}
]
},
{
"name": "llm-metal-stats",
"displayName": "Configure, build and install ExecuTorch LLM extension with Metal stats and logging",
Expand Down
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
#
# ==============================================================================

.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-vulkan sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help

help:
@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
Expand All @@ -109,6 +109,7 @@ help:
@echo " parakeet-cuda-debug - Build Parakeet runner with CUDA backend (debug mode)"
@echo " parakeet-cpu - Build Parakeet runner with CPU backend"
@echo " parakeet-metal - Build Parakeet runner with Metal backend (macOS only)"
@echo " parakeet-vulkan - Build Parakeet runner with Vulkan backend"
@echo " sortformer-cuda - Build Sortformer runner with CUDA backend"
@echo " sortformer-cpu - Build Sortformer runner with CPU backend"
@echo " silero-vad-cpu - Build Silero VAD runner with CPU backend"
Expand Down Expand Up @@ -219,6 +220,15 @@ parakeet-metal:
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/parakeet/parakeet_runner"

parakeet-vulkan:
@echo "==> Building and installing ExecuTorch with Vulkan..."
cmake --workflow --preset llm-debug-vulkan
@echo "==> Building Parakeet runner with Vulkan..."
cd examples/models/parakeet && cmake --workflow --preset parakeet-vulkan
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/parakeet/parakeet_runner"

sortformer-cuda:
@echo "==> Building and installing ExecuTorch with CUDA..."
cmake --workflow --preset llm-release-cuda
Expand Down
5 changes: 5 additions & 0 deletions examples/models/parakeet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ if(EXECUTORCH_BUILD_METAL)
executorch_target_link_options_shared_lib(metal_backend)
endif()

if(EXECUTORCH_BUILD_VULKAN)
list(APPEND link_libraries vulkan_backend)
executorch_target_link_options_shared_lib(vulkan_backend)
endif()

add_executable(parakeet_runner main.cpp timestamp_utils.cpp tokenizer_utils.cpp)
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_options_gc_sections(parakeet_runner)
Expand Down
34 changes: 34 additions & 0 deletions examples/models/parakeet/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,19 @@
"type": "equals",
"rhs": "Darwin"
}
},
{
"name": "parakeet-vulkan",
"displayName": "Parakeet runner (Vulkan)",
"inherits": ["parakeet-base"],
"cacheVariables": {
"EXECUTORCH_BUILD_VULKAN": "ON"
},
"condition": {
"type": "inList",
"string": "${hostSystemName}",
"list": ["Linux", "Windows"]
}
}
],
"buildPresets": [
Expand Down Expand Up @@ -85,6 +98,13 @@
"configurePreset": "parakeet-metal",
"configuration": "Release",
"targets": ["parakeet_runner"]
},
{
"name": "parakeet-vulkan",
"displayName": "Build Parakeet runner (Vulkan)",
"configurePreset": "parakeet-vulkan",
"configuration": "Release",
"targets": ["parakeet_runner"]
}
],
"workflowPresets": [
Expand Down Expand Up @@ -143,6 +163,20 @@
"name": "parakeet-metal"
}
]
},
{
"name": "parakeet-vulkan",
"displayName": "Configure and build Parakeet runner (Vulkan)",
"steps": [
{
"type": "configure",
"name": "parakeet-vulkan"
},
{
"type": "build",
"name": "parakeet-vulkan"
}
]
}
]
}
36 changes: 32 additions & 4 deletions examples/models/parakeet/export_parakeet_tdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import torch
import torchaudio

from executorch.examples.models.parakeet.quantize import quantize_model_
from executorch.exir import (
EdgeCompileConfig,
Expand Down Expand Up @@ -560,7 +559,25 @@ def _create_cuda_partitioners(programs, is_windows=False):
return partitioner, updated_programs


def lower_to_executorch(programs, metadata=None, backend="portable"):
def _create_vulkan_partitioners(programs, vulkan_force_fp16=False):
"""Create Vulkan partitioners for all programs except preprocessor."""
from executorch.backends.vulkan.partitioner.vulkan_partitioner import (
VulkanPartitioner,
)

print("\nLowering to ExecuTorch with Vulkan...")
partitioner = {}
for key in programs.keys():
if key == "preprocessor":
partitioner[key] = []
else:
partitioner[key] = [VulkanPartitioner({"force_fp16": vulkan_force_fp16})]
return partitioner, programs


def lower_to_executorch(
programs, metadata=None, backend="portable", vulkan_force_fp16=False
):
if backend == "xnnpack":
partitioner, programs = _create_xnnpack_partitioners(programs)
elif backend == "metal":
Expand All @@ -569,6 +586,10 @@ def lower_to_executorch(programs, metadata=None, backend="portable"):
partitioner, programs = _create_cuda_partitioners(
programs, is_windows=(backend == "cuda-windows")
)
elif backend == "vulkan":
partitioner, programs = _create_vulkan_partitioners(
programs, vulkan_force_fp16=vulkan_force_fp16
)
else:
print("\nLowering to ExecuTorch...")
partitioner = []
Expand Down Expand Up @@ -607,7 +628,7 @@ def main():
"--backend",
type=str,
default="xnnpack",
choices=["portable", "xnnpack", "metal", "cuda", "cuda-windows"],
choices=["portable", "xnnpack", "metal", "cuda", "cuda-windows", "vulkan"],
help="Backend for acceleration (default: xnnpack)",
)
parser.add_argument(
Expand Down Expand Up @@ -672,6 +693,8 @@ def main():
help="Group size for embedding quantization (default: 0 = per-axis)",
)

parser.add_argument("--vulkan_force_fp16", action="store_true")

args = parser.parse_args()

# Validate dtype
Expand Down Expand Up @@ -719,7 +742,12 @@ def main():
qembedding_group_size=args.qembedding_group_size,
)

et = lower_to_executorch(programs, metadata=metadata, backend=args.backend)
et = lower_to_executorch(
programs,
metadata=metadata,
backend=args.backend,
vulkan_force_fp16=args.vulkan_force_fp16,
)

pte_path = os.path.join(args.output_dir, "model.pte")
print(f"\nSaving ExecuTorch program to: {pte_path}")
Expand Down
Loading