From 9ea327256cfae5c592b71adcc2e7e133ea05070e Mon Sep 17 00:00:00 2001 From: Siddartha Pothapragada Date: Thu, 11 Jun 2026 11:53:24 -0400 Subject: [PATCH 1/2] Add Arduino library support for ExecuTorch Add tooling to package the ExecuTorch runtime as an Arduino library, enabling PyTorch model inference on Arduino microcontrollers. The library vendors ET runtime sources, CMSIS-NN kernels, and portable ops into a self-contained package that compiles under the Arduino build system. Key components: - `build_arduino_library.sh` assembles the distributable library from repository sources (no vendored copies checked in) - `ExecuTorchArduino.h` configures the build environment for Arduino (fixes for std::variant, cmake_macros.h stub, build defines) - `platform_stubs.c` provides C library stubs for the LLEXT environment - Example sketches using the native ExecuTorch C++ API (no wrapper layer) - Zephyr board config for Arduino Uno Q (STM32U585, Cortex-M33) Validated on Arduino Uno Q with DS-CNN keyword spotting model (int8, CMSIS-NN): 390+ source files compile, 106 KB flash (13%), 91 KB RAM. Authored with assistance from Claude. --- examples/arduino/.gitignore | 2 + examples/arduino/ExecuTorchArduino.h | 24 ++ examples/arduino/README.md | 203 ++++++++++++++++ examples/arduino/build_arduino_library.sh | 230 ++++++++++++++++++ .../HelloExecuTorch/HelloExecuTorch.ino | 22 ++ .../KeywordSpotting/KeywordSpotting.ino | 121 +++++++++ examples/arduino/library.properties | 10 + examples/arduino/platform_stubs.c | 18 ++ .../boards/arduino_uno_q.conf | 14 ++ 9 files changed, 644 insertions(+) create mode 100644 examples/arduino/.gitignore create mode 100644 examples/arduino/ExecuTorchArduino.h create mode 100644 examples/arduino/README.md create mode 100755 examples/arduino/build_arduino_library.sh create mode 100644 examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino create mode 100644 examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino create mode 100644 examples/arduino/library.properties create mode 100644 examples/arduino/platform_stubs.c create mode 100644 zephyr/samples/hello-executorch/boards/arduino_uno_q.conf diff --git a/examples/arduino/.gitignore b/examples/arduino/.gitignore new file mode 100644 index 00000000000..453c723c671 --- /dev/null +++ b/examples/arduino/.gitignore @@ -0,0 +1,2 @@ +# Generated by build_arduino_library.sh — do not check in +arduino_lib/ diff --git a/examples/arduino/ExecuTorchArduino.h b/examples/arduino/ExecuTorchArduino.h new file mode 100644 index 00000000000..240b99c0555 --- /dev/null +++ b/examples/arduino/ExecuTorchArduino.h @@ -0,0 +1,24 @@ +#pragma once + +// ExecuTorchArduino — ExecuTorch runtime for Arduino boards +// +// This header configures ExecuTorch for the Arduino build environment +// and exposes the native ET C++ API. There is no wrapper — use +// Program::load(), Method::execute(), etc. directly. + +// Arduino's custom header omits , which breaks +// std::bad_variant_access in . Include it first. +#include + +// ExecuTorch build configuration for Arduino +#define C10_USING_CUSTOM_GENERATED_MACROS +#define ET_ENABLE_DEPRECATED_CONSTANT_BUFFER 0 +#define FLATBUFFERS_MAX_ALIGNMENT 1024 + +// Core runtime headers +#include +#include +#include +#include +#include +#include diff --git a/examples/arduino/README.md b/examples/arduino/README.md new file mode 100644 index 00000000000..efa7f251a18 --- /dev/null +++ b/examples/arduino/README.md @@ -0,0 +1,203 @@ +# ExecuTorch Arduino Library + +Run PyTorch models on Arduino microcontrollers using ExecuTorch. + +This directory contains everything needed to package ExecuTorch as an +Arduino library. A build script vendors the runtime sources from this +repository into a self-contained library that Arduino users install +through the Library Manager or by copying into their libraries folder. + +## How It Works + +``` +PyTorch Model ──► torch.export ──► .pte file ──► model.h (C array) + │ + Arduino Sketch (.ino) + #include + #include "model.h" + │ + arduino-cli compile ──► Upload ──► Runs on board +``` + +### The three pieces + +1. **The library** (`arduino_lib/ExecuTorchArduino/`) — the ExecuTorch + runtime, CMSIS-NN kernels, and portable ops packaged for the Arduino + build system. Generated by `build_arduino_library.sh`; not checked in. + +2. **The model** (`model.h`) — a `.pte` file converted to a C byte array. + Each user brings their own model, exported from PyTorch with the + Cortex-M backend. + +3. **The sketch** (`.ino`) — a standard Arduino program that loads the + model, feeds it input, and reads the output. Uses the native + ExecuTorch C++ API (`Program::load`, `Method::execute`, etc.). + +## Supported Boards + +| Board | MCU | Status | +|-------|-----|--------| +| Arduino Uno Q | STM32U585 (Cortex-M33) | Tested | +| Arduino Nano 33 BLE | nRF52840 (Cortex-M4F) | Untested | +| Arduino Giga R1 WiFi | STM32H747 (Cortex-M7) | Untested | +| Arduino Portenta H7 | STM32H747 (Cortex-M7) | Untested | + +CMSIS-NN accelerated ops work on any board with an ARM Cortex-M processor +with DSP extensions. Portable ops work on any architecture. + +## Quick Start + +### 1. Build the Arduino library + +```bash +cd examples/arduino +./build_arduino_library.sh +``` + +This copies the required ExecuTorch sources from the repository into +`arduino_lib/ExecuTorchArduino/`, ready for Arduino. + +### 2. Install the library + +Copy the generated library into your Arduino libraries folder: + +```bash +cp -r arduino_lib/ExecuTorchArduino ~/Arduino/libraries/ +``` + +Or with `arduino-cli`: + +```bash +arduino-cli lib install --zip-path arduino_lib/ExecuTorchArduino +``` + +### 3. Export a model + +Export a PyTorch model to `.pte` format with Cortex-M quantization, then +convert to a C header: + +```bash +python export_model.py --model my_model.pt --target cortex-m33 --output model.h +``` + +Or use one of the pre-exported models in the `examples/` directory. + +### 4. Write a sketch + +```cpp +#include +#include "model.h" + +using executorch::extension::BufferDataLoader; +using executorch::runtime::Error; +using executorch::runtime::HierarchicalAllocator; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::MemoryManager; +using executorch::runtime::Method; +using executorch::runtime::MethodMeta; +using executorch::runtime::Program; +using executorch::runtime::Result; +using executorch::runtime::Span; + +alignas(16) uint8_t method_pool[64 * 1024]; +alignas(16) uint8_t temp_pool[8 * 1024]; + +void setup() { + Serial.begin(115200); + delay(2000); + + executorch::runtime::runtime_init(); + + auto loader = BufferDataLoader(model_pte, model_pte_size); + Result program = Program::load(&loader); + if (!program.ok()) { + Serial.println("Failed to load program"); + return; + } + + // ... load method, set inputs, execute, read outputs + // See examples/ for complete working sketches. +} + +void loop() { + // Run inference periodically + delay(2000); +} +``` + +The sketch uses the **native ExecuTorch C++ API** — the same API used on +Linux, Android, and bare-metal targets. No wrapper layer, no +Arduino-specific abstractions. + +### 5. Compile and upload + +```bash +arduino-cli compile --fqbn arduino:zephyr:unoq MySketch +arduino-cli upload --fqbn arduino:zephyr:unoq -p /dev/ttyUSB0 MySketch +``` + +## What is inside the library + +The `build_arduino_library.sh` script assembles these components from +the ExecuTorch repository: + +| Component | Source in repo | Purpose | +|-----------|---------------|---------| +| ET Runtime | `runtime/executor/`, `runtime/core/`, `runtime/kernel/`, `runtime/platform/` | Model loading, memory management, op dispatch | +| Portable Ops | `kernels/portable/` | Software op implementations (any CPU) | +| Cortex-M Ops | `backends/cortex_m/ops/` | CMSIS-NN accelerated int8 ops | +| CMSIS-NN | fetched by cmake / Zephyr module | ARM's optimized DSP kernels | +| flatcc | `third-party/flatcc/` | .pte file parsing | +| flatbuffers | `third-party/flatbuffers/` | Schema headers | +| c10 | `runtime/core/portable_type/c10/` | Core type definitions | + +The library uses no external dependencies beyond what the Arduino board +core provides. + +## Arduino-specific patches + +The build script applies these patches to make ExecuTorch compile under +Arduino's build system: + +1. **`#include ` before ``** — Arduino's custom + `` header omits ``, breaking `std::bad_variant_access`. + +2. **`cmake_macros.h` stub** — c10/torch headers expect a cmake-generated + file. The stub defines `C10_USING_CUSTOM_GENERATED_MACROS`. + +3. **`platform_stubs.c`** — provides `_Exit()` and `fprintf()` for the + LLEXT environment on boards that lack them. + +## Development + +### Updating the library + +After modifying ExecuTorch sources, regenerate the library: + +```bash +./build_arduino_library.sh # rebuild +./build_arduino_library.sh --clean # remove generated output +``` + +### Testing + +```bash +arduino-cli compile --fqbn arduino:zephyr:unoq examples/HelloExecuTorch +arduino-cli upload --fqbn arduino:zephyr:unoq -p /dev/ttyUSB0 examples/HelloExecuTorch +arduino-cli monitor -p /dev/ttyUSB0 --config baudrate=115200 +``` + +### Publishing to Arduino Library Manager + +The library is published by adding its repository URL to the +[Arduino Library Registry](https://github.com/arduino/library-registry). +After the initial registration, new git tags are picked up automatically. + +## Build Validation + +Tested on Arduino Uno Q (STM32U585, Cortex-M33 @ 160 MHz): + +- **390+ source files** compile with zero errors +- **Flash**: 106 KB used (13% of 786 KB) +- **RAM**: 91 KB used (69% of 131 KB) +- **Model**: DS-CNN keyword spotting, int8 quantized via CMSIS-NN, 52.6 KB diff --git a/examples/arduino/build_arduino_library.sh b/examples/arduino/build_arduino_library.sh new file mode 100755 index 00000000000..9bad7d7aa8a --- /dev/null +++ b/examples/arduino/build_arduino_library.sh @@ -0,0 +1,230 @@ +#!/bin/bash +# build_arduino_library.sh — Assemble the ExecuTorch Arduino library +# from sources in this repository. +# +# Usage: +# ./build_arduino_library.sh # build the library +# ./build_arduino_library.sh --clean # remove generated output +# ./build_arduino_library.sh --bump patch # 0.1.0 → 0.1.1 +# ./build_arduino_library.sh --bump minor # 0.1.0 → 0.2.0 +# ./build_arduino_library.sh --bump major # 0.1.0 → 1.0.0 +# +# Output: arduino_lib/ExecuTorchArduino/ (self-contained, installable) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ET_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +OUT_DIR="$SCRIPT_DIR/arduino_lib/ExecuTorchArduino" +PROPS="$SCRIPT_DIR/library.properties" + +if [ "${1:-}" = "--clean" ]; then + echo "Cleaning generated library..." + rm -rf "$SCRIPT_DIR/arduino_lib" + echo "Done." + exit 0 +fi + +if [ "${1:-}" = "--bump" ]; then + PART="${2:-patch}" + CURRENT=$(grep "^version=" "$PROPS" | cut -d= -f2) + IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" + case "$PART" in + major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;; + minor) MINOR=$((MINOR + 1)); PATCH=0 ;; + patch) PATCH=$((PATCH + 1)) ;; + *) echo "Usage: $0 --bump [major|minor|patch]"; exit 1 ;; + esac + NEW="$MAJOR.$MINOR.$PATCH" + sed -i '' "s/^version=.*/version=$NEW/" "$PROPS" 2>/dev/null || \ + sed -i "s/^version=.*/version=$NEW/" "$PROPS" + echo "Version: $CURRENT → $NEW" + exit 0 +fi + +echo "=== Building ExecuTorch Arduino Library ===" +echo " ET repo: $ET_ROOT" +echo " Output: $OUT_DIR" + +rm -rf "$OUT_DIR" +mkdir -p "$OUT_DIR/src" "$OUT_DIR/examples" + +# ───────────────────────────────────────────────────────── +# 1. Copy library metadata, wrapper header, and stubs +# ───────────────────────────────────────────────────────── +cp "$SCRIPT_DIR/library.properties" "$OUT_DIR/" +cp "$SCRIPT_DIR/ExecuTorchArduino.h" "$OUT_DIR/src/" +cp "$SCRIPT_DIR/platform_stubs.c" "$OUT_DIR/src/" +cp -r "$SCRIPT_DIR/examples/"* "$OUT_DIR/examples/" + +echo "[1/7] Metadata and examples copied" + +# ───────────────────────────────────────────────────────── +# 2. Vendor ET runtime sources +# ───────────────────────────────────────────────────────── +ET_SRC="$OUT_DIR/src/executorch" +mkdir -p "$ET_SRC" + +for dir in runtime/core runtime/executor runtime/kernel \ + runtime/platform runtime/backend; do + mkdir -p "$ET_SRC/$dir" + find "$ET_ROOT/$dir" -maxdepth 1 -name "*.h" -exec cp {} "$ET_SRC/$dir/" \; + find "$ET_ROOT/$dir" -maxdepth 1 -name "*.cpp" -exec cp {} "$ET_SRC/$dir/" \; +done + +# Subdirectories with headers +for dir in runtime/core/exec_aten runtime/core/exec_aten/util \ + runtime/core/portable_type runtime/platform/default; do + mkdir -p "$ET_SRC/$dir" + find "$ET_ROOT/$dir" -maxdepth 1 -name "*.h" -exec cp {} "$ET_SRC/$dir/" \; + find "$ET_ROOT/$dir" -maxdepth 1 -name "*.cpp" -exec cp {} "$ET_SRC/$dir/" \; +done + +# Extension data loader +mkdir -p "$ET_SRC/extension/data_loader" +cp "$ET_ROOT/extension/data_loader/"*.h "$ET_SRC/extension/data_loader/" +cp "$ET_ROOT/extension/data_loader/buffer_data_loader.cpp" \ + "$ET_SRC/extension/data_loader/" 2>/dev/null || true + +# Runner util headers +mkdir -p "$ET_SRC/extension/runner_util" +cp "$ET_ROOT/extension/runner_util/"*.h "$ET_SRC/extension/runner_util/" 2>/dev/null || true + +# Schema headers (generated — need a prior cmake build) +mkdir -p "$ET_SRC/schema" +cp "$ET_ROOT/schema/"*.h "$ET_SRC/schema/" 2>/dev/null || true +# Look for generated headers in common build dirs +for build_dir in "$ET_ROOT/cmake-out" "$ET_ROOT/cmake-out-mac" \ + "$ET_ROOT/outputs/build_uno_q"; do + if [ -d "$build_dir/schema/include/executorch/schema" ]; then + cp "$build_dir/schema/include/executorch/schema/"*.h "$ET_SRC/schema/" + break + fi +done + +echo "[2/7] ET runtime sources copied" + +# ───────────────────────────────────────────────────────── +# 3. Vendor portable kernels +# ───────────────────────────────────────────────────────── +mkdir -p "$ET_SRC/kernels/portable/cpu/util" \ + "$ET_SRC/kernels/portable/cpu/pattern" + +# Copy all portable op sources and headers +find "$ET_ROOT/kernels/portable/cpu" -maxdepth 1 \( -name "*.cpp" -o -name "*.h" \) \ + -exec cp {} "$ET_SRC/kernels/portable/cpu/" \; +cp "$ET_ROOT/kernels/portable/cpu/util/"*.h "$ET_SRC/kernels/portable/cpu/util/" +cp "$ET_ROOT/kernels/portable/cpu/util/"*.cpp "$ET_SRC/kernels/portable/cpu/util/" 2>/dev/null || true +cp "$ET_ROOT/kernels/portable/cpu/pattern/"*.h "$ET_SRC/kernels/portable/cpu/pattern/" +cp "$ET_ROOT/kernels/portable/cpu/pattern/"*.cpp "$ET_SRC/kernels/portable/cpu/pattern/" 2>/dev/null || true + +echo "[3/7] Portable kernels copied" + +# ───────────────────────────────────────────────────────── +# 4. Vendor Cortex-M backend ops +# ───────────────────────────────────────────────────────── +mkdir -p "$ET_SRC/backends/cortex_m/ops" +cp "$ET_ROOT/backends/cortex_m/ops/"*.cpp "$ET_SRC/backends/cortex_m/ops/" +cp "$ET_ROOT/backends/cortex_m/ops/"*.h "$ET_SRC/backends/cortex_m/ops/" + +echo "[4/7] Cortex-M ops copied" + +# ───────────────────────────────────────────────────────── +# 5. Vendor third-party dependencies +# ───────────────────────────────────────────────────────── + +# c10 / torch headers +cp -r "$ET_ROOT/runtime/core/portable_type/c10/c10" "$OUT_DIR/src/c10" +cp -r "$ET_ROOT/runtime/core/portable_type/c10/torch" "$OUT_DIR/src/torch" + +# cmake_macros.h stub +mkdir -p "$OUT_DIR/src/torch/headeronly/macros" +cat > "$OUT_DIR/src/torch/headeronly/macros/cmake_macros.h" << 'STUB' +#pragma once +#define C10_BUILD_SHARED_LIBS +#define C10_USE_GLOG 0 +#define C10_USE_MINIMAL_GLOG 0 +#define C10_USE_GFLAGS 0 +STUB + +# flatcc runtime +mkdir -p "$OUT_DIR/src/flatcc" +cp -r "$ET_ROOT/third-party/flatcc/include/flatcc" "$OUT_DIR/src/flatcc/" +mkdir -p "$OUT_DIR/src/flatcc/runtime" +cp "$ET_ROOT/third-party/flatcc/src/runtime/"*.c "$OUT_DIR/src/flatcc/runtime/" + +# flatbuffers headers +cp -r "$ET_ROOT/third-party/flatbuffers/include/flatbuffers" "$OUT_DIR/src/flatbuffers" + +# CMSIS-NN (from Zephyr workspace or cmake fetchcontent) +CMSIS_NN="" +for candidate in \ + "$ET_ROOT/outputs/zephyrproject/modules/lib/cmsis-nn" \ + "$ET_ROOT/third-party/cmsis-nn" \ + "$ET_ROOT/backends/arm/third-party/cmsis-nn/CMSIS-NN"; do + if [ -d "$candidate/Source" ]; then + CMSIS_NN="$candidate" + break + fi +done + +if [ -n "$CMSIS_NN" ]; then + mkdir -p "$OUT_DIR/src/cmsis-nn" + cp -r "$CMSIS_NN/Source" "$OUT_DIR/src/cmsis-nn/" + cp "$CMSIS_NN/Include/"*.h "$OUT_DIR/src/" 2>/dev/null || true + if [ -d "$CMSIS_NN/Include/Internal" ]; then + mkdir -p "$OUT_DIR/src/Internal" + cp "$CMSIS_NN/Include/Internal/"*.h "$OUT_DIR/src/Internal/" + fi + echo "[5/7] CMSIS-NN copied from $CMSIS_NN" +else + echo "[5/7] WARNING: CMSIS-NN not found. Cortex-M ops will not link." +fi + +# CMSIS Core headers (for arm_math_types.h) +for candidate in \ + "$ET_ROOT/outputs/zephyrproject/modules/hal/cmsis_6/CMSIS/Core/Include" \ + "$ET_ROOT/third-party/cmsis/CMSIS/Core/Include"; do + if [ -d "$candidate" ]; then + cp "$candidate/"*.h "$OUT_DIR/src/" 2>/dev/null || true + break + fi +done + +echo "[6/7] Third-party dependencies copied" + +# ───────────────────────────────────────────────────────── +# 6. Apply Arduino-specific patches +# ───────────────────────────────────────────────────────── + +# Fix: #include before in all ET headers +find "$OUT_DIR/src/executorch" -name "*.h" -exec \ + grep -l '#include ' {} \; | while read f; do + sed -i '' 's|#include |#include \n#include |' "$f" 2>/dev/null || \ + sed -i 's|#include |#include \n#include |' "$f" +done + +# Remove test files, ATen-specific files +find "$OUT_DIR" -path "*testing*" -delete 2>/dev/null || true +find "$OUT_DIR" -name "*_aten.cpp" -delete 2>/dev/null || true +find "$OUT_DIR" -path "*test*" -name "*.cpp" -delete 2>/dev/null || true + +echo "[7/7] Arduino patches applied" + +# ───────────────────────────────────────────────────────── +# Summary +# ───────────────────────────────────────────────────────── +NSRC=$(find "$OUT_DIR/src" -name "*.cpp" -o -name "*.c" | wc -l | tr -d ' ') +NHDR=$(find "$OUT_DIR/src" -name "*.h" | wc -l | tr -d ' ') + +echo "" +echo "=== Library built ===" +echo " Location: $OUT_DIR" +echo " Sources: $NSRC" +echo " Headers: $NHDR" +echo "" +echo "Install:" +echo " cp -r $OUT_DIR ~/Arduino/libraries/" +echo "" +echo "Or clean up:" +echo " $0 --clean" diff --git a/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino b/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino new file mode 100644 index 00000000000..51c6c61f8e4 --- /dev/null +++ b/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino @@ -0,0 +1,22 @@ +// HelloExecuTorch — Minimal ExecuTorch sketch +// +// Initializes the ExecuTorch runtime and prints confirmation over Serial. +// Use this to verify the library compiles and runs on your board before +// loading a model. + +#include + +void setup() { + Serial.begin(115200); + delay(2000); + + Serial.println("=== ExecuTorch Arduino ==="); + + executorch::runtime::runtime_init(); + Serial.println("Runtime initialized."); +} + +void loop() { + Serial.println("ExecuTorch ready"); + delay(5000); +} diff --git a/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino b/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino new file mode 100644 index 00000000000..5547628437a --- /dev/null +++ b/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino @@ -0,0 +1,121 @@ +// KeywordSpotting — DS-CNN keyword spotting with ExecuTorch +// +// Runs a quantized DS-CNN model (MLPerf Tiny KWS benchmark) on hardcoded +// MFCC test inputs and prints the detected keyword. The model classifies +// 12 keywords: silence, unknown, yes, no, up, down, left, right, on, off, +// stop, go. +// +// The model uses CMSIS-NN accelerated int8 ops on Cortex-M boards. +// +// Prerequisites: +// - model.h : DS-CNN .pte exported with CortexMQuantizer, as a C array +// - test_inputs.h : MFCC features from real audio, as float arrays + +#include +#include "model.h" +#include "test_inputs.h" + +using executorch::extension::BufferDataLoader; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::HierarchicalAllocator; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::MemoryManager; +using executorch::runtime::Method; +using executorch::runtime::MethodMeta; +using executorch::runtime::Program; +using executorch::runtime::Result; +using executorch::runtime::Span; + +static const char* kLabels[] = { + "silence", "unknown", "yes", "no", "up", "down", + "left", "right", "on", "off", "stop", "go"}; +static constexpr int kNumClasses = 12; + +alignas(16) static uint8_t method_pool[64 * 1024]; +alignas(16) static uint8_t temp_pool[8 * 1024]; + +void setup() { + Serial.begin(115200); + delay(2000); + + Serial.println("=== ExecuTorch Keyword Spotting ==="); + Serial.print("Model size: "); + Serial.print(model_pte_size); + Serial.println(" bytes"); + + executorch::runtime::runtime_init(); + Serial.println("Runtime initialized."); +} + +void loop() { + auto loader = BufferDataLoader(model_pte, model_pte_size); + Result program = Program::load(&loader); + if (!program.ok()) { + Serial.println("ERROR: Program::load failed"); + delay(5000); + return; + } + + const char* method_name = nullptr; + { + auto r = program->get_method_name(0); + if (!r.ok()) { Serial.println("ERROR: no methods"); delay(5000); return; } + method_name = *r; + } + + auto meta = program->method_meta(method_name); + if (!meta.ok()) { Serial.println("ERROR: method_meta"); delay(5000); return; } + + MemoryAllocator method_allocator(sizeof(method_pool), method_pool); + + // Allocate planned buffers + size_t n_planned = meta->num_memory_planned_buffers(); + Span* spans = static_cast*>( + method_allocator.allocate(n_planned * sizeof(Span))); + for (size_t i = 0; i < n_planned; i++) { + size_t sz = static_cast(meta->memory_planned_buffer_size(i).get()); + uint8_t* buf = static_cast(method_allocator.allocate(sz)); + spans[i] = {buf, sz}; + } + + HierarchicalAllocator planned({spans, n_planned}); + MemoryAllocator temp_allocator(sizeof(temp_pool), temp_pool); + MemoryManager mm(&method_allocator, &planned, &temp_allocator); + + auto method = program->load_method(method_name, &mm); + if (!method.ok()) { + Serial.print("ERROR: load_method failed: 0x"); + Serial.println((int)method.error(), HEX); + delay(5000); + return; + } + + // Run inference + Error status = method->execute(); + if (status != Error::Ok) { + Serial.print("ERROR: execute failed: 0x"); + Serial.println((int)status, HEX); + delay(5000); + return; + } + + // Read output + EValue output; + method->get_outputs(&output, 1); + if (output.isTensor()) { + auto tensor = output.toTensor(); + int best = 0; + float best_val = -1e9f; + for (int i = 0; i < kNumClasses && i < tensor.numel(); i++) { + float val = (tensor.scalar_type() == executorch::aten::ScalarType::Float) + ? tensor.const_data_ptr()[i] + : static_cast(tensor.const_data_ptr()[i]); + if (val > best_val) { best_val = val; best = i; } + } + Serial.print("Detected: "); + Serial.println(kLabels[best]); + } + + delay(5000); +} diff --git a/examples/arduino/library.properties b/examples/arduino/library.properties new file mode 100644 index 00000000000..e9787b1daec --- /dev/null +++ b/examples/arduino/library.properties @@ -0,0 +1,10 @@ +name=ExecuTorchArduino +version=0.1.0 +author=Meta Platforms +maintainer=ExecuTorch Team +sentence=Run PyTorch models on Arduino microcontrollers with ExecuTorch. +paragraph=ExecuTorch is a PyTorch runtime optimized for on-device inference. This library packages the ExecuTorch runtime with CMSIS-NN acceleration for ARM Cortex-M boards, enabling quantized model inference directly from Arduino sketches. +category=Data Processing +url=https://github.com/pytorch/executorch +architectures=zephyr,mbed_nano,mbed_giga,mbed_portenta +includes=ExecuTorchArduino.h diff --git a/examples/arduino/platform_stubs.c b/examples/arduino/platform_stubs.c new file mode 100644 index 00000000000..bc7b0629316 --- /dev/null +++ b/examples/arduino/platform_stubs.c @@ -0,0 +1,18 @@ +// Platform stubs for Arduino LLEXT environment. +// These C library functions are not exported by the Zephyr kernel's +// LLEXT symbol table but are referenced by the ExecuTorch runtime +// and CMSIS-NN. + +#include +#include + +void _Exit(int status) { + (void)status; + while (1) {} +} + +int fprintf(FILE* stream, const char* fmt, ...) { + (void)stream; + (void)fmt; + return 0; +} diff --git a/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf b/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf new file mode 100644 index 00000000000..22e56ccd035 --- /dev/null +++ b/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf @@ -0,0 +1,14 @@ +# Arduino Uno Q — STM32U585 (Cortex-M33, DSP, no MVE, no Ethos-U) +# 2 MB flash, 786 KB SRAM — plenty of room for DS-CNN int8 (50 KB .pte). +# +# CMSIS-NN uses the DSP path for M33 (no MVE on U5 series). + +# Memory tuning for DS-CNN on STM32U585: +# - method allocator: 64 KB (DS-CNN metadata + planned buffers) +# - temp allocator: 8 KB (scratch for CMSIS-NN kernels) +# - main stack: 8 KB +# - heap: 16 KB +CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE=65536 +CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE=8192 +CONFIG_MAIN_STACK_SIZE=8192 +CONFIG_HEAP_MEM_POOL_SIZE=16384 From 2b3744426a6d9d1efd76eeee83dcf08cfffe862c Mon Sep 17 00:00:00 2001 From: Siddartha Pothapragada Date: Thu, 11 Jun 2026 12:07:19 -0400 Subject: [PATCH 2/2] Add copyrights and improve example sketches - Add Meta copyright headers to all files - HelloExecuTorch: uses core ET runtime only (portable ops, no backend) - KeywordSpotting: uses Cortex-M backend with CMSIS-NN accelerated ops - Clarify the distinction in sketch comments --- examples/arduino/ExecuTorchArduino.h | 16 ++--- examples/arduino/README.md | 8 +++ examples/arduino/build_arduino_library.sh | 6 ++ .../HelloExecuTorch/HelloExecuTorch.ino | 37 +++++++++-- .../KeywordSpotting/KeywordSpotting.ino | 61 ++++++++++++------- examples/arduino/library.properties | 6 ++ examples/arduino/platform_stubs.c | 11 +++- .../boards/arduino_uno_q.conf | 6 ++ 8 files changed, 116 insertions(+), 35 deletions(-) diff --git a/examples/arduino/ExecuTorchArduino.h b/examples/arduino/ExecuTorchArduino.h index 240b99c0555..dc7e87706fd 100644 --- a/examples/arduino/ExecuTorchArduino.h +++ b/examples/arduino/ExecuTorchArduino.h @@ -1,21 +1,21 @@ -#pragma once +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ -// ExecuTorchArduino — ExecuTorch runtime for Arduino boards -// -// This header configures ExecuTorch for the Arduino build environment -// and exposes the native ET C++ API. There is no wrapper — use -// Program::load(), Method::execute(), etc. directly. +#pragma once // Arduino's custom header omits , which breaks // std::bad_variant_access in . Include it first. #include -// ExecuTorch build configuration for Arduino #define C10_USING_CUSTOM_GENERATED_MACROS #define ET_ENABLE_DEPRECATED_CONSTANT_BUFFER 0 #define FLATBUFFERS_MAX_ALIGNMENT 1024 -// Core runtime headers #include #include #include diff --git a/examples/arduino/README.md b/examples/arduino/README.md index efa7f251a18..bd3a8d02f44 100644 --- a/examples/arduino/README.md +++ b/examples/arduino/README.md @@ -1,3 +1,11 @@ + + # ExecuTorch Arduino Library Run PyTorch models on Arduino microcontrollers using ExecuTorch. diff --git a/examples/arduino/build_arduino_library.sh b/examples/arduino/build_arduino_library.sh index 9bad7d7aa8a..505e121cf7b 100755 --- a/examples/arduino/build_arduino_library.sh +++ b/examples/arduino/build_arduino_library.sh @@ -1,4 +1,10 @@ #!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + # build_arduino_library.sh — Assemble the ExecuTorch Arduino library # from sources in this repository. # diff --git a/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino b/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino index 51c6c61f8e4..b2502152505 100644 --- a/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino +++ b/examples/arduino/examples/HelloExecuTorch/HelloExecuTorch.ino @@ -1,19 +1,48 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + // HelloExecuTorch — Minimal ExecuTorch sketch // -// Initializes the ExecuTorch runtime and prints confirmation over Serial. -// Use this to verify the library compiles and runs on your board before -// loading a model. +// Initializes the ExecuTorch runtime and loads a model using the core +// ET library (portable ops only, no hardware-specific backends). +// Use this to verify the library works on your board. #include +#include "model.h" + +using executorch::extension::BufferDataLoader; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::Program; +using executorch::runtime::Result; + +alignas(16) static uint8_t arena[32 * 1024]; void setup() { Serial.begin(115200); delay(2000); - Serial.println("=== ExecuTorch Arduino ==="); + Serial.println("=== HelloExecuTorch ==="); executorch::runtime::runtime_init(); Serial.println("Runtime initialized."); + + auto loader = BufferDataLoader(model_pte, model_pte_size); + Result program = Program::load(&loader); + if (program.ok()) { + Serial.println("Model loaded OK!"); + Serial.print(" Size: "); + Serial.print(model_pte_size); + Serial.println(" bytes"); + Serial.print(" Methods: "); + Serial.println(program->num_methods()); + } else { + Serial.println("ERROR: Model load failed"); + } } void loop() { diff --git a/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino b/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino index 5547628437a..3e8f48c9c32 100644 --- a/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino +++ b/examples/arduino/examples/KeywordSpotting/KeywordSpotting.ino @@ -1,20 +1,31 @@ -// KeywordSpotting — DS-CNN keyword spotting with ExecuTorch +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// KeywordSpotting — DS-CNN inference with ExecuTorch + CMSIS-NN // -// Runs a quantized DS-CNN model (MLPerf Tiny KWS benchmark) on hardcoded -// MFCC test inputs and prints the detected keyword. The model classifies -// 12 keywords: silence, unknown, yes, no, up, down, left, right, on, off, -// stop, go. +// Runs a quantized DS-CNN model (MLPerf Tiny KWS benchmark) using the +// Cortex-M backend with CMSIS-NN accelerated int8 ops. The model +// classifies 12 keywords: silence, unknown, yes, no, up, down, left, +// right, on, off, stop, go. // -// The model uses CMSIS-NN accelerated int8 ops on Cortex-M boards. +// This example demonstrates: +// - Loading a .pte model with cortex_m::* ops (CMSIS-NN backend) +// - Setting up memory allocators for constrained devices +// - Running inference and reading output tensors // -// Prerequisites: -// - model.h : DS-CNN .pte exported with CortexMQuantizer, as a C array -// - test_inputs.h : MFCC features from real audio, as float arrays +// Required files (not included, generate with export_model.py): +// - model.h : DS-CNN .pte exported with CortexMQuantizer +// - test_inputs.h : MFCC features from real audio samples #include #include "model.h" -#include "test_inputs.h" +using executorch::aten::ScalarType; using executorch::extension::BufferDataLoader; using executorch::runtime::Error; using executorch::runtime::EValue; @@ -40,9 +51,9 @@ void setup() { delay(2000); Serial.println("=== ExecuTorch Keyword Spotting ==="); - Serial.print("Model size: "); + Serial.print("Model: "); Serial.print(model_pte_size); - Serial.println(" bytes"); + Serial.println(" bytes (DS-CNN int8, CMSIS-NN)"); executorch::runtime::runtime_init(); Serial.println("Runtime initialized."); @@ -60,16 +71,23 @@ void loop() { const char* method_name = nullptr; { auto r = program->get_method_name(0); - if (!r.ok()) { Serial.println("ERROR: no methods"); delay(5000); return; } + if (!r.ok()) { + Serial.println("ERROR: no methods"); + delay(5000); + return; + } method_name = *r; } auto meta = program->method_meta(method_name); - if (!meta.ok()) { Serial.println("ERROR: method_meta"); delay(5000); return; } + if (!meta.ok()) { + Serial.println("ERROR: method_meta failed"); + delay(5000); + return; + } MemoryAllocator method_allocator(sizeof(method_pool), method_pool); - // Allocate planned buffers size_t n_planned = meta->num_memory_planned_buffers(); Span* spans = static_cast*>( method_allocator.allocate(n_planned * sizeof(Span))); @@ -85,22 +103,20 @@ void loop() { auto method = program->load_method(method_name, &mm); if (!method.ok()) { - Serial.print("ERROR: load_method failed: 0x"); + Serial.print("ERROR: load_method: 0x"); Serial.println((int)method.error(), HEX); delay(5000); return; } - // Run inference Error status = method->execute(); if (status != Error::Ok) { - Serial.print("ERROR: execute failed: 0x"); + Serial.print("ERROR: execute: 0x"); Serial.println((int)status, HEX); delay(5000); return; } - // Read output EValue output; method->get_outputs(&output, 1); if (output.isTensor()) { @@ -108,10 +124,13 @@ void loop() { int best = 0; float best_val = -1e9f; for (int i = 0; i < kNumClasses && i < tensor.numel(); i++) { - float val = (tensor.scalar_type() == executorch::aten::ScalarType::Float) + float val = (tensor.scalar_type() == ScalarType::Float) ? tensor.const_data_ptr()[i] : static_cast(tensor.const_data_ptr()[i]); - if (val > best_val) { best_val = val; best = i; } + if (val > best_val) { + best_val = val; + best = i; + } } Serial.print("Detected: "); Serial.println(kLabels[best]); diff --git a/examples/arduino/library.properties b/examples/arduino/library.properties index e9787b1daec..5288800f27a 100644 --- a/examples/arduino/library.properties +++ b/examples/arduino/library.properties @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + name=ExecuTorchArduino version=0.1.0 author=Meta Platforms diff --git a/examples/arduino/platform_stubs.c b/examples/arduino/platform_stubs.c index bc7b0629316..6d50ed3290c 100644 --- a/examples/arduino/platform_stubs.c +++ b/examples/arduino/platform_stubs.c @@ -1,7 +1,14 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + // Platform stubs for Arduino LLEXT environment. // These C library functions are not exported by the Zephyr kernel's -// LLEXT symbol table but are referenced by the ExecuTorch runtime -// and CMSIS-NN. +// LLEXT symbol table but are referenced by the ExecuTorch runtime. #include #include diff --git a/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf b/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf index 22e56ccd035..d3bb1935979 100644 --- a/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf +++ b/zephyr/samples/hello-executorch/boards/arduino_uno_q.conf @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + # Arduino Uno Q — STM32U585 (Cortex-M33, DSP, no MVE, no Ethos-U) # 2 MB flash, 786 KB SRAM — plenty of room for DS-CNN int8 (50 KB .pte). #