From 1d9bf15f1fafb3baae26e257848b81a0529bdacb Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Wed, 20 May 2026 19:26:33 +0200
Subject: [PATCH] fix(opt): optimized path declines f32/f64, backend falls back
 to direct selection (#120)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OptimizerBridge::wasm_to_ir had zero handlers for scalar f32/f64 ops —
every float op fell through the catch-all `_ => Opcode::Nop`. A
value-producing float op (e.g. f32.div) that emits Nop produces no
vreg, so any downstream consumer references an unmapped vreg and trips
the PR #101 defensive get_arm_reg panic in ir_to_arm. This is the #93
silent-drop class, for floats — the customer-reported crash on
compiler_builtins float::div and gale_compute_ipi_mask.

The IR Opcode enum (synth-opt) has no float opcodes at all, so the
optimized path cannot lower floats without a large feature addition.
Fix (Option A): optimize_full now detects scalar f32/f64 ops up front
and returns a typed Err(Error::UnsupportedInstruction). The ARM
backend's compile_wasm_to_arm catches that Err and falls back to the
non-optimized InstructionSelector::select_with_stack path, which
already handles f32 via VFP/FPU. Honest degradation: float-containing
functions still compile correctly, just without IR-level optimization.

The defensive panic at get_arm_reg is intentionally kept — the fix
makes float ops never reach it, rather than removing the bug-finder.

f64 remains unsupported in both paths; the optimized path now declines
it cleanly instead of panicking.

Tests:
- crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs:
  exercises optimize_full + ir_to_arm (the path that panicked) for
  f32.div and assorted f32/f64 ops; asserts a clean Err, not a panic.
- arm_backend.rs: end-to-end backend tests compiling f32 div/add/mul/sub
  on Cortex-M4F via the default (optimized) config, asserting non-empty
  machine code; plus a no-FPU rejection test.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 crates/synth-backend/src/arm_backend.rs       | 146 ++++++++++++++--
 .../synth-synthesis/src/optimizer_bridge.rs   | 115 +++++++++++-
 .../regression_issue_120_f32_optimized.rs     | 163 ++++++++++++++++++
 3 files changed, 408 insertions(+), 16 deletions(-)
 create mode 100644 crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs
diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs
index 9ad5070..d77ca7c 100644
--- a/crates/synth-backend/src/arm_backend.rs
+++ b/crates/synth-backend/src/arm_backend.rs
@@ -150,8 +150,10 @@ fn compile_wasm_to_arm(
         BoundsCheckConfig::None
     };
 
-    // Instruction selection: optimized or direct
-    let arm_instrs = if config.no_optimize {
+    // The non-optimized (direct) instruction-selection path. Handles f32 via
+    // VFP/FPU. Used directly when `--no-optimize` is set, and as the fallback
+    // when the optimized path declines a module (see issue #120 below).
+    let select_direct = || -> Result<Vec<ArmInstruction>, String> {
         let db = RuleDatabase::with_standard_rules();
         let mut selector =
             InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config);
@@ -161,7 +163,12 @@ fn compile_wasm_to_arm(
         }
         selector
             .select_with_stack(wasm_ops, num_params)
-            .map_err(|e| format!("instruction selection failed: {}", e))?
+            .map_err(|e| format!("instruction selection failed: {}", e))
+    };
+
+    // Instruction selection: optimized or direct
+    let arm_instrs = if config.no_optimize {
+        select_direct()?
     } else {
         let opt_config = if config.loom_compat {
             OptimizationConfig::loom_compat()
@@ -170,18 +177,24 @@ fn compile_wasm_to_arm(
         };
 
         let bridge = OptimizerBridge::with_config(opt_config);
-        let (opt_ir, _cfg, _stats) = bridge
-            .optimize_full(wasm_ops)
-            .map_err(|e| format!("optimization failed: {}", e))?;
-
-        let arm_ops = bridge.ir_to_arm(&opt_ir, num_params as usize);
-        arm_ops
-            .into_iter()
-            .map(|op| ArmInstruction {
-                op,
-                source_line: None,
-            })
-            .collect()
+        match bridge.optimize_full(wasm_ops) {
+            Ok((opt_ir, _cfg, _stats)) => {
+                let arm_ops = bridge.ir_to_arm(&opt_ir, num_params as usize);
+                arm_ops
+                    .into_iter()
+                    .map(|op| ArmInstruction {
+                        op,
+                        source_line: None,
+                    })
+                    .collect()
+            }
+            // Issue #120: the optimized path declines modules it cannot lower
+            // (notably scalar f32/f64 ops — the IR has no float opcodes). Fall
+            // back to the direct instruction selector, which handles f32 via
+            // VFP/FPU. This is honest degradation: the function still compiles
+            // correctly, just without IR-level optimization.
+            Err(_) => select_direct()?,
+        }
     };
 
     // ISA feature gate: validate that all generated instructions are supported
@@ -405,6 +418,109 @@ mod tests {
         );
     }
 
+    // ========================================================================
+    // Issue #120 — f32 ops in the optimized lowering path
+    //
+    // `OptimizerBridge::wasm_to_ir` has no handlers for f32/f64 ops, so a
+    // value-producing float op fell through to `Opcode::Nop`, leaving a
+    // downstream consumer with an unmapped vreg and tripping the PR #101
+    // defensive panic in `ir_to_arm`. Customer reproducer: `compiler_builtins
+    // float::div` and `gale_compute_ipi_mask` in the `falcon-rate-component`
+    // module.
+    //
+    // Fix: `optimize_full` declines float modules with a typed `Err`;
+    // `compile_wasm_to_arm` falls back to the non-optimized `select_with_stack`
+    // path, which handles f32 via VFP/FPU. These tests use the *default*
+    // (optimized) config — `no_optimize` is NOT set — which is the exact
+    // configuration that panicked pre-fix.
+    // ========================================================================
+
+    /// Pre-fix: this panicked with "vreg vN has no assigned ARM register and
+    /// no spill slot" inside `ir_to_arm`. Post-fix: the optimized path declines
+    /// the module and the backend falls back to direct selection, producing a
+    /// non-empty f32.div lowering on a Cortex-M4F.
+    #[test]
+    fn test_issue120_f32_div_compiles_via_optimized_default() {
+        let backend = ArmBackend::new();
+        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
+        let config = CompileConfig {
+            target: TargetSpec::cortex_m4f(),
+            // no_optimize NOT set — this exercises the optimized path that
+            // panicked in issue #120, then the fallback to direct selection.
+            ..CompileConfig::default()
+        };
+
+        let result = backend.compile_function("fdiv", &ops, &config);
+        assert!(
+            result.is_ok(),
+            "f32.div must compile on Cortex-M4F via the optimized->direct \
+             fallback (issue #120), got: {:?}",
+            result.as_ref().err()
+        );
+        assert!(
+            !result.unwrap().code.is_empty(),
+            "f32.div must produce non-empty machine code"
+        );
+    }
+
+    /// A spread of f32 ops, all through the optimized (default) config, must
+    /// compile via the fallback on an FPU target without panicking.
+    #[test]
+    fn test_issue120_assorted_f32_ops_compile_via_optimized_default() {
+        let backend = ArmBackend::new();
+        let config = CompileConfig {
+            target: TargetSpec::cortex_m4f(),
+            ..CompileConfig::default()
+        };
+
+        let cases: Vec<(&str, Vec<WasmOp>)> = vec![
+            (
+                "fadd",
+                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Add],
+            ),
+            (
+                "fmul",
+                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Mul],
+            ),
+            (
+                "fsub",
+                vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Sub],
+            ),
+        ];
+
+        for (name, ops) in cases {
+            let result = backend.compile_function(name, &ops, &config);
+            assert!(
+                result.is_ok(),
+                "{name} must compile via the optimized->direct fallback \
+                 (issue #120), got: {:?}",
+                result.as_ref().err()
+            );
+            assert!(
+                !result.unwrap().code.is_empty(),
+                "{name} must produce non-empty machine code"
+            );
+        }
+    }
+
+    /// The fallback must still honor the ISA feature gate: f32 on a no-FPU
+    /// target must fail cleanly (not panic) even on the optimized path.
+    #[test]
+    fn test_issue120_f32_div_rejected_on_no_fpu_via_optimized() {
+        let backend = ArmBackend::new();
+        let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div];
+        let config = CompileConfig {
+            target: TargetSpec::cortex_m3(),
+            ..CompileConfig::default()
+        };
+
+        let result = backend.compile_function("fdiv", &ops, &config);
+        assert!(
+            result.is_err(),
+            "f32.div must be rejected on Cortex-M3 (no FPU), not panic"
+        );
+    }
+
     /// Issue #94: end-to-end byte-size check for the canonical u64-packed
     /// FFI-return hi32 extract pattern. Compiles two near-identical
     /// functions — one with the optimized shift-by-32, one with a generic
diff --git a/crates/synth-synthesis/src/optimizer_bridge.rs b/crates/synth-synthesis/src/optimizer_bridge.rs
index 304923a..328e242 100644
--- a/crates/synth-synthesis/src/optimizer_bridge.rs
+++ b/crates/synth-synthesis/src/optimizer_bridge.rs
@@ -14,8 +14,8 @@
 use crate::Condition;
 use crate::rules::ArmOp;
 use synth_cfg::{Cfg, CfgBuilder};
-use synth_core::Result;
 use synth_core::WasmOp;
+use synth_core::{Error, Result};
 use synth_opt::{
     AlgebraicSimplification, CommonSubexpressionElimination, ConstantFolding, DeadCodeElimination,
     Instruction, Opcode, OptResult, PassManager, PeepholeOptimization, Reg as OptReg,
@@ -364,6 +364,105 @@ impl OptimizerBridge {
         )
     }
 
+    /// Returns true if `op` is a scalar f32/f64 operation that the optimized
+    /// lowering path (`wasm_to_ir` → `ir_to_arm`) cannot handle.
+    ///
+    /// Issue #120: `wasm_to_ir` has zero handlers for f32/f64 ops, so every
+    /// float op falls through the catch-all `_ => Opcode::Nop`. A value-producing
+    /// float op that emits `Nop` produces no vreg; any downstream consumer then
+    /// references an unmapped vreg and trips the defensive `get_arm_reg` panic
+    /// added by PR #101 (the #93 silent-drop class, for floats).
+    ///
+    /// The IR `Opcode` enum (`synth-opt`) has no float opcodes at all, so the
+    /// optimized path cannot lower floats without a large feature addition.
+    /// Instead, `optimize_full` detects these ops up front and returns a typed
+    /// `Err`, letting the backend fall back to the non-optimized
+    /// `InstructionSelector::select_with_stack` path, which *does* handle f32
+    /// (VFP/FPU). This includes float→int / int→float conversion ops and float
+    /// loads/stores, since they too produce or consume float-typed values the
+    /// optimized path can neither map to a vreg nor lower to VFP.
+    fn is_unsupported_float_op(op: &WasmOp) -> bool {
+        matches!(
+            op,
+            // f32 arithmetic
+            WasmOp::F32Add
+                | WasmOp::F32Sub
+                | WasmOp::F32Mul
+                | WasmOp::F32Div
+                // f32 comparisons
+                | WasmOp::F32Eq
+                | WasmOp::F32Ne
+                | WasmOp::F32Lt
+                | WasmOp::F32Le
+                | WasmOp::F32Gt
+                | WasmOp::F32Ge
+                // f32 math functions
+                | WasmOp::F32Abs
+                | WasmOp::F32Neg
+                | WasmOp::F32Ceil
+                | WasmOp::F32Floor
+                | WasmOp::F32Trunc
+                | WasmOp::F32Nearest
+                | WasmOp::F32Sqrt
+                | WasmOp::F32Min
+                | WasmOp::F32Max
+                | WasmOp::F32Copysign
+                // f32 constants and memory
+                | WasmOp::F32Const(_)
+                | WasmOp::F32Load { .. }
+                | WasmOp::F32Store { .. }
+                // f32 conversions
+                | WasmOp::F32ConvertI32S
+                | WasmOp::F32ConvertI32U
+                | WasmOp::F32ConvertI64S
+                | WasmOp::F32ConvertI64U
+                | WasmOp::F32DemoteF64
+                | WasmOp::F32ReinterpretI32
+                | WasmOp::I32ReinterpretF32
+                | WasmOp::I32TruncF32S
+                | WasmOp::I32TruncF32U
+                // f64 arithmetic
+                | WasmOp::F64Add
+                | WasmOp::F64Sub
+                | WasmOp::F64Mul
+                | WasmOp::F64Div
+                // f64 comparisons
+                | WasmOp::F64Eq
+                | WasmOp::F64Ne
+                | WasmOp::F64Lt
+                | WasmOp::F64Le
+                | WasmOp::F64Gt
+                | WasmOp::F64Ge
+                // f64 math functions
+                | WasmOp::F64Abs
+                | WasmOp::F64Neg
+                | WasmOp::F64Ceil
+                | WasmOp::F64Floor
+                | WasmOp::F64Trunc
+                | WasmOp::F64Nearest
+                | WasmOp::F64Sqrt
+                | WasmOp::F64Min
+                | WasmOp::F64Max
+                | WasmOp::F64Copysign
+                // f64 constants and memory
+                | WasmOp::F64Const(_)
+                | WasmOp::F64Load { .. }
+                | WasmOp::F64Store { .. }
+                // f64 conversions
+                | WasmOp::F64ConvertI32S
+                | WasmOp::F64ConvertI32U
+                | WasmOp::F64ConvertI64S
+                | WasmOp::F64ConvertI64U
+                | WasmOp::F64PromoteF32
+                | WasmOp::F64ReinterpretI64
+                | WasmOp::I64ReinterpretF64
+                | WasmOp::I64TruncF64S
+                | WasmOp::I64TruncF64U
+                | WasmOp::I32TruncF64S
+                | WasmOp::I32TruncF64U
+        )
+    }
+
     /// Returns how many i64 values a WASM op consumes (0 if not an i64-consuming op)
     fn i64_operand_count(op: &WasmOp) -> usize {
         match op {
@@ -1389,6 +1488,20 @@ impl OptimizerBridge {
             ));
         }
 
+        // Issue #120: the optimized path (`wasm_to_ir` → `ir_to_arm`) has no
+        // handlers for scalar f32/f64 ops — the IR `Opcode` enum has no float
+        // opcodes. Rather than silently emit `Opcode::Nop` for a value-producing
+        // float op (which leaves a downstream consumer with an unmapped vreg and
+        // trips the PR #101 defensive panic), decline the whole module here with
+        // a typed error. The backend falls back to `select_with_stack`, the
+        // non-optimized path, which handles f32 via VFP/FPU.
+        if let Some(float_op) = wasm_ops.iter().find(|op| Self::is_unsupported_float_op(op)) {
+            return Err(Error::UnsupportedInstruction(format!(
+                "optimized lowering path does not support float ops ({float_op:?}); \
+                 the non-optimized instruction selector handles f32 — issue #120"
+            )));
+        }
+
         // Preprocess: convert if-else patterns to select
         let preprocessed = self.preprocess_wasm_ops(wasm_ops);
 
diff --git a/crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs b/crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs
new file mode 100644
index 0000000..7f0cb96
--- /dev/null
+++ b/crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs
@@ -0,0 +1,163 @@
+//! Regression test for issue #120 — unmapped-vreg panic on f32 ops in the
+//! optimized lowering path.
+//!
+//! Symptom (pre-fix, with the PR #101 defensive panic active):
+//!
+//! ```text
+//! INFO Compiling function '_ZN17compiler_builtins5float3div3div...E' via backend 'arm'...
+//! thread 'main' panicked at crates/synth-synthesis/src/optimizer_bridge.rs:...:
+//! synth internal compiler error: vreg v721 has no assigned ARM register and no spill slot.
+//! This is a wasm_to_ir bug — likely a wasm op whose result is unmapped (see issue #93).
+//! ```
+//!
+//! Root cause: `OptimizerBridge::wasm_to_ir`'s `match wasm_op` had zero arms
+//! for f32/f64 ops, so every float op fell through `_ => Opcode::Nop`. A
+//! value-producing float op (e.g. `f32.div`) that emits `Nop` produces no
+//! vreg; any downstream consumer of the float result then resolves an
+//! unmapped vreg in `ir_to_arm`'s `get_arm_reg` and trips the defensive panic
+//! (the #93 silent-drop class, for floats).
+//!
+//! Fix (issue #120, Option A): the IR `Opcode` enum has no float opcodes, so
+//! the optimized path cannot lower floats without a large feature addition.
+//! Instead `optimize_full` now detects scalar f32/f64 ops up front and returns
+//! a typed `Err(Error::UnsupportedInstruction(...))`. The ARM backend falls
+//! back to the non-optimized `InstructionSelector::select_with_stack` path,
+//! which handles f32 via VFP/FPU.
+//!
+//! This test verifies the optimizer-bridge layer (where the bug lived):
+//! `optimize_full` declines float modules with a clean `Err` instead of
+//! panicking. The end-to-end backend fallback is covered by tests in
+//! `crates/synth-backend/src/arm_backend.rs` (synth-synthesis sits below
+//! synth-backend in the dep graph, so `ArmBackend` cannot be used here).
+
+use synth_synthesis::{OptimizerBridge, WasmOp};
+
+/// Run a wasm-op sequence through the path that panicked pre-fix:
+/// `optimize_full` followed (on success) by `ir_to_arm`. Returns `Ok` if the
+/// optimized path accepted the module, `Err` if it cleanly declined it.
+/// Must never panic.
+fn try_optimized(wasm_ops: &[WasmOp], num_params: usize) -> Result<usize, String> {
+    let bridge = OptimizerBridge::new();
+    match bridge.optimize_full(wasm_ops) {
+        Ok((ir, _cfg, _stats)) => {
+            // ir_to_arm is the function that hosts the defensive get_arm_reg
+            // panic. Exercising it here proves no unmapped vreg is reached.
+            let arm = bridge.ir_to_arm(&ir, num_params);
+            Ok(arm.len())
+        }
+        Err(e) => Err(e.to_string()),
+    }
+}
+
+/// `f32.div` whose result is consumed by an `f32.store` — the
+/// `compiler_builtins float::div` shape from the issue reproducer.
+///
+/// Pre-fix: `F32Div` and `F32Store` both fell through to `Opcode::Nop`; the
+/// store's source vreg was unmapped and `ir_to_arm` panicked.
+///
+/// Post-fix: `optimize_full` returns `Err` — no panic.
+#[test]
+fn f32_div_does_not_panic_in_optimized_path() {
+    let ops = vec![
+        WasmOp::LocalGet(0),
+        WasmOp::LocalGet(1),
+        WasmOp::F32Div,
+        WasmOp::F32Store {
+            offset: 0,
+            align: 2,
+        },
+    ];
+
+    let result = try_optimized(&ops, /* num_params = */ 2);
+    assert!(
+        result.is_err(),
+        "optimized path must cleanly decline f32.div (issue #120), got Ok: {result:?}"
+    );
+    let msg = result.unwrap_err();
+    assert!(
+        msg.contains("float") && msg.contains("120"),
+        "error should explain the float limitation and reference issue #120, got: {msg}"
+    );
+}
+
+/// `f32.div` whose result is the function return value — exercises the
+/// value-on-stack-at-end shape.
+#[test]
+fn f32_div_returned_does_not_panic() {
+    let ops = vec![
+        WasmOp::LocalGet(0),
+        WasmOp::LocalGet(1),
+        WasmOp::F32Div,
+        WasmOp::End,
+    ];
+
+    let result = try_optimized(&ops, 2);
+    assert!(
+        result.is_err(),
+        "optimized path must cleanly decline f32.div return (issue #120), got Ok: {result:?}"
+    );
+}
+
+/// A representative spread of f32 ops — arithmetic, comparison, math
+/// functions, conversions, loads — must each be declined, not panic.
+#[test]
+fn assorted_f32_ops_are_declined_cleanly() {
+    let cases: Vec<Vec<WasmOp>> = vec![
+        vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Add],
+        vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Mul],
+        vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Sub],
+        vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Lt],
+        vec![WasmOp::LocalGet(0), WasmOp::F32Sqrt],
+        vec![WasmOp::LocalGet(0), WasmOp::F32Abs],
+        vec![WasmOp::LocalGet(0), WasmOp::F32ConvertI32S],
+        vec![WasmOp::LocalGet(0), WasmOp::I32TruncF32S],
+        vec![WasmOp::LocalGet(0), WasmOp::I32ReinterpretF32],
+        vec![
+            WasmOp::LocalGet(0),
+            WasmOp::F32Load {
+                offset: 0,
+                align: 2,
+            },
+        ],
+    ];
+
+    for ops in cases {
+        let result = try_optimized(&ops, 2);
+        assert!(
+            result.is_err(),
+            "optimized path must decline f32 ops without panicking, ops={ops:?} got Ok"
+        );
+    }
+}
+
+/// f64 ops — still unsupported in *both* paths, but the optimized path must at
+/// least decline them with a typed error rather than panic.
+#[test]
+fn f64_ops_are_declined_cleanly() {
+    let cases: Vec<Vec<WasmOp>> = vec![
+        vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F64Div],
+        vec![WasmOp::F64Const(1.0), WasmOp::F64Const(2.0), WasmOp::F64Add],
+        vec![WasmOp::LocalGet(0), WasmOp::F64Sqrt],
+        vec![WasmOp::LocalGet(0), WasmOp::F64PromoteF32],
+    ];
+
+    for ops in cases {
+        let result = try_optimized(&ops, 2);
+        assert!(
+            result.is_err(),
+            "optimized path must decline f64 ops without panicking, ops={ops:?} got Ok"
+        );
+    }
+}
+
+/// Sanity guard: a pure integer module must still go through the optimized
+/// path unchanged — the float guard must not over-trigger and reject i32 code.
+#[test]
+fn integer_ops_still_use_optimized_path() {
+    let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add];
+    let result = try_optimized(&ops, 2);
+    assert!(
+        result.is_ok(),
+        "pure i32 module must still optimize, got Err: {result:?}"
+    );
+}