From 1d9bf15f1fafb3baae26e257848b81a0529bdacb Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Wed, 20 May 2026 19:26:33 +0200 Subject: [PATCH] fix(opt): optimized path declines f32/f64, backend falls back to direct selection (#120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OptimizerBridge::wasm_to_ir had zero handlers for scalar f32/f64 ops — every float op fell through the catch-all `_ => Opcode::Nop`. A value-producing float op (e.g. f32.div) that emits Nop produces no vreg, so any downstream consumer references an unmapped vreg and trips the PR #101 defensive get_arm_reg panic in ir_to_arm. This is the #93 silent-drop class, for floats — the customer-reported crash on compiler_builtins float::div and gale_compute_ipi_mask. The IR Opcode enum (synth-opt) has no float opcodes at all, so the optimized path cannot lower floats without a large feature addition. Fix (Option A): optimize_full now detects scalar f32/f64 ops up front and returns a typed Err(Error::UnsupportedInstruction). The ARM backend's compile_wasm_to_arm catches that Err and falls back to the non-optimized InstructionSelector::select_with_stack path, which already handles f32 via VFP/FPU. Honest degradation: float-containing functions still compile correctly, just without IR-level optimization. The defensive panic at get_arm_reg is intentionally kept — the fix makes float ops never reach it, rather than removing the bug-finder. f64 remains unsupported in both paths; the optimized path now declines it cleanly instead of panicking. Tests: - crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs: exercises optimize_full + ir_to_arm (the path that panicked) for f32.div and assorted f32/f64 ops; asserts a clean Err, not a panic. - arm_backend.rs: end-to-end backend tests compiling f32 div/add/mul/sub on Cortex-M4F via the default (optimized) config, asserting non-empty machine code; plus a no-FPU rejection test. Co-Authored-By: Claude Opus 4.7 --- crates/synth-backend/src/arm_backend.rs | 146 ++++++++++++++-- .../synth-synthesis/src/optimizer_bridge.rs | 115 +++++++++++- .../regression_issue_120_f32_optimized.rs | 163 ++++++++++++++++++ 3 files changed, 408 insertions(+), 16 deletions(-) create mode 100644 crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs index 9ad5070..d77ca7c 100644 --- a/crates/synth-backend/src/arm_backend.rs +++ b/crates/synth-backend/src/arm_backend.rs @@ -150,8 +150,10 @@ fn compile_wasm_to_arm( BoundsCheckConfig::None }; - // Instruction selection: optimized or direct - let arm_instrs = if config.no_optimize { + // The non-optimized (direct) instruction-selection path. Handles f32 via + // VFP/FPU. Used directly when `--no-optimize` is set, and as the fallback + // when the optimized path declines a module (see issue #120 below). + let select_direct = || -> Result, String> { let db = RuleDatabase::with_standard_rules(); let mut selector = InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config); @@ -161,7 +163,12 @@ fn compile_wasm_to_arm( } selector .select_with_stack(wasm_ops, num_params) - .map_err(|e| format!("instruction selection failed: {}", e))? + .map_err(|e| format!("instruction selection failed: {}", e)) + }; + + // Instruction selection: optimized or direct + let arm_instrs = if config.no_optimize { + select_direct()? } else { let opt_config = if config.loom_compat { OptimizationConfig::loom_compat() @@ -170,18 +177,24 @@ fn compile_wasm_to_arm( }; let bridge = OptimizerBridge::with_config(opt_config); - let (opt_ir, _cfg, _stats) = bridge - .optimize_full(wasm_ops) - .map_err(|e| format!("optimization failed: {}", e))?; - - let arm_ops = bridge.ir_to_arm(&opt_ir, num_params as usize); - arm_ops - .into_iter() - .map(|op| ArmInstruction { - op, - source_line: None, - }) - .collect() + match bridge.optimize_full(wasm_ops) { + Ok((opt_ir, _cfg, _stats)) => { + let arm_ops = bridge.ir_to_arm(&opt_ir, num_params as usize); + arm_ops + .into_iter() + .map(|op| ArmInstruction { + op, + source_line: None, + }) + .collect() + } + // Issue #120: the optimized path declines modules it cannot lower + // (notably scalar f32/f64 ops — the IR has no float opcodes). Fall + // back to the direct instruction selector, which handles f32 via + // VFP/FPU. This is honest degradation: the function still compiles + // correctly, just without IR-level optimization. + Err(_) => select_direct()?, + } }; // ISA feature gate: validate that all generated instructions are supported @@ -405,6 +418,109 @@ mod tests { ); } + // ======================================================================== + // Issue #120 — f32 ops in the optimized lowering path + // + // `OptimizerBridge::wasm_to_ir` has no handlers for f32/f64 ops, so a + // value-producing float op fell through to `Opcode::Nop`, leaving a + // downstream consumer with an unmapped vreg and tripping the PR #101 + // defensive panic in `ir_to_arm`. Customer reproducer: `compiler_builtins + // float::div` and `gale_compute_ipi_mask` in the `falcon-rate-component` + // module. + // + // Fix: `optimize_full` declines float modules with a typed `Err`; + // `compile_wasm_to_arm` falls back to the non-optimized `select_with_stack` + // path, which handles f32 via VFP/FPU. These tests use the *default* + // (optimized) config — `no_optimize` is NOT set — which is the exact + // configuration that panicked pre-fix. + // ======================================================================== + + /// Pre-fix: this panicked with "vreg vN has no assigned ARM register and + /// no spill slot" inside `ir_to_arm`. Post-fix: the optimized path declines + /// the module and the backend falls back to direct selection, producing a + /// non-empty f32.div lowering on a Cortex-M4F. + #[test] + fn test_issue120_f32_div_compiles_via_optimized_default() { + let backend = ArmBackend::new(); + let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div]; + let config = CompileConfig { + target: TargetSpec::cortex_m4f(), + // no_optimize NOT set — this exercises the optimized path that + // panicked in issue #120, then the fallback to direct selection. + ..CompileConfig::default() + }; + + let result = backend.compile_function("fdiv", &ops, &config); + assert!( + result.is_ok(), + "f32.div must compile on Cortex-M4F via the optimized->direct \ + fallback (issue #120), got: {:?}", + result.as_ref().err() + ); + assert!( + !result.unwrap().code.is_empty(), + "f32.div must produce non-empty machine code" + ); + } + + /// A spread of f32 ops, all through the optimized (default) config, must + /// compile via the fallback on an FPU target without panicking. + #[test] + fn test_issue120_assorted_f32_ops_compile_via_optimized_default() { + let backend = ArmBackend::new(); + let config = CompileConfig { + target: TargetSpec::cortex_m4f(), + ..CompileConfig::default() + }; + + let cases: Vec<(&str, Vec)> = vec![ + ( + "fadd", + vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Add], + ), + ( + "fmul", + vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Mul], + ), + ( + "fsub", + vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Sub], + ), + ]; + + for (name, ops) in cases { + let result = backend.compile_function(name, &ops, &config); + assert!( + result.is_ok(), + "{name} must compile via the optimized->direct fallback \ + (issue #120), got: {:?}", + result.as_ref().err() + ); + assert!( + !result.unwrap().code.is_empty(), + "{name} must produce non-empty machine code" + ); + } + } + + /// The fallback must still honor the ISA feature gate: f32 on a no-FPU + /// target must fail cleanly (not panic) even on the optimized path. + #[test] + fn test_issue120_f32_div_rejected_on_no_fpu_via_optimized() { + let backend = ArmBackend::new(); + let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Div]; + let config = CompileConfig { + target: TargetSpec::cortex_m3(), + ..CompileConfig::default() + }; + + let result = backend.compile_function("fdiv", &ops, &config); + assert!( + result.is_err(), + "f32.div must be rejected on Cortex-M3 (no FPU), not panic" + ); + } + /// Issue #94: end-to-end byte-size check for the canonical u64-packed /// FFI-return hi32 extract pattern. Compiles two near-identical /// functions — one with the optimized shift-by-32, one with a generic diff --git a/crates/synth-synthesis/src/optimizer_bridge.rs b/crates/synth-synthesis/src/optimizer_bridge.rs index 304923a..328e242 100644 --- a/crates/synth-synthesis/src/optimizer_bridge.rs +++ b/crates/synth-synthesis/src/optimizer_bridge.rs @@ -14,8 +14,8 @@ use crate::Condition; use crate::rules::ArmOp; use synth_cfg::{Cfg, CfgBuilder}; -use synth_core::Result; use synth_core::WasmOp; +use synth_core::{Error, Result}; use synth_opt::{ AlgebraicSimplification, CommonSubexpressionElimination, ConstantFolding, DeadCodeElimination, Instruction, Opcode, OptResult, PassManager, PeepholeOptimization, Reg as OptReg, @@ -364,6 +364,105 @@ impl OptimizerBridge { ) } + /// Returns true if `op` is a scalar f32/f64 operation that the optimized + /// lowering path (`wasm_to_ir` → `ir_to_arm`) cannot handle. + /// + /// Issue #120: `wasm_to_ir` has zero handlers for f32/f64 ops, so every + /// float op falls through the catch-all `_ => Opcode::Nop`. A value-producing + /// float op that emits `Nop` produces no vreg; any downstream consumer then + /// references an unmapped vreg and trips the defensive `get_arm_reg` panic + /// added by PR #101 (the #93 silent-drop class, for floats). + /// + /// The IR `Opcode` enum (`synth-opt`) has no float opcodes at all, so the + /// optimized path cannot lower floats without a large feature addition. + /// Instead, `optimize_full` detects these ops up front and returns a typed + /// `Err`, letting the backend fall back to the non-optimized + /// `InstructionSelector::select_with_stack` path, which *does* handle f32 + /// (VFP/FPU). This includes float→int / int→float conversion ops and float + /// loads/stores, since they too produce or consume float-typed values the + /// optimized path can neither map to a vreg nor lower to VFP. + fn is_unsupported_float_op(op: &WasmOp) -> bool { + matches!( + op, + // f32 arithmetic + WasmOp::F32Add + | WasmOp::F32Sub + | WasmOp::F32Mul + | WasmOp::F32Div + // f32 comparisons + | WasmOp::F32Eq + | WasmOp::F32Ne + | WasmOp::F32Lt + | WasmOp::F32Le + | WasmOp::F32Gt + | WasmOp::F32Ge + // f32 math functions + | WasmOp::F32Abs + | WasmOp::F32Neg + | WasmOp::F32Ceil + | WasmOp::F32Floor + | WasmOp::F32Trunc + | WasmOp::F32Nearest + | WasmOp::F32Sqrt + | WasmOp::F32Min + | WasmOp::F32Max + | WasmOp::F32Copysign + // f32 constants and memory + | WasmOp::F32Const(_) + | WasmOp::F32Load { .. } + | WasmOp::F32Store { .. } + // f32 conversions + | WasmOp::F32ConvertI32S + | WasmOp::F32ConvertI32U + | WasmOp::F32ConvertI64S + | WasmOp::F32ConvertI64U + | WasmOp::F32DemoteF64 + | WasmOp::F32ReinterpretI32 + | WasmOp::I32ReinterpretF32 + | WasmOp::I32TruncF32S + | WasmOp::I32TruncF32U + // f64 arithmetic + | WasmOp::F64Add + | WasmOp::F64Sub + | WasmOp::F64Mul + | WasmOp::F64Div + // f64 comparisons + | WasmOp::F64Eq + | WasmOp::F64Ne + | WasmOp::F64Lt + | WasmOp::F64Le + | WasmOp::F64Gt + | WasmOp::F64Ge + // f64 math functions + | WasmOp::F64Abs + | WasmOp::F64Neg + | WasmOp::F64Ceil + | WasmOp::F64Floor + | WasmOp::F64Trunc + | WasmOp::F64Nearest + | WasmOp::F64Sqrt + | WasmOp::F64Min + | WasmOp::F64Max + | WasmOp::F64Copysign + // f64 constants and memory + | WasmOp::F64Const(_) + | WasmOp::F64Load { .. } + | WasmOp::F64Store { .. } + // f64 conversions + | WasmOp::F64ConvertI32S + | WasmOp::F64ConvertI32U + | WasmOp::F64ConvertI64S + | WasmOp::F64ConvertI64U + | WasmOp::F64PromoteF32 + | WasmOp::F64ReinterpretI64 + | WasmOp::I64ReinterpretF64 + | WasmOp::I64TruncF64S + | WasmOp::I64TruncF64U + | WasmOp::I32TruncF64S + | WasmOp::I32TruncF64U + ) + } + /// Returns how many i64 values a WASM op consumes (0 if not an i64-consuming op) fn i64_operand_count(op: &WasmOp) -> usize { match op { @@ -1389,6 +1488,20 @@ impl OptimizerBridge { )); } + // Issue #120: the optimized path (`wasm_to_ir` → `ir_to_arm`) has no + // handlers for scalar f32/f64 ops — the IR `Opcode` enum has no float + // opcodes. Rather than silently emit `Opcode::Nop` for a value-producing + // float op (which leaves a downstream consumer with an unmapped vreg and + // trips the PR #101 defensive panic), decline the whole module here with + // a typed error. The backend falls back to `select_with_stack`, the + // non-optimized path, which handles f32 via VFP/FPU. + if let Some(float_op) = wasm_ops.iter().find(|op| Self::is_unsupported_float_op(op)) { + return Err(Error::UnsupportedInstruction(format!( + "optimized lowering path does not support float ops ({float_op:?}); \ + the non-optimized instruction selector handles f32 — issue #120" + ))); + } + // Preprocess: convert if-else patterns to select let preprocessed = self.preprocess_wasm_ops(wasm_ops); diff --git a/crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs b/crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs new file mode 100644 index 0000000..7f0cb96 --- /dev/null +++ b/crates/synth-synthesis/tests/regression_issue_120_f32_optimized.rs @@ -0,0 +1,163 @@ +//! Regression test for issue #120 — unmapped-vreg panic on f32 ops in the +//! optimized lowering path. +//! +//! Symptom (pre-fix, with the PR #101 defensive panic active): +//! +//! ```text +//! INFO Compiling function '_ZN17compiler_builtins5float3div3div...E' via backend 'arm'... +//! thread 'main' panicked at crates/synth-synthesis/src/optimizer_bridge.rs:...: +//! synth internal compiler error: vreg v721 has no assigned ARM register and no spill slot. +//! This is a wasm_to_ir bug — likely a wasm op whose result is unmapped (see issue #93). +//! ``` +//! +//! Root cause: `OptimizerBridge::wasm_to_ir`'s `match wasm_op` had zero arms +//! for f32/f64 ops, so every float op fell through `_ => Opcode::Nop`. A +//! value-producing float op (e.g. `f32.div`) that emits `Nop` produces no +//! vreg; any downstream consumer of the float result then resolves an +//! unmapped vreg in `ir_to_arm`'s `get_arm_reg` and trips the defensive panic +//! (the #93 silent-drop class, for floats). +//! +//! Fix (issue #120, Option A): the IR `Opcode` enum has no float opcodes, so +//! the optimized path cannot lower floats without a large feature addition. +//! Instead `optimize_full` now detects scalar f32/f64 ops up front and returns +//! a typed `Err(Error::UnsupportedInstruction(...))`. The ARM backend falls +//! back to the non-optimized `InstructionSelector::select_with_stack` path, +//! which handles f32 via VFP/FPU. +//! +//! This test verifies the optimizer-bridge layer (where the bug lived): +//! `optimize_full` declines float modules with a clean `Err` instead of +//! panicking. The end-to-end backend fallback is covered by tests in +//! `crates/synth-backend/src/arm_backend.rs` (synth-synthesis sits below +//! synth-backend in the dep graph, so `ArmBackend` cannot be used here). + +use synth_synthesis::{OptimizerBridge, WasmOp}; + +/// Run a wasm-op sequence through the path that panicked pre-fix: +/// `optimize_full` followed (on success) by `ir_to_arm`. Returns `Ok` if the +/// optimized path accepted the module, `Err` if it cleanly declined it. +/// Must never panic. +fn try_optimized(wasm_ops: &[WasmOp], num_params: usize) -> Result { + let bridge = OptimizerBridge::new(); + match bridge.optimize_full(wasm_ops) { + Ok((ir, _cfg, _stats)) => { + // ir_to_arm is the function that hosts the defensive get_arm_reg + // panic. Exercising it here proves no unmapped vreg is reached. + let arm = bridge.ir_to_arm(&ir, num_params); + Ok(arm.len()) + } + Err(e) => Err(e.to_string()), + } +} + +/// `f32.div` whose result is consumed by an `f32.store` — the +/// `compiler_builtins float::div` shape from the issue reproducer. +/// +/// Pre-fix: `F32Div` and `F32Store` both fell through to `Opcode::Nop`; the +/// store's source vreg was unmapped and `ir_to_arm` panicked. +/// +/// Post-fix: `optimize_full` returns `Err` — no panic. +#[test] +fn f32_div_does_not_panic_in_optimized_path() { + let ops = vec![ + WasmOp::LocalGet(0), + WasmOp::LocalGet(1), + WasmOp::F32Div, + WasmOp::F32Store { + offset: 0, + align: 2, + }, + ]; + + let result = try_optimized(&ops, /* num_params = */ 2); + assert!( + result.is_err(), + "optimized path must cleanly decline f32.div (issue #120), got Ok: {result:?}" + ); + let msg = result.unwrap_err(); + assert!( + msg.contains("float") && msg.contains("120"), + "error should explain the float limitation and reference issue #120, got: {msg}" + ); +} + +/// `f32.div` whose result is the function return value — exercises the +/// value-on-stack-at-end shape. +#[test] +fn f32_div_returned_does_not_panic() { + let ops = vec![ + WasmOp::LocalGet(0), + WasmOp::LocalGet(1), + WasmOp::F32Div, + WasmOp::End, + ]; + + let result = try_optimized(&ops, 2); + assert!( + result.is_err(), + "optimized path must cleanly decline f32.div return (issue #120), got Ok: {result:?}" + ); +} + +/// A representative spread of f32 ops — arithmetic, comparison, math +/// functions, conversions, loads — must each be declined, not panic. +#[test] +fn assorted_f32_ops_are_declined_cleanly() { + let cases: Vec> = vec![ + vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Add], + vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Mul], + vec![WasmOp::F32Const(1.5), WasmOp::F32Const(2.5), WasmOp::F32Sub], + vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F32Lt], + vec![WasmOp::LocalGet(0), WasmOp::F32Sqrt], + vec![WasmOp::LocalGet(0), WasmOp::F32Abs], + vec![WasmOp::LocalGet(0), WasmOp::F32ConvertI32S], + vec![WasmOp::LocalGet(0), WasmOp::I32TruncF32S], + vec![WasmOp::LocalGet(0), WasmOp::I32ReinterpretF32], + vec![ + WasmOp::LocalGet(0), + WasmOp::F32Load { + offset: 0, + align: 2, + }, + ], + ]; + + for ops in cases { + let result = try_optimized(&ops, 2); + assert!( + result.is_err(), + "optimized path must decline f32 ops without panicking, ops={ops:?} got Ok" + ); + } +} + +/// f64 ops — still unsupported in *both* paths, but the optimized path must at +/// least decline them with a typed error rather than panic. +#[test] +fn f64_ops_are_declined_cleanly() { + let cases: Vec> = vec![ + vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::F64Div], + vec![WasmOp::F64Const(1.0), WasmOp::F64Const(2.0), WasmOp::F64Add], + vec![WasmOp::LocalGet(0), WasmOp::F64Sqrt], + vec![WasmOp::LocalGet(0), WasmOp::F64PromoteF32], + ]; + + for ops in cases { + let result = try_optimized(&ops, 2); + assert!( + result.is_err(), + "optimized path must decline f64 ops without panicking, ops={ops:?} got Ok" + ); + } +} + +/// Sanity guard: a pure integer module must still go through the optimized +/// path unchanged — the float guard must not over-trigger and reject i32 code. +#[test] +fn integer_ops_still_use_optimized_path() { + let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add]; + let result = try_optimized(&ops, 2); + assert!( + result.is_ok(), + "pure i32 module must still optimize, got Err: {result:?}" + ); +}