From 94855b19c669e65fc3aecfe2cdf11290928bf6e6 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Fri, 20 Mar 2026 12:06:47 -0400 Subject: [PATCH 01/28] Add SpcInlinedFrame --- src/engine/compiler/SinglePassCompiler.v3 | 1 + src/engine/x86-64/X86_64SinglePassCompiler.v3 | 15 +++++++++++++++ src/engine/x86-64/X86_64Stack.v3 | 3 +++ 3 files changed, 19 insertions(+) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 774443b17..a10ff3b60 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2082,6 +2082,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl trap_labels.put((reason, label, frames)); return label; } + def getSpcInlinedFrameIp() -> long; def unsupported() { success = false; // XXX: add opcode } diff --git a/src/engine/x86-64/X86_64SinglePassCompiler.v3 b/src/engine/x86-64/X86_64SinglePassCompiler.v3 index 88f62602a..23c5fae6f 100644 --- a/src/engine/x86-64/X86_64SinglePassCompiler.v3 +++ b/src/engine/x86-64/X86_64SinglePassCompiler.v3 @@ -1130,6 +1130,9 @@ class X86_64SinglePassCompiler extends SinglePassCompiler { state.push(b.kindFlagsMatching(kind, IN_REG), b.reg, 0); return true; } + def getSpcInlinedFrameIp() -> long { + return INLINED_FRAME_STUB.start - Pointer.NULL; + } } def ucontext_rip_offset = 168; @@ -1323,6 +1326,12 @@ class X86_64SpcTrapsStub extends X86_64SpcCode { } } +// Marker for reconstructed inlined frames in stack traces. +// Tells the stalk walker that it should look inside the frame to find the function's pc. +class X86_64SpcInlinedFrame extends X86_64SpcCode { + new() super("inlined-frame", Pointer.NULL, Pointer.NULL) { } +} + // The lazy-compile stub needs special handling in the Virgil runtime because it has // a frame that stores the function being compiled. class X86_64SpcCompileStub extends RiUserCode { @@ -1364,6 +1373,8 @@ def LAZY_COMPILE_STUB = X86_64PreGenStub.new("spc-lazy-compile", X86_64SpcCompil def TIERUP_COMPILE_STUB = X86_64PreGenStub.new("spc-tierup-compile", X86_64SpcCompileStub.new("tierup"), genTierUpCompileStub); def TRAPS_STUB = X86_64SpcTrapsStub.new(); def TRAPS_PREGEN = X86_64PreGenStub.new("spc-trap", TRAPS_STUB, genTrapsStub); +def INLINED_FRAME_STUB = X86_64SpcInlinedFrame.new(); +def INLINED_FRAME_PREGEN = X86_64PreGenStub.new("spc-inlined-frame", INLINED_FRAME_STUB, genSpcInlinedFrame); def genSpcEntryFunc(ic: X86_64InterpreterCode, w: DataWriter) { if (SpcTuning.disable) return; @@ -1468,6 +1479,10 @@ def genTrapsStub(ic: X86_64InterpreterCode, w: DataWriter) { w.skipN(skip); } } +def genSpcInlinedFrame(ic: X86_64InterpreterCode, w: DataWriter) { + var masm = X86_64MacroAssembler.new(w, X86_64MasmRegs.CONFIG); + masm.emit_intentional_crash(); // do not execute this +} def codePointer(f: P -> R) -> Pointer { return CiRuntime.unpackClosure(f).0; } diff --git a/src/engine/x86-64/X86_64Stack.v3 b/src/engine/x86-64/X86_64Stack.v3 index 5849cc754..cb3e94c93 100644 --- a/src/engine/x86-64/X86_64Stack.v3 +++ b/src/engine/x86-64/X86_64Stack.v3 @@ -144,6 +144,7 @@ class X86_64Stack extends WasmStack { null => break; x: X86_64InterpreterCode => if (f != null && !f(retip, code, pos, param)) return (true, pos); x: X86_64SpcModuleCode => if (f != null && !f(retip, code, pos, param)) return (true, pos); + x: X86_64SpcInlinedFrame => if (f != null && !f(retip, code, pos, param)) return (true, pos); x: X86_64SpcTrapsStub => if (f != null && !f(retip, code, pos, param)) return (true, pos); x: X86_64ReturnParentStub => { if (stack.parent == null || !continue_to_parent) { @@ -955,6 +956,7 @@ class X86_64FrameAccessor(stack: X86_64Stack, sp: Pointer, decl: FuncDecl) exten var code = RiRuntime.findUserCode(ip); match (code) { x: X86_64SpcModuleCode => cached_pc = x.lookupTopPc(ip, true); + x: X86_64SpcInlinedFrame => cached_pc = (sp + X86_64InterpreterFrame.curpc.offset).load(); x: X86_64InterpreterCode => cached_pc = X86_64Interpreter.computePCFromFrame(sp); x: X86_64SpcTrapsStub => cached_pc = (sp + X86_64InterpreterFrame.curpc.offset).load(); _ => cached_pc = -1; @@ -982,6 +984,7 @@ class X86_64FrameAccessor(stack: X86_64Stack, sp: Pointer, decl: FuncDecl) exten match (code) { x: X86_64InterpreterCode => ; x: X86_64SpcCode => ; + x: X86_64SpcInlinedFrame => ; // in the future, we could indicate inlining depth in the frame _ => return depth; } depth++; From 9a38ea7e595a27eb33950baae14e742175631120 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Mon, 23 Mar 2026 03:01:59 -0400 Subject: [PATCH 02/28] Add frame reconstructions methods and guard reconstruction points --- src/engine/compiler/SinglePassCompiler.v3 | 164 +++++++++++++++--- src/engine/x86-64/X86_64SinglePassCompiler.v3 | 30 +++- 2 files changed, 166 insertions(+), 28 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index a10ff3b60..b1efa797a 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -182,7 +182,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Emit function entry probe, if any. if (!FeatureDisable.entryProbes && func.entry_probed) { var probe = Instrumentation.getLocalProbe(module, func.func_index, 0); - emitProbe0(0, probe); + withReconstructedInlinedFrames(fun => + emitProbe0(0, probe)); } masm.current_fid = func.func_index; @@ -214,8 +215,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.bindLabel(label); if (frames.length > 1) { - // no inlining yet: this should never happen - System.error("SpcError", "attempt to emit trap in inlined context"); + unrefRegs(); + emitReconstructStackFrames(frames); } else { masm.emit_mov_m_i(xenv.pc_slot, label.create_pos); } @@ -395,11 +396,24 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl last_probe = it.pc; if (orig_op != Opcode.LOOP && orig_op != Opcode.END) emitProbe(); } + // Guards compiler code with frame reconstruction (if necessary). + def withReconstructedInlinedFrames(emit: void -> void) { + if (isInlined()) { + unrefRegs(); + def space = emitReconstructStackFrames(snapshotFrames()); + emit(); + if (space > 0) masm.emit_addw_r_i(regs.sp, space); + } else { + emit(); + } + + } def emitProbe() { if (last_probe == 0) return; var probe = Instrumentation.getLocalProbe(module, func.func_index, last_probe); last_probe = 0; - emitProbe0(it.pc, probe); + withReconstructedInlinedFrames(fun => + emitProbe0(it.pc, probe)); if (Trace.compiler) traceOpcodeAndStack(true); } def emitProbe0(pc: int, probe: Probe) { @@ -859,18 +873,21 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } def visitCallDirect(op: Opcode, index: u31, tailCall: bool) { var func = module.functions[index]; - var retpt = masm.newLabel(it.pc), wasmcall_label = masm.newLabel(it.pc); - // Load the instance (which must happen before frame is unwound). - var vsp_reg = allocTmpFixed(ValueKind.REF, regs.vsp); - var func_reg = allocTmpFixed(ValueKind.REF, regs.func_arg); - var tmp = allocTmp(ValueKind.REF); - emit_load_instance(tmp); - // Load the function, XXX: skip and compute function from instance + code on stack? - masm.emit_v3_Instance_functions_r_r(func_reg, tmp); - masm.emit_v3_Array_elem_r_ri(ValueKind.REF, func_reg, func_reg, func.func_index); + withReconstructedInlinedFrames(fun { + var retpt = masm.newLabel(it.pc), wasmcall_label = masm.newLabel(it.pc); + // Load the instance (which must happen before frame is unwound). + var vsp_reg = allocTmpFixed(ValueKind.REF, regs.vsp); + var func_reg = allocTmpFixed(ValueKind.REF, regs.func_arg); + var tmp = allocTmp(ValueKind.REF); + emit_load_instance(tmp); + + // Load the function, XXX: skip and compute function from instance + code on stack? + masm.emit_v3_Instance_functions_r_r(func_reg, tmp); + masm.emit_v3_Array_elem_r_ri(ValueKind.REF, func_reg, func_reg, func.func_index); - emitCallToReg(func.sig, func_reg, vsp_reg, tmp, func.imp != null, tailCall); + emitCallToReg(func.sig, func_reg, vsp_reg, tmp, func.imp != null, tailCall); + }); } def emitCallToReg(sig: SigDecl, func_reg: Reg, vsp_reg: Reg, tmp: Reg, checkHostCall: bool, tailCall: bool) { var retpt = masm.newLabel(it.pc), wasmcall_label = masm.newLabel(it.pc); @@ -1935,12 +1952,20 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl state.emitSaveAll(resolver, runtimeSpillMode); emit_compute_vsp(regs.vsp, state.sp); masm.emit_store_curstack_vsp(regs.vsp); - masm.emit_get_curstack(regs.runtime_arg0); - masm.emit_v3_set_X86_64Stack_rsp_r_r(regs.runtime_arg0, regs.sp); - masm.emit_push_X86_64Stack_rsp_r_r(regs.runtime_arg0); - emit_load_instance(regs.runtime_arg1); - masm.emit_mov_r_i(regs.runtime_arg2, arg1); - masm.emit_call_runtime_op(op); + + def emit = fun { + masm.emit_get_curstack(regs.runtime_arg0); + masm.emit_v3_set_X86_64Stack_rsp_r_r(regs.runtime_arg0, regs.sp); + masm.emit_push_X86_64Stack_rsp_r_r(regs.runtime_arg0); + emit_load_instance(regs.runtime_arg1); + masm.emit_mov_r_i(regs.runtime_arg2, arg1); + masm.emit_call_runtime_op(op); + }; + // Reconstruct stack frames across runtime calls that might (Wasm-level) trap. + if (canTrap) + withReconstructedInlinedFrames(emit); + else + emit(); masm.emit_get_curstack(regs.scratch); masm.emit_pop_X86_64Stack_rsp_r_r(regs.scratch); dropN(args); @@ -1952,13 +1977,21 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl state.emitSaveAll(resolver, runtimeSpillMode); emit_compute_vsp(regs.vsp, state.sp); masm.emit_store_curstack_vsp(regs.vsp); - masm.emit_get_curstack(regs.runtime_arg0); - masm.emit_v3_set_X86_64Stack_rsp_r_r(regs.runtime_arg0, regs.sp); - masm.emit_push_X86_64Stack_rsp_r_r(regs.runtime_arg0); - emit_load_instance(regs.runtime_arg1); - masm.emit_mov_r_i(regs.runtime_arg2, arg1); - masm.emit_mov_r_i(regs.runtime_arg3, arg2); - masm.emit_call_runtime_op(op); + + def emit = fun { + masm.emit_get_curstack(regs.runtime_arg0); + masm.emit_v3_set_X86_64Stack_rsp_r_r(regs.runtime_arg0, regs.sp); + masm.emit_push_X86_64Stack_rsp_r_r(regs.runtime_arg0); + emit_load_instance(regs.runtime_arg1); + masm.emit_mov_r_i(regs.runtime_arg2, arg1); + masm.emit_mov_r_i(regs.runtime_arg3, arg2); + masm.emit_call_runtime_op(op); + }; + // Reconstruct stack frames across runtime calls that might (Wasm-level) trap. + if (canTrap) + withReconstructedInlinedFrames(emit); + else + emit(); masm.emit_get_curstack(regs.scratch); masm.emit_pop_X86_64Stack_rsp_r_r(regs.scratch); dropN(args); @@ -2083,6 +2116,83 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl return label; } def getSpcInlinedFrameIp() -> long; + // Emit code to materialize stack frames for each inlined function. + def emitReconstructStackFrames(frames: Array) -> int { + // Metrics.spc_static_reconst.val++; + // masm.emit_inc_metric(Metrics.spc_dynamic_reconst); + def real_frame = frames[0]; + masm.emit_mov_m_i(xenv.pc_slot, real_frame.pc); + + // Use inlined frame stub IP as return address for all reconstructed frames + var return_addr = getSpcInlinedFrameIp(); + var total_space = 0; + + // load instance + var inst_reg = allocTmp(ValueKind.REF); + //emit_load_instance(inst_reg); + masm.emit_mov_r_m(ValueKind.REF, inst_reg, frame.instance_slot); + var mem_reg = allocTmp(ValueKind.REF); + masm.emit_mov_r_m(ValueKind.REF, mem_reg, frame.mem0_base_slot); + // Load instance.functions + def func_reg = allocTmp(ValueKind.REF); + masm.emit_v3_Instance_functions_r_r(func_reg, inst_reg); + // use same vfp for all frames + def vfp_reg = allocTmp(ValueKind.REF); + masm.emit_mov_r_m(ValueKind.REF, vfp_reg, frame.vfp_slot); + var wasm_func_reg = allocTmp(ValueKind.REF); + + var inl_inst_reg: Reg, inl_mem0_reg: Reg; + if (is_inlined) { + inl_inst_reg = allocTmp(ValueKind.REF); + inl_mem0_reg = allocTmp(ValueKind.REF); + masm.emit_mov_r_m(ValueKind.REF, inl_inst_reg, frame.inlined_instance_slot); + masm.emit_mov_r_m(ValueKind.REF, inl_mem0_reg, frame.inlined_mem0_base_slot); + } + + // Process the inlined frames (skip the outermost which already exists on native stack) + for (i = 1; i < frames.length; i++) { + var frame_info = frames[i]; + + // Push inlined frame stub IP as return address + masm.emit_subw_r_i(regs.sp, 8); + masm.emit_mov_m_l(MasmAddr(regs.sp, 0), return_addr); + total_space += 8; + + // Allocate concrete stack frame for inlined function + masm.emit_subw_r_i(regs.sp, frame.frameSize); + total_space += frame.frameSize; + + // get functions[func_index] and save into frame + masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, frame_info.func.func_index); + masm.emit_mov_m_r(ValueKind.REF, frame.wasm_func_slot, wasm_func_reg); + + // Save instance to frame.instance_slot + masm.emit_mov_m_r(ValueKind.REF, frame.instance_slot, inst_reg); + + // Save mem0 base + masm.emit_mov_m_r(ValueKind.REF, frame.mem0_base_slot, mem_reg); + + // use same vfp for all frames + masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, vfp_reg); + + // Save PC into frame.pc_slot + masm.emit_mov_m_i(frame.pc_slot, frame_info.pc); + + // Clear FrameAccessor + masm.emit_mov_m_l(frame.accessor_slot, 0); + + // if an inlined whamm probe, also grab inlined slots + if (is_inlined) { + masm.emit_mov_m_r(ValueKind.REF, frame.inlined_instance_slot, inl_inst_reg); + masm.emit_mov_m_r(ValueKind.REF, frame.inlined_mem0_base_slot, inl_mem0_reg); + } else { + masm.emit_mov_m_l(frame.inlined_instance_slot, 0); + masm.emit_mov_m_l(frame.inlined_mem0_base_slot, 0); + } + } + + return total_space; + } def unsupported() { success = false; // XXX: add opcode } diff --git a/src/engine/x86-64/X86_64SinglePassCompiler.v3 b/src/engine/x86-64/X86_64SinglePassCompiler.v3 index 23c5fae6f..b9d7728c3 100644 --- a/src/engine/x86-64/X86_64SinglePassCompiler.v3 +++ b/src/engine/x86-64/X86_64SinglePassCompiler.v3 @@ -1256,7 +1256,35 @@ class X86_64SpcModuleCode extends X86_64SpcCode { } // Reconstructs inlined interpreter frames for an inlined hardware trap context. // Returns the new rsp to write into the ucontext (top of stack). - private def reconstructInlinedFramesForTrap(r_rsp: Pointer, inline_ctx: List) -> Pointer; + private def reconstructInlinedFramesForTrap(r_rsp: Pointer, inline_ctx: List) -> Pointer { + def frames: Array = Lists.toArray(inline_ctx); + def outer = frames[frames.length - 1]; + def inlined = frames[0 ... (frames.length - 1)]; + def count = inlined.length; + + // set outermost pc in the real frame + (r_rsp + X86_64InterpreterFrame.curpc.offset).store(outer.pc); + + // Read instance from the real outer frame (shared across all inlined frames) + var instance = (r_rsp + X86_64InterpreterFrame.instance.offset).load(); + + // Push inlined frames + for (i = count - 1; i >= 0; i--) { + var fid = inlined[i].func_index; + var pc = inlined[i].pc; + + r_rsp += -8; + r_rsp.store(INLINED_FRAME_STUB.start); + + r_rsp += -X86_64InterpreterFrame.size; // move rsp? + // write func, pc, frame accessor + var wasm_func = WasmFunction.!(instance.functions[fid]); + (r_rsp + X86_64InterpreterFrame.wasm_func.offset).store(wasm_func); + (r_rsp + X86_64InterpreterFrame.curpc.offset).store(pc); + (r_rsp + X86_64InterpreterFrame.accessor.offset).store(null); + } + return r_rsp; + } // Look up the source {pc} of a location {i} in this code. Returns {-1} if no exact entry is found. // Return addresses are treated differently than other addresses in the code. def lookupPc(ip: Pointer, isRetAddr: bool) -> List { From 731c9e7c8187c1945ea7a4cfe47de513ac60e804 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Mon, 23 Mar 2026 04:03:12 -0400 Subject: [PATCH 03/28] Move function to a more sensible spot --- src/engine/compiler/SinglePassCompiler.v3 | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 8b5792612..83c5def7a 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -396,18 +396,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl last_probe = it.pc; if (orig_op != Opcode.LOOP && orig_op != Opcode.END) emitProbe(); } - // Guards compiler code with frame reconstruction (if necessary). - def withReconstructedInlinedFrames(emit: void -> void) { - if (isInlined()) { - unrefRegs(); - def space = emitReconstructStackFrames(snapshotFrames()); - emit(); - if (space > 0) masm.emit_addw_r_i(regs.sp, space); - } else { - emit(); - } - - } def emitProbe() { if (last_probe == 0) return; var probe = Instrumentation.getLocalProbe(module, func.func_index, last_probe); @@ -2197,6 +2185,18 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl return total_space; } + // Guards compiler code with frame reconstruction (if necessary). + def withReconstructedInlinedFrames(emit: void -> void) { + if (isInlined()) { + unrefRegs(); + def space = emitReconstructStackFrames(snapshotFrames()); + emit(); + if (space > 0) masm.emit_addw_r_i(regs.sp, space); + } else { + emit(); + } + + } def unsupported() { success = false; // XXX: add opcode } From 194f26d9e02b88f13c460faf419a379c60c5410c Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 11:20:47 -0400 Subject: [PATCH 04/28] Make the if statement one line --- src/engine/compiler/SinglePassCompiler.v3 | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 83c5def7a..5a39a058d 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -1954,10 +1954,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.emit_call_runtime_op(op); }; // Reconstruct stack frames across runtime calls that might (Wasm-level) trap. - if (canTrap) - withReconstructedInlinedFrames(emit); - else - emit(); + if (canTrap) withReconstructedInlinedFrames(emit); else emit(); masm.emit_get_curstack(regs.scratch); masm.emit_pop_X86_64Stack_rsp_r_r(regs.scratch); dropN(args); @@ -1980,10 +1977,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.emit_call_runtime_op(op); }; // Reconstruct stack frames across runtime calls that might (Wasm-level) trap. - if (canTrap) - withReconstructedInlinedFrames(emit); - else - emit(); + if (canTrap) withReconstructedInlinedFrames(emit); else emit(); masm.emit_get_curstack(regs.scratch); masm.emit_pop_X86_64Stack_rsp_r_r(regs.scratch); dropN(args); From f021600c7f639efb9f66f13fd830c7012f618cba Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 23:44:13 -0400 Subject: [PATCH 05/28] Compute vfp for every inlined frame --- src/engine/compiler/SinglePassCompiler.v3 | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 5a39a058d..ed0e6a1ea 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2122,9 +2122,9 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Load instance.functions def func_reg = allocTmp(ValueKind.REF); masm.emit_v3_Instance_functions_r_r(func_reg, inst_reg); - // use same vfp for all frames - def vfp_reg = allocTmp(ValueKind.REF); - masm.emit_mov_r_m(ValueKind.REF, vfp_reg, frame.vfp_slot); + // base vfp of the outermost frame + def base_vfp_reg = allocTmp(ValueKind.REF); + masm.emit_mov_r_m(ValueKind.REF, base_vfp_reg, frame.vfp_slot); var wasm_func_reg = allocTmp(ValueKind.REF); var inl_inst_reg: Reg, inl_mem0_reg: Reg; @@ -2158,8 +2158,11 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Save mem0 base masm.emit_mov_m_r(ValueKind.REF, frame.mem0_base_slot, mem_reg); - // use same vfp for all frames - masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, vfp_reg); + // Save vfp for every inlined frame = base_vfp + local_base_sp * slot_size + masm.emit_mov_r_r(ValueKind.REF, wasm_func_reg, base_vfp_reg); // reusing wasm_func_reg as scratch + var offset = int.view(frame_info.local_base_sp) * masm.valuerep.slot_size; + if (offset != 0) masm.emit_addw_r_i(wasm_func_reg, offset); + masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, wasm_func_reg); // Save PC into frame.pc_slot masm.emit_mov_m_i(frame.pc_slot, frame_info.pc); From 9bf2aebc8f5f1a43befa7486d3f753b3e244406f Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 23:44:59 -0400 Subject: [PATCH 06/28] Uncomment stack reconstruction metrics --- src/engine/compiler/SinglePassCompiler.v3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index ed0e6a1ea..5556b1302 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2104,8 +2104,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl def getSpcInlinedFrameIp() -> long; // Emit code to materialize stack frames for each inlined function. def emitReconstructStackFrames(frames: Array) -> int { - // Metrics.spc_static_reconst.val++; - // masm.emit_inc_metric(Metrics.spc_dynamic_reconst); + Metrics.spc_static_reconst.val++; + masm.emit_inc_metric(Metrics.spc_dynamic_reconst); def real_frame = frames[0]; masm.emit_mov_m_i(xenv.pc_slot, real_frame.pc); From e4b2d4a6dffb89b298d3bea6d8014e0fe40c7d75 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Thu, 26 Mar 2026 01:51:06 -0400 Subject: [PATCH 07/28] Use optimized vfp calculation --- src/engine/compiler/SinglePassCompiler.v3 | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 5556b1302..c8874cfb6 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2122,9 +2122,9 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Load instance.functions def func_reg = allocTmp(ValueKind.REF); masm.emit_v3_Instance_functions_r_r(func_reg, inst_reg); - // base vfp of the outermost frame - def base_vfp_reg = allocTmp(ValueKind.REF); - masm.emit_mov_r_m(ValueKind.REF, base_vfp_reg, frame.vfp_slot); + def vfp_reg = allocTmp(ValueKind.REF); + masm.emit_mov_r_m(ValueKind.REF, vfp_reg, frame.vfp_slot); + var prev_base_sp = int.view(frames[0].local_base_sp); var wasm_func_reg = allocTmp(ValueKind.REF); var inl_inst_reg: Reg, inl_mem0_reg: Reg; @@ -2158,11 +2158,12 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Save mem0 base masm.emit_mov_m_r(ValueKind.REF, frame.mem0_base_slot, mem_reg); - // Save vfp for every inlined frame = base_vfp + local_base_sp * slot_size - masm.emit_mov_r_r(ValueKind.REF, wasm_func_reg, base_vfp_reg); // reusing wasm_func_reg as scratch - var offset = int.view(frame_info.local_base_sp) * masm.valuerep.slot_size; - if (offset != 0) masm.emit_addw_r_i(wasm_func_reg, offset); - masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, wasm_func_reg); + // Step vfp_reg by change in local_base_sp from previous frame + def cur_base_sp = int.view(frame_info.local_base_sp); + var delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; + if (delta != 0) masm.emit_addw_r_i(vfp_reg, delta); + masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, vfp_reg); + prev_base_sp = cur_base_sp; // Save PC into frame.pc_slot masm.emit_mov_m_i(frame.pc_slot, frame_info.pc); From a7044bbd852e21263758be0c35936ba5f36056c5 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 10:22:27 -0400 Subject: [PATCH 08/28] Use a single `rsp` adjustment in reconstructing stack frames --- src/engine/compiler/SinglePassCompiler.v3 | 53 +++++++++++++---------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index c8874cfb6..ee0808866 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2111,7 +2111,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Use inlined frame stub IP as return address for all reconstructed frames var return_addr = getSpcInlinedFrameIp(); - var total_space = 0; // load instance var inst_reg = allocTmp(ValueKind.REF); @@ -2135,49 +2134,59 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.emit_mov_r_m(ValueKind.REF, inl_mem0_reg, frame.inlined_mem0_base_slot); } + // Pre-allocate stack space for all reconstructed frames at once. + def total_space = (frames.length - 1) * (frame.frameSize + 8); + masm.emit_subw_r_i(regs.sp, total_space); + // Process the inlined frames (skip the outermost which already exists on native stack) for (i = 1; i < frames.length; i++) { var frame_info = frames[i]; + def frame_offset = (frames.length - i) * (frame.frameSize + 8); - // Push inlined frame stub IP as return address - masm.emit_subw_r_i(regs.sp, 8); - masm.emit_mov_m_l(MasmAddr(regs.sp, 0), return_addr); - total_space += 8; - - // Allocate concrete stack frame for inlined function - masm.emit_subw_r_i(regs.sp, frame.frameSize); - total_space += frame.frameSize; + // Write inlined frame stub IP as return address + def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); + masm.emit_mov_m_l(retaddr_slot, return_addr); // get functions[func_index] and save into frame + def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, frame_info.func.func_index); - masm.emit_mov_m_r(ValueKind.REF, frame.wasm_func_slot, wasm_func_reg); + masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); - // Save instance to frame.instance_slot - masm.emit_mov_m_r(ValueKind.REF, frame.instance_slot, inst_reg); + // Save instance + def instance_slot = frame.instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); // Save mem0 base - masm.emit_mov_m_r(ValueKind.REF, frame.mem0_base_slot, mem_reg); + def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); - // Step vfp_reg by change in local_base_sp from previous frame + // Step vfp_reg by change in local_base_sp from previous frame and save + def vfp_slot = frame.vfp_slot.plus(frame_offset); def cur_base_sp = int.view(frame_info.local_base_sp); var delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; if (delta != 0) masm.emit_addw_r_i(vfp_reg, delta); - masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, vfp_reg); + masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); prev_base_sp = cur_base_sp; - // Save PC into frame.pc_slot - masm.emit_mov_m_i(frame.pc_slot, frame_info.pc); + // Save PC + def pc_slot = frame.pc_slot.plus(frame_offset); + masm.emit_mov_m_i(pc_slot, frame_info.pc); // Clear FrameAccessor - masm.emit_mov_m_l(frame.accessor_slot, 0); + def accessor_slot = frame.accessor_slot.plus(frame_offset); + masm.emit_mov_m_l(accessor_slot, 0); // if an inlined whamm probe, also grab inlined slots if (is_inlined) { - masm.emit_mov_m_r(ValueKind.REF, frame.inlined_instance_slot, inl_inst_reg); - masm.emit_mov_m_r(ValueKind.REF, frame.inlined_mem0_base_slot, inl_mem0_reg); + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); } else { - masm.emit_mov_m_l(frame.inlined_instance_slot, 0); - masm.emit_mov_m_l(frame.inlined_mem0_base_slot, 0); + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_instance_slot, 0); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_mem0_base_slot, 0); } } From 7b8ae6d24dff35a0648d57cd274cdffa9bef2be2 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 10:58:14 -0400 Subject: [PATCH 09/28] Separate single-frame reconstruction into its own method --- src/engine/compiler/SinglePassCompiler.v3 | 95 ++++++++++++----------- 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index ee0808866..c9a9a904d 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2109,12 +2109,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl def real_frame = frames[0]; masm.emit_mov_m_i(xenv.pc_slot, real_frame.pc); - // Use inlined frame stub IP as return address for all reconstructed frames - var return_addr = getSpcInlinedFrameIp(); - // load instance var inst_reg = allocTmp(ValueKind.REF); - //emit_load_instance(inst_reg); masm.emit_mov_r_m(ValueKind.REF, inst_reg, frame.instance_slot); var mem_reg = allocTmp(ValueKind.REF); masm.emit_mov_r_m(ValueKind.REF, mem_reg, frame.mem0_base_slot); @@ -2140,57 +2136,64 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Process the inlined frames (skip the outermost which already exists on native stack) for (i = 1; i < frames.length; i++) { - var frame_info = frames[i]; - def frame_offset = (frames.length - i) * (frame.frameSize + 8); + def frame_info = frames[i]; + def cur_base_sp = int.view(frame_info.local_base_sp); + def delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; + emitReconstructStackFrame(frame_info, frames.length - 1, delta, + wasm_func_reg, func_reg, inst_reg, mem_reg, vfp_reg, inl_inst_reg, inl_mem0_reg); + prev_base_sp = cur_base_sp; + } - // Write inlined frame stub IP as return address - def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); - masm.emit_mov_m_l(retaddr_slot, return_addr); + return total_space; + } + def emitReconstructStackFrame(spcFrame: SpcFrame, offset: int, vfp_delta: int, + wasm_func_reg: Reg, func_reg: Reg, inst_reg: Reg, mem_reg: Reg, vfp_reg: Reg, inl_inst_reg: Reg, inl_mem0_reg: Reg) { + // Use inlined frame stub IP as return address for all reconstructed frames + def return_addr = getSpcInlinedFrameIp(); - // get functions[func_index] and save into frame - def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); - masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, frame_info.func.func_index); - masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); + def frame_offset = offset * (frame.frameSize + 8); + // Write inlined frame stub IP as return address + def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); + masm.emit_mov_m_l(retaddr_slot, return_addr); - // Save instance - def instance_slot = frame.instance_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); + // get functions[func_index] and save into frame + def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); + masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, spcFrame.func.func_index); + masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); - // Save mem0 base - def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); + // Save instance + def instance_slot = frame.instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); - // Step vfp_reg by change in local_base_sp from previous frame and save - def vfp_slot = frame.vfp_slot.plus(frame_offset); - def cur_base_sp = int.view(frame_info.local_base_sp); - var delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; - if (delta != 0) masm.emit_addw_r_i(vfp_reg, delta); - masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); - prev_base_sp = cur_base_sp; + // Save mem0 base + def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); - // Save PC - def pc_slot = frame.pc_slot.plus(frame_offset); - masm.emit_mov_m_i(pc_slot, frame_info.pc); + // Step vfp_reg by change in local_base_sp from previous frame and save + if (vfp_delta != 0) masm.emit_addw_r_i(vfp_reg, vfp_delta); + def vfp_slot = frame.vfp_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); - // Clear FrameAccessor - def accessor_slot = frame.accessor_slot.plus(frame_offset); - masm.emit_mov_m_l(accessor_slot, 0); + // Save PC + def pc_slot = frame.pc_slot.plus(frame_offset); + masm.emit_mov_m_i(pc_slot, spcFrame.pc); - // if an inlined whamm probe, also grab inlined slots - if (is_inlined) { - def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); - def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); - } else { - def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); - masm.emit_mov_m_l(inl_instance_slot, 0); - def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_l(inl_mem0_base_slot, 0); - } - } + // Clear FrameAccessor + def accessor_slot = frame.accessor_slot.plus(frame_offset); + masm.emit_mov_m_l(accessor_slot, 0); - return total_space; + // if an inlined whamm probe, also grab inlined slots + if (is_inlined) { + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); + } else { + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_instance_slot, 0); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_mem0_base_slot, 0); + } } // Guards compiler code with frame reconstruction (if necessary). def withReconstructedInlinedFrames(emit: void -> void) { From 036753d8a37c19e04c2c0f06e14df1cb3b96f9ce Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 11:20:35 -0400 Subject: [PATCH 10/28] Revert "Separate single-frame reconstruction into its own method" This reverts commit 7b8ae6d24dff35a0648d57cd274cdffa9bef2be2. --- src/engine/compiler/SinglePassCompiler.v3 | 95 +++++++++++------------ 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index c9a9a904d..ee0808866 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2109,8 +2109,12 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl def real_frame = frames[0]; masm.emit_mov_m_i(xenv.pc_slot, real_frame.pc); + // Use inlined frame stub IP as return address for all reconstructed frames + var return_addr = getSpcInlinedFrameIp(); + // load instance var inst_reg = allocTmp(ValueKind.REF); + //emit_load_instance(inst_reg); masm.emit_mov_r_m(ValueKind.REF, inst_reg, frame.instance_slot); var mem_reg = allocTmp(ValueKind.REF); masm.emit_mov_r_m(ValueKind.REF, mem_reg, frame.mem0_base_slot); @@ -2136,64 +2140,57 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Process the inlined frames (skip the outermost which already exists on native stack) for (i = 1; i < frames.length; i++) { - def frame_info = frames[i]; - def cur_base_sp = int.view(frame_info.local_base_sp); - def delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; - emitReconstructStackFrame(frame_info, frames.length - 1, delta, - wasm_func_reg, func_reg, inst_reg, mem_reg, vfp_reg, inl_inst_reg, inl_mem0_reg); - prev_base_sp = cur_base_sp; - } + var frame_info = frames[i]; + def frame_offset = (frames.length - i) * (frame.frameSize + 8); - return total_space; - } - def emitReconstructStackFrame(spcFrame: SpcFrame, offset: int, vfp_delta: int, - wasm_func_reg: Reg, func_reg: Reg, inst_reg: Reg, mem_reg: Reg, vfp_reg: Reg, inl_inst_reg: Reg, inl_mem0_reg: Reg) { - // Use inlined frame stub IP as return address for all reconstructed frames - def return_addr = getSpcInlinedFrameIp(); + // Write inlined frame stub IP as return address + def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); + masm.emit_mov_m_l(retaddr_slot, return_addr); - def frame_offset = offset * (frame.frameSize + 8); - // Write inlined frame stub IP as return address - def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); - masm.emit_mov_m_l(retaddr_slot, return_addr); + // get functions[func_index] and save into frame + def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); + masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, frame_info.func.func_index); + masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); - // get functions[func_index] and save into frame - def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); - masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, spcFrame.func.func_index); - masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); + // Save instance + def instance_slot = frame.instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); - // Save instance - def instance_slot = frame.instance_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); + // Save mem0 base + def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); - // Save mem0 base - def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); - - // Step vfp_reg by change in local_base_sp from previous frame and save - if (vfp_delta != 0) masm.emit_addw_r_i(vfp_reg, vfp_delta); - def vfp_slot = frame.vfp_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); + // Step vfp_reg by change in local_base_sp from previous frame and save + def vfp_slot = frame.vfp_slot.plus(frame_offset); + def cur_base_sp = int.view(frame_info.local_base_sp); + var delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; + if (delta != 0) masm.emit_addw_r_i(vfp_reg, delta); + masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); + prev_base_sp = cur_base_sp; - // Save PC - def pc_slot = frame.pc_slot.plus(frame_offset); - masm.emit_mov_m_i(pc_slot, spcFrame.pc); + // Save PC + def pc_slot = frame.pc_slot.plus(frame_offset); + masm.emit_mov_m_i(pc_slot, frame_info.pc); - // Clear FrameAccessor - def accessor_slot = frame.accessor_slot.plus(frame_offset); - masm.emit_mov_m_l(accessor_slot, 0); + // Clear FrameAccessor + def accessor_slot = frame.accessor_slot.plus(frame_offset); + masm.emit_mov_m_l(accessor_slot, 0); - // if an inlined whamm probe, also grab inlined slots - if (is_inlined) { - def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); - def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); - } else { - def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); - masm.emit_mov_m_l(inl_instance_slot, 0); - def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_l(inl_mem0_base_slot, 0); + // if an inlined whamm probe, also grab inlined slots + if (is_inlined) { + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); + } else { + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_instance_slot, 0); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_mem0_base_slot, 0); + } } + + return total_space; } // Guards compiler code with frame reconstruction (if necessary). def withReconstructedInlinedFrames(emit: void -> void) { From 296ea0f394388e9ee57942a67733f1fc21f154c7 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 11:26:01 -0400 Subject: [PATCH 11/28] Fix wrong way indexing --- src/engine/compiler/SinglePassCompiler.v3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index ee0808866..457b25cd0 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2141,7 +2141,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Process the inlined frames (skip the outermost which already exists on native stack) for (i = 1; i < frames.length; i++) { var frame_info = frames[i]; - def frame_offset = (frames.length - i) * (frame.frameSize + 8); + def frame_offset = (frames.length - 1 - i) * (frame.frameSize + 8); // Write inlined frame stub IP as return address def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); From 6b269728201cb5a294181b8a446709fca0e259df Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 11:27:51 -0400 Subject: [PATCH 12/28] Reapply "Separate single-frame reconstruction into its own method" This reverts commit 036753d8a37c19e04c2c0f06e14df1cb3b96f9ce. --- src/engine/compiler/SinglePassCompiler.v3 | 95 ++++++++++++----------- 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 457b25cd0..6373e3964 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2109,12 +2109,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl def real_frame = frames[0]; masm.emit_mov_m_i(xenv.pc_slot, real_frame.pc); - // Use inlined frame stub IP as return address for all reconstructed frames - var return_addr = getSpcInlinedFrameIp(); - // load instance var inst_reg = allocTmp(ValueKind.REF); - //emit_load_instance(inst_reg); masm.emit_mov_r_m(ValueKind.REF, inst_reg, frame.instance_slot); var mem_reg = allocTmp(ValueKind.REF); masm.emit_mov_r_m(ValueKind.REF, mem_reg, frame.mem0_base_slot); @@ -2140,57 +2136,64 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Process the inlined frames (skip the outermost which already exists on native stack) for (i = 1; i < frames.length; i++) { - var frame_info = frames[i]; - def frame_offset = (frames.length - 1 - i) * (frame.frameSize + 8); + def frame_info = frames[i]; + def cur_base_sp = int.view(frame_info.local_base_sp); + def delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; + emitReconstructStackFrame(frame_info, frames.length - i - 1, delta, + wasm_func_reg, func_reg, inst_reg, mem_reg, vfp_reg, inl_inst_reg, inl_mem0_reg); + prev_base_sp = cur_base_sp; + } - // Write inlined frame stub IP as return address - def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); - masm.emit_mov_m_l(retaddr_slot, return_addr); + return total_space; + } + def emitReconstructStackFrame(spcFrame: SpcFrame, offset: int, vfp_delta: int, + wasm_func_reg: Reg, func_reg: Reg, inst_reg: Reg, mem_reg: Reg, vfp_reg: Reg, inl_inst_reg: Reg, inl_mem0_reg: Reg) { + // Use inlined frame stub IP as return address for all reconstructed frames + def return_addr = getSpcInlinedFrameIp(); - // get functions[func_index] and save into frame - def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); - masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, frame_info.func.func_index); - masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); + def frame_offset = offset * (frame.frameSize + 8); + // Write inlined frame stub IP as return address + def retaddr_slot = MasmAddr(regs.sp, frame_offset + frame.frameSize); + masm.emit_mov_m_l(retaddr_slot, return_addr); - // Save instance - def instance_slot = frame.instance_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); + // get functions[func_index] and save into frame + def wasm_func_slot = frame.wasm_func_slot.plus(frame_offset); + masm.emit_v3_Array_elem_r_ri(ValueKind.REF, wasm_func_reg, func_reg, spcFrame.func.func_index); + masm.emit_mov_m_r(ValueKind.REF, wasm_func_slot, wasm_func_reg); - // Save mem0 base - def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); + // Save instance + def instance_slot = frame.instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, instance_slot, inst_reg); - // Step vfp_reg by change in local_base_sp from previous frame and save - def vfp_slot = frame.vfp_slot.plus(frame_offset); - def cur_base_sp = int.view(frame_info.local_base_sp); - var delta = (cur_base_sp - prev_base_sp) * masm.valuerep.slot_size; - if (delta != 0) masm.emit_addw_r_i(vfp_reg, delta); - masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); - prev_base_sp = cur_base_sp; + // Save mem0 base + def mem0_base_slot = frame.mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, mem0_base_slot, mem_reg); - // Save PC - def pc_slot = frame.pc_slot.plus(frame_offset); - masm.emit_mov_m_i(pc_slot, frame_info.pc); + // Step vfp_reg by change in local_base_sp from previous frame and save + if (vfp_delta != 0) masm.emit_addw_r_i(vfp_reg, vfp_delta); + def vfp_slot = frame.vfp_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, vfp_slot, vfp_reg); - // Clear FrameAccessor - def accessor_slot = frame.accessor_slot.plus(frame_offset); - masm.emit_mov_m_l(accessor_slot, 0); + // Save PC + def pc_slot = frame.pc_slot.plus(frame_offset); + masm.emit_mov_m_i(pc_slot, spcFrame.pc); - // if an inlined whamm probe, also grab inlined slots - if (is_inlined) { - def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); - def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); - } else { - def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); - masm.emit_mov_m_l(inl_instance_slot, 0); - def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); - masm.emit_mov_m_l(inl_mem0_base_slot, 0); - } - } + // Clear FrameAccessor + def accessor_slot = frame.accessor_slot.plus(frame_offset); + masm.emit_mov_m_l(accessor_slot, 0); - return total_space; + // if an inlined whamm probe, also grab inlined slots + if (is_inlined) { + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_r(ValueKind.REF, inl_mem0_base_slot, inl_mem0_reg); + } else { + def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_instance_slot, 0); + def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); + masm.emit_mov_m_l(inl_mem0_base_slot, 0); + } } // Guards compiler code with frame reconstruction (if necessary). def withReconstructedInlinedFrames(emit: void -> void) { From a96f168ebb80067ac5d206575479b5b53af4257e Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 12:37:56 -0400 Subject: [PATCH 13/28] Restore vfp after out call --- src/engine/compiler/SinglePassCompiler.v3 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 6373e3964..d9c81072e 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -2201,7 +2201,10 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl unrefRegs(); def space = emitReconstructStackFrames(snapshotFrames()); emit(); - if (space > 0) masm.emit_addw_r_i(regs.sp, space); + if (space > 0) { + masm.emit_addw_r_i(regs.sp, space); + masm.emit_mov_r_m(ValueKind.REF, regs.vfp, frame.vfp_slot); + } } else { emit(); } From 3fb2fd1cec1b0e4c901c5f7d14f2713ea775d350 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Mon, 23 Mar 2026 03:37:07 -0400 Subject: [PATCH 14/28] The first of the final inlinings --- src/engine/compiler/SinglePassCompiler.v3 | 417 ++++++++++++++-------- src/util/Whamm.v3 | 32 +- test/inline/failures.x86-64-linux | 3 - 3 files changed, 298 insertions(+), 154 deletions(-) delete mode 100644 test/inline/failures.x86-64-linux diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index d9c81072e..cd131af15 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -112,9 +112,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var ret_label: MasmLabel; var last_probe = 0; var skip_to_end: bool; - // this is Whamm probe inlining, not arbitrary function inlining (yet) - var is_inlined = false; - var whamm_probe_ctl_base: u31; // ctl_stack.top when Whamm probe compilation started + var whamm_config: WhammInlineConfig; // XXX: hack var handler_dest_info = Vector.new(); @@ -486,40 +484,33 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // saves the overhead of using a runtime call by directly invoking the wasm function associated with the whamm probe def emitWhammProbe(probe: WhammProbe) { + if (Trace.compiler) Trace.OUT.puts("emitting whamm probe\n"); // set up args and push to frame slots. var whamm_sig = probe.sig; - var inline_config = InlineConfig(false, false, false); - var new_local_base_sp = 0; var orig_sp = state.sp; var callee_func = WasmFunction.!(probe.func); + def inline_decision = shouldInline(callee_func.decl) && SpcTuning.inlineWhammProbes; // TODO move to shouldInline + var swap_instance = false; + var swap_membase = false; - if (SpcTuning.inlineWhammProbes) { - inline_config = InlineConfig(probe.spc_swap_membase, probe.spc_swap_instance, probe.spc_inline_func); - if (!probe.inline_heuristic_checked) { - inline_config = funcCanInline(callee_func.decl); - probe.inline_heuristic_checked = true; - probe.spc_swap_instance = inline_config.swap_instance; - probe.spc_swap_membase = inline_config.swap_membase; - probe.spc_inline_func = inline_config.can_inline; - } + if (inline_decision) { + probe.checkSwap(); + swap_instance = probe.swap_instance; + swap_membase = probe.swap_membase; - if (inline_config.swap_instance) { // push whamm instance onto abstract stack directly + if (swap_instance) { masm.emit_mov_r_Instance(regs.scratch, callee_func.instance); masm.emit_mov_m_r(ValueKind.REF, frame.inlined_instance_slot, regs.scratch); } - - // overwrite mem0_base with whamm instance's memory base, restore from frame slot later - if (inline_config.swap_membase) { - var membase = callee_func.instance.memories[0].getMemBase64(); - masm.emit_mov_r_l(regs.mem0_base, i64.view(membase)); + if (swap_membase) { + if (callee_func.instance.memories.length > 0) { + var membase = callee_func.instance.memories[0].getMemBase64(); + masm.emit_mov_r_l(regs.mem0_base, i64.view(membase)); + } masm.emit_mov_m_r(ValueKind.REF, frame.inlined_mem0_base_slot, regs.mem0_base); } - } - - if (!inline_config.can_inline) { - state.emitSaveAll(resolver, probeSpillMode); } else { - new_local_base_sp = int.view(state.sp); + state.emitSaveAll(resolver, probeSpillMode); } for (i < whamm_sig.length) { @@ -528,13 +519,13 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var kind: byte; match(whamm_sig[i]) { FrameAccessor => { - if (inline_config.can_inline) state.emitSaveAll(resolver, probeSpillMode); // spill entire value stack. + if (inline_decision) state.emitSaveAll(resolver, probeSpillMode); // spill entire value stack. masm.emit_call_runtime_getFrameAccessorMetaRef(); emit_reload_regs(); - if (inline_config.can_inline && !probeSpillMode.free_regs) state.emitRestoreAll(resolver); + if (inline_decision && !probeSpillMode.free_regs) state.emitRestoreAll(resolver); // move result to mem slot or reg, depending on inlining - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.REF); masm.emit_mov_r_r(ValueKind.REF, reg, xenv.runtime_ret0); state.push(KIND_REF | IN_REG, reg, 0); @@ -546,7 +537,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl Val(val) => { match (val) { I31(v) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.REF); masm.emit_mov_r_i(reg, i32.view(v) << 1); state.push(KIND_REF | IN_REG, reg, 0); @@ -556,7 +547,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.REF.code; } I32(v) => { - if (inline_config.can_inline) { + if (inline_decision) { state.push(KIND_I32 | IS_CONST, NO_REG, i32.view(v)); } else { masm.emit_mov_m_d(slot_addr, v); @@ -564,7 +555,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.I32.code; } I64(v) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.I64); masm.emit_mov_r_l(reg, i64.view(v)); state.push(KIND_I64 | IN_REG, reg, 0); @@ -574,7 +565,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.I64.code; } F32(v) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.F32); masm.emit_mov_r_f32(reg, v); state.push(KIND_F32 | IN_REG, reg, 0); @@ -584,7 +575,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.F32.code; } F64(v) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.F64); masm.emit_mov_r_d64(reg, v); state.push(KIND_F64 | IN_REG, reg, 0); @@ -594,7 +585,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.F64.code; } V128(l, h) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.V128); masm.emit_mov_r_q(reg, l, h); state.push(KIND_V128 | IN_REG, reg, 0); @@ -605,7 +596,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.V128.code; } Ref(v) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.REF); masm.emit_mov_r_Object(reg, v); state.push(KIND_REF | IN_REG, reg, 0); @@ -616,7 +607,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl kind = ValueKind.REF.code; } Cont(v) => { - if (inline_config.can_inline) { + if (inline_decision) { var reg = allocRegTos(ValueKind.REF_U64); masm.emit_mov_r_Cont(reg, v); state.push(KIND_REF_U64 | IN_REG, reg, 0); @@ -631,15 +622,15 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } Operand(_, i) => { var index = orig_sp + u32.view(i) - 1; - if (inline_config.can_inline) { - visit_LOCAL_GET(u31.view(index)); + if (inline_decision) { + visit_LOCAL_GET(u31.view(index - local_base_sp)); } else { masm.emit_mov_m_m(state.state[index].kind(), slot_addr, masm.slotAddr(index)); } kind = state.state[index].kind().code; } Local(_, i) => { - if (inline_config.can_inline) { + if (inline_decision) { visit_LOCAL_GET(u31.view(i)); } else { masm.emit_mov_m_m(state.state[u31.view(i)].kind(), slot_addr, masm.slotAddr(u32.view(i))); @@ -648,7 +639,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } Null => System.error("whamm", "null whamm arg!"); } - if (!inline_config.can_inline) { + if (!inline_decision) { masm.emit_mov_m_i(slot_tag_addr, kind); } } @@ -656,49 +647,14 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var func_id = callee_func.decl.func_index; var whamm_module = whamm_instance.module; var whamm_func_decl = callee_func.decl; - if (inline_config.can_inline) { - var prev_it = it; - it = BytecodeIterator.new().reset(whamm_func_decl); - var orig_module = module; - - // prepare spc for inlining - this.local_base_sp = u31.view(new_local_base_sp); - this.module = whamm_module; - this.func = whamm_func_decl; - this.sig = whamm_func_decl.sig; - - // inline codegen - it.dispatchLocalDecls(this); - this.is_inlined = true; - if (Trace.compiler) Trace.OUT.puts("Start compiling inlined whamm probe").ln(); - while (it.more() && success) { - if (Trace.compiler) traceOpcodeAndStack(false); - last_probe = 0; - masm.source_loc = it.pc; - it.dispatch(this); - if (Trace.compiler && Trace.asm) { - OUT.puts("JIT code: "); - masm.printCodeBytes(OUT, codegen_offset, masm.curCodeBytes()); - codegen_offset = masm.curCodeBytes(); - OUT.ln(); - } - unrefRegs(); - if (Debug.compiler) checkRegAlloc(); - it.next(); + if (inline_decision) { + whamm_config = WhammInlineConfig(swap_membase, swap_instance, true); + emitInlinedCall(whamm_func_decl, probe); + whamm_config = WhammInlineConfig(false, false, false); + // Restore mem0_base after probe + if (module.memories.length > 0) { + masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.mem0_base_slot); } - if (Trace.compiler) Trace.OUT.puts("Finished compiling inlined whamm probe").ln(); - - // restore spc after inlining - it = prev_it; - this.local_base_sp = 0; - this.is_inlined = false; - this.module = orig_module; - this.func = it.func; - this.sig = it.func.sig; - masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.mem0_base_slot); - - // clear callee params/locals from abstract state - dropN(state.sp - orig_sp); } else { var vsp_reg = allocTmpFixed(ValueKind.REF, regs.vsp); var func_reg = allocTmpFixed(ValueKind.REF, regs.func_arg); @@ -794,37 +750,38 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl setUnreachable(); } def visit_END() { - if (!this.is_inlined) { - var ctl_top = state.ctl_stack.peek(); - if (ctl_top.opcode == Opcode.LOOP.code) { - state.ctl_stack.pop(); - if (!ctl_top.reachable) setUnreachable(); - } else if (ctl_top.opcode == Opcode.IF.code) { - // simulate empty if-true block - state.emitFallthru(resolver); - masm.emit_br(ctl_top.label); - masm.bindLabel(ctl_top.else_label); - state.doElse(); - ctl_top.opcode = Opcode.ELSE.code; - state.emitFallthru(resolver); - masm.bindLabel(ctl_top.label); - state.resetToMerge(ctl_top); - state.ctl_stack.pop(); - } else if (ctl_top.opcode == Opcode.BLOCK.code || ctl_top.opcode == Opcode.ELSE.code) { - state.emitFallthru(resolver); - masm.bindLabel(ctl_top.label); - state.resetToMerge(ctl_top); - state.ctl_stack.pop(); - } else if (ctl_top.opcode == Opcode.RETURN.code) { - state.emitFallthru(resolver); - masm.bindLabel(ctl_top.label); - state.resetToMerge(ctl_top); - emitProbe(); - if (ctl_top.merge_count > 1) emitReturn(ctl_top); - state.ctl_stack.pop(); - } + var frame = state.frame_stack.peek(); + var is_implicit_function_block = isInlined() && state.ctl_stack.top == frame.ctl_base_sp + 1; + + var ctl_top = state.ctl_stack.peek(); + if (ctl_top.opcode == Opcode.LOOP.code) { + state.ctl_stack.pop(); + if (!ctl_top.reachable) setUnreachable(); + } else if (ctl_top.opcode == Opcode.IF.code) { + // simulate empty if-true block + state.emitFallthru(resolver); + masm.emit_br(ctl_top.label); + masm.bindLabel(ctl_top.else_label); + state.doElse(); + ctl_top.opcode = Opcode.ELSE.code; + state.emitFallthru(resolver); + masm.bindLabel(ctl_top.label); + state.resetToMerge(ctl_top); + state.ctl_stack.pop(); + } else if (ctl_top.opcode == Opcode.BLOCK.code || ctl_top.opcode == Opcode.ELSE.code) { + state.emitFallthru(resolver); + masm.bindLabel(ctl_top.label); + state.resetToMerge(ctl_top); + state.ctl_stack.pop(); + } else if (ctl_top.opcode == Opcode.RETURN.code) { + state.emitFallthru(resolver); + masm.bindLabel(ctl_top.label); + state.resetToMerge(ctl_top); emitProbe(); + if (ctl_top.merge_count > 1) emitReturn(ctl_top); + state.ctl_stack.pop(); } + emitProbe(); } def visit_BR(depth: u31) { var target = state.getControl(depth); @@ -866,6 +823,18 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } var func = module.functions[index]; + // Try inlining for intra-module, non-tail calls + if (!tailCall && shouldInline(func)) { + if (Trace.compiler) Trace.OUT.put2("Inlining call to func #%d (%d bytes)", index, func.orig_bytecode.length).ln(); + if (op == Opcode.CALL) { + Metrics.spc_static_inlined_calls.val++; + masm.emit_inc_metric(Metrics.spc_dynamic_inlined_calls); + masm.emit_inc_metric(Metrics.spc_dynamic_calls); + } + emitInlinedCall(func, null); + return; + } + withReconstructedInlinedFrames(fun { var retpt = masm.newLabel(it.pc), wasmcall_label = masm.newLabel(it.pc); // Load the instance (which must happen before frame is unwound). @@ -881,6 +850,160 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl emitCallToReg(func.sig, func_reg, vsp_reg, tmp, func.imp != null, tailCall); }); } + def emitInlinedCall(callee_func: FuncDecl, whamm: WhammProbe) { + var sig = callee_func.sig; + var params_count = u32.view(sig.params.length); + var results_count = u32.view(sig.results.length); + var orig_sp = state.sp; + + // Arguments are already on stack + // Stack: [..., arg0, arg1, ..., argN] <- sp + // We want callee's local 0 = arg0, so: + var new_local_base_sp: u31 = u31.view(orig_sp - params_count); + var new_ctl_base_sp = u31.view(state.ctl_stack.top); + + var num_locals = callee_func.num_slots(); + + // Push an implicit block for the head of the function + var end_label = masm.newLabel(callee_func.cur_bytecode.length); + state.pushBlock(sig.params, sig.results, end_label); + + var m: Module = module; + + // Whamm probe configuration + if (whamm != null) { + def whamm_sig = whamm.sig; + def whamm_wf = WasmFunction.!(whamm.func); + def whamm_instance = whamm_wf.instance; + def whamm_func_decl = whamm_wf.decl; + + m = whamm_instance.module; + new_local_base_sp = u31.view(state.sp) - u31.view(whamm_sig.length); // XXX + } + + // Create and push frame for inlined function + var callee_frame = SpcFrame.new(callee_func, + m, new_local_base_sp, new_ctl_base_sp, num_locals, 0); + + pushSpcFrame(callee_frame); + + // Emit function entry probe, if any. + // XXX expensive because frame materialization required + if (whamm == null && !FeatureDisable.entryProbes && func.entry_probed) { + var probe = Instrumentation.getLocalProbe(module, callee_func.func_index, 0); + + // Reconstruct inlined frames before emitting probe + var reconstructed_space = 0; + if (isInlined()) { + var frames = snapshotFrames(); + unrefRegs(); + reconstructed_space = emitReconstructStackFrames(frames); + } + emitProbe0(0, probe); + // Clean up reconstructed frames after the call returns + if (reconstructed_space > 0) { + masm.emit_addw_r_i(regs.sp, reconstructed_space); + } + } + + // Allocate callee's non-parameter locals + it.dispatchLocalDecls(this); + + // Compile callee's bytecode + if (Trace.compiler) Trace.OUT.puts(" Start inlined function body").ln(); + while (it.more() && success) { + if (Trace.compiler) traceOpcodeAndStack(false); + last_probe = 0; + masm.source_loc = it.pc; + masm.current_fid = func.func_index; + it.dispatch(this); + if (Trace.compiler && Trace.asm) { + OUT.puts("JIT code: "); + masm.printCodeBytes(OUT, codegen_offset, masm.curCodeBytes()); + codegen_offset = masm.curCodeBytes(); + OUT.ln(); + } + unrefRegs(); + if (Debug.compiler) checkRegAlloc(); + it.next(); + if (skip_to_end) doSkipToEndOfBlock(); + } + if (Trace.compiler) Trace.OUT.puts(" End inlined function body").ln(); + + // Check if the inlined function is unreachable (e.g., ended with UNREACHABLE, RETURN, THROW) + var inlined_reachable = state.ctl_stack.peek().reachable; + + // Restore caller context by popping frame + popSpcFrame(); // Automatically restores cached fields + + // Note: Control stack cleanup (popping implicit BLOCK) is handled by visit_END + + // If inlined function is unreachable, no results to clean up + if (!inlined_reachable) { + if (Trace.compiler) { + Trace.OUT.puts(" Inlined function unreachable, skipping result cleanup").ln(); + Trace.OUT.put3(" state.sp=%d, new_local_base_sp=%d, callee_slots=%d", + state.sp, new_local_base_sp, state.sp - new_local_base_sp).ln(); + } + // Drop all callee state (params + locals, no results) + var callee_slots = state.sp - new_local_base_sp; + if (callee_slots > 0) dropN(u32.view(callee_slots)); + if (Trace.compiler) Trace.OUT.put1(" After dropN: state.sp=%d", state.sp).ln(); + setUnreachable(); + return; + } + + // Clean up stack: + // Before: [..., arg0, arg1, ..., argN, local0, local1, ..., localM, result0, ..., resultK] + // After: [..., result0, ..., resultK] + + var total_callee_slots = state.sp - new_local_base_sp; // All callee state + var slots_to_drop = total_callee_slots - results_count; + + // for whamm probes, results_count SHOULD be zero + if (slots_to_drop > 0 && results_count > 0) { + // Need to move results down over parameters and locals + for (i < results_count) { + var result_slot = state.sp - results_count + u32.view(i); + var target_slot = new_local_base_sp + u32.view(i); + if (Trace.compiler) { + Trace.OUT.put3(" Moving result %d: slot %d -> slot %d", i, result_slot, target_slot).ln(); + } + if (result_slot != target_slot) { + var rv = state.state[result_slot]; + if (Trace.compiler) { + Trace.OUT.put2(" rv: flags=%x, const=%d", rv.flags, rv.const).ln(); + } + if (rv.inReg()) { + regAlloc.reassign(rv.reg, int.!(result_slot), int.!(target_slot)); + } else { + // Move in memory (rarely needed if results are in regs) + resolver.addMove((target_slot, rv), (result_slot, rv)); + } + state.state[target_slot] = rv; + } else { + // Result already in the right place + if (Trace.compiler) Trace.OUT.puts(" (already in place)").ln(); + } + } + resolver.emitMoves(); + + // Drop everything above results + for (slot = new_local_base_sp + results_count; slot < state.sp; slot++) { + unrefSlot(slot); + } + state.sp = new_local_base_sp + results_count; + } else if (slots_to_drop > 0) { + // No results, just drop everything + if (Trace.compiler) Trace.OUT.put1("dropping %d slots\n", slots_to_drop); + dropN(u32.view(slots_to_drop)); + } + // If slots_to_drop <= 0, results are already in the right place + + if (Trace.compiler) { + Trace.OUT.put1(" Inlined call complete, sp=%d", state.sp).ln(); + } + } def emitCallToReg(sig: SigDecl, func_reg: Reg, vsp_reg: Reg, tmp: Reg, checkHostCall: bool, tailCall: bool) { var retpt = masm.newLabel(it.pc), wasmcall_label = masm.newLabel(it.pc); // Handle the current stack state. @@ -2123,7 +2246,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var wasm_func_reg = allocTmp(ValueKind.REF); var inl_inst_reg: Reg, inl_mem0_reg: Reg; - if (is_inlined) { + if (whamm_config.is_inlined) { // TODO investigate, check individual configs? inl_inst_reg = allocTmp(ValueKind.REF); inl_mem0_reg = allocTmp(ValueKind.REF); masm.emit_mov_r_m(ValueKind.REF, inl_inst_reg, frame.inlined_instance_slot); @@ -2183,7 +2306,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.emit_mov_m_l(accessor_slot, 0); // if an inlined whamm probe, also grab inlined slots - if (is_inlined) { + if (whamm_config.is_inlined) { def inl_instance_slot = frame.inlined_instance_slot.plus(frame_offset); masm.emit_mov_m_r(ValueKind.REF, inl_instance_slot, inl_inst_reg); def inl_mem0_base_slot = frame.inlined_mem0_base_slot.plus(frame_offset); @@ -2304,7 +2427,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // XXX: recompute VFP from VSP - #slots? masm.emit_mov_r_m(ValueKind.REF, regs.vfp, frame.vfp_slot); if (module.memories.length > 0) { - if (is_inlined) { + if (whamm_config.is_inlined) { masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.inlined_mem0_base_slot); } else { masm.emit_mov_r_m(ValueKind.REF, regs.mem0_base, frame.mem0_base_slot); @@ -2312,7 +2435,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } } def emit_load_instance(reg: Reg) { - if (is_inlined) { // inline compilation + if (whamm_config.is_inlined) { // inline compilation masm.emit_mov_r_m(ValueKind.REF, reg, frame.inlined_instance_slot); } else { masm.emit_mov_r_m(ValueKind.REF, reg, frame.instance_slot); @@ -2680,6 +2803,37 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } return frames; } + // Determine if a regular function call should be inlined + def shouldInline(func: FuncDecl) -> bool { + if (Trace.compiler) OUT.put1("deciding on inlining call to func #%d: ", func.func_index); + + if (func.imp != null) return no("imported"); + if (inlineDepth() >= SpcTuning.maxInlineDepth) return no("max inline depth exceeded"); + if (func.orig_bytecode.length > SpcTuning.maxInlineBytecodeSize) return no("func too large"); + if (func.sig.params.length > SpcTuning.maxInlineParams) return no("too many parameters"); + + // Scan bytecode for unsupported instructions + var bi = BytecodeIterator.new().reset(func); + while (bi.more()) { + match (bi.current()) { + RETURN, RETURN_CALL, RETURN_CALL_INDIRECT, RETURN_CALL_REF => + return no("uses return instruction"); + TRY, CATCH, THROW, RETHROW, THROW_REF, DELEGATE, CATCH_ALL, TRY_TABLE => + return no("uses exception handling instruction"); + CONT_NEW, CONT_BIND, SUSPEND, RESUME, RESUME_THROW, RESUME_THROW_REF, SWITCH => + return no("uses stack switching instruction"); + _ => ; + } + bi.next(); + } + + if (Trace.compiler) OUT.puts("YES\n"); + return true; + } + private def no(reason: string) -> bool { + if (Trace.compiler) OUT.puts("NO (").puts(reason).putc(')').ln(); + return false; + } } // Different branch instructions have different repush enum BrRepush(taken: bool, not_taken: bool) { @@ -3386,38 +3540,7 @@ class MoveNode { var dstNext: MoveNode; // next in a list of successors } -// checks function bytecode to see if it can be inlined based on -// simple heuristics: length <= maxInlineBytecodeSize and straightline code. -def funcCanInline(decl: FuncDecl) -> InlineConfig { - var default = InlineConfig(false, false, false); - if (decl.orig_bytecode.length > SpcTuning.maxInlineBytecodeSize || decl.sig.params.length > SpcTuning.maxInlineParams) return default; - var bi = BytecodeIterator.new().reset(decl); - var swap_instance = false; - var swap_membase = false; - while (bi.more()) { - var op = bi.current(); - match (op) { - // Cannot handle control flow yet. - IF, BR, BR_IF, BR_TABLE, BR_ON_NULL, BR_ON_NON_NULL, BR_ON_CAST, BR_ON_CAST_FAIL, RETURN => return default; - // These opcodes require swapping the instance. - THROW, CALL, CALL_INDIRECT, MEMORY_INIT, MEMORY_SIZE, MEMORY_GROW, MEMORY_COPY, MEMORY_FILL, REF_FUNC, DATA_DROP, - ELEM_DROP, TABLE_INIT, TABLE_SIZE, TABLE_COPY, TABLE_GROW, GLOBAL_SET, GLOBAL_GET, TABLE_SET, TABLE_GET => swap_instance = true; - // Load/store opcodes require either the memory base or the instance. - I32_STORE, I64_STORE, F32_STORE, F64_STORE, I32_STORE8, I32_STORE16, I64_STORE8, I64_STORE16, I64_STORE32, - V128_STORE, I32_LOAD, I64_LOAD, F32_LOAD, F64_LOAD, I32_LOAD8_S, I32_LOAD8_U, I32_LOAD16_S, I32_LOAD16_U, - I64_LOAD8_S, I64_LOAD8_U, I64_LOAD16_S, I64_LOAD16_U, I64_LOAD32_S, I64_LOAD32_U, V128_LOAD => { - var memarg = bi.immptr().read_MemArg(); - if (memarg.memory_index == 0) swap_membase = true; - else swap_instance = true; - } - _ => ; - } - bi.next(); - } - return InlineConfig(swap_membase, swap_instance, true); -} - -type InlineConfig(swap_membase: bool, swap_instance: bool, can_inline: bool); +type WhammInlineConfig(swap_membase: bool, swap_instance: bool, is_inlined: bool); // Used to record the entry point of exception/suspension handlers. Jumping to {stub_label} allows // control transfer to its corresponding handler without falling back to fast-int. diff --git a/src/util/Whamm.v3 b/src/util/Whamm.v3 index 9b93b746d..ae1649d8b 100644 --- a/src/util/Whamm.v3 +++ b/src/util/Whamm.v3 @@ -175,10 +175,9 @@ component Whamm { class WhammProbe(func: Function, sig: Array) extends Probe { var trampoline: TargetCode; // properties set by the spc to make inlining optimization decisions. - var inline_heuristic_checked = false; - var spc_inline_func = false; - var spc_swap_instance = false; - var spc_swap_membase = false; + var swap_checked = false; + var swap_instance = false; + var swap_membase = false; private def args = if(sig.length == 0, Values.NONE, Array.new(sig.length)); @@ -203,6 +202,31 @@ class WhammProbe(func: Function, sig: Array) extends Probe { } return ProbeAction.Continue; } + + // If function is to be inlined, check to see if instance or mem0_base need to be swapped. + def checkSwap() { + if (swap_checked) return; + var bi = BytecodeIterator.new().reset(WasmFunction.!(func).decl); + while (bi.more()) { + var op = bi.current(); + match (op) { + // These opcodes require swapping the instance. + THROW, CALL, CALL_INDIRECT, MEMORY_INIT, MEMORY_SIZE, MEMORY_GROW, MEMORY_COPY, MEMORY_FILL, REF_FUNC, DATA_DROP, + ELEM_DROP, TABLE_INIT, TABLE_SIZE, TABLE_COPY, TABLE_GROW, GLOBAL_SET, GLOBAL_GET, TABLE_SET, TABLE_GET => swap_instance = true; + // Load/store opcodes require either the memory base or the instance. + I32_STORE, I64_STORE, F32_STORE, F64_STORE, I32_STORE8, I32_STORE16, I64_STORE8, I64_STORE16, I64_STORE32, + V128_STORE, I32_LOAD, I64_LOAD, F32_LOAD, F64_LOAD, I32_LOAD8_S, I32_LOAD8_U, I32_LOAD16_S, I32_LOAD16_U, + I64_LOAD8_S, I64_LOAD8_U, I64_LOAD16_S, I64_LOAD16_U, I64_LOAD32_S, I64_LOAD32_U, V128_LOAD => { + var memarg = bi.immptr().read_MemArg(); + if (memarg.memory_index == 0) swap_membase = true; + else swap_instance = true; + } + _ => ; + } + bi.next(); + } + swap_checked = true; + } } def parseParam0(r: TextReader) -> WhammParam { diff --git a/test/inline/failures.x86-64-linux b/test/inline/failures.x86-64-linux deleted file mode 100644 index 925e70891..000000000 --- a/test/inline/failures.x86-64-linux +++ /dev/null @@ -1,3 +0,0 @@ -inline_test_arithmetic.wasm -inline_test_locals_control.wasm -inline_test_nesting.wasm From 4ad0cbd2c63cbf4ef4019294c3b175a4436262ef Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Mon, 23 Mar 2026 03:38:34 -0400 Subject: [PATCH 15/28] Remove dead lines --- src/engine/compiler/SinglePassCompiler.v3 | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index cd131af15..45da336e0 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -750,9 +750,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl setUnreachable(); } def visit_END() { - var frame = state.frame_stack.peek(); - var is_implicit_function_block = isInlined() && state.ctl_stack.top == frame.ctl_base_sp + 1; - var ctl_top = state.ctl_stack.peek(); if (ctl_top.opcode == Opcode.LOOP.code) { state.ctl_stack.pop(); From 3f9b030cc5086154dac9ccbc4f0e028d8b3e9eb7 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Mon, 23 Mar 2026 03:43:24 -0400 Subject: [PATCH 16/28] Remove extra metric increment --- src/engine/compiler/SinglePassCompiler.v3 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 45da336e0..e4bf4f6a2 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -826,7 +826,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl if (op == Opcode.CALL) { Metrics.spc_static_inlined_calls.val++; masm.emit_inc_metric(Metrics.spc_dynamic_inlined_calls); - masm.emit_inc_metric(Metrics.spc_dynamic_calls); } emitInlinedCall(func, null); return; From c0ce246425b35408caa448e893b86d3be95fb526 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Mon, 23 Mar 2026 03:49:25 -0400 Subject: [PATCH 17/28] Fix osr bug on dyn --- src/engine/compiler/SinglePassCompiler.v3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index e4bf4f6a2..7bd635556 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -697,7 +697,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl state.prepareLoop(resolver); masm.bindLabel(ctl_top.label); emitProbe(); - if (it.pc == osr_pc) { + if (it.pc == osr_pc && !isInlined()) { osr_state = state.ctl_stack.peek().copyMerge(); osr_loop_label = masm.newLabel(it.pc); masm.bindLabel(osr_loop_label); From 364664a1840fa20db4cb20f0240ff75f7a45496b Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 01:35:09 -0400 Subject: [PATCH 18/28] Support RETURN instruction * we now use RETURN for SpcControl for inlined functions instead of BLOCK * RETURN instructions are now supported * ret_labels are now eagerly instantiated and added to SpcState * added pushFuncBody to abstract control push of RETURN --- src/engine/compiler/SinglePassCompiler.v3 | 47 +++++++---- test/inline/inline_test_return.wasm | Bin 0 -> 182 bytes test/inline/inline_test_return.wasm.exit | 1 + test/inline/inline_test_return.wasm.flags | 1 + test/inline/inline_test_return.wasm.out | 4 + test/inline/inline_test_return.wat | 97 ++++++++++++++++++++++ 6 files changed, 134 insertions(+), 16 deletions(-) create mode 100644 test/inline/inline_test_return.wasm create mode 100644 test/inline/inline_test_return.wasm.exit create mode 100644 test/inline/inline_test_return.wasm.flags create mode 100644 test/inline/inline_test_return.wasm.out create mode 100644 test/inline/inline_test_return.wat diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 7bd635556..a840a7b98 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -102,6 +102,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var sig: SigDecl; var num_locals: int; var local_base_sp: u31; // can use a Range for 0-indexing instead of from offset + var ctl_base_sp: u31; // index of the RETURN control in ctl_stack for the current frame var success = true; var osr_pc: int; @@ -164,7 +165,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Push initial frame for top-level function state.frame_stack.clear(); - var initial_frame = SpcFrame.new(func, module, 0, 0, func.num_slots(), 0); + var initial_frame = SpcFrame.new(func, module, 0, 0, func.num_slots(), 0, masm.newLabel(func.cur_bytecode.length)); pushSpcFrame(initial_frame); // Emit prologue, which allocates the frame and initializes various registers. @@ -777,6 +778,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl emitProbe(); if (ctl_top.merge_count > 1) emitReturn(ctl_top); state.ctl_stack.pop(); + return; } emitProbe(); } @@ -807,9 +809,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl setUnreachable(); } def visit_RETURN() { - var target = state.ctl_stack.elems[0]; + var target = state.ctl_stack.elems[ctl_base_sp]; state.emitTransfer(target, resolver); - if (ret_label == null) ret_label = masm.newLabel(func.cur_bytecode.length); masm.emit_br(ret_label); setUnreachable(); } @@ -860,9 +861,11 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var num_locals = callee_func.num_slots(); - // Push an implicit block for the head of the function + // Push a RETURN control for the inlined callee's function body. var end_label = masm.newLabel(callee_func.cur_bytecode.length); - state.pushBlock(sig.params, sig.results, end_label); + var func_body_ctl = state.pushFuncBody(sig.params, sig.results, end_label); + func_body_ctl.merge_state = state.getInMemoryMergeWithArgs(int.view(func_body_ctl.val_stack_top), sig.results); // preserve outer frame state below callee's results + func_body_ctl.merge_count = 1; var m: Module = module; @@ -878,8 +881,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } // Create and push frame for inlined function - var callee_frame = SpcFrame.new(callee_func, - m, new_local_base_sp, new_ctl_base_sp, num_locals, 0); + var callee_frame = SpcFrame.new(callee_func, + m, new_local_base_sp, new_ctl_base_sp, num_locals, 0, masm.newLabel(callee_func.cur_bytecode.length)); pushSpcFrame(callee_frame); @@ -2170,10 +2173,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl } def emitReturn(ctl: SpcControl) { // All explicit RETURN instructions branch here. - if (ret_label != null) { - masm.bindLabel(ret_label); - ret_label = null; - } + masm.bindLabel(ret_label); + var results = sig.results; if (masm.valuerep.tagged) { // update mismatched value tags @@ -2184,6 +2185,9 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl masm.emit_mov_m_i(masm.tagAddr(state.sp - u32.view(results.length) + u32.view(i)), rtag.code); } } + + if (isInlined()) return; + // Compute VSP = VFP + state.sp emit_compute_vsp(regs.vsp, state.sp); // Return to caller @@ -2759,7 +2763,10 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl if (func != null) masm.pushInlineContext(func.func_index); def current = state.frame_stack.peek(); - if (current != null) current.pc = it.pc; + if (current != null) { + current.pc = it.pc; + current.ret_label = ret_label; + } state.frame_stack.push(frame); // Update cached copies from new top frame it.reset(frame.func).at(frame.pc, -1); @@ -2768,6 +2775,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl sig = func.sig; num_locals = frame.num_locals; local_base_sp = frame.local_base_sp; + ctl_base_sp = frame.ctl_base_sp; + ret_label = frame.ret_label; } def popSpcFrame() -> SpcFrame { masm.popInlineContext(); @@ -2781,6 +2790,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl sig = func.sig; num_locals = current.num_locals; local_base_sp = current.local_base_sp; + ctl_base_sp = current.ctl_base_sp; + ret_label = current.ret_label; return frame; } @@ -2795,7 +2806,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl for (i < state.frame_stack.top) { var f = state.frame_stack.elems[i]; var pc = if(i == state.frame_stack.top - 1, it.pc, f.pc); - frames[i] = SpcFrame.new(f.func, f.module, f.local_base_sp, f.ctl_base_sp, f.num_locals, pc); + frames[i] = SpcFrame.new(f.func, f.module, f.local_base_sp, f.ctl_base_sp, f.num_locals, pc, null); } return frames; } @@ -2812,7 +2823,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var bi = BytecodeIterator.new().reset(func); while (bi.more()) { match (bi.current()) { - RETURN, RETURN_CALL, RETURN_CALL_INDIRECT, RETURN_CALL_REF => + RETURN_CALL, RETURN_CALL_INDIRECT, RETURN_CALL_REF => return no("uses return instruction"); TRY, CATCH, THROW, RETHROW, THROW_REF, DELEGATE, CATCH_ALL, TRY_TABLE => return no("uses exception handling instruction"); @@ -2983,8 +2994,9 @@ class SpcFrame { var ctl_base_sp: u31; // Base index into SpcState.ctl_stack var num_locals: int; var pc: int; + var ret_label: MasmLabel; - new(func, module, local_base_sp, ctl_base_sp, num_locals, pc) {} + new(func, module, local_base_sp, ctl_base_sp, num_locals, pc, ret_label) {} } class SpcState(regAlloc: RegAlloc) { @@ -3001,7 +3013,7 @@ class SpcState(regAlloc: RegAlloc) { ctl_stack.clear(); // manually set up first control entry and return merge state var results = sig.results; - var ctl = pushControl(Opcode.RETURN.code, ValueTypes.NONE, results, ret_label); + var ctl = pushFuncBody(ValueTypes.NONE, results, ret_label); var merge_state = Array.new(results.length); for (i < results.length) { // request the merged values be stored to the stack, but don't require tags @@ -3033,6 +3045,9 @@ class SpcState(regAlloc: RegAlloc) { def pushBlock(params: Array, results: Array, end_label: MasmLabel) -> SpcControl { return pushControl(Opcode.BLOCK.code, params, results, end_label); } + def pushFuncBody(params: Array, results: Array, end_label: MasmLabel) -> SpcControl { + return pushControl(Opcode.RETURN.code, params, results, end_label); + } def pushLoop(params: Array, results: Array, start_label: MasmLabel) -> SpcControl { var ctl = pushControl(Opcode.LOOP.code, params, results, start_label); return ctl; diff --git a/test/inline/inline_test_return.wasm b/test/inline/inline_test_return.wasm new file mode 100644 index 0000000000000000000000000000000000000000..d7bcbbaa0b658f86faa93be8c09ba06cbf0b59e7 GIT binary patch literal 182 zcmXZSF%H5o429umCsk2Aq+(>s#5rm)1aXRX1W|@EBad7gr*!(iJO}f8Apmp?6PuAu z!M1=b!~o*{KyjJxFL3%&ID@S~`N^XPw>TF12ZbJ4L_8uV6n|gaHH(wmM{bl0G-w>4 beRY_ltE<{Jv8z*P8faTTyWxA@o$4w-Cl(w8 literal 0 HcmV?d00001 diff --git a/test/inline/inline_test_return.wasm.exit b/test/inline/inline_test_return.wasm.exit new file mode 100644 index 000000000..573541ac9 --- /dev/null +++ b/test/inline/inline_test_return.wasm.exit @@ -0,0 +1 @@ +0 diff --git a/test/inline/inline_test_return.wasm.flags b/test/inline/inline_test_return.wasm.flags new file mode 100644 index 000000000..0c2fe67af --- /dev/null +++ b/test/inline/inline_test_return.wasm.flags @@ -0,0 +1 @@ +--metrics=spc*calls --inline-max-depth=1 diff --git a/test/inline/inline_test_return.wasm.out b/test/inline/inline_test_return.wasm.out new file mode 100644 index 000000000..79d1497bf --- /dev/null +++ b/test/inline/inline_test_return.wasm.out @@ -0,0 +1,4 @@ +spc:static_calls : 6 calls +spc:static_inlined_calls : 6 calls +spc:dynamic_calls : 6 calls +spc:dynamic_inlined_calls : 6 calls diff --git a/test/inline/inline_test_return.wat b/test/inline/inline_test_return.wat new file mode 100644 index 000000000..c1dd8b196 --- /dev/null +++ b/test/inline/inline_test_return.wat @@ -0,0 +1,97 @@ +;; Test inlined functions with explicit RETURN, including nested control flow +;; and paths where extra values are on the stack at the time of return. +(module + ;; Two levels of nested ifs; in the early-return path, 2*a is an extra value + ;; on the value stack below the returned a+b. + (func $weighted (param i32) (param i32) (result i32) + block (result i32) + local.get 0 + i32.const 2 + i32.mul ;; [2a] -- extra below when early return fires + block + local.get 0 + i32.const 0 + i32.gt_s + if + local.get 1 + i32.const 0 + i32.gt_s + if + ;; both positive: return a+b; 2a is extra on stack + local.get 0 + local.get 1 + i32.add + return + end + end + end + local.get 1 + i32.add ;; fallthrough: 2a+b + end + ) + + ;; Clamp x to [lo, hi]; two levels of nesting, returns on multiple paths. + (func $clamp (param i32) (param i32) (param i32) (result i32) + local.get 0 + local.get 1 + i32.lt_s + if + local.get 1 + return + end + local.get 0 + local.get 2 + i32.gt_s + if + local.get 2 + return + end + local.get 0 + ) + + (func (export "main") (result i32) + i32.const 3 + i32.const 4 + call $weighted + i32.const 7 ;; both positive: 3+4=7 + i32.ne + + i32.const 3 + i32.const -1 + call $weighted + i32.const 5 ;; b<=0: 2*3+(-1)=5 + i32.ne + i32.or + + i32.const -1 + i32.const 4 + call $weighted + i32.const 2 ;; a<=0: 2*(-1)+4=2 + i32.ne + i32.or + + i32.const 5 + i32.const 0 + i32.const 10 + call $clamp + i32.const 5 + i32.ne + i32.or + + i32.const -3 + i32.const 0 + i32.const 10 + call $clamp + i32.const 0 + i32.ne + i32.or + + i32.const 15 + i32.const 0 + i32.const 10 + call $clamp + i32.const 10 + i32.ne + i32.or + ) +) From 437da05222311b17a9efd4e77297ba3c93198a5b Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 01:39:31 -0400 Subject: [PATCH 19/28] Remove manual result slot cleanup (using merge state transfer instead) q --- src/engine/compiler/SinglePassCompiler.v3 | 47 ----------------------- 1 file changed, 47 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index a840a7b98..d52c43af0 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -952,53 +952,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl return; } - // Clean up stack: - // Before: [..., arg0, arg1, ..., argN, local0, local1, ..., localM, result0, ..., resultK] - // After: [..., result0, ..., resultK] - - var total_callee_slots = state.sp - new_local_base_sp; // All callee state - var slots_to_drop = total_callee_slots - results_count; - - // for whamm probes, results_count SHOULD be zero - if (slots_to_drop > 0 && results_count > 0) { - // Need to move results down over parameters and locals - for (i < results_count) { - var result_slot = state.sp - results_count + u32.view(i); - var target_slot = new_local_base_sp + u32.view(i); - if (Trace.compiler) { - Trace.OUT.put3(" Moving result %d: slot %d -> slot %d", i, result_slot, target_slot).ln(); - } - if (result_slot != target_slot) { - var rv = state.state[result_slot]; - if (Trace.compiler) { - Trace.OUT.put2(" rv: flags=%x, const=%d", rv.flags, rv.const).ln(); - } - if (rv.inReg()) { - regAlloc.reassign(rv.reg, int.!(result_slot), int.!(target_slot)); - } else { - // Move in memory (rarely needed if results are in regs) - resolver.addMove((target_slot, rv), (result_slot, rv)); - } - state.state[target_slot] = rv; - } else { - // Result already in the right place - if (Trace.compiler) Trace.OUT.puts(" (already in place)").ln(); - } - } - resolver.emitMoves(); - - // Drop everything above results - for (slot = new_local_base_sp + results_count; slot < state.sp; slot++) { - unrefSlot(slot); - } - state.sp = new_local_base_sp + results_count; - } else if (slots_to_drop > 0) { - // No results, just drop everything - if (Trace.compiler) Trace.OUT.put1("dropping %d slots\n", slots_to_drop); - dropN(u32.view(slots_to_drop)); - } - // If slots_to_drop <= 0, results are already in the right place - if (Trace.compiler) { Trace.OUT.put1(" Inlined call complete, sp=%d", state.sp).ln(); } From 644ba206d54f49978274336b02647a9e3818758b Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 02:06:12 -0400 Subject: [PATCH 20/28] Revert "Remove manual result slot cleanup (using merge state transfer instead)" This reverts commit 112716e1942feda187004249639502131730f592. --- src/engine/compiler/SinglePassCompiler.v3 | 47 +++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index d52c43af0..a840a7b98 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -952,6 +952,53 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl return; } + // Clean up stack: + // Before: [..., arg0, arg1, ..., argN, local0, local1, ..., localM, result0, ..., resultK] + // After: [..., result0, ..., resultK] + + var total_callee_slots = state.sp - new_local_base_sp; // All callee state + var slots_to_drop = total_callee_slots - results_count; + + // for whamm probes, results_count SHOULD be zero + if (slots_to_drop > 0 && results_count > 0) { + // Need to move results down over parameters and locals + for (i < results_count) { + var result_slot = state.sp - results_count + u32.view(i); + var target_slot = new_local_base_sp + u32.view(i); + if (Trace.compiler) { + Trace.OUT.put3(" Moving result %d: slot %d -> slot %d", i, result_slot, target_slot).ln(); + } + if (result_slot != target_slot) { + var rv = state.state[result_slot]; + if (Trace.compiler) { + Trace.OUT.put2(" rv: flags=%x, const=%d", rv.flags, rv.const).ln(); + } + if (rv.inReg()) { + regAlloc.reassign(rv.reg, int.!(result_slot), int.!(target_slot)); + } else { + // Move in memory (rarely needed if results are in regs) + resolver.addMove((target_slot, rv), (result_slot, rv)); + } + state.state[target_slot] = rv; + } else { + // Result already in the right place + if (Trace.compiler) Trace.OUT.puts(" (already in place)").ln(); + } + } + resolver.emitMoves(); + + // Drop everything above results + for (slot = new_local_base_sp + results_count; slot < state.sp; slot++) { + unrefSlot(slot); + } + state.sp = new_local_base_sp + results_count; + } else if (slots_to_drop > 0) { + // No results, just drop everything + if (Trace.compiler) Trace.OUT.put1("dropping %d slots\n", slots_to_drop); + dropN(u32.view(slots_to_drop)); + } + // If slots_to_drop <= 0, results are already in the right place + if (Trace.compiler) { Trace.OUT.put1(" Inlined call complete, sp=%d", state.sp).ln(); } From 9a1af469a0e9b13a2971f35ba5ceef2f85abc873 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 02:21:30 -0400 Subject: [PATCH 21/28] Fix whamm arg for merge state --- src/engine/compiler/SinglePassCompiler.v3 | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index a840a7b98..5c58f883c 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -864,8 +864,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Push a RETURN control for the inlined callee's function body. var end_label = masm.newLabel(callee_func.cur_bytecode.length); var func_body_ctl = state.pushFuncBody(sig.params, sig.results, end_label); - func_body_ctl.merge_state = state.getInMemoryMergeWithArgs(int.view(func_body_ctl.val_stack_top), sig.results); // preserve outer frame state below callee's results - func_body_ctl.merge_count = 1; var m: Module = module; @@ -878,8 +876,13 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl m = whamm_instance.module; new_local_base_sp = u31.view(state.sp) - u31.view(whamm_sig.length); // XXX + func_body_ctl.val_stack_top = new_local_base_sp; // correct val_stack_top for whamm arg count } + // create merge state based on outer function's base sp given inlined function's results + func_body_ctl.merge_state = state.getInMemoryMergeWithArgs(int.view(new_local_base_sp), sig.results); + func_body_ctl.merge_count = 1; + // Create and push frame for inlined function var callee_frame = SpcFrame.new(callee_func, m, new_local_base_sp, new_ctl_base_sp, num_locals, 0, masm.newLabel(callee_func.cur_bytecode.length)); From e1eb4cddb7d63f64c8edf0928d2c0e10e38edad0 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 02:27:55 -0400 Subject: [PATCH 22/28] Reapply "Remove manual result slot cleanup (using merge state transfer instead)" This reverts commit 7c09bfc5ea27573da05b85a996cffd36a82170bd. --- src/engine/compiler/SinglePassCompiler.v3 | 47 ----------------------- 1 file changed, 47 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 5c58f883c..e2cad3243 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -955,53 +955,6 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl return; } - // Clean up stack: - // Before: [..., arg0, arg1, ..., argN, local0, local1, ..., localM, result0, ..., resultK] - // After: [..., result0, ..., resultK] - - var total_callee_slots = state.sp - new_local_base_sp; // All callee state - var slots_to_drop = total_callee_slots - results_count; - - // for whamm probes, results_count SHOULD be zero - if (slots_to_drop > 0 && results_count > 0) { - // Need to move results down over parameters and locals - for (i < results_count) { - var result_slot = state.sp - results_count + u32.view(i); - var target_slot = new_local_base_sp + u32.view(i); - if (Trace.compiler) { - Trace.OUT.put3(" Moving result %d: slot %d -> slot %d", i, result_slot, target_slot).ln(); - } - if (result_slot != target_slot) { - var rv = state.state[result_slot]; - if (Trace.compiler) { - Trace.OUT.put2(" rv: flags=%x, const=%d", rv.flags, rv.const).ln(); - } - if (rv.inReg()) { - regAlloc.reassign(rv.reg, int.!(result_slot), int.!(target_slot)); - } else { - // Move in memory (rarely needed if results are in regs) - resolver.addMove((target_slot, rv), (result_slot, rv)); - } - state.state[target_slot] = rv; - } else { - // Result already in the right place - if (Trace.compiler) Trace.OUT.puts(" (already in place)").ln(); - } - } - resolver.emitMoves(); - - // Drop everything above results - for (slot = new_local_base_sp + results_count; slot < state.sp; slot++) { - unrefSlot(slot); - } - state.sp = new_local_base_sp + results_count; - } else if (slots_to_drop > 0) { - // No results, just drop everything - if (Trace.compiler) Trace.OUT.put1("dropping %d slots\n", slots_to_drop); - dropN(u32.view(slots_to_drop)); - } - // If slots_to_drop <= 0, results are already in the right place - if (Trace.compiler) { Trace.OUT.put1(" Inlined call complete, sp=%d", state.sp).ln(); } From 072d51cbee1657f578df420183ad4ab99a937d37 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 25 Mar 2026 11:58:02 -0400 Subject: [PATCH 23/28] Add return test to failures on dyn --- test/inline/failures.x86-64-linux.dyn | 1 + 1 file changed, 1 insertion(+) diff --git a/test/inline/failures.x86-64-linux.dyn b/test/inline/failures.x86-64-linux.dyn index da02fa079..50325688b 100644 --- a/test/inline/failures.x86-64-linux.dyn +++ b/test/inline/failures.x86-64-linux.dyn @@ -1,4 +1,5 @@ inline_test_arithmetic.wasm inline_test_locals_control.wasm inline_test_nesting.wasm +inline_test_return.wasm From 8df10deb9c29a83e1c9639edf8cf617637e591d5 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 14:38:13 -0400 Subject: [PATCH 24/28] Use withReconstruct on inlined function entry probes --- src/engine/compiler/SinglePassCompiler.v3 | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index e2cad3243..956a81fa4 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -893,19 +893,8 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // XXX expensive because frame materialization required if (whamm == null && !FeatureDisable.entryProbes && func.entry_probed) { var probe = Instrumentation.getLocalProbe(module, callee_func.func_index, 0); - - // Reconstruct inlined frames before emitting probe - var reconstructed_space = 0; - if (isInlined()) { - var frames = snapshotFrames(); - unrefRegs(); - reconstructed_space = emitReconstructStackFrames(frames); - } - emitProbe0(0, probe); - // Clean up reconstructed frames after the call returns - if (reconstructed_space > 0) { - masm.emit_addw_r_i(regs.sp, reconstructed_space); - } + withReconstructedInlinedFrames(fun => + emitProbe0(0, probe)); } // Allocate callee's non-parameter locals From 6fa3289fb16292f05dbc64429300ecbe40f2cb63 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 23:30:47 -0400 Subject: [PATCH 25/28] Prohibit nested frame reconstruction (Whamm probe hackfix when nesting depth > 1) --- src/engine/compiler/SinglePassCompiler.v3 | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index 956a81fa4..a2b50ce84 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -114,6 +114,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var last_probe = 0; var skip_to_end: bool; var whamm_config: WhammInlineConfig; + var frames_reconstructed = false; // XXX: hack var handler_dest_info = Vector.new(); @@ -2266,9 +2267,20 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl // Guards compiler code with frame reconstruction (if necessary). def withReconstructedInlinedFrames(emit: void -> void) { if (isInlined()) { + if (frames_reconstructed) { + // XXX this should not happen (but does), in the case of deep nesting + // when one layer is a Whamm probe + if (Trace.compiler) Trace.OUT.puts(" nested frame reconstruction inhibited\n"); + // need to save vfp into the frame (because Whamm probe doesn't?) + masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, regs.vfp); + emit(); + return; + } unrefRegs(); + frames_reconstructed = true; def space = emitReconstructStackFrames(snapshotFrames()); emit(); + frames_reconstructed = false; if (space > 0) { masm.emit_addw_r_i(regs.sp, space); masm.emit_mov_r_m(ValueKind.REF, regs.vfp, frame.vfp_slot); From 44e8babe69bae3fb3e0c123b6a2948f2dfd54214 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Tue, 31 Mar 2026 23:40:47 -0400 Subject: [PATCH 26/28] Increase code size estimate when inlining is enabled --- src/engine/x86-64/X86_64SinglePassCompiler.v3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/x86-64/X86_64SinglePassCompiler.v3 b/src/engine/x86-64/X86_64SinglePassCompiler.v3 index c277b9b98..20fbc427b 100644 --- a/src/engine/x86-64/X86_64SinglePassCompiler.v3 +++ b/src/engine/x86-64/X86_64SinglePassCompiler.v3 @@ -1535,7 +1535,7 @@ component X86_64Spc { return addr; } def estimateCodeSizeFor(decl: FuncDecl) -> int { - return 60 + decl.orig_bytecode.length * 20; // TODO: huge overestimate + return 60 + decl.orig_bytecode.length * 20 * (2 << byte.view(SpcTuning.maxInlineDepth)); // TODO: huge overestimate } private def lazyCompile(wf: WasmFunction) -> (WasmFunction, Pointer, Throwable) { // The global stub simply consults the execution strategy. From 884b8cc928472088eab37c2ed6931ec571dffa49 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Wed, 1 Apr 2026 11:18:04 -0400 Subject: [PATCH 27/28] Move vfp fixing to emitWhammProbe --- src/engine/compiler/SinglePassCompiler.v3 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/engine/compiler/SinglePassCompiler.v3 b/src/engine/compiler/SinglePassCompiler.v3 index a2b50ce84..280b86e60 100644 --- a/src/engine/compiler/SinglePassCompiler.v3 +++ b/src/engine/compiler/SinglePassCompiler.v3 @@ -651,6 +651,7 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl var whamm_func_decl = callee_func.decl; if (inline_decision) { whamm_config = WhammInlineConfig(swap_membase, swap_instance, true); + masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, regs.vfp); emitInlinedCall(whamm_func_decl, probe); whamm_config = WhammInlineConfig(false, false, false); // Restore mem0_base after probe @@ -2268,11 +2269,10 @@ class SinglePassCompiler(xenv: SpcExecEnv, masm: MacroAssembler, regAlloc: RegAl def withReconstructedInlinedFrames(emit: void -> void) { if (isInlined()) { if (frames_reconstructed) { - // XXX this should not happen (but does), in the case of deep nesting - // when one layer is a Whamm probe + // FIXME this should not happen (but does): + // - in the case of deep nesting when one layer is a Whamm probe + // - when refactoring to avoid `with` clause, GC test fails (inlining depth 2) if (Trace.compiler) Trace.OUT.puts(" nested frame reconstruction inhibited\n"); - // need to save vfp into the frame (because Whamm probe doesn't?) - masm.emit_mov_m_r(ValueKind.REF, frame.vfp_slot, regs.vfp); emit(); return; } From a2016c288e179f437f76a732f888dc7545e4d3f9 Mon Sep 17 00:00:00 2001 From: Matthew Schneider Date: Thu, 2 Apr 2026 17:57:23 -0400 Subject: [PATCH 28/28] Empty commit