Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
94855b1
Add SpcInlinedFrame
matthew-mojira Mar 20, 2026
9a38ea7
Add frame reconstructions methods and guard reconstruction points
matthew-mojira Mar 23, 2026
e5e4f05
Merge branch 'master' into pr/reconstruction
matthew-mojira Mar 23, 2026
731c9e7
Move function to a more sensible spot
matthew-mojira Mar 23, 2026
194f26d
Make the if statement one line
matthew-mojira Mar 25, 2026
f021600
Compute vfp for every inlined frame
matthew-mojira Mar 26, 2026
9bf2aeb
Uncomment stack reconstruction metrics
matthew-mojira Mar 26, 2026
e4b2d4a
Use optimized vfp calculation
matthew-mojira Mar 26, 2026
a7044bb
Use a single `rsp` adjustment in reconstructing stack frames
matthew-mojira Mar 31, 2026
7b8ae6d
Separate single-frame reconstruction into its own method
matthew-mojira Mar 31, 2026
036753d
Revert "Separate single-frame reconstruction into its own method"
matthew-mojira Mar 31, 2026
296ea0f
Fix wrong way indexing
matthew-mojira Mar 31, 2026
6b26972
Reapply "Separate single-frame reconstruction into its own method"
matthew-mojira Mar 31, 2026
a96f168
Restore vfp after out call
matthew-mojira Mar 31, 2026
3fb2fd1
The first of the final inlinings
matthew-mojira Mar 23, 2026
4ad0cbd
Remove dead lines
matthew-mojira Mar 23, 2026
3f9b030
Remove extra metric increment
matthew-mojira Mar 23, 2026
c0ce246
Fix osr bug on dyn
matthew-mojira Mar 23, 2026
364664a
Support RETURN instruction
matthew-mojira Mar 25, 2026
437da05
Remove manual result slot cleanup (using merge state transfer instead)
matthew-mojira Mar 25, 2026
644ba20
Revert "Remove manual result slot cleanup (using merge state transfer…
matthew-mojira Mar 25, 2026
9a1af46
Fix whamm arg for merge state
matthew-mojira Mar 25, 2026
e1eb4cd
Reapply "Remove manual result slot cleanup (using merge state transfe…
matthew-mojira Mar 25, 2026
072d51c
Add return test to failures on dyn
matthew-mojira Mar 25, 2026
8df10de
Use withReconstruct on inlined function entry probes
matthew-mojira Mar 31, 2026
6fa3289
Prohibit nested frame reconstruction (Whamm probe hackfix when nestin…
matthew-mojira Apr 1, 2026
44e8bab
Increase code size estimate when inlining is enabled
matthew-mojira Apr 1, 2026
884b8cc
Move vfp fixing to emitWhammProbe
matthew-mojira Apr 1, 2026
a2016c2
Empty commit
matthew-mojira Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
582 changes: 398 additions & 184 deletions src/engine/compiler/SinglePassCompiler.v3

Large diffs are not rendered by default.

32 changes: 30 additions & 2 deletions src/engine/x86-64/X86_64SinglePassCompiler.v3
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,35 @@ class X86_64SpcModuleCode extends X86_64SpcCode {
}
// Reconstructs inlined interpreter frames for an inlined hardware trap context.
// Returns the new rsp to write into the ucontext (top of stack).
private def reconstructInlinedFramesForTrap(r_rsp: Pointer, inline_ctx: List<FuncLoc>) -> Pointer;
private def reconstructInlinedFramesForTrap(r_rsp: Pointer, inline_ctx: List<FuncLoc>) -> Pointer {
def frames: Array<FuncLoc> = Lists.toArray(inline_ctx);
def outer = frames[frames.length - 1];
def inlined = frames[0 ... (frames.length - 1)];
def count = inlined.length;

// set outermost pc in the real frame
(r_rsp + X86_64InterpreterFrame.curpc.offset).store<int>(outer.pc);

// Read instance from the real outer frame (shared across all inlined frames)
var instance = (r_rsp + X86_64InterpreterFrame.instance.offset).load<Instance>();

// Push inlined frames
for (i = count - 1; i >= 0; i--) {
var fid = inlined[i].func_index;
var pc = inlined[i].pc;

r_rsp += -8;
r_rsp.store<Pointer>(INLINED_FRAME_STUB.start);

r_rsp += -X86_64InterpreterFrame.size; // move rsp?
// write func, pc, frame accessor
var wasm_func = WasmFunction.!(instance.functions[fid]);
(r_rsp + X86_64InterpreterFrame.wasm_func.offset).store<WasmFunction>(wasm_func);
(r_rsp + X86_64InterpreterFrame.curpc.offset).store<int>(pc);
(r_rsp + X86_64InterpreterFrame.accessor.offset).store<X86_64FrameAccessor>(null);
}
return r_rsp;
}
// Look up the source {pc} of a location {i} in this code. Returns {-1} if no exact entry is found.
// Return addresses are treated differently than other addresses in the code.
def lookupPc(ip: Pointer, isRetAddr: bool) -> List<FuncLoc> {
Expand Down Expand Up @@ -1507,7 +1535,7 @@ component X86_64Spc {
return addr;
}
def estimateCodeSizeFor(decl: FuncDecl) -> int {
return 60 + decl.orig_bytecode.length * 20; // TODO: huge overestimate
return 60 + decl.orig_bytecode.length * 20 * (2 << byte.view(SpcTuning.maxInlineDepth)); // TODO: huge overestimate
}
private def lazyCompile(wf: WasmFunction) -> (WasmFunction, Pointer, Throwable) {
// The global stub simply consults the execution strategy.
Expand Down
32 changes: 28 additions & 4 deletions src/util/Whamm.v3
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,9 @@ component Whamm {
class WhammProbe(func: Function, sig: Array<WhammArg>) extends Probe {
var trampoline: TargetCode;
// properties set by the spc to make inlining optimization decisions.
var inline_heuristic_checked = false;
var spc_inline_func = false;
var spc_swap_instance = false;
var spc_swap_membase = false;
var swap_checked = false;
var swap_instance = false;
var swap_membase = false;

private def args = if(sig.length == 0, Values.NONE, Array<Value>.new(sig.length));

Expand All @@ -203,6 +202,31 @@ class WhammProbe(func: Function, sig: Array<WhammArg>) extends Probe {
}
return ProbeAction.Continue;
}

// If function is to be inlined, check to see if instance or mem0_base need to be swapped.
def checkSwap() {
if (swap_checked) return;
var bi = BytecodeIterator.new().reset(WasmFunction.!(func).decl);
while (bi.more()) {
var op = bi.current();
match (op) {
// These opcodes require swapping the instance.
THROW, CALL, CALL_INDIRECT, MEMORY_INIT, MEMORY_SIZE, MEMORY_GROW, MEMORY_COPY, MEMORY_FILL, REF_FUNC, DATA_DROP,
ELEM_DROP, TABLE_INIT, TABLE_SIZE, TABLE_COPY, TABLE_GROW, GLOBAL_SET, GLOBAL_GET, TABLE_SET, TABLE_GET => swap_instance = true;
// Load/store opcodes require either the memory base or the instance.
I32_STORE, I64_STORE, F32_STORE, F64_STORE, I32_STORE8, I32_STORE16, I64_STORE8, I64_STORE16, I64_STORE32,
V128_STORE, I32_LOAD, I64_LOAD, F32_LOAD, F64_LOAD, I32_LOAD8_S, I32_LOAD8_U, I32_LOAD16_S, I32_LOAD16_U,
I64_LOAD8_S, I64_LOAD8_U, I64_LOAD16_S, I64_LOAD16_U, I64_LOAD32_S, I64_LOAD32_U, V128_LOAD => {
var memarg = bi.immptr().read_MemArg();
if (memarg.memory_index == 0) swap_membase = true;
else swap_instance = true;
}
_ => ;
}
bi.next();
}
swap_checked = true;
}
}

def parseParam0(r: TextReader) -> WhammParam {
Expand Down
3 changes: 0 additions & 3 deletions test/inline/failures.x86-64-linux

This file was deleted.

1 change: 1 addition & 0 deletions test/inline/failures.x86-64-linux.dyn
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
inline_test_arithmetic.wasm
inline_test_locals_control.wasm
inline_test_nesting.wasm
inline_test_return.wasm

Binary file added test/inline/inline_test_return.wasm
Binary file not shown.
1 change: 1 addition & 0 deletions test/inline/inline_test_return.wasm.exit
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
1 change: 1 addition & 0 deletions test/inline/inline_test_return.wasm.flags
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--metrics=spc*calls --inline-max-depth=1
4 changes: 4 additions & 0 deletions test/inline/inline_test_return.wasm.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
spc:static_calls : 6 calls
spc:static_inlined_calls : 6 calls
spc:dynamic_calls : 6 calls
spc:dynamic_inlined_calls : 6 calls
97 changes: 97 additions & 0 deletions test/inline/inline_test_return.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
;; Test inlined functions with explicit RETURN, including nested control flow
;; and paths where extra values are on the stack at the time of return.
(module
;; Two levels of nested ifs; in the early-return path, 2*a is an extra value
;; on the value stack below the returned a+b.
(func $weighted (param i32) (param i32) (result i32)
block (result i32)
local.get 0
i32.const 2
i32.mul ;; [2a] -- extra below when early return fires
block
local.get 0
i32.const 0
i32.gt_s
if
local.get 1
i32.const 0
i32.gt_s
if
;; both positive: return a+b; 2a is extra on stack
local.get 0
local.get 1
i32.add
return
end
end
end
local.get 1
i32.add ;; fallthrough: 2a+b
end
)

;; Clamp x to [lo, hi]; two levels of nesting, returns on multiple paths.
(func $clamp (param i32) (param i32) (param i32) (result i32)
local.get 0
local.get 1
i32.lt_s
if
local.get 1
return
end
local.get 0
local.get 2
i32.gt_s
if
local.get 2
return
end
local.get 0
)

(func (export "main") (result i32)
i32.const 3
i32.const 4
call $weighted
i32.const 7 ;; both positive: 3+4=7
i32.ne

i32.const 3
i32.const -1
call $weighted
i32.const 5 ;; b<=0: 2*3+(-1)=5
i32.ne
i32.or

i32.const -1
i32.const 4
call $weighted
i32.const 2 ;; a<=0: 2*(-1)+4=2
i32.ne
i32.or

i32.const 5
i32.const 0
i32.const 10
call $clamp
i32.const 5
i32.ne
i32.or

i32.const -3
i32.const 0
i32.const 10
call $clamp
i32.const 0
i32.ne
i32.or

i32.const 15
i32.const 0
i32.const 10
call $clamp
i32.const 10
i32.ne
i32.or
)
)
Loading