Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
59944a8
Configurable native stack size for Stylus with auto-retry on overflow
bragaigor Mar 20, 2026
e52f4ac
add validation check and avoid infinite loop
bragaigor Mar 20, 2026
7ecbee1
temporarily bump global stack size
bragaigor Mar 20, 2026
9a8988b
add per thread stack size override
bragaigor Mar 20, 2026
5c01282
cleanup and update comments
bragaigor Mar 20, 2026
5f23b03
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Mar 23, 2026
f02adf1
update wasmer pin
bragaigor Mar 23, 2026
c5d7598
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Mar 30, 2026
c08d636
replace set_thread_stack_size with drain_stack_pool
bragaigor Mar 30, 2026
0ca8979
update comments and add tests
bragaigor Mar 30, 2026
35473be
add retries count log and update tests
bragaigor Mar 30, 2026
b60a623
add test for give-up path
bragaigor Mar 30, 2026
81800a5
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Mar 30, 2026
fd2ae52
update wasmer pin
bragaigor Mar 31, 2026
3be8c15
apply new fallback stack overflow logic
bragaigor Mar 31, 2026
7e2f334
fix race condition and update tests
bragaigor Mar 31, 2026
065d30b
make lint happy and fix snapshot
bragaigor Mar 31, 2026
11db344
fix test race
bragaigor Mar 31, 2026
93fca7d
improve tests
bragaigor Apr 1, 2026
4fd0576
return error if we hit native stack overflow
bragaigor Apr 1, 2026
043e63f
Consolidate cranelift targets with non-cranelift targets
bragaigor Apr 1, 2026
72d0cba
fix tests and save UsedMultiGas
bragaigor Apr 2, 2026
988d29c
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Apr 2, 2026
730c7ce
make lint happy
bragaigor Apr 2, 2026
3ecad5e
only call stylus with cranelift if not compiled again
bragaigor Apr 2, 2026
c5e37cc
cleanup and fix program stack overflow tests
bragaigor Apr 2, 2026
16682ac
update comments and add error logs
bragaigor Apr 2, 2026
f0610d4
update go-ethereum pin
bragaigor Apr 2, 2026
f166a5c
always run with cranelift on retry first unconditionally
bragaigor Apr 2, 2026
619e251
make getCraneliftAsm also return an error and add test for it
bragaigor Apr 2, 2026
030edf9
return ErrNativeStackOverflow for userNativeStackOverflow
bragaigor Apr 2, 2026
e928d36
typo
bragaigor Apr 2, 2026
6de4c20
fix comments
bragaigor Apr 3, 2026
e8d60b3
also save state for openPages and everPages
bragaigor Apr 3, 2026
746454b
add consistency tests and bring GetAllowFallback() earlier
bragaigor Apr 6, 2026
db48a58
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Apr 6, 2026
3edb1d9
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Apr 7, 2026
ee1898f
allow fallback to be applied for both all targets
bragaigor Apr 7, 2026
b4cd4e8
update retryOnStackOverflow to handleNativeStackOverflow
bragaigor Apr 8, 2026
d32d8ef
update comments to reflect new logic
bragaigor Apr 8, 2026
5b3f15d
introduce doubleNativeStackSize() and try cranelift first
bragaigor Apr 8, 2026
41e87c6
update comments and add restoreState helper
bragaigor Apr 8, 2026
d9ec033
Merge branch 'master' into nm004-config-stylus-stack-size
bragaigor Apr 8, 2026
68673e9
on first overflow, double stack and try again
bragaigor Apr 8, 2026
bcc3cdc
update comment and improve test
bragaigor Apr 8, 2026
20aea22
move cranelift target registration to PopulateStylusTargetCache
bragaigor Apr 8, 2026
9dc0493
encapsulate savedState and more compareAndSwap to the end
bragaigor Apr 9, 2026
56e26a6
Merge branch 'master' into nm004-config-stylus-stack-size
eljobe Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions arbos/programs/cgo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,11 @@ func TestCompileArch(t *testing.T) {
}
}
}

func TestNativeStackSize(t *testing.T) {
defer SetNativeStackSize(1024 * 1024) // restore default even on panic
err := testNativeStackSize()
if err != nil {
t.Fatal(err)
}
}
6 changes: 6 additions & 0 deletions arbos/programs/native.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ func SetAllowFallback(enabled bool) {
log.Info("Compiler fallback for Stylus compilation configured", "enabled", enabled)
}

// SetNativeStackSize configures the Wasmer coroutine stack size for Stylus execution.
// If size is 0, the existing default (1 MB) is kept.
func SetNativeStackSize(size uint64) {
C.stylus_set_native_stack_size(u64(size))
}

var (
stylusLRUCacheSizeBytesGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/lru/size_bytes", nil)
stylusLRUCacheCountGauge = metrics.NewRegisteredGauge("arb/arbos/stylus/cache/lru/count", nil)
Expand Down
6 changes: 6 additions & 0 deletions arbos/programs/programs.go
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ const (
userFailure
userOutOfInk
userOutOfStack
userNativeStackOverflow
)

func (status userStatus) toResult(data []byte, debug bool) ([]byte, string, error) {
Expand All @@ -772,6 +773,11 @@ func (status userStatus) toResult(data []byte, debug bool) ([]byte, string, erro
return nil, "", vm.ErrOutOfGas
case userOutOfStack:
return nil, "", vm.ErrDepth
case userNativeStackOverflow:
// Should not reach Go — the Rust retry loop handles this.
// If it does, treat as depth error.
log.Error("native stack overflow reached Go side (retry loop failed)")
return nil, "", vm.ErrDepth
default:
log.Error("program errored with unknown status", "status", status, "data", msg)
return nil, msg, vm.ErrExecutionReverted
Expand Down
102 changes: 102 additions & 0 deletions arbos/programs/testcompile.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,108 @@ func testCompileLoadFor(filePath string) error {
return err
}

// testNativeStackSize tests that:
// 1. SetNativeStackSize correctly configures the Wasmer coroutine stack size.
// 2. A program that overflows a tiny native stack is retried with a larger one.
//
// It compiles and runs a WAT program with deeply nested multi-value loops
// and recursion, using a very small initial native stack so it overflows,
// then verifies the Rust retry loop (which doubles the stack) saves the day.
func testNativeStackSize() error {
localTarget := rawdb.LocalTarget()
err := SetTarget(localTarget, "", true)
if err != nil {
return fmt.Errorf("failed setting target: %w", err)
}

// A simple program that calls itself recursively until it runs out of
// either gas or stack. The nested loops with multi-value signatures
// consume native stack quickly in Singlepass.
wat := []byte(`(module
(memory 0 0)
(export "memory" (memory 0))
(type $mv (func (param i32 i32) (result i32 i32)))
(func $main (export "user_entrypoint") (param $args_len i32) (result i32)
;; Push initial values for the loop params
i32.const 0
i32.const 0
(loop $outer (param i32 i32) (result i32 i32)
(loop $inner (param i32 i32) (result i32 i32)
;; just pass through
)
)
drop
drop

;; Recurse to consume more native stack
i32.const 0
call $main
)
)`)

wasm, err := Wat2Wasm(wat)
if err != nil {
return fmt.Errorf("failed compiling WAT: %w", err)
}

localAsm, err := compileNative(wasm, 1, true, localTarget, false, time.Minute)
if err != nil {
return fmt.Errorf("failed compiling native: %w", err)
}

// Set a very small native stack (32 KB) to force overflow quickly.
SetNativeStackSize(32 * 1024)

calldata := []byte{}
evmData := EvmData{}
progParams := ProgParams{
MaxDepth: 10000,
InkPrice: 1,
DebugMode: true,
}
reqHandler := C.NativeRequestHandler{
handle_request_fptr: (*[0]byte)(C.handleReqWrap),
id: 0,
}

gas := u64(0xfffffffffffffff)
output := &rustBytes{}

// This should trigger the retry loop: 32KB overflows, doubled to 64KB,
// then 128KB, etc. until it's large enough (or runs out of gas first).
// The program recurses until out-of-gas or out-of-stack, both are fine.
status := userStatus(C.stylus_call(
goSlice(localAsm),
goSlice(calldata),
progParams.encode(),
reqHandler,
evmData.encode(),
cbool(true),
output,
&gas,
u32(0),
))

rustBytesIntoBytes(output)

// The program should eventually terminate with out-of-ink or out-of-stack
// (from the DepthChecker), NOT a crash. The key assertion is that we
// survived without SIGSEGV — the retry loop worked.
if status == userSuccess {
return fmt.Errorf("expected recursive program to eventually fail, got success")
}
if status != userOutOfInk && status != userOutOfStack {
return fmt.Errorf("expected out-of-ink or out-of-stack, got status %d", status)
}

_, err = fmt.Printf("testNativeStackSize: passed (status=%d), stack auto-grew from 32KB\n", status)
if err != nil {
return err
}

return nil
}

func testCompileLoad() error {
filePathStart := "../../target/testdata/host"
localTarget := rawdb.LocalTarget()
Expand Down
6 changes: 6 additions & 0 deletions changelog/braga-configurable-native-stack-size.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Configuration
- Add `--stylus-target.native-stack-size` config to set the initial Wasmer coroutine stack size for Stylus execution.

### Fixed
- Fix Wasmer stack pool reusing stale smaller stacks after a stack size change.
- Automatically detect native stack overflow during Stylus execution, grow the stack size (doubling each retry, capped at 100 MB), and retry.
8 changes: 8 additions & 0 deletions crates/arbutil/src/evm/user.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ pub enum UserOutcome {
Failure(ErrReport),
OutOfInk,
OutOfStack,
/// The Wasmer native coroutine stack overflowed (SIGSEGV caught by signal handler).
/// Unlike OutOfStack (which is the deterministic DepthChecker limit), this indicates
/// the physical stack was exhausted and the call should be retried with a larger stack.
NativeStackOverflow,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)]
Expand All @@ -22,6 +26,7 @@ pub enum UserOutcomeKind {
Failure,
OutOfInk,
OutOfStack,
NativeStackOverflow,
}

impl UserOutcome {
Expand Down Expand Up @@ -50,6 +55,7 @@ impl From<&UserOutcome> for UserOutcomeKind {
Failure(_) => Self::Failure,
OutOfInk => Self::OutOfInk,
OutOfStack => Self::OutOfStack,
NativeStackOverflow => Self::NativeStackOverflow,
}
}
}
Expand All @@ -68,6 +74,7 @@ impl Display for UserOutcome {
Failure(err) => write!(f, "failure {err:?}"),
OutOfInk => write!(f, "out of ink"),
OutOfStack => write!(f, "out of stack"),
NativeStackOverflow => write!(f, "native stack overflow"),
Revert(data) => {
let text = String::from_utf8(data.clone()).unwrap_or_else(|_| hex::encode(data));
write!(f, "revert {text}")
Expand All @@ -86,6 +93,7 @@ impl Display for UserOutcomeKind {
Failure => write!(f, "failure ({as_u8})"),
OutOfInk => write!(f, "out of ink ({as_u8})"),
OutOfStack => write!(f, "out of stack ({as_u8})"),
NativeStackOverflow => write!(f, "native stack overflow ({as_u8})"),
}
}
}
1 change: 1 addition & 0 deletions crates/stylus/src/evm_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use arbutil::evm::{
};
use prover::RustSlice;

#[derive(Clone, Copy)]
#[repr(C)]
pub struct NativeRequestHandler {
pub handle_request_fptr: unsafe extern "C" fn(
Expand Down
139 changes: 111 additions & 28 deletions crates/stylus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,29 @@ pub unsafe extern "C" fn stylus_target_set(
UserOutcomeKind::Success
}

/// Sets the default native stack size for Wasmer coroutines.
/// If `size` is 0, the existing default (1MB) is kept.
///
/// # Safety
///
/// Must be called before any Stylus execution begins (typically at startup).
#[no_mangle]
pub extern "C" fn stylus_set_native_stack_size(size: u64) {
if size > 0 {
wasmer_vm::set_stack_size(size as usize);
eprintln!(
"stylus: native stack size set to {} bytes",
wasmer_vm::get_stack_size()
);
}
}

/// Calls an activated user program.
///
/// If the call fails due to native stack overflow, the native stack size is
/// doubled and the call is retried. This repeats until the call succeeds,
/// fails for another reason, or the 100 MB Wasmer stack limit is reached.
///
/// # Safety
///
/// `module` must represent a valid module produced from `stylus_activate`.
Expand All @@ -243,37 +264,99 @@ pub unsafe extern "C" fn stylus_call(
) -> UserOutcomeKind {
let module = module.slice();
let calldata = calldata.slice().to_vec();
let evm_api = EvmApiRequestor::new(req_handler);
let pricing = config.pricing;
let output = &mut *output;
let ink = pricing.gas_to_ink(Gas(*gas));

// Safety: module came from compile_user_wasm and we've paid for memory expansion
let instance = unsafe {
NativeInstance::deserialize_cached(
module,
config.version,
evm_api,
evm_data,
long_term_tag,
debug_chain,
)
};
let mut instance = match instance {
Ok(instance) => instance,
Err(error) => util::panic_with_wasm(module, error.wrap_err("init failed")),
};
let original_gas = *gas;

// Native stack overflow retry loop.
//
// The DepthChecker middleware tracks an abstract "stack depth" to enforce
// deterministic stack limits across compilers, but its model can undercount
// the real native stack usage of the Singlepass compiler (e.g. multi-value
// loop PHI slots are not accounted for). When the mismatch is large enough,
// the Wasmer coroutine stack overflows with SIGSEGV before the DepthChecker
// limit is reached.
//
// Wasmer's signal handler catches this and returns TrapCode::StackOverflow,
// which run_main surfaces as UserOutcome::NativeStackOverflow. When we see
// that outcome we:
// 1. Double the global native stack size (capped at MAX_STACK_SIZE = 100 MB).
// set_stack_size drains the cached stack pool so the next call gets a
// fresh, correctly-sized stack.
// 2. Restore gas to its pre-call value (the failed attempt may have
// consumed some via host calls, but the Go-side EVM reverts sub-call
// state on failure, so replaying is safe).
// 3. Re-create the instance and retry.
//
// If the stack is already at MAX_STACK_SIZE and still overflows, we give up
// and return OutOfStack (taking all gas) instead of crashing the node.
loop {
let evm_api = EvmApiRequestor::new(req_handler);
let ink = pricing.gas_to_ink(Gas(*gas));

// Safety: module came from compile_user_wasm and we've paid for memory expansion
let instance = unsafe {
NativeInstance::deserialize_cached(
module,
config.version,
evm_api,
evm_data,
long_term_tag,
debug_chain,
)
};
let mut instance = match instance {
Ok(instance) => instance,
Err(error) => util::panic_with_wasm(module, error.wrap_err("init failed")),
};

let outcome = instance.run_main(&calldata, config, ink);

if matches!(&outcome, Ok(UserOutcome::NativeStackOverflow)) {
let current = wasmer_vm::get_stack_size();
if current >= wasmer_vm::MAX_STACK_SIZE {
eprintln!(
"stylus: native stack overflow at maximum stack size ({} bytes), \
giving up",
current,
);
let status = write_outcome(output, UserOutcome::OutOfStack);
*gas = 0;
return status;
}
let new_size = current.saturating_mul(2).min(wasmer_vm::MAX_STACK_SIZE);
Comment thread
bragaigor marked this conversation as resolved.
Outdated
eprintln!(
"stylus: WARNING: native stack overflow detected, \
growing stack size from {} to {} bytes and retrying",
current, new_size,
);
wasmer_vm::set_stack_size(new_size);
let actual = wasmer_vm::get_stack_size();
if actual <= current {
eprintln!(
"stylus: CRITICAL: failed to grow native stack \
(requested {}, got {}), giving up",
new_size, actual,
);
let status = write_outcome(output, UserOutcome::OutOfStack);
*gas = 0;
return status;
}
*gas = original_gas;
continue;
}

let status = match instance.run_main(&calldata, config, ink) {
Err(e) | Ok(UserOutcome::Failure(e)) => write_err(output, e.wrap_err("call failed")),
Ok(outcome) => write_outcome(output, outcome),
};
let ink_left = match status {
UserOutcomeKind::OutOfStack => Ink(0), // take all gas when out of stack
_ => instance.ink_left().into(),
};
*gas = pricing.ink_to_gas(ink_left).0;
status
let status = match outcome {
Err(e) | Ok(UserOutcome::Failure(e)) => write_err(output, e.wrap_err("call failed")),
Ok(outcome) => write_outcome(output, outcome),
};
let ink_left = match status {
UserOutcomeKind::OutOfStack => Ink(0), // take all gas when out of stack
_ => instance.ink_left().into(),
};
*gas = pricing.ink_to_gas(ink_left).0;
return status;
}
}

/// set lru cache capacity
Expand Down
8 changes: 8 additions & 0 deletions crates/stylus/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use arbutil::evm::user::UserOutcome;
use eyre::{eyre, Result};
use prover::machine::Machine;
use prover::programs::{prelude::*, STYLUS_ENTRY_POINT};
use wasmer_types::TrapCode;

pub trait RunProgram {
fn run_main(&mut self, args: &[u8], config: StylusConfig, ink: Ink) -> Result<UserOutcome>;
Expand Down Expand Up @@ -88,6 +89,13 @@ impl<D: DataReader, E: EvmApi<D>> RunProgram for NativeInstance<D, E> {
let status = match main.call(store, args.len() as u32) {
Ok(status) => status,
Err(outcome) => {
// Detect native stack overflow FIRST — it takes priority because
// the DepthChecker counter may also be at zero when SIGSEGV fires,
// and we need the retry loop in stylus_call to see NativeStackOverflow.
if outcome.clone().to_trap() == Some(TrapCode::StackOverflow) {
return Ok(NativeStackOverflow);
}

if self.stack_left() == 0 {
return Ok(OutOfStack);
}
Expand Down
2 changes: 1 addition & 1 deletion crates/tools/wasmer
Loading
Loading