From 36205f4fd117c8b9ed9dac10b22841f47c0a991d Mon Sep 17 00:00:00 2001 From: Paul Nodet <5941125+pnodet@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:43:41 +0200 Subject: [PATCH 1/2] perf(aarch64): use lr-only linkage frames for simple regular calls --- cranelift/codegen/src/isa/aarch64/abi.rs | 129 ++++++++++++++++------- 1 file changed, 92 insertions(+), 37 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 84f5df6d1644..ea6333f9741d 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -578,6 +578,13 @@ impl ABIMachineSpec for AArch64MachineDeps { frame_layout: &FrameLayout, ) -> SmallInstVec { let setup_frame = frame_layout.setup_area_size > 0; + let lr_only_setup = setup_frame + && AArch64MachineDeps::use_lr_only_linkage_frame( + call_conv, + flags, + isa_flags, + frame_layout, + ); let mut insts = SmallVec::new(); match Self::select_api_key(isa_flags, call_conv, setup_frame) { @@ -610,36 +617,47 @@ impl ABIMachineSpec for AArch64MachineDeps { } if setup_frame { - // stp fp (x29), lr (x30), [sp, #-16]! - insts.push(Inst::StoreP64 { - rt: fp_reg(), - rt2: link_reg(), - mem: PairAMode::SPPreIndexed { - simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), - }, - flags: MemFlags::trusted(), - }); + if lr_only_setup { + // str lr (x30), [sp, #-16]! + insts.push(Inst::Store64 { + rd: link_reg(), + mem: AMode::SPPreIndexed { + simm9: SImm9::maybe_from_i64(-16).unwrap(), + }, + flags: MemFlags::trusted(), + }); + } else { + // stp fp (x29), lr (x30), [sp, #-16]! + insts.push(Inst::StoreP64 { + rt: fp_reg(), + rt2: link_reg(), + mem: PairAMode::SPPreIndexed { + simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); - if flags.unwind_info() { - insts.push(Inst::Unwind { - inst: UnwindInst::PushFrameRegs { - offset_upward_to_caller_sp: frame_layout.setup_area_size, + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::PushFrameRegs { + offset_upward_to_caller_sp: frame_layout.setup_area_size, + }, + }); + } + + // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because + // the usual encoding (`ORR`) does not work with SP. + insts.push(Inst::AluRRImm12 { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd: writable_fp_reg(), + rn: stack_reg(), + imm12: Imm12 { + bits: 0, + shift12: false, }, }); } - - // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because - // the usual encoding (`ORR`) does not work with SP. - insts.push(Inst::AluRRImm12 { - alu_op: ALUOp::Add, - size: OperandSize::Size64, - rd: writable_fp_reg(), - rn: stack_reg(), - imm12: Imm12 { - bits: 0, - shift12: false, - }, - }); } insts @@ -647,11 +665,18 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_epilogue_frame_restore( call_conv: isa::CallConv, - _flags: &settings::Flags, - _isa_flags: &aarch64_settings::Flags, + flags: &settings::Flags, + isa_flags: &aarch64_settings::Flags, frame_layout: &FrameLayout, ) -> SmallInstVec { let setup_frame = frame_layout.setup_area_size > 0; + let lr_only_setup = setup_frame + && AArch64MachineDeps::use_lr_only_linkage_frame( + call_conv, + flags, + isa_flags, + frame_layout, + ); let mut insts = SmallVec::new(); if setup_frame { @@ -659,15 +684,26 @@ impl ABIMachineSpec for AArch64MachineDeps { // clobber-restore code (which also frees the fixed frame). Hence, there // is no need for the usual `mov sp, fp` here. - // `ldp fp, lr, [sp], #16` - insts.push(Inst::LoadP64 { - rt: writable_fp_reg(), - rt2: writable_link_reg(), - mem: PairAMode::SPPostIndexed { - simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), - }, - flags: MemFlags::trusted(), - }); + if lr_only_setup { + // `ldr lr, [sp], #16` + insts.push(Inst::ULoad64 { + rd: writable_link_reg(), + mem: AMode::SPPostIndexed { + simm9: SImm9::maybe_from_i64(16).unwrap(), + }, + flags: MemFlags::trusted(), + }); + } else { + // `ldp fp, lr, [sp], #16` + insts.push(Inst::LoadP64 { + rt: writable_fp_reg(), + rt2: writable_link_reg(), + mem: PairAMode::SPPostIndexed { + simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(), + }, + flags: MemFlags::trusted(), + }); + } } if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 { @@ -1248,6 +1284,25 @@ impl ABIMachineSpec for AArch64MachineDeps { } impl AArch64MachineDeps { + fn use_lr_only_linkage_frame( + call_conv: isa::CallConv, + flags: &settings::Flags, + isa_flags: &aarch64_settings::Flags, + frame_layout: &FrameLayout, + ) -> bool { + call_conv != isa::CallConv::Tail + && frame_layout.function_calls == FunctionCalls::Regular + && frame_layout.setup_area_size == 16 + && !flags.preserve_frame_pointers() + && !flags.unwind_info() + && !isa_flags.sign_return_address() + && frame_layout.incoming_args_size == 0 + && frame_layout.tail_args_size == frame_layout.incoming_args_size + && frame_layout.clobber_size == 0 + && frame_layout.fixed_frame_storage_size == 0 + && frame_layout.outgoing_args_size == 0 + } + fn gen_probestack_unroll(insts: &mut SmallInstVec, guard_size: u32, probe_count: u32) { // When manually unrolling adjust the stack pointer and then write a zero // to the stack at that offset. This generates something like From f846f2ef585590149f72bde825dc980416cd7772 Mon Sep 17 00:00:00 2001 From: Paul Nodet <5941125+pnodet@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:43:41 +0200 Subject: [PATCH 2/2] test(aarch64): update filetests for lr-only linkage frame mode --- .../filetests/filetests/isa/aarch64/bti.clif | 16 ++- .../filetests/isa/aarch64/bti_with_csdb.clif | 17 ++- .../filetests/isa/aarch64/call-indirect.clif | 12 +- .../filetests/filetests/isa/aarch64/call.clif | 112 ++++++++---------- .../isa/aarch64/leaf_function_detection.clif | 26 ++-- .../aarch64/no_spill_floats_on_try_call.clif | 34 +++--- .../filetests/isa/aarch64/patchable-call.clif | 14 +-- .../filetests/isa/aarch64/preserve-all.clif | 26 ++-- .../aarch64/regular-call-lr-only-frame.clif | 68 +++++++++++ .../filetests/isa/aarch64/stack-limit.clif | 18 ++- .../aarch64/tail-call-frame-optimization.clif | 14 +-- .../filetests/isa/aarch64/tls-elf-gd.clif | 12 +- .../isa/aarch64/uext-sext-handling.clif | 6 +- 13 files changed, 201 insertions(+), 174 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/aarch64/regular-call-lr-only-frame.clif diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif index 72c7e648c394..ec02fdc86781 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bti.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -149,25 +149,23 @@ block0(v0: i64): ; VCode: ; bti c -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; load_ext_name_far x3, TestCase(%g)+0 ; blr x3 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; hint #0x22 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0xc -; ldr x3, #0x14 -; b #0x1c +; str x30, [sp, #-0x10]! +; block1: ; offset 0x8 +; ldr x3, #0x10 +; b #0x18 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x3 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/bti_with_csdb.clif b/cranelift/filetests/filetests/isa/aarch64/bti_with_csdb.clif index 3c00aee53480..7a467db37f12 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bti_with_csdb.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bti_with_csdb.clif @@ -151,24 +151,23 @@ block0(v0: i64): ; VCode: ; bti c -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; load_ext_name_far x3, TestCase(%g)+0 ; blr x3 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; hint #0x22 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0xc -; ldr x3, #0x14 -; b #0x1c +; str x30, [sp, #-0x10]! +; block1: ; offset 0x8 +; ldr x3, #0x10 +; b #0x18 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x3 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif b/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif index 8dc330de96f6..3adf29255f56 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif @@ -10,19 +10,17 @@ block0(v0: i64, v1: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; blr x1 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; blr x1 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index eb1eb7d5fd5a..135d2854387a 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -14,25 +14,23 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; load_ext_name_far x3, TestCase(%g)+0 ; blr x3 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; ldr x3, #0x10 -; b #0x18 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; ldr x3, #0xc +; b #0x14 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x3 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %f2(i32) -> i64 { @@ -44,25 +42,23 @@ block0(v0: i32): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; load_ext_name_far x3, TestCase(%g)+0 ; blr x3 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; ldr x3, #0x10 -; b #0x18 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; ldr x3, #0xc +; b #0x14 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x3 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %f3(i32) -> i32 uext { @@ -87,25 +83,23 @@ block0(v0: i32): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; load_ext_name_far x3, TestCase(%g)+0 ; blr x3 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; ldr x3, #0x10 -; b #0x18 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; ldr x3, #0xc +; b #0x14 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x3 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %f5(i32) -> i32 sext { @@ -527,31 +521,29 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; movz x2, #42 ; load_ext_name_far x4, TestCase(%f11)+0 ; mov x1, x0 ; mov x0, x2 ; blr x4 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; mov x2, #0x2a -; ldr x4, #0x14 -; b #0x1c +; ldr x4, #0x10 +; b #0x18 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %f11 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; mov x1, x0 ; mov x0, x2 ; blr x4 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %f12(i64, i128) -> i64 { @@ -581,31 +573,29 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; movz x3, #42 ; load_ext_name_far x4, TestCase(%f12)+0 ; mov x2, x0 ; mov x0, x3 ; blr x4 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; mov x3, #0x2a -; ldr x4, #0x14 -; b #0x1c +; ldr x4, #0x10 +; b #0x18 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %f12 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; mov x2, x0 ; mov x0, x3 ; blr x4 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %f13(i64, i128) -> i64 apple_aarch64 { @@ -635,31 +625,29 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; movz x2, #42 ; load_ext_name_far x4, TestCase(%f13)+0 ; mov x1, x0 ; mov x0, x2 ; blr x4 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; mov x2, #0x2a -; ldr x4, #0x14 -; b #0x1c +; ldr x4, #0x10 +; b #0x18 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %f13 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; mov x1, x0 ; mov x0, x2 ; blr x4 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %f14(i128, i128, i128, i64, i128) -> i128 { @@ -912,25 +900,23 @@ block0: } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; load_ext_name_far x0, TestCase(%g)+0 ; blr x0 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; ldr x0, #0x10 -; b #0x18 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; ldr x0, #0xc +; b #0x14 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x0 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %second_f16(f16, f16) -> f16 system_v { diff --git a/cranelift/filetests/filetests/isa/aarch64/leaf_function_detection.clif b/cranelift/filetests/filetests/isa/aarch64/leaf_function_detection.clif index a31b13315fe9..a90e7138c2c8 100644 --- a/cranelift/filetests/filetests/isa/aarch64/leaf_function_detection.clif +++ b/cranelift/filetests/filetests/isa/aarch64/leaf_function_detection.clif @@ -88,20 +88,18 @@ block0(v0: i32): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; bl 0 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; bl #8 ; reloc_external Call %simple_leaf 0 -; ldp x29, x30, [sp], #0x10 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; bl #4 ; reloc_external Call %simple_leaf 0 +; ldr x30, [sp], #0x10 ; ret ;; Test 4: Non-leaf function with indirect call @@ -114,20 +112,18 @@ block0(v0: i32, v1: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; blr x1 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; blr x1 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret ;; Test 5: Leaf function with memory operations (should still be leaf) diff --git a/cranelift/filetests/filetests/isa/aarch64/no_spill_floats_on_try_call.clif b/cranelift/filetests/filetests/isa/aarch64/no_spill_floats_on_try_call.clif index 961db90cbc62..c8a188c9f03e 100644 --- a/cranelift/filetests/filetests/isa/aarch64/no_spill_floats_on_try_call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/no_spill_floats_on_try_call.clif @@ -13,22 +13,20 @@ block1: } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; bl 0; b MachLabel(1); catch [] ; block1: -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; bl #8 ; reloc_external Call u0:1 0 -; block2: ; offset 0xc -; ldp x29, x30, [sp], #0x10 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; bl #4 ; reloc_external Call u0:1 0 +; block2: ; offset 0x8 +; ldr x30, [sp], #0x10 ; ret function u0:1() system_v { @@ -46,25 +44,23 @@ block2(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; bl 0; b MachLabel(1); catch [tag0: MachLabel(2)] ; block1: -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; block2: ; udf #0xc11f ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; bl #8 ; reloc_external Call u0:1 0 -; block2: ; offset 0xc -; ldp x29, x30, [sp], #0x10 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; bl #4 ; reloc_external Call u0:1 0 +; block2: ; offset 0x8 +; ldr x30, [sp], #0x10 ; ret -; block3: ; offset 0x14 +; block3: ; offset 0x10 ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: user1 diff --git a/cranelift/filetests/filetests/isa/aarch64/patchable-call.clif b/cranelift/filetests/filetests/isa/aarch64/patchable-call.clif index fb5afc0197c6..6bc29af4201e 100644 --- a/cranelift/filetests/filetests/isa/aarch64/patchable-call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/patchable-call.clif @@ -10,28 +10,26 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; mov x3, x0 ; mov x1, x3 ; mov x2, x3 ; bl 0 ; bl 0 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; mov x3, x0 ; mov x1, x3 ; mov x2, x3 +; bl #0x10 ; reloc_external Call %f 0 ; patchable call: NOP out last 4 bytes ; bl #0x14 ; reloc_external Call %f 0 ; patchable call: NOP out last 4 bytes -; bl #0x18 ; reloc_external Call %f 0 ; patchable call: NOP out last 4 bytes -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %patchable_call_stack_args(i64) system_v { diff --git a/cranelift/filetests/filetests/isa/aarch64/preserve-all.clif b/cranelift/filetests/filetests/isa/aarch64/preserve-all.clif index 2f3672baf409..34c04e57f5fc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/preserve-all.clif +++ b/cranelift/filetests/filetests/isa/aarch64/preserve-all.clif @@ -10,28 +10,26 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; mov x3, x0 ; mov x1, x3 ; mov x2, x3 ; blr x3 ; blr x3 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; mov x3, x0 ; mov x1, x3 ; mov x2, x3 ; blr x3 ; blr x3 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret function %preserve_all_abi_trampoline(i64) preserve_all { @@ -235,19 +233,17 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; bl 0 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; bl #8 ; reloc_external Call %f 0 -; ldp x29, x30, [sp], #0x10 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; bl #4 ; reloc_external Call %f 0 +; ldr x30, [sp], #0x10 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/regular-call-lr-only-frame.clif b/cranelift/filetests/filetests/isa/aarch64/regular-call-lr-only-frame.clif new file mode 100644 index 000000000000..dbed6d9a121f --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/regular-call-lr-only-frame.clif @@ -0,0 +1,68 @@ +test compile precise-output +set unwind_info=false +set preserve_frame_pointers=false +target aarch64 + +;; Eligible regular-call function should avoid establishing fp. +function %regular_call_lr_only() -> i64 system_v { + fn0 = colocated %leaf() -> i64 system_v +block0: + v0 = call fn0() + return v0 +} + +; VCode: +; str lr, [sp, #-16]! +; block0: +; bl 0 +; ldr lr, [sp], #16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; bl #4 ; reloc_external Call %leaf 0 +; ldr x30, [sp], #0x10 +; ret + +;; Incoming stack args force fp/lr frame setup. +function %regular_call_needs_fp(i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 system_v { + fn0 = colocated %leaf() -> i64 system_v +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64): + v9 = call fn0() + return v9 +} + +; VCode: +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; bl 0 +; ldp fp, lr, [sp], #16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; block1: ; offset 0x8 +; bl #8 ; reloc_external Call %leaf 0 +; ldp x29, x30, [sp], #0x10 +; ret + +function %leaf() -> i64 { +block0: + v0 = iconst.i64 42 + return v0 +} + +; VCode: +; block0: +; movz x0, #42 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mov x0, #0x2a +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif index 26081172cf2d..6b630930faa4 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif @@ -44,8 +44,7 @@ block0(v0: i64): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; ldr x16, [x0] ; ldr x16, [x16, #4] ; add x16, x16, #16 @@ -54,25 +53,24 @@ block0(v0: i64): ; block0: ; load_ext_name_far x0, TestCase(%foo)+0 ; blr x0 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp +; str x30, [sp, #-0x10]! ; ldur x16, [x0] ; ldur x16, [x16, #4] ; add x16, x16, #0x10 ; cmp sp, x16 -; b.lo #0x38 -; block1: ; offset 0x1c -; ldr x0, #0x24 -; b #0x2c +; b.lo #0x34 +; block1: ; offset 0x18 +; ldr x0, #0x20 +; b #0x28 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x0 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret ; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf diff --git a/cranelift/filetests/filetests/isa/aarch64/tail-call-frame-optimization.clif b/cranelift/filetests/filetests/isa/aarch64/tail-call-frame-optimization.clif index 4b29cf2a8019..90098904fec7 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tail-call-frame-optimization.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tail-call-frame-optimization.clif @@ -31,20 +31,18 @@ block0: } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; bl 0 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 -; bl #8 ; reloc_external Call %target_func 0 -; ldp x29, x30, [sp], #0x10 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 +; bl #4 ; reloc_external Call %target_func 0 +; ldr x30, [sp], #0x10 ; ret ;; Test 3: Tail calling convention with conditional tail calls diff --git a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif index 7f185e4b2e81..f64853f84ccc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif @@ -11,21 +11,19 @@ block0(v0: i32): } ; VCode: -; stp fp, lr, [sp, #-16]! -; mov fp, sp +; str lr, [sp, #-16]! ; block0: ; mov x6, x0 ; elf_tls_get_addr x0, x3, userextname0 ; mov x1, x0 ; mov x0, x6 -; ldp fp, lr, [sp], #16 +; ldr lr, [sp], #16 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; stp x29, x30, [sp, #-0x10]! -; mov x29, sp -; block1: ; offset 0x8 +; str x30, [sp, #-0x10]! +; block1: ; offset 0x4 ; mov x6, x0 ; adrp x0, #0 ; reloc_external Aarch64TlsDescAdrPage21 u1:0 0 ; ldr x3, [x0] ; reloc_external Aarch64TlsDescLd64Lo12 u1:0 0 @@ -35,6 +33,6 @@ block0(v0: i32): ; add x0, x0, x3 ; mov x1, x0 ; mov x0, x6 -; ldp x29, x30, [sp], #0x10 +; ldr x30, [sp], #0x10 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/uext-sext-handling.clif b/cranelift/filetests/filetests/isa/aarch64/uext-sext-handling.clif index 7b081a469715..401cb7579aee 100644 --- a/cranelift/filetests/filetests/isa/aarch64/uext-sext-handling.clif +++ b/cranelift/filetests/filetests/isa/aarch64/uext-sext-handling.clif @@ -11,8 +11,7 @@ block0(v0: i8): return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp +; check: str lr, [sp, #-16]! ; nextln: block0: ; check-not: uxtb w0, w0 ; nextln: load_ext_name_far x2, User(userextname0)+0 @@ -28,8 +27,7 @@ block0(v0: i8): return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp +; check: str lr, [sp, #-16]! ; nextln: block0: ; nextln: uxtb w0, w0 ; nextln: load_ext_name_far x4, User(userextname0)+0