Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions .github/workflows/build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -266,17 +266,26 @@ jobs:
arch: "armhf"
library-arch: arm-linux-gnueabihf

# JIT armv6m build (Thumb-1 only JIT code)
- cc: "arm-linux-gnueabihf-gcc"
cxx: "arm-linux-gnueabihf-g++"
# -D_FILE_OFFSET_BITS=64 is required for making atomvm:posix_readdir/1 test work
# otherwise readdir will fail due to 64 bits inode numbers with 32 bit ino_t
cflags: "-mcpu=cortex-a7 -mfloat-abi=hard -O3 -mthumb -mthumb-interwork -D_FILE_OFFSET_BITS=64"
cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=armv6m -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/armhf_toolchain.cmake"
compiler_pkgs: "crossbuild-essential-armhf libc6-dbg:armhf zlib1g-dev:armhf libmbedtls-dev:armhf qemu-user qemu-user-binfmt binfmt-support"
arch: "armhf"
library-arch: arm-linux-gnueabihf
jit_target_arch: "armv6m"

# JIT armv6m+thumb2 build (Thumb-2 JIT code)
- cc: "arm-linux-gnueabihf-gcc"
cxx: "arm-linux-gnueabihf-g++"
cflags: "-mcpu=cortex-a7 -mfloat-abi=hard -O3 -mthumb -mthumb-interwork -D_FILE_OFFSET_BITS=64"
cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=armv6m+thumb2 -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/armhf_toolchain.cmake"
compiler_pkgs: "crossbuild-essential-armhf libc6-dbg:armhf zlib1g-dev:armhf libmbedtls-dev:armhf qemu-user qemu-user-binfmt binfmt-support"
arch: "armhf"
library-arch: arm-linux-gnueabihf
jit_target_arch: "armv6m+thumb2"

# JIT ARM32 (ARM mode) build
- cc: "arm-linux-gnueabihf-gcc"
cxx: "arm-linux-gnueabihf-g++"
Expand All @@ -301,6 +310,20 @@ jobs:
library-arch: arm-linux-gnueabihf
jit_target_arch: "armv6m"

# JIT + DWARF build (armv6m+thumb2)
- os: "ubuntu-24.04"
cc: "arm-linux-gnueabihf-gcc"
cxx: "arm-linux-gnueabihf-g++"
cflags: "-mcpu=cortex-a7 -mfloat-abi=hard -O2 -mthumb -mthumb-interwork -D_FILE_OFFSET_BITS=64"
otp: "28"
elixir_version: "1.17"
rebar3_version: "3.24.0"
cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_DISABLE_JIT_DWARF=OFF -DAVM_JIT_TARGET_ARCH=armv6m+thumb2 -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/armhf_toolchain.cmake"
compiler_pkgs: "crossbuild-essential-armhf libc6-dbg:armhf zlib1g-dev:armhf libmbedtls-dev:armhf qemu-user qemu-user-binfmt binfmt-support"
arch: "armhf"
library-arch: arm-linux-gnueabihf
jit_target_arch: "armv6m+thumb2"

# JIT + DWARF build (arm32)
- os: "ubuntu-24.04"
cc: "arm-linux-gnueabihf-gcc"
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added DWARF debug information support for JIT-compiled code
- Added I2C and SPI APIs to rp2 platform
- Added `code:get_object_code/1`
- Added Thumb-2 support to armv6m JIT backend, optimizing code for ARMv7-M and later cores

### Changed
- ~10% binary size reduction by rewriting module loading logic
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH)
endif()
endif()

set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;arm32;armv6m;armv6m+float32;riscv32;riscv64" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON")
set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;arm32;armv6m;armv6m+float32;armv6m+thumb2;riscv32;riscv64" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON")

if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR
(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR
Expand Down
5 changes: 5 additions & 0 deletions CMakeModules/BuildErlang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ macro(pack_precompiled_archive avm_name)
${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}.beam
${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}_asm.beam
)
if("${jit_target_arch_variant}" MATCHES "thumb2")
list(APPEND jit_compiler_modules
${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_armv7m_asm.beam
)
endif()

if (NOT AVM_DISABLE_JIT_DWARF)
set(jit_precompile_dwarf_flag "dwarf")
Expand Down
6 changes: 3 additions & 3 deletions doc/src/atomvm-internals.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ Following BEAM, there are two flavors of the emulator: jit and emu, but eventual
- Native: the VM only runs native code and all code must be precompiled on the desktop using the JIT compiler (which effectively is a AOT or Ahead-of-Time compiler). In this mode, it is not necessary to bundle the jit compiler on the embedded target.
- Hybrid: the VM can run native code as well as emulated BEAM code and some code is precompiled on the desktop.

JIT is available on some platforms (currently x86_64, aarch64, arm32, armv6m, riscv32 and riscv64) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted.
JIT is available on some platforms (currently x86_64, aarch64, arm32, armv6m, armv6m+thumb2, riscv32 and riscv64) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted.

Modules can include precompiled code in a dedicated beam chunk with name 'avmN'. The chunk can contain native code for several architectures, however it may only contain native code for a given version of the native interface. Current version is 1. This native code is executed by the jit-flavor of the emulator as well as the emu flavor if execution of precompiled is enabled.

Expand All @@ -158,7 +158,7 @@ A backend implementation is required for each architecture. The backend is calle
- `jit_x86_64` for System V X86 64 ABI
- `jit_aarch64` for AArch64 ABI
- `jit_arm32` for ARM32 (AArch32 ARM mode) ABI
- `jit_armv6m` for ARMv6-M (AArch32 Thumb mode) ABI
- `jit_armv6m` for ARMv6-M (AArch32 Thumb mode) ABI, with an ARMv7-M or later variant using Thumb-2 32-bit encodings for Cortex-M3+ targets (Raspberry Pi Pico 2, STM32 with Cortex-M3/M4/M7/M33)
- `jit_riscv32` for rv32imc ilp32 ABI
- `jit_riscv64` for rv64gc lp64 ABI.

Expand All @@ -169,7 +169,7 @@ A stream implementation is responsible for streaming the machine code, especiall

### Embedded JIT and Native

On embedded devices, Native mode means the code is precompiled on the desktop and executed natively on the device. This currently works on all ARMv6M devices (Pico and STM32).
On embedded devices, Native mode means the code is precompiled on the desktop and executed natively on the device. This currently works on all ARMv6-M devices (Pico and STM32 with Cortex-M0/M0+) as well as ARMv7-M devices using the Thumb-2 variant (Pico 2 and STM32 with Cortex-M3/M4/M7/M33).

The default partition scheme on all platforms is optimized for the Emulated VM which is larger than the JIT or Native VM, and for the Emulated atomvmlib (with no native code for estdlib and no jit library) which is smaller than the JIT atomvmlib (that includes native code for estdlib and jit library).

Expand Down
5 changes: 3 additions & 2 deletions doc/src/jit.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ The JIT compiler supports the following target architectures:
* `x86_64` — 64-bit x86 (Linux, macOS, FreeBSD)
* `aarch64` — 64-bit ARM (Linux, macOS)
* `arm32` — 32-bit ARM (Linux)
* `armv6m` — ARM Cortex-M0+ (Raspberry Pi Pico, STM32)
* `armv6m` — ARM Cortex-M0+ (Raspberry Pi Pico, STM32 with Cortex-M0/M0+)
* `armv6m+thumb2` — ARM Cortex-M3+ with Thumb-2 support, ARMv7-M or later (Raspberry Pi Pico 2, STM32 with Cortex-M3/M4/M7/M33)
* `riscv32` — 32-bit RISC-V
* `riscv64` — 64-bit RISC-V

Expand Down Expand Up @@ -176,5 +177,5 @@ $ riscv64-elf-objdump -d module.elf
|--------|---------|-------------|
| `AVM_DISABLE_JIT` | `ON` | Disable JIT compilation |
| `AVM_DISABLE_JIT_DWARF` | `ON` | Disable DWARF debug information in JIT |
| `AVM_JIT_TARGET_ARCH` | auto-detected | Target architecture (`x86_64`, `aarch64`, `arm32`, `armv6m`, `riscv32`, `riscv64`) |
| `AVM_JIT_TARGET_ARCH` | auto-detected | Target architecture (`x86_64`, `aarch64`, `arm32`, `armv6m`, `armv6m+thumb2`, `riscv32`, `riscv64`) |
| `AVM_DISABLE_SMP` | `OFF` | Disable SMP support |
1 change: 1 addition & 0 deletions libs/jit/include/jit.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@

-define(JIT_VARIANT_PIC, 1).
-define(JIT_VARIANT_FLOAT32, 2).
-define(JIT_VARIANT_THUMB2, 4).

-define(MAX_REG, 16).
1 change: 1 addition & 0 deletions libs/jit/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ set(ERLANG_MODULES
jit_arm32_asm
jit_armv6m
jit_armv6m_asm
jit_armv7m_asm
jit_riscv32
jit_riscv32_asm
jit_riscv64
Expand Down
130 changes: 103 additions & 27 deletions libs/jit/src/jit_armv6m.erl
Original file line number Diff line number Diff line change
Expand Up @@ -146,19 +146,22 @@
).

-type stream() :: any().
-type branch_type() ::
{adr, armv6m_register()} | b_w | {far_branch, non_neg_integer(), armv6m_register()}.

-record(state, {
stream_module :: module(),
stream :: stream(),
offset :: non_neg_integer(),
branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
branches :: [{non_neg_integer(), non_neg_integer(), branch_type()}],
jump_table_start :: non_neg_integer(),
available_regs :: non_neg_integer(),
used_regs :: non_neg_integer(),
labels :: [{integer() | reference(), integer()}],
variant :: non_neg_integer(),
literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}],
regs :: jit_regs:regs()
regs :: jit_regs:regs(),
thumb2 :: boolean()
}).

-type state() :: #state{}.
Expand Down Expand Up @@ -203,8 +206,8 @@
-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}).

-define(JUMP_TABLE_ENTRY_SIZE, 12).
-define(JUMP_TABLE_ENTRY_SIZE_THUMB2, 6).

% aarch64 ABI specific
%% ARMv6-M register mappings

%% IP can be used as an additional scratch register
Expand Down Expand Up @@ -300,7 +303,8 @@ new(Variant, StreamModule, Stream) ->
labels = [],
variant = Variant,
literal_pool = [],
regs = jit_regs:new()
regs = jit_regs:new(),
thumb2 = (Variant band ?JIT_VARIANT_THUMB2) =/= 0
}.

%%-----------------------------------------------------------------------------
Expand Down Expand Up @@ -415,7 +419,7 @@ assert_all_native_free(State) ->
%% 0 (special entry for lines and labels information) to LabelsCount included
%% (special entry for OP_INT_CALL_END).
%%
%% On this platform, each jump table entry is 12 bytes.
%% On ARMv6-M (Thumb-1), each jump table entry is 12 bytes:
%% ```
%% ldr r3, pc+4
%% push {r1, r4, r5, r6, r7, lr}
Expand All @@ -424,6 +428,12 @@ assert_all_native_free(State) ->
%% offset_to_label0
%% ```
%%
%% On ARMv7-M/ARMv8-M (Thumb-2 variant), each jump table entry is 6 bytes:
%% ```
%% push {r1, r4, r5, r6, r7, lr}
%% b.w offset_to_label0
%% ```
%%
%% @end
%% @param State current backend state
%% @param LabelsCount number of labels in the module.
Expand All @@ -436,12 +446,26 @@ jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, Label

jump_table0(State, N, LabelsCount) when N > LabelsCount ->
State;
jump_table0(
#state{stream_module = StreamModule, stream = Stream0, thumb2 = true} = State,
N,
LabelsCount
) ->
% Thumb-2 jump table entry: push + b.w (6 bytes)
I1 = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]),
% Placeholder b.w - will be patched by update_branches
I2 = <<16#FFFF:16, 16#FFFF:16>>,

JumpEntry = <<I1/binary, I2/binary>>,
Stream1 = StreamModule:append(Stream0, JumpEntry),

jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount);
jump_table0(
#state{stream_module = StreamModule, stream = Stream0} = State,
N,
LabelsCount
) ->
% Create jump table entry with calculated offsets - all at emit time
% ARMv6-M jump table entry: ldr + push + add pc + nop + literal (12 bytes)
I1 = jit_armv6m_asm:ldr(r3, {pc, 4}),
I2 = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]),
I3 = jit_armv6m_asm:add(pc, r3),
Expand Down Expand Up @@ -469,6 +493,8 @@ patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) ->
case Type of
{adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel);
{adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2);
b_w ->
jit_armv7m_asm:b_w(Rel - 4);
{far_branch, Size, TempReg} ->
% Check if branch can now be optimized to near branch
if
Expand Down Expand Up @@ -917,6 +943,11 @@ jump_to_continuation(
State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0},
flush_literal_pool(State2).

branch_to_offset_code(#state{thumb2 = true}, Offset, TargetOffset) ->
% Thumb-2: b.w has +-16MB range, always sufficient
% b.w offset is relative to PC (instruction address + 4)
Rel = TargetOffset - (Offset + 4),
jit_armv7m_asm:b_w(Rel);
branch_to_offset_code(_State, Offset, TargetOffset) when
TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044
->
Expand Down Expand Up @@ -952,6 +983,13 @@ branch_to_offset_code(
branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) ->
CodeBlock = branch_to_offset_code(State, Offset, LabelOffset),
{State, CodeBlock};
branch_to_label_code(
#state{branches = Branches, thumb2 = true} = State0, Offset, Label, false
) ->
CodeBlock = <<16#FFFF:16, 16#FFFF:16>>,
Reloc = {Label, Offset, b_w},
State1 = State0#state{branches = [Reloc | Branches]},
{State1, CodeBlock};
branch_to_label_code(
#state{available_regs = Available, branches = Branches} = State0, Offset, Label, false
) when Available =/= 0 ->
Expand Down Expand Up @@ -3170,7 +3208,12 @@ set_continuation_to_label(
Temp1 = first_avail(Avail),
Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))),
% Calculate jump table entry offset
JumpTableEntryOffset = (Label * ?JUMP_TABLE_ENTRY_SIZE) + JumpTableOffset,
EntrySize =
case State#state.thumb2 of
true -> ?JUMP_TABLE_ENTRY_SIZE_THUMB2;
false -> ?JUMP_TABLE_ENTRY_SIZE
end,
JumpTableEntryOffset = (Label * EntrySize) + JumpTableOffset,

AdrOffset = StreamModule:offset(Stream0),
% ADR Temp, +.4 means we're storing PC value in Temp1.
Expand Down Expand Up @@ -3494,6 +3537,26 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re
I = jit_armv6m_asm:movs(Reg, Val),
Stream1 = StreamModule:append(Stream0, I),
State#state{stream = Stream1};
mov_immediate(
#state{stream_module = StreamModule, stream = Stream0, thumb2 = true} = State, Reg, Val
) when
Val > 255 andalso Val =< 65535
->
I = jit_armv7m_asm:movw(Reg, Val),
Stream1 = StreamModule:append(Stream0, I),
State#state{stream = Stream1};
mov_immediate(
#state{stream_module = StreamModule, stream = Stream0, thumb2 = true} = State, Reg, Val
) when
?IS_SIGNED_OR_UNSIGNED_INT32_T(Val)
->
UVal = Val band 16#FFFFFFFF,
Lo16 = UVal band 16#FFFF,
Hi16 = (UVal bsr 16) band 16#FFFF,
I1 = jit_armv7m_asm:movw(Reg, Lo16),
I2 = jit_armv7m_asm:movt(Reg, Hi16),
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
State#state{stream = Stream1};
mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
Val >= -255 andalso Val < 0
->
Expand Down Expand Up @@ -4192,30 +4255,43 @@ add_label(
stream = Stream0,
jump_table_start = JumpTableStart,
branches = Branches,
labels = Labels
labels = Labels,
thumb2 = Thumb2
} = State,
Label,
LabelOffset
) when is_integer(Label) ->
% Patch the jump table entry immediately
% Each jump table entry is 12 bytes:
% - ldr r3, [pc, 4] (2 bytes) at offset 0
% - push {...} (2 bytes) at offset 2
% - add pc, r3 (2 bytes) at offset 4
% - nop (2 bytes) at offset 6
% - data (4 bytes) at offset 8
JumpTableEntryStart = JumpTableStart + Label * 12,
DataOffset = JumpTableEntryStart + 8,
AddInstrOffset = JumpTableEntryStart + 4,

% Calculate offset from 'add pc, r3' instruction to target label
% When 'add pc, r3' executes, PC reads as AddInstrOffset + 4
% Result goes through BXWritePC, so bit 0 must be 1 for Thumb mode
AddPC = AddInstrOffset + 4,
RelativeOffset = LabelOffset - AddPC + 1,
DataBytes = <<RelativeOffset:32/little>>,

Stream1 = StreamModule:replace(Stream0, DataOffset, DataBytes),
Stream1 =
case Thumb2 of
true ->
% Thumb-2 jump table entry is 6 bytes:
% - push {...} (2 bytes) at offset 0
% - b.w <offset> (4 bytes) at offset 2
JumpTableEntryStart = JumpTableStart + Label * ?JUMP_TABLE_ENTRY_SIZE_THUMB2,
BranchInstrOffset = JumpTableEntryStart + 2,
% b.w offset is relative to instruction address + 4
BranchPC = BranchInstrOffset + 4,
RelativeOffset = LabelOffset - BranchPC,
BranchBytes = jit_armv7m_asm:b_w(RelativeOffset),
StreamModule:replace(Stream0, BranchInstrOffset, BranchBytes);
false ->
% ARMv6-M jump table entry is 12 bytes:
% - ldr r3, [pc, 4] (2 bytes) at offset 0
% - push {...} (2 bytes) at offset 2
% - add pc, r3 (2 bytes) at offset 4
% - nop (2 bytes) at offset 6
% - data (4 bytes) at offset 8
JumpTableEntryStart = JumpTableStart + Label * ?JUMP_TABLE_ENTRY_SIZE,
DataOffset = JumpTableEntryStart + 8,
AddInstrOffset = JumpTableEntryStart + 4,
% Calculate offset from 'add pc, r3' instruction to target label
% When 'add pc, r3' executes, PC reads as AddInstrOffset + 4
% Result goes through BXWritePC, so bit 0 must be 1 for Thumb mode
AddPC = AddInstrOffset + 4,
RelativeOffset = LabelOffset - AddPC + 1,
DataBytes = <<RelativeOffset:32/little>>,
StreamModule:replace(Stream0, DataOffset, DataBytes)
end,

% Eagerly patch any branches targeting this label
{Stream2, RemainingBranches} = patch_branches_for_label(
Expand Down
1 change: 1 addition & 0 deletions libs/jit/src/jit_armv6m_asm.erl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
]).

-export_type([
arm_gpr_register/0,
cc/0
]).

Expand Down
Loading
Loading