From 886acf3f08bf3ac288de32e05123f50db298bacf Mon Sep 17 00:00:00 2001 From: Mateusz Front Date: Wed, 19 Nov 2025 10:42:34 +0100 Subject: [PATCH 1/5] Support matching bitstrings that are in fact binaries (size divisible by 8) Signed-off-by: Mateusz Front --- libs/jit/src/jit.erl | 49 +++++++++++++++++++++++++--------- src/libAtomVM/opcodesswitch.h | 20 +++++++------- tests/erlang_tests/test_bs.erl | 19 +++++++++++++ 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 0e370cbf50..37e920d6a9 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -1333,10 +1333,6 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt5, BSOffsetReg0} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), MSt6 = if - Unit =/= 8 -> - MMod:call_primitive_last(MSt5, ?PRIM_RAISE_ERROR, [ - ctx, jit_state, offset, ?UNSUPPORTED_ATOM - ]); FlagsValue =/= 0 -> MMod:call_primitive_last(MSt5, ?PRIM_RAISE_ERROR, [ ctx, jit_state, offset, ?UNSUPPORTED_ATOM @@ -1357,12 +1353,17 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt11, SizeReg}; is_integer(Size) -> % SizeReg is binary size - % Size is a tagged integer: (N bsl 4) bor 0xF - % SizeBytes is the raw byte count - SizeBytes = Size bsr 4, - MSt11 = MMod:sub(MSt10, SizeReg, SizeBytes), + MSt11 = + if + (Size * Unit) rem 8 =/= 0 -> + MMod:call_primitive_last(MSt10, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?UNSUPPORTED_ATOM + ]); + true -> + MMod:sub(MSt10, SizeReg, (Size * Unit) div 8) + end, MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11), - {MSt12, SizeBytes}; + {MSt12, (Size * Unit) div 8}; true -> {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size), MSt12 = MMod:if_else_block( @@ -1374,10 +1375,32 @@ first_pass(<>, MMod, MSt0, State0) -> end, fun(BSt0) -> {BSt1, SizeValReg} = term_to_int(SizeValReg, 0, MMod, BSt0), - BSt2 = MMod:sub(BSt1, SizeReg, SizeValReg), - BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg1}, Fail, MMod, BSt2), - BSt4 = MMod:move_to_native_register(BSt3, SizeValReg, SizeReg), - MMod:free_native_registers(BSt4, [SizeValReg]) + {BSt2, SizeValReg2} = + if + is_integer(SizeValReg) -> + if + (SizeValReg * Unit) rem 8 =/= 0 -> + MMod:call_primitive_last(BSt1, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?UNSUPPORTED_ATOM + ]); + true -> + {BSt1, (SizeValReg * Unit) div 8} + end; + true -> + BBSt1 = MMod:mul(BSt1, SizeValReg, Unit), + BBSt2 = MMod:if_block( + BBSt1, {SizeValReg, '&', 16#7, '!=', 0}, fun(BlockSt) -> + MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?UNSUPPORTED_ATOM + ]) + end + ), + MMod:shift_right(BBSt2, SizeValReg, 3) + end, + BSt3 = MMod:sub(BSt2, SizeReg, SizeValReg2), + BSt4 = cond_jump_to_label({SizeReg, '<', BSOffsetReg1}, Fail, MMod, BSt3), + BSt5 = MMod:move_to_native_register(BSt4, SizeValReg2, SizeReg), + MMod:free_native_registers(BSt5, [SizeValReg]) end ), {MSt12, SizeReg} diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index 2473e00b58..e56ccde6d9 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -5570,20 +5570,21 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) term bs_bin = term_get_match_state_binary(src); avm_int_t bs_offset = term_get_match_state_offset(src); - if (unit != 8) { - TRACE("bs_get_binary2: Unsupported: unit must be 8.\n"); - RAISE_ERROR(UNSUPPORTED_ATOM); - } avm_int_t size_val = 0; if (term_is_integer(size)) { - size_val = term_to_int(size); + size_val = term_to_int(size) * unit; + if (size_val % 8) { + TRACE("bs_get_binary2: Unsupported: size must be divisible by 8, got: %ld\n", size_val); + RAISE_ERROR(UNSUPPORTED_ATOM); + } + size_val = size_val / 8; } else if (size == ALL_ATOM) { size_val = term_binary_size(bs_bin) - bs_offset / 8; } else { TRACE("bs_get_binary2: size is neither an integer nor the atom `all`\n"); RAISE_ERROR(BADARG_ATOM); } - if (bs_offset % unit != 0) { + if (bs_offset % 8 != 0) { TRACE("bs_get_binary2: Unsupported. Offset on binary read must be aligned on byte boundaries.\n"); RAISE_ERROR(BADARG_ATOM); } @@ -5594,11 +5595,11 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) TRACE("bs_get_binary2/7, fail=%u src=%p live=%u unit=%u\n", (unsigned) fail, (void *) bs_bin, (unsigned) live, (unsigned) unit); - if ((unsigned int) (bs_offset / unit + size_val) > term_binary_size(bs_bin)) { + if ((unsigned int) (bs_offset / 8 + size_val) > term_binary_size(bs_bin)) { TRACE("bs_get_binary2: insufficient capacity -- bs_offset = %d, size_val = %d\n", (int) bs_offset, (int) size_val); JUMP_TO_ADDRESS(mod->labels[fail]); } else { - term_set_match_state_offset(src, bs_offset + size_val * unit); + term_set_match_state_offset(src, bs_offset + size_val * 8); TRIM_LIVE_REGS(live); // there is always room for a MAX_REG + 1 register, used as working register @@ -7109,7 +7110,8 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) JUMP_TO_LABEL(mod, fail); } } - segment_size = signed_size_value; + segment_size = size_in_bytes; + segment_unit = 8; } break; } diff --git a/tests/erlang_tests/test_bs.erl b/tests/erlang_tests/test_bs.erl index 55b9abcbbc..420332ad2f 100644 --- a/tests/erlang_tests/test_bs.erl +++ b/tests/erlang_tests/test_bs.erl @@ -105,6 +105,7 @@ start() -> ok = test_bs_variable_size_bitstring(), ok = test_float(), + ok = test_bs_match_bitstring_modifier(), 0. @@ -701,6 +702,24 @@ test_integer_outside_float_limits() -> create_float_binary(Value, Size) -> <>. +test_bs_match_bitstring_modifier() -> + ok = + try + bitstring_match(id(<<123, 234, 245>>), id(15)), + case erlang:system_info(machine) of + "BEAM" -> ok; + "ATOM" -> unexpected + end + catch + error:unsupported -> ok + end, + + {<<123, 234>>, <<245>>} = bitstring_match(id(<<123, 234, 245>>), id(16)), + ok. + +bitstring_match(BS, Size) -> + <> = BS, + {Matched, Rest}. check_x86_64_jt(<<>>) -> ok; check_x86_64_jt(<<16#e9, _Offset:32/little, Tail/binary>>) -> check_x86_64_jt(Tail); From 9a1b28ea65d81d309b1ae8940eea0e2fe02b6165 Mon Sep 17 00:00:00 2001 From: Mateusz Front Date: Mon, 2 Mar 2026 17:35:14 +0100 Subject: [PATCH 2/5] CR Signed-off-by: Mateusz Front --- libs/jit/src/jit.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 37e920d6a9..0746a9d42d 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -1352,7 +1352,7 @@ first_pass(<>, MMod, MSt0, State0) -> MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg1), {MSt11, SizeReg}; is_integer(Size) -> - % SizeReg is binary size + % SizeReg contains binary size in bytes as a term MSt11 = if (Size * Unit) rem 8 =/= 0 -> @@ -1360,7 +1360,8 @@ first_pass(<>, MMod, MSt0, State0) -> ctx, jit_state, offset, ?UNSUPPORTED_ATOM ]); true -> - MMod:sub(MSt10, SizeReg, (Size * Unit) div 8) + % Equivalent of SizeReg - (((Size * Unit) div 8) bsl 4) + MMod:sub(MSt10, SizeReg, (Size * Unit) bsl 1) end, MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11), {MSt12, (Size * Unit) div 8}; From 72ba6a4a2dbdfcd9832fa064333521dbc65e22c0 Mon Sep 17 00:00:00 2001 From: Mateusz Front Date: Tue, 3 Mar 2026 15:08:24 +0100 Subject: [PATCH 3/5] CR Signed-off-by: Mateusz Front --- libs/jit/src/jit.erl | 9 ++++++--- src/libAtomVM/opcodesswitch.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 0746a9d42d..2159049505 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -1353,6 +1353,9 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt11, SizeReg}; is_integer(Size) -> % SizeReg contains binary size in bytes as a term + % Size is a tagged integer: (N bsl 4) bor 0xF + % SizeBytes is the raw byte count + SizeBytes = Size bsr 4, MSt11 = if (Size * Unit) rem 8 =/= 0 -> @@ -1360,11 +1363,11 @@ first_pass(<>, MMod, MSt0, State0) -> ctx, jit_state, offset, ?UNSUPPORTED_ATOM ]); true -> - % Equivalent of SizeReg - (((Size * Unit) div 8) bsl 4) - MMod:sub(MSt10, SizeReg, (Size * Unit) bsl 1) + % Equivalent of SizeReg - (((SizeBytes * Unit) div 8) bsl 4) + MMod:sub(MSt10, SizeReg, (SizeBytes * Unit) bsl 1) end, MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11), - {MSt12, (Size * Unit) div 8}; + {MSt12, (SizeBytes * Unit) div 8}; true -> {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size), MSt12 = MMod:if_else_block( diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index e56ccde6d9..c7ed098812 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -5616,7 +5616,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) #ifdef IMPL_EXECUTE_LOOP bs_bin = x_regs[live]; - term t = term_maybe_create_sub_binary(bs_bin, bs_offset / unit, size_val, &ctx->heap, ctx->global); + term t = term_maybe_create_sub_binary(bs_bin, bs_offset / 8, size_val, &ctx->heap, ctx->global); WRITE_REGISTER(dreg, t); } #endif From a8dffe39b5684c9d846e390e89a9eb16743a5d9a Mon Sep 17 00:00:00 2001 From: Peter M Date: Tue, 3 Mar 2026 22:02:30 +0100 Subject: [PATCH 4/5] Fix JIT bs_get_binary2 unit/size arithmetic Fix several issues in the JIT OP_BS_GET_BINARY2 code generation introduced by the bitstring-as-binary matching support. - Rename `SizeBytes` to `SizeInUnits` to reflect that `Size bsr 4` gives the count in units, not bytes. The actual byte count is `(SizeInUnits * Unit) div 8`. - Use `SizeInUnits * Unit` (not `Size * Unit`) in the alignment check, since `Size` is a tagged integer and must be untagged first. - Subtract raw bytes `(SizeInUnits * Unit) div 8` from `SizeReg` instead of using `bsl 1`, since `SizeReg` holds a raw byte count (not a tagged term). - Add fast-path for `Unit =:= 8` that skips the `mul`/`if_block`/ `shift_right` sequence entirely, since size-in-units already equals size-in-bytes. This avoids unnecessary codegen on ARM32 Thumb where register pressure is high. - Fix register leak for `Unit =/= 8`: pass `{free, SizeValReg}` to `shift_right/3` so it reuses the register in-place instead of allocating a new one. Without this, `SizeValReg` (holding bits) remains allocated while the new result register is never freed, which can exhaust available GPRs on ARM32 and cause incorrect code generation. Fix tagged/raw integer confusion in JIT OP_BS_GET_BINARY2 constant-size path. - SizeReg holds raw bytes, not a tagged term. Subtract raw bytes `(SizeBytes * Unit) div 8` instead of `(SizeBytes * Unit) bsl 1`. - Use untagged `SizeBytes` in the alignment check instead of tagged `Size`, since `Size * Unit` on a tagged integer gives a wrong result. - Fix comment: SizeReg is "raw bytes (not a term)". Signed-off-by: Peter M Signed-off-by: Mateusz Front --- libs/jit/src/jit.erl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 2159049505..76710a511c 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -1352,22 +1352,21 @@ first_pass(<>, MMod, MSt0, State0) -> MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg1), {MSt11, SizeReg}; is_integer(Size) -> - % SizeReg contains binary size in bytes as a term + % SizeReg contains binary size in raw bytes (not a term) % Size is a tagged integer: (N bsl 4) bor 0xF - % SizeBytes is the raw byte count - SizeBytes = Size bsr 4, + % SizeInUnits is the raw count in units + SizeInUnits = Size bsr 4, MSt11 = if - (Size * Unit) rem 8 =/= 0 -> + (SizeInUnits * Unit) rem 8 =/= 0 -> MMod:call_primitive_last(MSt10, ?PRIM_RAISE_ERROR, [ ctx, jit_state, offset, ?UNSUPPORTED_ATOM ]); true -> - % Equivalent of SizeReg - (((SizeBytes * Unit) div 8) bsl 4) - MMod:sub(MSt10, SizeReg, (SizeBytes * Unit) bsl 1) + MMod:sub(MSt10, SizeReg, (SizeInUnits * Unit) div 8) end, MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11), - {MSt12, (SizeBytes * Unit) div 8}; + {MSt12, (SizeInUnits * Unit) div 8}; true -> {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size), MSt12 = MMod:if_else_block( From 66450147e868ea341d8cfd0e4ae964b57929fa0e Mon Sep 17 00:00:00 2001 From: Peter M Date: Wed, 4 Mar 2026 10:56:30 +0100 Subject: [PATCH 5/5] Add tests for bitstring modifier matching Expand test_bs_match_bitstring_modifier with additional coverage for edge cases identified during code review: - Non-zero offset with dynamic size - Size=0 (empty binary match) - Negative dynamic size (should error, not crash) - Constant size 16/bitstring (compile-time path) - Non-zero offset with constant size These exercise both the JIT constant-size (is_integer) and dynamic-size (runtime variable) code paths, as well as the interpreter's bs_offset byte-alignment logic. Signed-off-by: Peter M Signed-off-by: Mateusz Front --- tests/erlang_tests/test_bs.erl | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/erlang_tests/test_bs.erl b/tests/erlang_tests/test_bs.erl index 420332ad2f..2d6c7cc84a 100644 --- a/tests/erlang_tests/test_bs.erl +++ b/tests/erlang_tests/test_bs.erl @@ -715,12 +715,48 @@ test_bs_match_bitstring_modifier() -> end, {<<123, 234>>, <<245>>} = bitstring_match(id(<<123, 234, 245>>), id(16)), + + %% Non-zero offset + dynamic size + {<<234, 245>>, <<>>} = bitstring_match_offset(id(<<123, 234, 245>>), id(16)), + + %% Size=0 — matched should be empty binary + {<<>>, <<1, 2, 3>>} = bitstring_match(id(<<1, 2, 3>>), id(0)), + + %% Negative dynamic size — should raise an error + ok = + try + bitstring_match(id(<<1, 2, 3>>), id(-1)), + unexpected + catch + error:{badmatch, _} -> ok; + error:badarg -> ok; + error:unsupported -> ok + end, + + %% Constant size with unit=1, byte-aligned (compile-time) + {<<1, 2>>, <<3>>} = bitstring_match_const_16(id(<<1, 2, 3>>)), + + %% Non-zero offset + constant size + {<<2, 3>>, <<4>>} = bitstring_match_offset_const(id(<<1, 2, 3, 4>>)), + ok. bitstring_match(BS, Size) -> <> = BS, {Matched, Rest}. +bitstring_match_offset(BS, Size) -> + <<_Skip:8, Matched:Size/bitstring, Rest/bits>> = BS, + {Matched, Rest}. + +bitstring_match_const_16(BS) -> + <> = BS, + {Matched, Rest}. + +bitstring_match_offset_const(BS) -> + <<_Skip:8, Matched:16/bitstring, Rest/bits>> = BS, + {Matched, Rest}. + check_x86_64_jt(<<>>) -> ok; check_x86_64_jt(<<16#e9, _Offset:32/little, Tail/binary>>) -> check_x86_64_jt(Tail); check_x86_64_jt(Bin) -> {unexpected, Bin}.