From fbff0c936cc8b1a1c5f6fe51ea6e892721c200f1 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Thu, 29 Jan 2026 13:00:13 -0500 Subject: [PATCH 1/2] ZJIT: Handle `nil` case for `getblockparamproxy` (#15986) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves https://github.com/Shopify/ruby/issues/772 Adds profiling for the `getblockparamproxy` YARV instruction and handles the `nil` block case by pushing `nil` instead of the block proxy object, improves `ratio_in_zjit` a tiny bit (0.1%) Profiling data for `getblockparamproxy` on Lobsters ``` Top-6 getblockparamproxy handler (100.0% of total 3,353,291): polymorphic: 2,337,372 (69.7%) nil: 552,629 (16.5%) iseq: 259,636 ( 7.7%) no_profiles: 156,734 ( 4.7%) proc: 40,223 ( 1.2%) megamorphic: 6,697 ( 0.2%) ``` Lobsters benchmark stats:
Stats before (master):

``` ❯ ./run_benchmarks.rb --chruby 'ruby-zjit --zjit-stats' lobsters ***ZJIT: Printing ZJIT statistics on exit*** ... Top-20 side exit reasons (100.0% of total 15,338,024): guard_type_failure: 6,889,050 (44.9%) guard_shape_failure: 6,848,898 (44.7%) block_param_proxy_not_iseq_or_ifunc: 1,008,525 ( 6.6%) unhandled_hir_insn: 236,977 ( 1.5%) compile_error: 191,763 ( 1.3%) fixnum_mult_overflow: 50,739 ( 0.3%) block_param_proxy_modified: 28,119 ( 0.2%) patchpoint_stable_constant_names: 18,229 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) unhandled_block_arg: 13,782 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) unhandled_yarv_insn: 7,540 ( 0.0%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,207 ( 0.0%) patchpoint_no_singleton_class: 1,130 ( 0.0%) obj_to_string_fallback: 412 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 82 ( 0.0%) ... ratio_in_zjit: 82.1% ```

Stats after:

``` ❯ ./run_benchmarks.rb --chruby 'ruby-zjit --zjit-stats' lobsters ***ZJIT: Printing ZJIT statistics on exit*** ... Top-20 side exit reasons (100.0% of total 15,061,422): guard_type_failure: 6,892,934 (45.8%) guard_shape_failure: 6,850,512 (45.5%) block_param_proxy_not_iseq_or_ifunc: 549,823 ( 3.7%) unhandled_hir_insn: 236,979 ( 1.6%) compile_error: 191,782 ( 1.3%) unhandled_yarv_insn: 128,695 ( 0.9%) block_param_proxy_not_nil: 68,623 ( 0.5%) fixnum_mult_overflow: 50,739 ( 0.3%) patchpoint_stable_constant_names: 18,568 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) block_param_proxy_modified: 13,819 ( 0.1%) unhandled_block_arg: 13,798 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,207 ( 0.0%) obj_to_string_fallback: 1,140 ( 0.0%) patchpoint_no_singleton_class: 1,130 ( 0.0%) guard_less_failure: 163 ( 0.0%) ... ratio_in_zjit: 82.2% ```

--- insns.def | 1 + vm_insnhelper.c | 2 +- yjit/src/cruby_bindings.inc.rs | 65 +++++++++--------- zjit.c | 1 + zjit.rb | 1 + zjit/bindgen/src/main.rs | 1 + zjit/src/codegen.rs | 50 ++++++++------ zjit/src/cruby.rs | 2 + zjit/src/cruby_bindings.inc.rs | 66 +++++++++--------- zjit/src/hir.rs | 121 +++++++++++++++++++++++++++++---- zjit/src/hir/opt_tests.rs | 93 ++++++++++++++++++++++--- zjit/src/hir/tests.rs | 40 ++++++----- zjit/src/profile.rs | 17 +++++ zjit/src/stats.rs | 11 +++ 14 files changed, 344 insertions(+), 127 deletions(-) diff --git a/insns.def b/insns.def index ceeaf4128e9abf..f9a334d824b31a 100644 --- a/insns.def +++ b/insns.def @@ -145,6 +145,7 @@ getblockparamproxy (lindex_t idx, rb_num_t level) () (VALUE val) +// attr bool zjit_profile = true; { const VALUE *ep = vm_get_ep(GET_EP(), level); VM_ASSERT(VM_ENV_LOCAL_P(ep)); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 9cb163e97f2d36..a27bf5f49be69c 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -6050,7 +6050,7 @@ vm_define_method(const rb_execution_context_t *ec, VALUE obj, ID id, VALUE iseqv // * If it's VM_BLOCK_HANDLER_NONE, return nil // * If it's an ISEQ or an IFUNC, fetch it from its rb_captured_block // * If it's a PROC or SYMBOL, return it as is -static VALUE +VALUE rb_vm_untag_block_handler(VALUE block_handler) { if (VM_BLOCK_HANDLER_NONE == block_handler) return Qnil; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 56994388a3a4a1..9216802a3c1919 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -917,38 +917,39 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; +pub const YARVINSN_zjit_getblockparamproxy: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 249; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 250; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit.c b/zjit.c index 9560d88130b03f..0c463334cde42a 100644 --- a/zjit.c +++ b/zjit.c @@ -300,6 +300,7 @@ rb_zjit_class_has_default_allocator(VALUE klass) } +VALUE rb_vm_untag_block_handler(VALUE block_handler); VALUE rb_vm_get_untagged_block_handler(rb_control_frame_t *reg_cfp); void diff --git a/zjit.rb b/zjit.rb index 0bd6c1b96d36d0..0cc9ca8261a274 100644 --- a/zjit.rb +++ b/zjit.rb @@ -191,6 +191,7 @@ def stats_string print_counters_with_prefix(prefix: 'getivar_fallback_', prompt: 'getivar fallback reasons', buf:, stats:, limit: 5) print_counters_with_prefix(prefix: 'definedivar_fallback_', prompt: 'definedivar fallback reasons', buf:, stats:, limit: 5) print_counters_with_prefix(prefix: 'invokeblock_handler_', prompt: 'invokeblock handler', buf:, stats:, limit: 10) + print_counters_with_prefix(prefix: 'getblockparamproxy_handler_', prompt: 'getblockparamproxy handler', buf:, stats:, limit: 10) # Show most popular unsupported call features. Because each call can # use multiple complex features, a decrease in this number does not diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 794293d1d321c7..d71e75c444af69 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -409,6 +409,7 @@ fn main() { .allowlist_function("rb_str_neq_internal") .allowlist_function("rb_yarv_ary_entry_internal") .allowlist_function("rb_vm_get_untagged_block_handler") + .allowlist_function("rb_vm_untag_block_handler") .allowlist_function("rb_FL_TEST") .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 5d6060dd49d3a3..9276d0af6b81e7 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -529,7 +529,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), - &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), + &Insn::GuardAnyBitSet { val, mask, reason, state } => gen_guard_any_bit_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), + &Insn::GuardNoBitsSet { val, mask, reason, state } => gen_guard_no_bits_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), Insn::GuardNotFrozen { recv, state } => gen_guard_not_frozen(jit, asm, opnd!(recv), &function.frame_state(*state)), Insn::GuardNotShared { recv, state } => gen_guard_not_shared(jit, asm, opnd!(recv), &function.frame_state(*state)), &Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), @@ -580,6 +581,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)), Insn::LoadPC => gen_load_pc(asm), Insn::LoadEC => gen_load_ec(), + &Insn::GetEP { level } => gen_get_ep(asm, level), Insn::GetLEP => gen_get_lep(jit, asm), Insn::LoadSelf => gen_load_self(), &Insn::LoadField { recv, id, offset, return_type } => gen_load_field(asm, opnd!(recv), id, offset, return_type), @@ -786,26 +788,6 @@ fn gen_getblockparam(jit: &mut JITState, asm: &mut Assembler, ep_offset: u32, le asm.load(Opnd::mem(VALUE_BITS, ep, offset)) } -fn gen_guard_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) { - // Bail out if the `&block` local variable has been modified - let ep = gen_get_ep(asm, level); - let flags = Opnd::mem(64, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); - asm.test(flags, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); - asm.jnz(side_exit(jit, state, SideExitReason::BlockParamProxyModified)); - - // This handles two cases which are nearly identical - // Block handler is a tagged pointer. Look at the tag. - // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 - // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 - // So to check for either of those cases we can use: val & 0x1 == 0x1 - const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); - - // Bail ouf if the block handler is neither ISEQ nor ifunc - let block_handler = asm.load(Opnd::mem(64, ep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); - asm.test(block_handler, 0x1.into()); - asm.jz(side_exit(jit, state, SideExitReason::BlockParamProxyNotIseqOrIfunc)); -} - fn gen_guard_not_frozen(jit: &JITState, asm: &mut Assembler, recv: Opnd, state: &FrameState) -> Opnd { let recv = asm.load(recv); // It's a heap object, so check the frozen flag @@ -2338,6 +2320,32 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, val } +fn mask_to_opnd(mask: crate::hir::Const) -> Option { + match mask { + crate::hir::Const::CUInt8(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt16(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt32(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt64(v) => Some(Opnd::UImm(v)), + _ => None + } +} + +/// Compile a bitmask check with a side exit if none of the masked bits are not set +fn gen_guard_any_bit_set(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, mask: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { + let mask_opnd = mask_to_opnd(mask).unwrap_or_else(|| panic!("gen_guard_any_bit_set: unexpected hir::Const {mask:?}")); + asm.test(val, mask_opnd); + asm.jz(side_exit(jit, state, reason)); + val +} + +/// Compile a bitmask check with a side exit if any of the masked bits are set +fn gen_guard_no_bits_set(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, mask: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { + let mask_opnd = mask_to_opnd(mask).unwrap_or_else(|| panic!("gen_guard_no_bits_set: unexpected hir::Const {mask:?}")); + asm.test(val, mask_opnd); + asm.jnz(side_exit(jit, state, reason)); + val +} + /// Generate code that records unoptimized C functions if --zjit-stats is enabled fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) { if get_option!(stats) { diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 51faaab9c24658..94b2a443c8b043 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1396,6 +1396,8 @@ pub(crate) mod ids { name: self_ content: b"self" name: rb_ivar_get_at_no_ractor_check name: _shape_id + name: _env_data_index_flags + name: _env_data_index_specval } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 15533180dad72f..969c5a4c693a73 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1706,38 +1706,39 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; +pub const YARVINSN_zjit_getblockparamproxy: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 249; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 250; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -2081,6 +2082,7 @@ unsafe extern "C" { pub fn rb_zjit_class_initialized_p(klass: VALUE) -> bool; pub fn rb_zjit_class_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; pub fn rb_zjit_class_has_default_allocator(klass: VALUE) -> bool; + pub fn rb_vm_untag_block_handler(block_handler: VALUE) -> VALUE; pub fn rb_vm_get_untagged_block_handler(reg_cfp: *mut rb_control_frame_t) -> VALUE; pub fn rb_zjit_writebarrier_check_immediate(recv: VALUE, val: VALUE); pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index b523d8430f3e5e..32519a5b978847 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -506,6 +506,7 @@ pub enum SideExitReason { Interrupt, BlockParamProxyModified, BlockParamProxyNotIseqOrIfunc, + BlockParamProxyNotNil, BlockParamWbRequired, StackOverflow, FixnumModByZero, @@ -855,6 +856,9 @@ pub enum Insn { /// Set a class variable `id` to `val` SetClassVar { id: ID, val: InsnId, ic: *const iseq_inline_cvar_cache_entry, state: InsnId }, + /// Get the EP at the given level from the current CFP. + GetEP { level: u32 }, + /// Get the EP of the ISeq of the containing method, or "local level", skipping over block-level EPs. /// Equivalent of GET_LEP() macro. GetLEP, @@ -1018,11 +1022,12 @@ pub enum Insn { GuardTypeNot { val: InsnId, guard_type: Type, state: InsnId }, /// Side-exit if val is not the expected Const. GuardBitEquals { val: InsnId, expected: Const, reason: SideExitReason, state: InsnId }, + /// Side-exit if (val & mask) == 0 + GuardAnyBitSet { val: InsnId, mask: Const, reason: SideExitReason, state: InsnId }, + /// Side-exit if (val & mask) != 0 + GuardNoBitsSet { val: InsnId, mask: Const, reason: SideExitReason, state: InsnId }, /// Side-exit if val doesn't have the expected shape. GuardShape { val: InsnId, shape: ShapeId, state: InsnId }, - /// Side-exit if the block param has been modified or the block handler for the frame - /// is neither ISEQ nor ifunc, which makes it incompatible with rb_block_param_proxy. - GuardBlockParamProxy { level: u32, state: InsnId }, /// Side-exit if val is frozen. Does *not* check if the val is an immediate; assumes that it is /// a heap object. GuardNotFrozen { recv: InsnId, state: InsnId }, @@ -1066,7 +1071,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => false, _ => true, @@ -1154,6 +1159,7 @@ impl Insn { Insn::DefinedIvar { .. } => effects::Any, Insn::LoadPC { .. } => Effect::read_write(abstract_heaps::PC, abstract_heaps::Empty), Insn::LoadEC { .. } => effects::Empty, + Insn::GetEP { .. } => effects::Empty, Insn::GetLEP { .. } => effects::Empty, Insn::LoadSelf { .. } => Effect::read_write(abstract_heaps::Frame, abstract_heaps::Empty), Insn::LoadField { .. } => Effect::read_write(abstract_heaps::Other, abstract_heaps::Empty), @@ -1220,8 +1226,9 @@ impl Insn { Insn::GuardType { .. } => effects::Any, Insn::GuardTypeNot { .. } => effects::Any, Insn::GuardBitEquals { .. } => effects::Any, + Insn::GuardAnyBitSet { .. } => effects::Any, + Insn::GuardNoBitsSet { .. } => effects::Any, Insn::GuardShape { .. } => effects::Any, - Insn::GuardBlockParamProxy { .. } => effects::Any, Insn::GuardNotFrozen { .. } => effects::Any, Insn::GuardNotShared { .. } => effects::Any, Insn::GuardGreaterEq { .. } => effects::Any, @@ -1541,8 +1548,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, + Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardBitSet {val}, {}", mask.print(self.ptr_map)) }, + Insn::GuardNoBitsSet { val, mask, .. } => { write!(f, "GuardBitNotSet {val}, {}", mask.print(self.ptr_map)) }, &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, - Insn::GuardBlockParamProxy { level, .. } => write!(f, "GuardBlockParamProxy l{level}"), Insn::GuardNotFrozen { recv, .. } => write!(f, "GuardNotFrozen {recv}"), Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"), Insn::GuardLess { left, right, .. } => write!(f, "GuardLess {left}, {right}"), @@ -1604,6 +1612,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), Insn::LoadPC => write!(f, "LoadPC"), Insn::LoadEC => write!(f, "LoadEC"), + &Insn::GetEP { level } => write!(f, "GetEP {level}"), Insn::GetLEP => write!(f, "GetLEP"), Insn::LoadSelf => write!(f, "LoadSelf"), &Insn::LoadField { recv, id, offset, return_type: _ } => write!(f, "LoadField {recv}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_offset(offset)), @@ -2187,6 +2196,7 @@ impl Function { | EntryPoint {..} | LoadPC | LoadEC + | GetEP {..} | GetLEP | LoadSelf | IncrCounterPtr {..} @@ -2228,8 +2238,9 @@ impl Function { &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, + &GuardAnyBitSet { val, mask, reason, state } => GuardAnyBitSet { val: find!(val), mask, reason, state }, + &GuardNoBitsSet { val, mask, reason, state } => GuardNoBitsSet { val: find!(val), mask, reason, state }, &GuardShape { val, shape, state } => GuardShape { val: find!(val), shape, state }, - &GuardBlockParamProxy { level, state } => GuardBlockParamProxy { level, state: find!(state) }, &GuardNotFrozen { recv, state } => GuardNotFrozen { recv: find!(recv), state }, &GuardNotShared { recv, state } => GuardNotShared { recv: find!(recv), state }, &GuardGreaterEq { left, right, state } => GuardGreaterEq { left: find!(left), right: find!(right), state }, @@ -2429,7 +2440,7 @@ impl Function { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), @@ -2488,6 +2499,8 @@ impl Function { Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type), Insn::GuardTypeNot { .. } => types::BasicObject, Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)), + Insn::GuardAnyBitSet { val, .. } => self.type_of(*val), + Insn::GuardNoBitsSet { val, .. } => self.type_of(*val), Insn::GuardShape { val, .. } => self.type_of(*val), Insn::GuardNotFrozen { recv, .. } | Insn::GuardNotShared { recv, .. } => self.type_of(*recv), Insn::GuardLess { left, .. } => self.type_of(*left), @@ -2532,6 +2545,7 @@ impl Function { Insn::GetIvar { .. } => types::BasicObject, Insn::LoadPC => types::CPtr, Insn::LoadEC => types::CPtr, + Insn::GetEP { .. } => types::CPtr, Insn::GetLEP => types::CPtr, Insn::LoadSelf => types::BasicObject, &Insn::LoadField { return_type, .. } => return_type, @@ -2659,7 +2673,9 @@ impl Function { Insn::GuardType { val, .. } | Insn::GuardTypeNot { val, .. } | Insn::GuardShape { val, .. } - | Insn::GuardBitEquals { val, .. } => self.chase_insn(val), + | Insn::GuardBitEquals { val, .. } + | Insn::GuardAnyBitSet { val, .. } + | Insn::GuardNoBitsSet { val, .. } => self.chase_insn(val), | Insn::RefineType { val, .. } => self.chase_insn(val), _ => id, } @@ -4517,6 +4533,7 @@ impl Function { | &Insn::EntryPoint { .. } | &Insn::LoadPC | &Insn::LoadEC + | &Insn::GetEP { .. } | &Insn::GetLEP | &Insn::LoadSelf | &Insn::GetLocal { .. } @@ -4605,6 +4622,8 @@ impl Function { | &Insn::GuardType { val, state, .. } | &Insn::GuardTypeNot { val, state, .. } | &Insn::GuardBitEquals { val, state, .. } + | &Insn::GuardAnyBitSet { val, state, .. } + | &Insn::GuardNoBitsSet { val, state, .. } | &Insn::GuardShape { val, state, .. } | &Insn::GuardNotFrozen { recv: val, state } | &Insn::GuardNotShared { recv: val, state } @@ -4755,7 +4774,6 @@ impl Function { worklist.push_back(recv); worklist.push_back(val); } - &Insn::GuardBlockParamProxy { state, .. } | &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | @@ -5301,12 +5319,12 @@ impl Function { | Insn::GetGlobal { .. } | Insn::LoadPC | Insn::LoadEC + | Insn::GetEP { .. } | Insn::GetLEP | Insn::LoadSelf | Insn::Snapshot { .. } | Insn::Jump { .. } | Insn::EntryPoint { .. } - | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::GetBlockHandler { .. } | Insn::PatchPoint { .. } @@ -5530,6 +5548,18 @@ impl Function { Const::CPtr(_) => self.assert_subtype(insn_id, val, types::CPtr), } } + Insn::GuardAnyBitSet { val, mask, .. } + | Insn::GuardNoBitsSet { val, mask, .. } => { + match mask { + Const::CUInt8(_) | Const::CUInt16(_) | Const::CUInt32(_) | Const::CUInt64(_) + if self.is_a(val, types::CInt) || self.is_a(val, types::RubyValue) => { + Ok(()) + } + _ => { + Err(ValidationError::MiscValidationError(insn_id, "GuardAnyBitSet/GuardNoBitsSet can only compare RubyValue/CUInt or CInt/CUInt".to_string())) + } + } + } Insn::GuardLess { left, right, .. } | Insn::GuardGreaterEq { left, right, .. } => { self.assert_subtype(insn_id, left, types::CInt64)?; @@ -6198,7 +6228,38 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } } } - } else { + } else if opcode == YARVINSN_getblockparamproxy || opcode == YARVINSN_trace_getblockparamproxy { + if get_option!(stats) { + let iseq_insn_idx = exit_state.insn_idx; + if let Some([block_handler_distribution]) = profiles.payload.profile.get_operand_types(iseq_insn_idx) { + let summary = TypeDistributionSummary::new(block_handler_distribution); + + if summary.is_monomorphic() { + let obj = summary.bucket(0).class(); + if unsafe { rb_IMEMO_TYPE_P(obj, imemo_iseq) == 1} { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_iseq)); + } else if unsafe { rb_IMEMO_TYPE_P(obj, imemo_ifunc) == 1} { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_ifunc)); + } + else if obj.nil_p() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_nil)); + } + else if obj.symbol_p() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_symbol)); + } else if unsafe { rb_obj_is_proc(obj).test() } { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_proc)); + } + } else if summary.is_polymorphic() || summary.is_skewed_polymorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_polymorphic)); + } else if summary.is_megamorphic() || summary.is_skewed_megamorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_megamorphic)); + } + } else { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_no_profiles)); + } + } + } + else { profiles.profile_stack(&exit_state); } @@ -6593,9 +6654,39 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } YARVINSN_getblockparamproxy => { let level = get_arg(pc, 1).as_u32(); - fun.push_insn(block, Insn::GuardBlockParamProxy { level, state: exit_id }); - // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing - state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); + + let profiled_block_type = if let Some([block_handler_distribution]) = profiles.payload.profile.get_operand_types(exit_state.insn_idx) { + let summary = TypeDistributionSummary::new(block_handler_distribution); + summary.is_monomorphic().then_some(summary.bucket(0).class()) + } else { + None + }; + + let ep = fun.push_insn(block, Insn::GetEP { level }); + let flags = fun.push_insn(block, Insn::LoadField { recv: ep, id: ID!(_env_data_index_flags), offset: SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32), return_type: types::CInt64 }); + fun.push_insn(block, Insn::GuardNoBitsSet { val: flags, mask: Const::CUInt64(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), reason: SideExitReason::BlockParamProxyModified, state: exit_id }); + + let block_handler = fun.push_insn(block, Insn::LoadField { recv: ep, id: ID!(_env_data_index_specval), offset: SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, return_type: types::CInt64 }); + + match profiled_block_type { + Some(ty) if ty.nil_p() => { + fun.push_insn(block, Insn::GuardBitEquals { val: block_handler, expected: Const::CInt64(VM_BLOCK_HANDLER_NONE.into()), reason: SideExitReason::BlockParamProxyNotNil, state: exit_id }); + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(Qnil) })); + } + _ => { + // This handles two cases which are nearly identical + // Block handler is a tagged pointer. Look at the tag. + // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 + // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 + // So to check for either of those cases we can use: val & 0x1 == 0x1 + const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); + + // Bail out if the block handler is neither ISEQ nor ifunc + fun.push_insn(block, Insn::GuardAnyBitSet { val: block_handler, mask: Const::CUInt64(0x1), reason: SideExitReason::BlockParamProxyNotIseqOrIfunc, state: exit_id }); + // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); + } + } } YARVINSN_getblockparam => { fn new_branch_block( diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 70afd54022e40e..c1059094ac531a 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -3875,7 +3875,7 @@ mod hir_opt_tests { eval(" def test(&block) = tap(&block) "); - assert_snapshot!(hir_string("test"), @r" + assert_snapshot!(hir_string("test"), @" fn test@:2: bb0(): EntryPoint interpreter @@ -3886,11 +3886,15 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - GuardBlockParamProxy l0 - v15:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) - v17:BasicObject = Send v8, 0x1008, :tap, v15 # SendFallbackReason: Uncategorized(send) + v14:CPtr = GetEP 0 + v15:CInt64 = LoadField v14, :_env_data_index_flags@0x1000 + v16:CInt64 = GuardBitNotSet v15, CUInt64(512) + v17:CInt64 = LoadField v14, :_env_data_index_specval@0x1001 + v18:CInt64 = GuardBitSet v17, CUInt64(1) + v19:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + v21:BasicObject = Send v8, 0x1010, :tap, v19 # SendFallbackReason: Uncategorized(send) CheckInterrupts - Return v17 + Return v21 "); } @@ -6518,11 +6522,42 @@ mod hir_opt_tests { #[test] fn test_do_not_optimize_send_with_block_forwarding() { + eval(r#" + def test(&block) = [].map(&block) + test { |x| x }; test { |x| x } + "#); + assert_snapshot!(hir_string("test"), @" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :block, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:ArrayExact = NewArray + v15:CPtr = GetEP 0 + v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 + v17:CInt64 = GuardBitNotSet v16, CUInt64(512) + v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 + v19:CInt64 = GuardBitSet v18, CUInt64(1) + v20:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + IncrCounter complex_arg_pass_caller_blockarg + v22:BasicObject = Send v13, 0x1010, :map, v20 # SendFallbackReason: Complex argument passing + CheckInterrupts + Return v22 + "); + } + + #[test] + fn test_replace_block_param_proxy_with_nil() { eval(r#" def test(&block) = [].map(&block) test; test "#); - assert_snapshot!(hir_string("test"), @r" + assert_snapshot!(hir_string("test"), @" fn test@:2: bb0(): EntryPoint interpreter @@ -6534,12 +6569,50 @@ mod hir_opt_tests { Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): v13:ArrayExact = NewArray - GuardBlockParamProxy l0 - v16:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v15:CPtr = GetEP 0 + v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 + v17:CInt64 = GuardBitNotSet v16, CUInt64(512) + v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 + v19:CInt64[0] = GuardBitEquals v18, CInt64(0) + v20:NilClass = Const Value(nil) IncrCounter complex_arg_pass_caller_blockarg - v18:BasicObject = Send v13, 0x1008, :map, v16 # SendFallbackReason: Complex argument passing + v22:BasicObject = Send v13, 0x1008, :map, v20 # SendFallbackReason: Complex argument passing CheckInterrupts - Return v18 + Return v22 + "); + } + + #[test] + fn test_replace_block_param_proxy_with_nil_nested() { + eval(r#" + def test(&block) + proc do + [].map(&block) + end + end + test; test + "#); + assert_snapshot!(hir_string_proc("test"), @" + fn block in test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact = NewArray + v12:CPtr = GetEP 1 + v13:CInt64 = LoadField v12, :_env_data_index_flags@0x1000 + v14:CInt64 = GuardBitNotSet v13, CUInt64(512) + v15:CInt64 = LoadField v12, :_env_data_index_specval@0x1001 + v16:CInt64 = GuardBitSet v15, CUInt64(1) + v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + IncrCounter complex_arg_pass_caller_blockarg + v19:BasicObject = Send v10, 0x1010, :map, v17 # SendFallbackReason: Complex argument passing + CheckInterrupts + Return v19 "); } diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index e0b0129ea1ce5a..dbab964976f91a 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2038,7 +2038,7 @@ pub mod hir_build_tests { eval(" def test(a, ...) = foo(a, ...) "); - assert_snapshot!(hir_string("test"), @r" + assert_snapshot!(hir_string("test"), @" fn test@:2: bb0(): EntryPoint interpreter @@ -2056,8 +2056,12 @@ pub mod hir_build_tests { bb2(v16:BasicObject, v17:BasicObject, v18:ArrayExact, v19:BasicObject, v20:BasicObject, v21:NilClass): v28:ArrayExact = ToArray v18 PatchPoint NoEPEscape(test) - GuardBlockParamProxy l0 - v34:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v33:CPtr = GetEP 0 + v34:CInt64 = LoadField v33, :_env_data_index_flags@0x1000 + v35:CInt64 = GuardBitNotSet v34, CUInt64(512) + v36:CInt64 = LoadField v33, :_env_data_index_specval@0x1001 + v37:CInt64 = GuardBitSet v36, CUInt64(1) + v38:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) SideExit UnhandledYARVInsn(splatkw) "); } @@ -3409,7 +3413,7 @@ pub mod hir_build_tests { let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("Dir", "open")); assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate), "iseq Dir.open does not contain invokebuiltin"); let function = iseq_to_hir(iseq).unwrap(); - assert_snapshot!(hir_string_function(&function), @r" + assert_snapshot!(hir_string_function(&function), @" fn open@: bb0(): EntryPoint interpreter @@ -3428,20 +3432,24 @@ pub mod hir_build_tests { bb2(v16:BasicObject, v17:BasicObject, v18:BasicObject, v19:BasicObject, v20:BasicObject, v21:NilClass): v25:BasicObject = InvokeBuiltin dir_s_open, v16, v17, v18 PatchPoint NoEPEscape(open) - GuardBlockParamProxy l0 - v32:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v31:CPtr = GetEP 0 + v32:CInt64 = LoadField v31, :_env_data_index_flags@0x1000 + v33:CInt64 = GuardBitNotSet v32, CUInt64(512) + v34:CInt64 = LoadField v31, :_env_data_index_specval@0x1001 + v35:CInt64 = GuardBitSet v34, CUInt64(1) + v36:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + CheckInterrupts + v39:CBool[true] = Test v36 + v40 = RefineType v36, Falsy + IfFalse v39, bb3(v16, v17, v18, v19, v20, v25) + v42:HeapObject[BlockParamProxy] = RefineType v36, Truthy + v46:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) + v49:BasicObject = InvokeBuiltin dir_s_close, v16, v25 CheckInterrupts - v35:CBool[true] = Test v32 - v36 = RefineType v32, Falsy - IfFalse v35, bb3(v16, v17, v18, v19, v20, v25) - v38:HeapObject[BlockParamProxy] = RefineType v32, Truthy - v42:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) - v45:BasicObject = InvokeBuiltin dir_s_close, v16, v25 - CheckInterrupts - Return v42 - bb3(v51, v52, v53, v54, v55, v56): + Return v46 + bb3(v55, v56, v57, v58, v59, v60): CheckInterrupts - Return v56 + Return v60 "); } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index c1feb759529e15..ad6da06c71e91b 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -91,6 +91,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), YARVINSN_invokeblock => profile_block_handler(profiler, profile), + YARVINSN_getblockparamproxy => profile_getblockparamproxy(profiler, profile), YARVINSN_invokesuper => profile_invokesuper(profiler, profile), YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); @@ -155,6 +156,22 @@ fn profile_block_handler(profiler: &mut Profiler, profile: &mut IseqProfile) { types[0].observe(ty); } +fn profile_getblockparamproxy(profiler: &mut Profiler, profile: &mut IseqProfile) { + let types = &mut profile.opnd_types[profiler.insn_idx]; + if types.is_empty() { + types.resize(1, TypeDistribution::new()); + } + + let level = profiler.insn_opnd(1).as_u32(); + let ep = unsafe { get_cfp_ep_level(profiler.cfp, level) }; + let block_handler = unsafe { *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) }; + let untagged = unsafe { rb_vm_untag_block_handler(block_handler) }; + + let ty = ProfiledType::object(untagged); + VALUE::from(profiler.iseq).write_barrier(ty.class()); + types[0].observe(ty); +} + fn profile_invokesuper(profiler: &mut Profiler, profile: &mut IseqProfile) { let cme = unsafe { rb_vm_frame_method_entry(profiler.cfp) }; let cme_value = VALUE(cme as usize); // CME is a T_IMEMO, which is a VALUE diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index bb11b96dd9a403..367a19fc32fc67 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -210,6 +210,7 @@ make_counters! { exit_stackoverflow, exit_block_param_proxy_modified, exit_block_param_proxy_not_iseq_or_ifunc, + exit_block_param_proxy_not_nil, exit_block_param_wb_required, exit_too_many_keyword_parameters, } @@ -422,6 +423,15 @@ make_counters! { invokeblock_handler_polymorphic, invokeblock_handler_megamorphic, invokeblock_handler_no_profiles, + + getblockparamproxy_handler_iseq, + getblockparamproxy_handler_ifunc, + getblockparamproxy_handler_symbol, + getblockparamproxy_handler_proc, + getblockparamproxy_handler_nil, + getblockparamproxy_handler_polymorphic, + getblockparamproxy_handler_megamorphic, + getblockparamproxy_handler_no_profiles, } /// Increase a counter by a specified amount @@ -558,6 +568,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { StackOverflow => exit_stackoverflow, BlockParamProxyModified => exit_block_param_proxy_modified, BlockParamProxyNotIseqOrIfunc => exit_block_param_proxy_not_iseq_or_ifunc, + BlockParamProxyNotNil => exit_block_param_proxy_not_nil, BlockParamWbRequired => exit_block_param_wb_required, TooManyKeywordParameters => exit_too_many_keyword_parameters, PatchPoint(Invariant::BOPRedefined { .. }) From 33d828470bc86b494fe9b8f6b684d7e8153f3b95 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Thu, 29 Jan 2026 13:02:33 -0500 Subject: [PATCH 2/2] ZJIT: Support polymorphic send without block (#15949) Break out the different cases into different blocks in the bytecode to HIR parser. Use a `RefineType` to plumb the case's type through so the type specialization can see it. Then join the logic back to the rest of the current block after each case's send. lobsters before
``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (58.7% of total 4,476,259): Hash#fetch: 849,219 (19.0%) String#start_with?: 328,017 ( 7.3%) Regexp#match?: 148,149 ( 3.3%) Hash#key?: 135,034 ( 3.0%) Kernel#is_a?: 110,030 ( 2.5%) Set#include?: 97,934 ( 2.2%) Integer#===: 96,952 ( 2.2%) Process.clock_gettime: 92,795 ( 2.1%) String#sub!: 84,940 ( 1.9%) String.new: 80,730 ( 1.8%) SQLite3::Statement#done?: 73,532 ( 1.6%) SQLite3::Statement#step: 73,532 ( 1.6%) Time#plus_without_duration: 66,724 ( 1.5%) String#<<: 63,954 ( 1.4%) Time#to_i: 60,817 ( 1.4%) Hash#delete: 60,664 ( 1.4%) Time#subsec: 60,363 ( 1.3%) String#hash: 51,261 ( 1.1%) IO#read: 47,753 ( 1.1%) String#to_sym: 43,915 ( 1.0%) Top-20 calls to C functions from JIT code (83.7% of total 35,570,418): rb_vm_opt_send_without_block: 10,516,746 (29.6%) rb_vm_env_write: 2,382,117 ( 6.7%) rb_zjit_writebarrier_check_immediate: 2,241,285 ( 6.3%) rb_hash_aref: 2,189,588 ( 6.2%) rb_vm_getinstancevariable: 1,762,596 ( 5.0%) rb_ivar_get_at_no_ractor_check: 1,702,246 ( 4.8%) rb_vm_send: 1,460,754 ( 4.1%) rb_hash_aset: 1,151,302 ( 3.2%) rb_vm_setinstancevariable: 1,029,286 ( 2.9%) rb_obj_is_kind_of: 1,000,979 ( 2.8%) rb_vm_opt_getconstant_path: 623,490 ( 1.8%) rb_vm_invokesuper: 595,831 ( 1.7%) Hash#fetch: 562,212 ( 1.6%) rb_vm_invokeblock: 545,744 ( 1.5%) rb_class_allocate_instance: 422,454 ( 1.2%) rb_ec_ary_new_from_values: 388,035 ( 1.1%) String#start_with?: 328,017 ( 0.9%) rb_hash_new_with_size: 289,130 ( 0.8%) fetch: 287,007 ( 0.8%) rb_vm_sendforward: 284,183 ( 0.8%) Top-1 not optimized method types for send (100.0% of total 428): null: 428 (100.0%) Top-3 not optimized method types for send_without_block (100.0% of total 102,413): optimized_send: 92,837 (90.6%) null: 8,595 ( 8.4%) optimized_block_call: 981 ( 1.0%) Top-3 not optimized method types for super (100.0% of total 517,931): cfunc: 489,746 (94.6%) alias: 26,398 ( 5.1%) attrset: 1,787 ( 0.3%) Top-4 instructions with uncategorized fallback reason (100.0% of total 868,223): invokeblock: 545,744 (62.9%) sendforward: 284,183 (32.7%) invokesuperforward: 29,713 ( 3.4%) opt_send_without_block: 8,583 ( 1.0%) Top-20 send fallback reasons (100.0% of total 13,432,971): send_without_block_polymorphic: 4,825,641 (35.9%) singleton_class_seen: 3,257,447 (24.2%) send_without_block_no_profiles: 1,906,060 (14.2%) uncategorized: 868,223 ( 6.5%) send_no_profiles: 806,168 ( 6.0%) one_or_more_complex_arg_pass: 537,965 ( 4.0%) super_not_optimized_method_type: 517,931 ( 3.9%) send_without_block_megamorphic: 158,893 ( 1.2%) too_many_args_for_lir: 127,160 ( 0.9%) send_polymorphic: 112,628 ( 0.8%) send_without_block_not_optimized_need_permission: 100,041 ( 0.7%) send_without_block_not_optimized_method_type_optimized: 93,818 ( 0.7%) super_complex_args_pass: 34,022 ( 0.3%) super_target_complex_args_pass: 25,536 ( 0.2%) super_polymorphic: 16,853 ( 0.1%) obj_to_string_not_string: 13,794 ( 0.1%) argc_param_mismatch: 9,927 ( 0.1%) send_without_block_not_optimized_method_type: 8,595 ( 0.1%) send_without_block_direct_keyword_mismatch: 5,568 ( 0.0%) send_megamorphic: 4,525 ( 0.0%) Top-4 setivar fallback reasons (100.0% of total 1,029,286): not_monomorphic: 992,723 (96.4%) not_t_object: 21,354 ( 2.1%) too_complex: 15,188 ( 1.5%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 1,790,794): not_monomorphic: 1,750,108 (97.7%) too_complex: 40,686 ( 2.3%) Top-3 definedivar fallback reasons (100.0% of total 81,713): not_monomorphic: 80,197 (98.1%) too_complex: 796 ( 1.0%) not_t_object: 720 ( 0.9%) Top-6 invokeblock handler (100.0% of total 545,744): monomorphic_iseq: 249,809 (45.8%) polymorphic: 217,915 (39.9%) monomorphic_ifunc: 46,244 ( 8.5%) monomorphic_other: 27,938 ( 5.1%) megamorphic: 2,943 ( 0.5%) no_profiles: 895 ( 0.2%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 652,565): param_forwardable: 246,421 (37.8%) param_block: 198,808 (30.5%) param_rest: 101,529 (15.6%) param_kwrest: 44,809 ( 6.9%) caller_blockarg: 24,596 ( 3.8%) caller_splat: 15,969 ( 2.4%) caller_kw_splat: 14,227 ( 2.2%) caller_kwarg: 6,206 ( 1.0%) Top-1 compile error reasons (100.0% of total 38,981): exception_handler: 38,981 (100.0%) Top-5 unhandled YARV insns (100.0% of total 4,154): getconstant: 2,566 (61.8%) checkmatch: 929 (22.4%) setblockparam: 443 (10.7%) once: 171 ( 4.1%) expandarray: 45 ( 1.1%) Top-3 unhandled HIR insns (100.0% of total 75,904): throw: 39,721 (52.3%) invokebuiltin: 35,772 (47.1%) array_max: 411 ( 0.5%) Top-20 side exit reasons (100.0% of total 3,770,125): guard_shape_failure: 1,927,218 (51.1%) guard_type_failure: 1,395,315 (37.0%) block_param_proxy_not_iseq_or_ifunc: 257,894 ( 6.8%) unhandled_hir_insn: 75,904 ( 2.0%) compile_error: 38,981 ( 1.0%) patchpoint_stable_constant_names: 25,375 ( 0.7%) block_param_proxy_modified: 13,713 ( 0.4%) fixnum_lshift_overflow: 10,085 ( 0.3%) fixnum_mult_overflow: 8,550 ( 0.2%) unhandled_yarv_insn: 4,154 ( 0.1%) unhandled_block_arg: 2,548 ( 0.1%) unhandled_newarray_send_pack: 2,322 ( 0.1%) patchpoint_no_singleton_class: 2,008 ( 0.1%) patchpoint_no_ep_escape: 1,683 ( 0.0%) obj_to_string_fallback: 1,358 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) expandarray_failure: 837 ( 0.0%) guard_super_method_entry: 737 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 49 ( 0.0%) send_count: 46,003,239 dynamic_send_count: 13,432,971 (29.2%) optimized_send_count: 32,570,268 (70.8%) dynamic_setivar_count: 1,029,286 ( 2.2%) dynamic_getivar_count: 1,790,794 ( 3.9%) dynamic_definedivar_count: 81,713 ( 0.2%) iseq_optimized_send_count: 15,117,301 (32.9%) inline_cfunc_optimized_send_count: 11,837,918 (25.7%) inline_iseq_optimized_send_count: 884,606 ( 1.9%) non_variadic_cfunc_optimized_send_count: 2,597,998 ( 5.6%) variadic_cfunc_optimized_send_count: 2,132,445 ( 4.6%) compiled_iseq_count: 5,259 failed_iseq_count: 0 compile_time: 1,409ms profile_time: 10ms gc_time: 11ms invalidation_time: 77ms vm_write_pc_count: 40,924,587 vm_write_sp_count: 40,924,587 vm_write_locals_count: 39,740,467 vm_write_stack_count: 39,740,467 vm_write_to_parent_iseq_local_count: 306,481 vm_read_from_parent_iseq_local_count: 4,841,855 guard_type_count: 48,810,089 guard_type_exit_ratio: 2.9% guard_shape_count: 19,485,073 guard_shape_exit_ratio: 9.9% code_region_bytes: 27,262,976 zjit_alloc_bytes: 34,517,324 total_mem_bytes: 61,780,300 side_exit_count: 3,770,125 total_insn_count: 273,152,243 vm_insn_count: 43,926,931 zjit_insn_count: 229,225,312 ratio_in_zjit: 83.9% ```
lobsters after
``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (61.7% of total 5,220,252): Hash#fetch: 1,274,409 (24.4%) String#start_with?: 328,017 ( 6.3%) Regexp#match?: 147,525 ( 2.8%) Hash#key?: 139,198 ( 2.7%) Kernel#is_a?: 110,178 ( 2.1%) Class#allocate: 107,143 ( 2.1%) Hash#delete: 106,307 ( 2.0%) Class#superclass: 98,165 ( 1.9%) Set#include?: 97,934 ( 1.9%) Integer#===: 95,874 ( 1.8%) Process.clock_gettime: 92,795 ( 1.8%) String#sub!: 80,732 ( 1.5%) String.new: 80,730 ( 1.5%) SQLite3::Statement#done?: 73,532 ( 1.4%) SQLite3::Statement#step: 73,532 ( 1.4%) Time#plus_without_duration: 66,724 ( 1.3%) String#<<: 63,954 ( 1.2%) Kernel#dup: 62,590 ( 1.2%) Time#to_i: 60,814 ( 1.2%) Time#subsec: 60,363 ( 1.2%) Top-20 calls to C functions from JIT code (80.8% of total 33,681,248): rb_vm_opt_send_without_block: 6,869,559 (20.4%) rb_hash_aref: 2,487,056 ( 7.4%) rb_vm_env_write: 2,372,693 ( 7.0%) rb_zjit_writebarrier_check_immediate: 2,238,890 ( 6.6%) rb_vm_getinstancevariable: 1,861,700 ( 5.5%) rb_ivar_get_at_no_ractor_check: 1,702,246 ( 5.1%) rb_vm_send: 1,468,202 ( 4.4%) rb_hash_aset: 1,267,469 ( 3.8%) rb_obj_is_kind_of: 1,126,363 ( 3.3%) rb_vm_setinstancevariable: 1,055,131 ( 3.1%) Hash#fetch: 987,402 ( 2.9%) rb_vm_opt_getconstant_path: 641,779 ( 1.9%) rb_vm_invokesuper: 603,416 ( 1.8%) rb_vm_invokeblock: 545,743 ( 1.6%) rb_class_allocate_instance: 415,748 ( 1.2%) rb_ec_ary_new_from_values: 380,080 ( 1.1%) String#start_with?: 328,017 ( 1.0%) rb_hash_new_with_size: 289,172 ( 0.9%) fetch: 287,007 ( 0.9%) rb_vm_sendforward: 283,885 ( 0.8%) Top-1 not optimized method types for send (100.0% of total 428): null: 428 (100.0%) Top-3 not optimized method types for send_without_block (100.0% of total 202,329): optimized_send: 190,504 (94.2%) null: 10,844 ( 5.4%) optimized_block_call: 981 ( 0.5%) Top-3 not optimized method types for super (100.0% of total 517,421): cfunc: 489,236 (94.6%) alias: 26,398 ( 5.1%) attrset: 1,787 ( 0.3%) Top-4 instructions with uncategorized fallback reason (100.0% of total 867,452): invokeblock: 545,743 (62.9%) sendforward: 283,885 (32.7%) invokesuperforward: 29,713 ( 3.4%) opt_send_without_block: 8,111 ( 0.9%) Top-20 send fallback reasons (100.0% of total 9,800,518): singleton_class_seen: 3,293,078 (33.6%) send_without_block_no_profiles: 2,142,301 (21.9%) uncategorized: 867,452 ( 8.9%) send_no_profiles: 820,538 ( 8.4%) send_without_block_polymorphic: 780,065 ( 8.0%) one_or_more_complex_arg_pass: 556,514 ( 5.7%) super_not_optimized_method_type: 517,421 ( 5.3%) send_without_block_not_optimized_method_type_optimized: 191,485 ( 2.0%) send_without_block_megamorphic: 161,550 ( 1.6%) too_many_args_for_lir: 127,190 ( 1.3%) send_polymorphic: 111,290 ( 1.1%) send_without_block_not_optimized_need_permission: 99,526 ( 1.0%) super_polymorphic: 45,651 ( 0.5%) super_complex_args_pass: 33,748 ( 0.3%) obj_to_string_not_string: 13,794 ( 0.1%) send_without_block_not_optimized_method_type: 10,844 ( 0.1%) argc_param_mismatch: 9,927 ( 0.1%) send_without_block_direct_keyword_mismatch: 6,336 ( 0.1%) super_target_complex_args_pass: 5,108 ( 0.1%) send_megamorphic: 4,525 ( 0.0%) Top-4 setivar fallback reasons (100.0% of total 1,123,837): not_monomorphic: 1,087,274 (96.7%) not_t_object: 21,354 ( 1.9%) too_complex: 15,188 ( 1.4%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 2,132,203): not_monomorphic: 2,092,243 (98.1%) too_complex: 39,960 ( 1.9%) Top-3 definedivar fallback reasons (100.0% of total 107,264): not_monomorphic: 105,748 (98.6%) too_complex: 796 ( 0.7%) not_t_object: 720 ( 0.7%) Top-6 invokeblock handler (100.0% of total 545,743): monomorphic_iseq: 249,809 (45.8%) polymorphic: 217,914 (39.9%) monomorphic_ifunc: 46,244 ( 8.5%) monomorphic_other: 27,938 ( 5.1%) megamorphic: 2,943 ( 0.5%) no_profiles: 895 ( 0.2%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 651,185): param_forwardable: 233,989 (35.9%) param_block: 205,158 (31.5%) param_rest: 100,319 (15.4%) param_kwrest: 44,596 ( 6.8%) caller_blockarg: 21,863 ( 3.4%) caller_kw_splat: 20,970 ( 3.2%) caller_splat: 18,106 ( 2.8%) caller_kwarg: 6,184 ( 0.9%) Top-1 compile error reasons (100.0% of total 38,980): exception_handler: 38,980 (100.0%) Top-5 unhandled YARV insns (100.0% of total 4,154): getconstant: 2,566 (61.8%) checkmatch: 929 (22.4%) setblockparam: 443 (10.7%) once: 171 ( 4.1%) expandarray: 45 ( 1.1%) Top-3 unhandled HIR insns (100.0% of total 75,633): throw: 39,447 (52.2%) invokebuiltin: 35,775 (47.3%) array_max: 411 ( 0.5%) Top-20 side exit reasons (100.0% of total 3,734,975): guard_shape_failure: 1,908,302 (51.1%) guard_type_failure: 1,391,624 (37.3%) block_param_proxy_not_iseq_or_ifunc: 246,820 ( 6.6%) unhandled_hir_insn: 75,633 ( 2.0%) compile_error: 38,980 ( 1.0%) patchpoint_stable_constant_names: 25,375 ( 0.7%) block_param_proxy_modified: 13,713 ( 0.4%) fixnum_lshift_overflow: 10,085 ( 0.3%) fixnum_mult_overflow: 8,550 ( 0.2%) unhandled_yarv_insn: 4,154 ( 0.1%) unhandled_block_arg: 2,548 ( 0.1%) unhandled_newarray_send_pack: 2,322 ( 0.1%) patchpoint_no_singleton_class: 2,008 ( 0.1%) patchpoint_no_ep_escape: 1,683 ( 0.0%) obj_to_string_fallback: 1,358 ( 0.0%) expandarray_failure: 837 ( 0.0%) patchpoint_method_redefined: 710 ( 0.0%) guard_less_failure: 163 ( 0.0%) guard_super_method_entry: 53 ( 0.0%) interrupt: 38 ( 0.0%) send_count: 45,128,693 dynamic_send_count: 9,800,518 (21.7%) optimized_send_count: 35,328,175 (78.3%) dynamic_setivar_count: 1,123,837 ( 2.5%) dynamic_getivar_count: 2,132,203 ( 4.7%) dynamic_definedivar_count: 107,264 ( 0.2%) iseq_optimized_send_count: 15,891,453 (35.2%) inline_cfunc_optimized_send_count: 12,866,297 (28.5%) inline_iseq_optimized_send_count: 1,102,971 ( 2.4%) non_variadic_cfunc_optimized_send_count: 2,857,775 ( 6.3%) variadic_cfunc_optimized_send_count: 2,609,679 ( 5.8%) compiled_iseq_count: 5,268 failed_iseq_count: 0 compile_time: 1,558ms profile_time: 10ms gc_time: 13ms invalidation_time: 84ms vm_write_pc_count: 39,300,901 vm_write_sp_count: 39,300,901 vm_write_locals_count: 38,133,357 vm_write_stack_count: 38,133,357 vm_write_to_parent_iseq_local_count: 305,249 vm_read_from_parent_iseq_local_count: 4,818,083 guard_type_count: 48,036,224 guard_type_exit_ratio: 2.9% guard_shape_count: 19,302,903 guard_shape_exit_ratio: 9.9% code_region_bytes: 29,491,200 zjit_alloc_bytes: 34,932,040 total_mem_bytes: 64,423,240 side_exit_count: 3,734,975 total_insn_count: 272,964,960 vm_insn_count: 46,583,034 zjit_insn_count: 226,381,926 ratio_in_zjit: 82.9% ```
--- zjit/src/codegen.rs | 64 +++++++++++++++++++ zjit/src/distribution.rs | 6 +- zjit/src/hir.rs | 91 +++++++++++++++++++++++++++ zjit/src/hir/opt_tests.rs | 128 +++++++++++++++++++++++++++++++++++++- zjit/src/stats.rs | 2 + 5 files changed, 288 insertions(+), 3 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 9276d0af6b81e7..41da154c1ae937 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -526,6 +526,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::UnboxFixnum { val } => gen_unbox_fixnum(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), Insn::RefineType { val, .. } => opnd!(val), + Insn::HasType { val, expected } => gen_has_type(asm, opnd!(val), *expected), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), @@ -2187,6 +2188,69 @@ fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.csel_e(0.into(), 1.into()) } +fn gen_has_type(asm: &mut Assembler, val: lir::Opnd, ty: Type) -> lir::Opnd { + if ty.is_subtype(types::Fixnum) { + asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.csel_nz(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::Flonum) { + // Flonum: (val & RUBY_FLONUM_MASK) == RUBY_FLONUM_FLAG + let masked = asm.and(val, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(masked, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::StaticSymbol) { + // Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG + // Use 8-bit comparison like YJIT does. GuardType should not be used + // for a known VALUE, which with_num_bits() does not support. + asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::NilClass) { + asm.cmp(val, Qnil.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::TrueClass) { + asm.cmp(val, Qtrue.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::FalseClass) { + asm.cmp(val, Qfalse.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_immediate() { + // All immediate types' guard should have been handled above + panic!("unexpected immediate guard type: {ty}"); + } else if let Some(expected_class) = ty.runtime_exact_ruby_class() { + // If val isn't in a register, load it to use it as the base of Opnd::mem later. + // TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685) + let val = match val { + Opnd::Reg(_) | Opnd::VReg { .. } => val, + _ => asm.load(val), + }; + + let ret_label = asm.new_label("true"); + let false_label = asm.new_label("false"); + + // Check if it's a special constant + asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(false_label.clone()); + + // Check if it's false + asm.cmp(val, Qfalse.into()); + asm.je(false_label.clone()); + + // Load the class from the object's klass field + let klass = asm.load(Opnd::mem(64, val, RUBY_OFFSET_RBASIC_KLASS)); + asm.cmp(klass, Opnd::Value(expected_class)); + asm.jmp(ret_label.clone()); + + // If we get here then the value was false, unset the Z flag + // so that csel_e will select false instead of true + asm.write_label(false_label); + asm.test(Opnd::UImm(1), Opnd::UImm(1)); + + asm.write_label(ret_label); + asm.csel_e(Opnd::UImm(1), Opnd::Imm(0)) + } else { + unimplemented!("unsupported type: {ty}"); + } +} + /// Compile a type check with a side exit fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { gen_incr_counter(asm, Counter::guard_type_count); diff --git a/zjit/src/distribution.rs b/zjit/src/distribution.rs index 2c6ffb3ae6fff0..9b3920396a13b9 100644 --- a/zjit/src/distribution.rs +++ b/zjit/src/distribution.rs @@ -69,7 +69,7 @@ enum DistributionKind { SkewedMegamorphic, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DistributionSummary { kind: DistributionKind, buckets: [T; N], @@ -134,6 +134,10 @@ impl Distributi assert!(idx < N, "index {idx} out of bounds for buckets[{N}]"); self.buckets[idx] } + + pub fn buckets(&self) -> &[T] { + &self.buckets + } } #[cfg(test)] diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 32519a5b978847..24d04f59663291 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -626,6 +626,7 @@ pub enum SendFallbackReason { SendWithoutBlockNotOptimizedNeedPermission, SendWithoutBlockBopRedefined, SendWithoutBlockOperandsNotFixnum, + SendWithoutBlockPolymorphicFallback, SendDirectKeywordMismatch, SendDirectKeywordCountMismatch, SendDirectMissingKeyword, @@ -687,6 +688,7 @@ impl Display for SendFallbackReason { SendNotOptimizedNeedPermission => write!(f, "Send: method private or protected and no FCALL"), SendWithoutBlockBopRedefined => write!(f, "SendWithoutBlock: basic operation was redefined"), SendWithoutBlockOperandsNotFixnum => write!(f, "SendWithoutBlock: operands are not fixnums"), + SendWithoutBlockPolymorphicFallback => write!(f, "SendWithoutBlock: polymorphic fallback"), SendDirectKeywordMismatch => write!(f, "SendDirect: keyword mismatch"), SendDirectKeywordCountMismatch => write!(f, "SendDirect: keyword count mismatch"), SendDirectMissingKeyword => write!(f, "SendDirect: missing keyword"), @@ -1016,6 +1018,8 @@ pub enum Insn { /// Refine the known type information of with additional type information. /// Computes the intersection of the existing type and the new type. RefineType { val: InsnId, new_type: Type }, + /// Return CBool[true] if val has type Type and CBool[false] otherwise. + HasType { val: InsnId, expected: Type }, /// Side-exit if val doesn't have the expected type. GuardType { val: InsnId, guard_type: Type, state: InsnId }, @@ -1242,6 +1246,7 @@ impl Insn { Insn::CheckInterrupts { .. } => effects::Any, Insn::InvokeProc { .. } => effects::Any, Insn::RefineType { .. } => effects::Empty, + Insn::HasType { .. } => effects::Empty, } } @@ -1546,6 +1551,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) }, + Insn::HasType { val, expected, .. } => { write!(f, "HasType {val}, {}", expected.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardBitSet {val}, {}", mask.print(self.ptr_map)) }, @@ -2235,6 +2241,7 @@ impl Function { &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, &RefineType { val, new_type } => RefineType { val: find!(val), new_type }, + &HasType { val, expected } => HasType { val: find!(val), expected }, &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, @@ -2497,6 +2504,7 @@ impl Function { &Insn::CCallVariadic { return_type, .. } => return_type, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type), + Insn::HasType { .. } => types::CBool, Insn::GuardTypeNot { .. } => types::BasicObject, Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)), Insn::GuardAnyBitSet { val, .. } => self.type_of(*val), @@ -2860,6 +2868,22 @@ impl Function { self.resolve_receiver_type_from_profile(recv, insn_idx) } + fn polymorphic_summary(&self, profiles: &ProfileOracle, recv: InsnId, insn_idx: usize) -> Option { + let Some(entries) = profiles.types.get(&insn_idx) else { + return None; + }; + let recv = self.chase_insn(recv); + for (entry_insn, entry_type_summary) in entries { + if self.union_find.borrow().find_const(*entry_insn) == recv { + if entry_type_summary.is_polymorphic() { + return Some(entry_type_summary.clone()); + } + return None; + } + } + None + } + /// Resolve the receiver type for method dispatch optimization from profile data. /// /// Returns: @@ -4608,6 +4632,7 @@ impl Function { worklist.push_back(state); } | &Insn::RefineType { val, .. } + | &Insn::HasType { val, .. } | &Insn::Return { val } | &Insn::Test { val } | &Insn::SetLocal { val, .. } @@ -5579,6 +5604,7 @@ impl Function { self.assert_subtype(insn_id, class, types::Class) } Insn::RefineType { .. } => Ok(()), + Insn::HasType { val, .. } => self.assert_subtype(insn_id, val, types::BasicObject), } } @@ -6939,6 +6965,71 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; + { + fn new_branch_block( + fun: &mut Function, + cd: *const rb_call_data, + argc: usize, + opcode: u32, + new_type: Type, + insn_idx: u32, + exit_state: &FrameState, + locals_count: usize, + stack_count: usize, + join_block: BlockId, + ) -> BlockId { + let block = fun.new_block(insn_idx); + let self_param = fun.push_insn(block, Insn::Param); + let mut state = exit_state.clone(); + state.locals.clear(); + state.stack.clear(); + state.locals.extend((0..locals_count).map(|_| fun.push_insn(block, Insn::Param))); + state.stack.extend((0..stack_count).map(|_| fun.push_insn(block, Insn::Param))); + let snapshot = fun.push_insn(block, Insn::Snapshot { state: state.clone() }); + let args = state.stack_pop_n(argc).unwrap(); + let recv = state.stack_pop().unwrap(); + let refined_recv = fun.push_insn(block, Insn::RefineType { val: recv, new_type }); + state.replace(recv, refined_recv); + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv: refined_recv, cd, args, state: snapshot, reason: Uncategorized(opcode) }); + state.stack_push(send); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) })); + block + } + let branch_insn_idx = exit_state.insn_idx as u32; + let locals_count = state.locals.len(); + let stack_count = state.stack.len(); + let recv = state.stack_topn(argc as usize)?; // args are on top + let entry_args = state.as_args(self_param); + if let Some(summary) = fun.polymorphic_summary(&profiles, recv, exit_state.insn_idx) { + let join_block = insn_idx_to_block.get(&insn_idx).copied().unwrap_or_else(|| fun.new_block(insn_idx)); + // TODO(max): Only iterate over unique classes, not unique (class, shape) pairs. + for &profiled_type in summary.buckets() { + if profiled_type.is_empty() { break; } + let expected = Type::from_profiled_type(profiled_type); + let has_type = fun.push_insn(block, Insn::HasType { val: recv, expected }); + let iftrue_block = + new_branch_block(&mut fun, cd, argc as usize, opcode, expected, branch_insn_idx, &exit_state, locals_count, stack_count, join_block); + let target = BranchEdge { target: iftrue_block, args: entry_args.clone() }; + fun.push_insn(block, Insn::IfTrue { val: has_type, target }); + } + // Continue compilation from the join block at the next instruction. + // Make a copy of the current state without the args (pop the receiver + // and push the result) because we just use the locals/stack sizes to + // make the right number of Params + let mut join_state = state.clone(); + join_state.stack_pop_n(argc as usize)?; + queue.push_back((join_state, join_block, insn_idx, local_inval)); + // In the fallthrough case, do a generic interpreter send and then join. + let args = state.stack_pop_n(argc as usize)?; + let recv = state.stack_pop()?; + let reason = SendWithoutBlockPolymorphicFallback; + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason }); + state.stack_push(send); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) })); + break; // End the block + } + } + let args = state.stack_pop_n(argc as usize)?; let recv = state.stack_pop()?; let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason: Uncategorized(opcode) }); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index c1059094ac531a..06bf561d166d62 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -6407,9 +6407,29 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - v14:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: Uncategorized(opt_send_without_block) + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + v19:HeapObject[class_exact:C] = RefineType v17, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter getivar_fallback_not_monomorphic + v44:BasicObject = GetIvar v19, :@foo + Jump bb3(v15, v16, v44) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + v28:HeapObject[class_exact:C] = RefineType v26, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter getivar_fallback_not_monomorphic + v47:BasicObject = GetIvar v28, :@foo + Jump bb3(v24, v25, v47) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): CheckInterrupts - Return v14 + Return v36 "); } @@ -11487,4 +11507,108 @@ mod hir_opt_tests { Return v47 "); } + + #[test] + fn specialize_polymorphic_send_iseq() { + set_call_threshold(4); + eval(" + class C + def foo = 3 + end + + class D + def foo = 4 + end + + def test o + o.foo + 2 + end + + test C.new; test D.new; test C.new; test D.new + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:11: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :o, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, HeapObject[class_exact:D] + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter inline_iseq_optimized_send_count + v54:Fixnum[3] = Const Value(3) + Jump bb3(v15, v16, v54) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + PatchPoint NoSingletonClass(D@0x1038) + PatchPoint MethodRedefined(D@0x1038, foo@0x1008, cme:0x1040) + IncrCounter inline_iseq_optimized_send_count + v56:Fixnum[4] = Const Value(4) + Jump bb3(v24, v25, v56) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): + v39:Fixnum[2] = Const Value(2) + PatchPoint MethodRedefined(Integer@0x1068, +@0x1070, cme:0x1078) + v59:Fixnum = GuardType v36, Fixnum + v60:Fixnum = FixnumAdd v59, v39 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v60 + "); + } + + #[test] + fn specialize_polymorphic_send_with_immediate() { + set_call_threshold(4); + eval(" + class C; end + + def test o + o.itself + end + + test C.new; test 3; test C.new; test 4 + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:5: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :o, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, Fixnum + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :itself # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + v19:HeapObject[class_exact:C] = RefineType v17, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, itself@0x1008, cme:0x1010) + IncrCounter inline_cfunc_optimized_send_count + Jump bb3(v15, v16, v19) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + v28:Fixnum = RefineType v26, Fixnum + PatchPoint MethodRedefined(Integer@0x1038, itself@0x1008, cme:0x1010) + IncrCounter inline_cfunc_optimized_send_count + Jump bb3(v24, v25, v28) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): + CheckInterrupts + Return v36 + "); + } } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 367a19fc32fc67..6fc754007f6a82 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -229,6 +229,7 @@ make_counters! { send_fallback_too_many_args_for_lir, send_fallback_send_without_block_bop_redefined, send_fallback_send_without_block_operands_not_fixnum, + send_fallback_send_without_block_polymorphic_fallback, send_fallback_send_without_block_direct_keyword_mismatch, send_fallback_send_without_block_direct_keyword_count_mismatch, send_fallback_send_without_block_direct_missing_keyword, @@ -610,6 +611,7 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter TooManyArgsForLir => send_fallback_too_many_args_for_lir, SendWithoutBlockBopRedefined => send_fallback_send_without_block_bop_redefined, SendWithoutBlockOperandsNotFixnum => send_fallback_send_without_block_operands_not_fixnum, + SendWithoutBlockPolymorphicFallback => send_fallback_send_without_block_polymorphic_fallback, SendDirectKeywordMismatch => send_fallback_send_without_block_direct_keyword_mismatch, SendDirectKeywordCountMismatch => send_fallback_send_without_block_direct_keyword_count_mismatch, SendDirectMissingKeyword => send_fallback_send_without_block_direct_missing_keyword,