Skip to content

Commit 33d8284

Browse files
authored
ZJIT: Support polymorphic send without block (ruby#15949)
Break out the different cases into different blocks in the bytecode to HIR parser. Use a `RefineType` to plumb the case's type through so the type specialization can see it. Then join the logic back to the rest of the current block after each case's send. lobsters before <details> ``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (58.7% of total 4,476,259): Hash#fetch: 849,219 (19.0%) String#start_with?: 328,017 ( 7.3%) Regexp#match?: 148,149 ( 3.3%) Hash#key?: 135,034 ( 3.0%) Kernel#is_a?: 110,030 ( 2.5%) Set#include?: 97,934 ( 2.2%) Integer#===: 96,952 ( 2.2%) Process.clock_gettime: 92,795 ( 2.1%) String#sub!: 84,940 ( 1.9%) String.new: 80,730 ( 1.8%) SQLite3::Statement#done?: 73,532 ( 1.6%) SQLite3::Statement#step: 73,532 ( 1.6%) Time#plus_without_duration: 66,724 ( 1.5%) String#<<: 63,954 ( 1.4%) Time#to_i: 60,817 ( 1.4%) Hash#delete: 60,664 ( 1.4%) Time#subsec: 60,363 ( 1.3%) String#hash: 51,261 ( 1.1%) IO#read: 47,753 ( 1.1%) String#to_sym: 43,915 ( 1.0%) Top-20 calls to C functions from JIT code (83.7% of total 35,570,418): rb_vm_opt_send_without_block: 10,516,746 (29.6%) rb_vm_env_write: 2,382,117 ( 6.7%) rb_zjit_writebarrier_check_immediate: 2,241,285 ( 6.3%) rb_hash_aref: 2,189,588 ( 6.2%) rb_vm_getinstancevariable: 1,762,596 ( 5.0%) rb_ivar_get_at_no_ractor_check: 1,702,246 ( 4.8%) rb_vm_send: 1,460,754 ( 4.1%) rb_hash_aset: 1,151,302 ( 3.2%) rb_vm_setinstancevariable: 1,029,286 ( 2.9%) rb_obj_is_kind_of: 1,000,979 ( 2.8%) rb_vm_opt_getconstant_path: 623,490 ( 1.8%) rb_vm_invokesuper: 595,831 ( 1.7%) Hash#fetch: 562,212 ( 1.6%) rb_vm_invokeblock: 545,744 ( 1.5%) rb_class_allocate_instance: 422,454 ( 1.2%) rb_ec_ary_new_from_values: 388,035 ( 1.1%) String#start_with?: 328,017 ( 0.9%) rb_hash_new_with_size: 289,130 ( 0.8%) fetch: 287,007 ( 0.8%) rb_vm_sendforward: 284,183 ( 0.8%) Top-1 not optimized method types for send (100.0% of total 428): null: 428 (100.0%) Top-3 not optimized method types for send_without_block (100.0% of total 102,413): optimized_send: 92,837 (90.6%) null: 8,595 ( 8.4%) optimized_block_call: 981 ( 1.0%) Top-3 not optimized method types for super (100.0% of total 517,931): cfunc: 489,746 (94.6%) alias: 26,398 ( 5.1%) attrset: 1,787 ( 0.3%) Top-4 instructions with uncategorized fallback reason (100.0% of total 868,223): invokeblock: 545,744 (62.9%) sendforward: 284,183 (32.7%) invokesuperforward: 29,713 ( 3.4%) opt_send_without_block: 8,583 ( 1.0%) Top-20 send fallback reasons (100.0% of total 13,432,971): send_without_block_polymorphic: 4,825,641 (35.9%) singleton_class_seen: 3,257,447 (24.2%) send_without_block_no_profiles: 1,906,060 (14.2%) uncategorized: 868,223 ( 6.5%) send_no_profiles: 806,168 ( 6.0%) one_or_more_complex_arg_pass: 537,965 ( 4.0%) super_not_optimized_method_type: 517,931 ( 3.9%) send_without_block_megamorphic: 158,893 ( 1.2%) too_many_args_for_lir: 127,160 ( 0.9%) send_polymorphic: 112,628 ( 0.8%) send_without_block_not_optimized_need_permission: 100,041 ( 0.7%) send_without_block_not_optimized_method_type_optimized: 93,818 ( 0.7%) super_complex_args_pass: 34,022 ( 0.3%) super_target_complex_args_pass: 25,536 ( 0.2%) super_polymorphic: 16,853 ( 0.1%) obj_to_string_not_string: 13,794 ( 0.1%) argc_param_mismatch: 9,927 ( 0.1%) send_without_block_not_optimized_method_type: 8,595 ( 0.1%) send_without_block_direct_keyword_mismatch: 5,568 ( 0.0%) send_megamorphic: 4,525 ( 0.0%) Top-4 setivar fallback reasons (100.0% of total 1,029,286): not_monomorphic: 992,723 (96.4%) not_t_object: 21,354 ( 2.1%) too_complex: 15,188 ( 1.5%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 1,790,794): not_monomorphic: 1,750,108 (97.7%) too_complex: 40,686 ( 2.3%) Top-3 definedivar fallback reasons (100.0% of total 81,713): not_monomorphic: 80,197 (98.1%) too_complex: 796 ( 1.0%) not_t_object: 720 ( 0.9%) Top-6 invokeblock handler (100.0% of total 545,744): monomorphic_iseq: 249,809 (45.8%) polymorphic: 217,915 (39.9%) monomorphic_ifunc: 46,244 ( 8.5%) monomorphic_other: 27,938 ( 5.1%) megamorphic: 2,943 ( 0.5%) no_profiles: 895 ( 0.2%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 652,565): param_forwardable: 246,421 (37.8%) param_block: 198,808 (30.5%) param_rest: 101,529 (15.6%) param_kwrest: 44,809 ( 6.9%) caller_blockarg: 24,596 ( 3.8%) caller_splat: 15,969 ( 2.4%) caller_kw_splat: 14,227 ( 2.2%) caller_kwarg: 6,206 ( 1.0%) Top-1 compile error reasons (100.0% of total 38,981): exception_handler: 38,981 (100.0%) Top-5 unhandled YARV insns (100.0% of total 4,154): getconstant: 2,566 (61.8%) checkmatch: 929 (22.4%) setblockparam: 443 (10.7%) once: 171 ( 4.1%) expandarray: 45 ( 1.1%) Top-3 unhandled HIR insns (100.0% of total 75,904): throw: 39,721 (52.3%) invokebuiltin: 35,772 (47.1%) array_max: 411 ( 0.5%) Top-20 side exit reasons (100.0% of total 3,770,125): guard_shape_failure: 1,927,218 (51.1%) guard_type_failure: 1,395,315 (37.0%) block_param_proxy_not_iseq_or_ifunc: 257,894 ( 6.8%) unhandled_hir_insn: 75,904 ( 2.0%) compile_error: 38,981 ( 1.0%) patchpoint_stable_constant_names: 25,375 ( 0.7%) block_param_proxy_modified: 13,713 ( 0.4%) fixnum_lshift_overflow: 10,085 ( 0.3%) fixnum_mult_overflow: 8,550 ( 0.2%) unhandled_yarv_insn: 4,154 ( 0.1%) unhandled_block_arg: 2,548 ( 0.1%) unhandled_newarray_send_pack: 2,322 ( 0.1%) patchpoint_no_singleton_class: 2,008 ( 0.1%) patchpoint_no_ep_escape: 1,683 ( 0.0%) obj_to_string_fallback: 1,358 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) expandarray_failure: 837 ( 0.0%) guard_super_method_entry: 737 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 49 ( 0.0%) send_count: 46,003,239 dynamic_send_count: 13,432,971 (29.2%) optimized_send_count: 32,570,268 (70.8%) dynamic_setivar_count: 1,029,286 ( 2.2%) dynamic_getivar_count: 1,790,794 ( 3.9%) dynamic_definedivar_count: 81,713 ( 0.2%) iseq_optimized_send_count: 15,117,301 (32.9%) inline_cfunc_optimized_send_count: 11,837,918 (25.7%) inline_iseq_optimized_send_count: 884,606 ( 1.9%) non_variadic_cfunc_optimized_send_count: 2,597,998 ( 5.6%) variadic_cfunc_optimized_send_count: 2,132,445 ( 4.6%) compiled_iseq_count: 5,259 failed_iseq_count: 0 compile_time: 1,409ms profile_time: 10ms gc_time: 11ms invalidation_time: 77ms vm_write_pc_count: 40,924,587 vm_write_sp_count: 40,924,587 vm_write_locals_count: 39,740,467 vm_write_stack_count: 39,740,467 vm_write_to_parent_iseq_local_count: 306,481 vm_read_from_parent_iseq_local_count: 4,841,855 guard_type_count: 48,810,089 guard_type_exit_ratio: 2.9% guard_shape_count: 19,485,073 guard_shape_exit_ratio: 9.9% code_region_bytes: 27,262,976 zjit_alloc_bytes: 34,517,324 total_mem_bytes: 61,780,300 side_exit_count: 3,770,125 total_insn_count: 273,152,243 vm_insn_count: 43,926,931 zjit_insn_count: 229,225,312 ratio_in_zjit: 83.9% ``` </details> lobsters after <details> ``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (61.7% of total 5,220,252): Hash#fetch: 1,274,409 (24.4%) String#start_with?: 328,017 ( 6.3%) Regexp#match?: 147,525 ( 2.8%) Hash#key?: 139,198 ( 2.7%) Kernel#is_a?: 110,178 ( 2.1%) Class#allocate: 107,143 ( 2.1%) Hash#delete: 106,307 ( 2.0%) Class#superclass: 98,165 ( 1.9%) Set#include?: 97,934 ( 1.9%) Integer#===: 95,874 ( 1.8%) Process.clock_gettime: 92,795 ( 1.8%) String#sub!: 80,732 ( 1.5%) String.new: 80,730 ( 1.5%) SQLite3::Statement#done?: 73,532 ( 1.4%) SQLite3::Statement#step: 73,532 ( 1.4%) Time#plus_without_duration: 66,724 ( 1.3%) String#<<: 63,954 ( 1.2%) Kernel#dup: 62,590 ( 1.2%) Time#to_i: 60,814 ( 1.2%) Time#subsec: 60,363 ( 1.2%) Top-20 calls to C functions from JIT code (80.8% of total 33,681,248): rb_vm_opt_send_without_block: 6,869,559 (20.4%) rb_hash_aref: 2,487,056 ( 7.4%) rb_vm_env_write: 2,372,693 ( 7.0%) rb_zjit_writebarrier_check_immediate: 2,238,890 ( 6.6%) rb_vm_getinstancevariable: 1,861,700 ( 5.5%) rb_ivar_get_at_no_ractor_check: 1,702,246 ( 5.1%) rb_vm_send: 1,468,202 ( 4.4%) rb_hash_aset: 1,267,469 ( 3.8%) rb_obj_is_kind_of: 1,126,363 ( 3.3%) rb_vm_setinstancevariable: 1,055,131 ( 3.1%) Hash#fetch: 987,402 ( 2.9%) rb_vm_opt_getconstant_path: 641,779 ( 1.9%) rb_vm_invokesuper: 603,416 ( 1.8%) rb_vm_invokeblock: 545,743 ( 1.6%) rb_class_allocate_instance: 415,748 ( 1.2%) rb_ec_ary_new_from_values: 380,080 ( 1.1%) String#start_with?: 328,017 ( 1.0%) rb_hash_new_with_size: 289,172 ( 0.9%) fetch: 287,007 ( 0.9%) rb_vm_sendforward: 283,885 ( 0.8%) Top-1 not optimized method types for send (100.0% of total 428): null: 428 (100.0%) Top-3 not optimized method types for send_without_block (100.0% of total 202,329): optimized_send: 190,504 (94.2%) null: 10,844 ( 5.4%) optimized_block_call: 981 ( 0.5%) Top-3 not optimized method types for super (100.0% of total 517,421): cfunc: 489,236 (94.6%) alias: 26,398 ( 5.1%) attrset: 1,787 ( 0.3%) Top-4 instructions with uncategorized fallback reason (100.0% of total 867,452): invokeblock: 545,743 (62.9%) sendforward: 283,885 (32.7%) invokesuperforward: 29,713 ( 3.4%) opt_send_without_block: 8,111 ( 0.9%) Top-20 send fallback reasons (100.0% of total 9,800,518): singleton_class_seen: 3,293,078 (33.6%) send_without_block_no_profiles: 2,142,301 (21.9%) uncategorized: 867,452 ( 8.9%) send_no_profiles: 820,538 ( 8.4%) send_without_block_polymorphic: 780,065 ( 8.0%) one_or_more_complex_arg_pass: 556,514 ( 5.7%) super_not_optimized_method_type: 517,421 ( 5.3%) send_without_block_not_optimized_method_type_optimized: 191,485 ( 2.0%) send_without_block_megamorphic: 161,550 ( 1.6%) too_many_args_for_lir: 127,190 ( 1.3%) send_polymorphic: 111,290 ( 1.1%) send_without_block_not_optimized_need_permission: 99,526 ( 1.0%) super_polymorphic: 45,651 ( 0.5%) super_complex_args_pass: 33,748 ( 0.3%) obj_to_string_not_string: 13,794 ( 0.1%) send_without_block_not_optimized_method_type: 10,844 ( 0.1%) argc_param_mismatch: 9,927 ( 0.1%) send_without_block_direct_keyword_mismatch: 6,336 ( 0.1%) super_target_complex_args_pass: 5,108 ( 0.1%) send_megamorphic: 4,525 ( 0.0%) Top-4 setivar fallback reasons (100.0% of total 1,123,837): not_monomorphic: 1,087,274 (96.7%) not_t_object: 21,354 ( 1.9%) too_complex: 15,188 ( 1.4%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 2,132,203): not_monomorphic: 2,092,243 (98.1%) too_complex: 39,960 ( 1.9%) Top-3 definedivar fallback reasons (100.0% of total 107,264): not_monomorphic: 105,748 (98.6%) too_complex: 796 ( 0.7%) not_t_object: 720 ( 0.7%) Top-6 invokeblock handler (100.0% of total 545,743): monomorphic_iseq: 249,809 (45.8%) polymorphic: 217,914 (39.9%) monomorphic_ifunc: 46,244 ( 8.5%) monomorphic_other: 27,938 ( 5.1%) megamorphic: 2,943 ( 0.5%) no_profiles: 895 ( 0.2%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 651,185): param_forwardable: 233,989 (35.9%) param_block: 205,158 (31.5%) param_rest: 100,319 (15.4%) param_kwrest: 44,596 ( 6.8%) caller_blockarg: 21,863 ( 3.4%) caller_kw_splat: 20,970 ( 3.2%) caller_splat: 18,106 ( 2.8%) caller_kwarg: 6,184 ( 0.9%) Top-1 compile error reasons (100.0% of total 38,980): exception_handler: 38,980 (100.0%) Top-5 unhandled YARV insns (100.0% of total 4,154): getconstant: 2,566 (61.8%) checkmatch: 929 (22.4%) setblockparam: 443 (10.7%) once: 171 ( 4.1%) expandarray: 45 ( 1.1%) Top-3 unhandled HIR insns (100.0% of total 75,633): throw: 39,447 (52.2%) invokebuiltin: 35,775 (47.3%) array_max: 411 ( 0.5%) Top-20 side exit reasons (100.0% of total 3,734,975): guard_shape_failure: 1,908,302 (51.1%) guard_type_failure: 1,391,624 (37.3%) block_param_proxy_not_iseq_or_ifunc: 246,820 ( 6.6%) unhandled_hir_insn: 75,633 ( 2.0%) compile_error: 38,980 ( 1.0%) patchpoint_stable_constant_names: 25,375 ( 0.7%) block_param_proxy_modified: 13,713 ( 0.4%) fixnum_lshift_overflow: 10,085 ( 0.3%) fixnum_mult_overflow: 8,550 ( 0.2%) unhandled_yarv_insn: 4,154 ( 0.1%) unhandled_block_arg: 2,548 ( 0.1%) unhandled_newarray_send_pack: 2,322 ( 0.1%) patchpoint_no_singleton_class: 2,008 ( 0.1%) patchpoint_no_ep_escape: 1,683 ( 0.0%) obj_to_string_fallback: 1,358 ( 0.0%) expandarray_failure: 837 ( 0.0%) patchpoint_method_redefined: 710 ( 0.0%) guard_less_failure: 163 ( 0.0%) guard_super_method_entry: 53 ( 0.0%) interrupt: 38 ( 0.0%) send_count: 45,128,693 dynamic_send_count: 9,800,518 (21.7%) optimized_send_count: 35,328,175 (78.3%) dynamic_setivar_count: 1,123,837 ( 2.5%) dynamic_getivar_count: 2,132,203 ( 4.7%) dynamic_definedivar_count: 107,264 ( 0.2%) iseq_optimized_send_count: 15,891,453 (35.2%) inline_cfunc_optimized_send_count: 12,866,297 (28.5%) inline_iseq_optimized_send_count: 1,102,971 ( 2.4%) non_variadic_cfunc_optimized_send_count: 2,857,775 ( 6.3%) variadic_cfunc_optimized_send_count: 2,609,679 ( 5.8%) compiled_iseq_count: 5,268 failed_iseq_count: 0 compile_time: 1,558ms profile_time: 10ms gc_time: 13ms invalidation_time: 84ms vm_write_pc_count: 39,300,901 vm_write_sp_count: 39,300,901 vm_write_locals_count: 38,133,357 vm_write_stack_count: 38,133,357 vm_write_to_parent_iseq_local_count: 305,249 vm_read_from_parent_iseq_local_count: 4,818,083 guard_type_count: 48,036,224 guard_type_exit_ratio: 2.9% guard_shape_count: 19,302,903 guard_shape_exit_ratio: 9.9% code_region_bytes: 29,491,200 zjit_alloc_bytes: 34,932,040 total_mem_bytes: 64,423,240 side_exit_count: 3,734,975 total_insn_count: 272,964,960 vm_insn_count: 46,583,034 zjit_insn_count: 226,381,926 ratio_in_zjit: 82.9% ``` </details>
1 parent fbff0c9 commit 33d8284

5 files changed

Lines changed: 288 additions & 3 deletions

File tree

zjit/src/codegen.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
526526
&Insn::UnboxFixnum { val } => gen_unbox_fixnum(asm, opnd!(val)),
527527
Insn::Test { val } => gen_test(asm, opnd!(val)),
528528
Insn::RefineType { val, .. } => opnd!(val),
529+
Insn::HasType { val, expected } => gen_has_type(asm, opnd!(val), *expected),
529530
Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)),
530531
Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)),
531532
&Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)),
@@ -2187,6 +2188,69 @@ fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd {
21872188
asm.csel_e(0.into(), 1.into())
21882189
}
21892190

2191+
fn gen_has_type(asm: &mut Assembler, val: lir::Opnd, ty: Type) -> lir::Opnd {
2192+
if ty.is_subtype(types::Fixnum) {
2193+
asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
2194+
asm.csel_nz(Opnd::Imm(1), Opnd::Imm(0))
2195+
} else if ty.is_subtype(types::Flonum) {
2196+
// Flonum: (val & RUBY_FLONUM_MASK) == RUBY_FLONUM_FLAG
2197+
let masked = asm.and(val, Opnd::UImm(RUBY_FLONUM_MASK as u64));
2198+
asm.cmp(masked, Opnd::UImm(RUBY_FLONUM_FLAG as u64));
2199+
asm.csel_e(Opnd::Imm(1), Opnd::Imm(0))
2200+
} else if ty.is_subtype(types::StaticSymbol) {
2201+
// Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG
2202+
// Use 8-bit comparison like YJIT does. GuardType should not be used
2203+
// for a known VALUE, which with_num_bits() does not support.
2204+
asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64));
2205+
asm.csel_e(Opnd::Imm(1), Opnd::Imm(0))
2206+
} else if ty.is_subtype(types::NilClass) {
2207+
asm.cmp(val, Qnil.into());
2208+
asm.csel_e(Opnd::Imm(1), Opnd::Imm(0))
2209+
} else if ty.is_subtype(types::TrueClass) {
2210+
asm.cmp(val, Qtrue.into());
2211+
asm.csel_e(Opnd::Imm(1), Opnd::Imm(0))
2212+
} else if ty.is_subtype(types::FalseClass) {
2213+
asm.cmp(val, Qfalse.into());
2214+
asm.csel_e(Opnd::Imm(1), Opnd::Imm(0))
2215+
} else if ty.is_immediate() {
2216+
// All immediate types' guard should have been handled above
2217+
panic!("unexpected immediate guard type: {ty}");
2218+
} else if let Some(expected_class) = ty.runtime_exact_ruby_class() {
2219+
// If val isn't in a register, load it to use it as the base of Opnd::mem later.
2220+
// TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685)
2221+
let val = match val {
2222+
Opnd::Reg(_) | Opnd::VReg { .. } => val,
2223+
_ => asm.load(val),
2224+
};
2225+
2226+
let ret_label = asm.new_label("true");
2227+
let false_label = asm.new_label("false");
2228+
2229+
// Check if it's a special constant
2230+
asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into());
2231+
asm.jnz(false_label.clone());
2232+
2233+
// Check if it's false
2234+
asm.cmp(val, Qfalse.into());
2235+
asm.je(false_label.clone());
2236+
2237+
// Load the class from the object's klass field
2238+
let klass = asm.load(Opnd::mem(64, val, RUBY_OFFSET_RBASIC_KLASS));
2239+
asm.cmp(klass, Opnd::Value(expected_class));
2240+
asm.jmp(ret_label.clone());
2241+
2242+
// If we get here then the value was false, unset the Z flag
2243+
// so that csel_e will select false instead of true
2244+
asm.write_label(false_label);
2245+
asm.test(Opnd::UImm(1), Opnd::UImm(1));
2246+
2247+
asm.write_label(ret_label);
2248+
asm.csel_e(Opnd::UImm(1), Opnd::Imm(0))
2249+
} else {
2250+
unimplemented!("unsupported type: {ty}");
2251+
}
2252+
}
2253+
21902254
/// Compile a type check with a side exit
21912255
fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd {
21922256
gen_incr_counter(asm, Counter::guard_type_count);

zjit/src/distribution.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ enum DistributionKind {
6969
SkewedMegamorphic,
7070
}
7171

72-
#[derive(Debug)]
72+
#[derive(Debug, Clone)]
7373
pub struct DistributionSummary<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> {
7474
kind: DistributionKind,
7575
buckets: [T; N],
@@ -134,6 +134,10 @@ impl<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> Distributi
134134
assert!(idx < N, "index {idx} out of bounds for buckets[{N}]");
135135
self.buckets[idx]
136136
}
137+
138+
pub fn buckets(&self) -> &[T] {
139+
&self.buckets
140+
}
137141
}
138142

139143
#[cfg(test)]

zjit/src/hir.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ pub enum SendFallbackReason {
626626
SendWithoutBlockNotOptimizedNeedPermission,
627627
SendWithoutBlockBopRedefined,
628628
SendWithoutBlockOperandsNotFixnum,
629+
SendWithoutBlockPolymorphicFallback,
629630
SendDirectKeywordMismatch,
630631
SendDirectKeywordCountMismatch,
631632
SendDirectMissingKeyword,
@@ -687,6 +688,7 @@ impl Display for SendFallbackReason {
687688
SendNotOptimizedNeedPermission => write!(f, "Send: method private or protected and no FCALL"),
688689
SendWithoutBlockBopRedefined => write!(f, "SendWithoutBlock: basic operation was redefined"),
689690
SendWithoutBlockOperandsNotFixnum => write!(f, "SendWithoutBlock: operands are not fixnums"),
691+
SendWithoutBlockPolymorphicFallback => write!(f, "SendWithoutBlock: polymorphic fallback"),
690692
SendDirectKeywordMismatch => write!(f, "SendDirect: keyword mismatch"),
691693
SendDirectKeywordCountMismatch => write!(f, "SendDirect: keyword count mismatch"),
692694
SendDirectMissingKeyword => write!(f, "SendDirect: missing keyword"),
@@ -1016,6 +1018,8 @@ pub enum Insn {
10161018
/// Refine the known type information of with additional type information.
10171019
/// Computes the intersection of the existing type and the new type.
10181020
RefineType { val: InsnId, new_type: Type },
1021+
/// Return CBool[true] if val has type Type and CBool[false] otherwise.
1022+
HasType { val: InsnId, expected: Type },
10191023

10201024
/// Side-exit if val doesn't have the expected type.
10211025
GuardType { val: InsnId, guard_type: Type, state: InsnId },
@@ -1242,6 +1246,7 @@ impl Insn {
12421246
Insn::CheckInterrupts { .. } => effects::Any,
12431247
Insn::InvokeProc { .. } => effects::Any,
12441248
Insn::RefineType { .. } => effects::Empty,
1249+
Insn::HasType { .. } => effects::Empty,
12451250
}
12461251
}
12471252

@@ -1546,6 +1551,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
15461551
Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") },
15471552
Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) },
15481553
Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) },
1554+
Insn::HasType { val, expected, .. } => { write!(f, "HasType {val}, {}", expected.print(self.ptr_map)) },
15491555
Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) },
15501556
Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) },
15511557
Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardBitSet {val}, {}", mask.print(self.ptr_map)) },
@@ -2235,6 +2241,7 @@ impl Function {
22352241
&IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) },
22362242
&IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) },
22372243
&RefineType { val, new_type } => RefineType { val: find!(val), new_type },
2244+
&HasType { val, expected } => HasType { val: find!(val), expected },
22382245
&GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state },
22392246
&GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state },
22402247
&GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state },
@@ -2497,6 +2504,7 @@ impl Function {
24972504
&Insn::CCallVariadic { return_type, .. } => return_type,
24982505
Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type),
24992506
Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type),
2507+
Insn::HasType { .. } => types::CBool,
25002508
Insn::GuardTypeNot { .. } => types::BasicObject,
25012509
Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)),
25022510
Insn::GuardAnyBitSet { val, .. } => self.type_of(*val),
@@ -2860,6 +2868,22 @@ impl Function {
28602868
self.resolve_receiver_type_from_profile(recv, insn_idx)
28612869
}
28622870

2871+
fn polymorphic_summary(&self, profiles: &ProfileOracle, recv: InsnId, insn_idx: usize) -> Option<TypeDistributionSummary> {
2872+
let Some(entries) = profiles.types.get(&insn_idx) else {
2873+
return None;
2874+
};
2875+
let recv = self.chase_insn(recv);
2876+
for (entry_insn, entry_type_summary) in entries {
2877+
if self.union_find.borrow().find_const(*entry_insn) == recv {
2878+
if entry_type_summary.is_polymorphic() {
2879+
return Some(entry_type_summary.clone());
2880+
}
2881+
return None;
2882+
}
2883+
}
2884+
None
2885+
}
2886+
28632887
/// Resolve the receiver type for method dispatch optimization from profile data.
28642888
///
28652889
/// Returns:
@@ -4608,6 +4632,7 @@ impl Function {
46084632
worklist.push_back(state);
46094633
}
46104634
| &Insn::RefineType { val, .. }
4635+
| &Insn::HasType { val, .. }
46114636
| &Insn::Return { val }
46124637
| &Insn::Test { val }
46134638
| &Insn::SetLocal { val, .. }
@@ -5579,6 +5604,7 @@ impl Function {
55795604
self.assert_subtype(insn_id, class, types::Class)
55805605
}
55815606
Insn::RefineType { .. } => Ok(()),
5607+
Insn::HasType { val, .. } => self.assert_subtype(insn_id, val, types::BasicObject),
55825608
}
55835609
}
55845610

@@ -6939,6 +6965,71 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
69396965
}
69406966
let argc = unsafe { vm_ci_argc((*cd).ci) };
69416967

6968+
{
6969+
fn new_branch_block(
6970+
fun: &mut Function,
6971+
cd: *const rb_call_data,
6972+
argc: usize,
6973+
opcode: u32,
6974+
new_type: Type,
6975+
insn_idx: u32,
6976+
exit_state: &FrameState,
6977+
locals_count: usize,
6978+
stack_count: usize,
6979+
join_block: BlockId,
6980+
) -> BlockId {
6981+
let block = fun.new_block(insn_idx);
6982+
let self_param = fun.push_insn(block, Insn::Param);
6983+
let mut state = exit_state.clone();
6984+
state.locals.clear();
6985+
state.stack.clear();
6986+
state.locals.extend((0..locals_count).map(|_| fun.push_insn(block, Insn::Param)));
6987+
state.stack.extend((0..stack_count).map(|_| fun.push_insn(block, Insn::Param)));
6988+
let snapshot = fun.push_insn(block, Insn::Snapshot { state: state.clone() });
6989+
let args = state.stack_pop_n(argc).unwrap();
6990+
let recv = state.stack_pop().unwrap();
6991+
let refined_recv = fun.push_insn(block, Insn::RefineType { val: recv, new_type });
6992+
state.replace(recv, refined_recv);
6993+
let send = fun.push_insn(block, Insn::SendWithoutBlock { recv: refined_recv, cd, args, state: snapshot, reason: Uncategorized(opcode) });
6994+
state.stack_push(send);
6995+
fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) }));
6996+
block
6997+
}
6998+
let branch_insn_idx = exit_state.insn_idx as u32;
6999+
let locals_count = state.locals.len();
7000+
let stack_count = state.stack.len();
7001+
let recv = state.stack_topn(argc as usize)?; // args are on top
7002+
let entry_args = state.as_args(self_param);
7003+
if let Some(summary) = fun.polymorphic_summary(&profiles, recv, exit_state.insn_idx) {
7004+
let join_block = insn_idx_to_block.get(&insn_idx).copied().unwrap_or_else(|| fun.new_block(insn_idx));
7005+
// TODO(max): Only iterate over unique classes, not unique (class, shape) pairs.
7006+
for &profiled_type in summary.buckets() {
7007+
if profiled_type.is_empty() { break; }
7008+
let expected = Type::from_profiled_type(profiled_type);
7009+
let has_type = fun.push_insn(block, Insn::HasType { val: recv, expected });
7010+
let iftrue_block =
7011+
new_branch_block(&mut fun, cd, argc as usize, opcode, expected, branch_insn_idx, &exit_state, locals_count, stack_count, join_block);
7012+
let target = BranchEdge { target: iftrue_block, args: entry_args.clone() };
7013+
fun.push_insn(block, Insn::IfTrue { val: has_type, target });
7014+
}
7015+
// Continue compilation from the join block at the next instruction.
7016+
// Make a copy of the current state without the args (pop the receiver
7017+
// and push the result) because we just use the locals/stack sizes to
7018+
// make the right number of Params
7019+
let mut join_state = state.clone();
7020+
join_state.stack_pop_n(argc as usize)?;
7021+
queue.push_back((join_state, join_block, insn_idx, local_inval));
7022+
// In the fallthrough case, do a generic interpreter send and then join.
7023+
let args = state.stack_pop_n(argc as usize)?;
7024+
let recv = state.stack_pop()?;
7025+
let reason = SendWithoutBlockPolymorphicFallback;
7026+
let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason });
7027+
state.stack_push(send);
7028+
fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) }));
7029+
break; // End the block
7030+
}
7031+
}
7032+
69427033
let args = state.stack_pop_n(argc as usize)?;
69437034
let recv = state.stack_pop()?;
69447035
let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason: Uncategorized(opcode) });

0 commit comments

Comments
 (0)