diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index a552eee7acc6..3d09b9931d73 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2332,6 +2332,41 @@ (rule 0 (do_ctz ty orig_ty src) (bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty)))) +;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 2 (lower (has_type (ty_32_or_64 ty) (cls src))) + (do_cls ty src)) + +(rule 1 (lower (has_type (ty_8_or_16 ty) (cls src))) + (let ((extended Gpr (extend_to_gpr src $I32 (ExtendKind.Sign))) + (cls Gpr (do_cls $I32 extended))) + (x64_sub $I32 cls (RegMemImm.Imm (u32_wrapping_sub 32 (ty_bits ty)))))) + +(rule 0 (lower + (has_type $I128 + (cls src))) + (let ((upper Gpr (do_cls $I64 (value_regs_get_gpr src 1))) + (sign_fill Gpr (x64_sarq_mi (value_regs_get_gpr src 1) 63)) + (xored Gpr (x64_xor $I64 (value_regs_get_gpr src 0) sign_fill)) + (lower Gpr (x64_add $I64 + (do_clz $I64 $I64 xored) + (RegMemImm.Imm 63))) + (result_lo Gpr + (with_flags_reg + (x64_cmpq_mi_sxb upper 63) + (cmove $I64 (CC.NZ) upper lower)))) + (value_regs result_lo (imm $I64 0)))) + +;; Implementation helper for cls; operates on 32 or 64-bit units. +(decl do_cls (Type Gpr) Gpr) + +;; cls is implemented via clz using the identity: cls(x) = clz(x ^ (x >> 1)) - 1 +(rule (do_cls ty src) + (let ((shifted Gpr (x64_sar ty src (Imm8Gpr.Imm8 1))) + (xored Gpr (x64_xor ty src (RegMemImm.Reg shifted))) + (clz Gpr (do_clz ty ty xored))) + (x64_sub ty clz (RegMemImm.Imm 1)))) + ;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 4 (lower (has_type (ty_32_or_64 ty) (popcnt src))) diff --git a/cranelift/filetests/filetests/runtests/cls.clif b/cranelift/filetests/filetests/runtests/cls.clif index 5df34dfd0c9d..5ee8b97356ab 100644 --- a/cranelift/filetests/filetests/runtests/cls.clif +++ b/cranelift/filetests/filetests/runtests/cls.clif @@ -1,11 +1,13 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 +target x86_64 target riscv64 target riscv64 has_zbb target riscv64 has_c has_zcb +set enable_multi_ret_implicit_sret target s390x -; not implemented on `x86_64` function %cls_i8(i8) -> i8 { block0(v0: i8): @@ -46,3 +48,22 @@ block0(v0: i64): ; run: %cls_i64(0x4000000000000000) == 0 ; run: %cls_i64(-1) == 63 ; run: %cls_i64(0) == 63 + +function %cls_i128(i128) -> i128 { +block0(v0: i128): + v1 = cls v0 + return v1 +} +; run: %cls_i128(1) == 126 +; run: %cls_i128(0x40000000_00000000_00000000_00000000) == 0 +; run: %cls_i128(0) == 127 +; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127 +; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127 +; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1 +; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1 +; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-cls.clif b/cranelift/filetests/filetests/runtests/i128-cls.clif deleted file mode 100644 index e7a7f4672241..000000000000 --- a/cranelift/filetests/filetests/runtests/i128-cls.clif +++ /dev/null @@ -1,23 +0,0 @@ -test run -target aarch64 -target riscv64 -target riscv64 has_zbb -target riscv64 has_c has_zcb -set enable_multi_ret_implicit_sret -target s390x - -function %cls_i128(i128) -> i128 { -block0(v0: i128): - v1 = cls v0 - return v1 -} -; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127 -; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127 -; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 -; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1 -; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1 -; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0