From c3c1b115f30aef84296062cb86017619627f01ae Mon Sep 17 00:00:00 2001 From: Shreyas Ravi Date: Mon, 23 Feb 2026 19:29:47 -0800 Subject: [PATCH 1/3] cranelift/x64: implement cls for all integer types --- cranelift/codegen/src/isa/x64/lower.isle | 41 ++++++++++++++ .../filetests/filetests/isa/x64/cls.clif | 53 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 cranelift/filetests/filetests/isa/x64/cls.clif diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index a552eee7acc6..7cc4393128b5 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2332,6 +2332,47 @@ (rule 0 (do_ctz ty orig_ty src) (bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty)))) +;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 2 (lower (has_type (ty_32_or_64 ty) (cls src))) + (do_cls ty src)) + +(rule 1 (lower (has_type (ty_8_or_16 ty) (cls src))) + (let ((extended Gpr (extend_to_gpr src $I32 (ExtendKind.Sign))) + (cls Gpr (do_cls $I32 extended))) + (x64_sub $I32 cls (RegMemImm.Imm (u32_wrapping_sub 32 (ty_bits ty)))))) + +(rule 0 (lower + (has_type $I128 + (cls src))) + (let ((upper Gpr (do_cls $I64 (value_regs_get_gpr src 1))) + (sign_fill Gpr (x64_sarq_mi (value_regs_get_gpr src 1) 63)) + (xored Gpr (x64_xor $I64 (value_regs_get_gpr src 0) sign_fill)) + (lower Gpr (x64_add $I64 + (do_clz $I64 $I64 xored) + (RegMemImm.Imm 63))) + (result_lo Gpr + (with_flags_reg + (x64_cmpq_mi_sxb upper 63) + (cmove $I64 (CC.NZ) upper lower)))) + (value_regs result_lo (imm $I64 0)))) + +;; Implementation helper for cls; operates on 32 or 64-bit units. +(decl do_cls (Type Gpr) Gpr) + +;; cls is implemented via clz using the identity: cls(x) = clz(x ^ (x >> 1)) - 1 +(rule 1 (do_cls $I64 src) + (let ((shifted Gpr (x64_sarq_mi src 1)) + (xored Gpr (x64_xor $I64 src (RegMemImm.Reg shifted))) + (clz Gpr (do_clz $I64 $I64 xored))) + (x64_sub $I64 clz (RegMemImm.Imm 1)))) + +(rule 0 (do_cls $I32 src) + (let ((shifted Gpr (x64_sarl_mi src 1)) + (xored Gpr (x64_xor $I32 src (RegMemImm.Reg shifted))) + (clz Gpr (do_clz $I32 $I32 xored))) + (x64_sub $I32 clz (RegMemImm.Imm 1)))) + ;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 4 (lower (has_type (ty_32_or_64 ty) (popcnt src))) diff --git a/cranelift/filetests/filetests/isa/x64/cls.clif b/cranelift/filetests/filetests/isa/x64/cls.clif new file mode 100644 index 000000000000..6ae757ea2b74 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/cls.clif @@ -0,0 +1,53 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target x86_64 + +function %a(i64) -> i64 { +block0(v0: i64): + v1 = cls.i64 v0 + return v1 +} + +; run: %a(1) == 62 +; run: %a(0) == 63 ;; all zeros, 63 matching sign bits +; run: %a(-1) == 63 ;; all ones, 63 matching sign bits +; run: %a(1) == 62 ;; already in the issue +; run: %a(-2) == 62 ;; 1111...1110, one bit differs + +function %b(i32) -> i32 { +block0(v0: i32): + v1 = cls.i32 v0 + return v1 +} +; run: %b(1) == 30 +; run: %b(0) == 31 +; run: %b(-1) == 31 +; run: %b(-2) == 30 + +function %c(i16) -> i16 { +block0(v0: i16): + v1 = cls.i16 v0 + return v1 +} +; run: %c(1) == 14 +; run: %c(0) == 15 +; run: %c(-1) == 15 + +function %d(i8) -> i8 { +block0(v0: i8): + v1 = cls.i8 v0 + return v1 +} +; run: %d(1) == 6 +; run: %d(0) == 7 +; run: %d(-1) == 7 + +function %e(i128) -> i128 { +block0(v0: i128): + v1 = cls.i128 v0 + return v1 +} +; run: %e(1) == 126 +; run: %e(0) == 127 +; run: %e(-1) == 127 From 98c927f726d13da5d6858f543794906178c73b91 Mon Sep 17 00:00:00 2001 From: Shreyas Ravi Date: Tue, 24 Feb 2026 15:20:03 -0800 Subject: [PATCH 2/3] refactored to use x64_sar; moved tests to runtests --- cranelift/codegen/src/isa/x64/lower.isle | 16 ++---- .../filetests/filetests/isa/x64/cls.clif | 53 ------------------- .../filetests/filetests/runtests/cls.clif | 13 ++++- 3 files changed, 17 insertions(+), 65 deletions(-) delete mode 100644 cranelift/filetests/filetests/isa/x64/cls.clif diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 7cc4393128b5..3d09b9931d73 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2361,17 +2361,11 @@ (decl do_cls (Type Gpr) Gpr) ;; cls is implemented via clz using the identity: cls(x) = clz(x ^ (x >> 1)) - 1 -(rule 1 (do_cls $I64 src) - (let ((shifted Gpr (x64_sarq_mi src 1)) - (xored Gpr (x64_xor $I64 src (RegMemImm.Reg shifted))) - (clz Gpr (do_clz $I64 $I64 xored))) - (x64_sub $I64 clz (RegMemImm.Imm 1)))) - -(rule 0 (do_cls $I32 src) - (let ((shifted Gpr (x64_sarl_mi src 1)) - (xored Gpr (x64_xor $I32 src (RegMemImm.Reg shifted))) - (clz Gpr (do_clz $I32 $I32 xored))) - (x64_sub $I32 clz (RegMemImm.Imm 1)))) +(rule (do_cls ty src) + (let ((shifted Gpr (x64_sar ty src (Imm8Gpr.Imm8 1))) + (xored Gpr (x64_xor ty src (RegMemImm.Reg shifted))) + (clz Gpr (do_clz ty ty xored))) + (x64_sub ty clz (RegMemImm.Imm 1)))) ;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/x64/cls.clif b/cranelift/filetests/filetests/isa/x64/cls.clif deleted file mode 100644 index 6ae757ea2b74..000000000000 --- a/cranelift/filetests/filetests/isa/x64/cls.clif +++ /dev/null @@ -1,53 +0,0 @@ -test interpret -test run -set enable_llvm_abi_extensions=true -target x86_64 - -function %a(i64) -> i64 { -block0(v0: i64): - v1 = cls.i64 v0 - return v1 -} - -; run: %a(1) == 62 -; run: %a(0) == 63 ;; all zeros, 63 matching sign bits -; run: %a(-1) == 63 ;; all ones, 63 matching sign bits -; run: %a(1) == 62 ;; already in the issue -; run: %a(-2) == 62 ;; 1111...1110, one bit differs - -function %b(i32) -> i32 { -block0(v0: i32): - v1 = cls.i32 v0 - return v1 -} -; run: %b(1) == 30 -; run: %b(0) == 31 -; run: %b(-1) == 31 -; run: %b(-2) == 30 - -function %c(i16) -> i16 { -block0(v0: i16): - v1 = cls.i16 v0 - return v1 -} -; run: %c(1) == 14 -; run: %c(0) == 15 -; run: %c(-1) == 15 - -function %d(i8) -> i8 { -block0(v0: i8): - v1 = cls.i8 v0 - return v1 -} -; run: %d(1) == 6 -; run: %d(0) == 7 -; run: %d(-1) == 7 - -function %e(i128) -> i128 { -block0(v0: i128): - v1 = cls.i128 v0 - return v1 -} -; run: %e(1) == 126 -; run: %e(0) == 127 -; run: %e(-1) == 127 diff --git a/cranelift/filetests/filetests/runtests/cls.clif b/cranelift/filetests/filetests/runtests/cls.clif index 5df34dfd0c9d..bd601d085120 100644 --- a/cranelift/filetests/filetests/runtests/cls.clif +++ b/cranelift/filetests/filetests/runtests/cls.clif @@ -1,11 +1,12 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 +target x86_64 target riscv64 target riscv64 has_zbb target riscv64 has_c has_zcb target s390x -; not implemented on `x86_64` function %cls_i8(i8) -> i8 { block0(v0: i8): @@ -46,3 +47,13 @@ block0(v0: i64): ; run: %cls_i64(0x4000000000000000) == 0 ; run: %cls_i64(-1) == 63 ; run: %cls_i64(0) == 63 + +function %cls_i128(i128) -> i128 { +block0(v0: i128): + v1 = cls.i128 v0 + return v1 +} +; run: %cls_i128(1) == 126 +; run: %cls_i128(0x40000000000000000000000000000000) == 0 +; run: %cls_i128(0) == 127 +; run: %cls_i128(-1) == 127 From e696e4eebc01fac35dc2337cd65488bcd4d247e4 Mon Sep 17 00:00:00 2001 From: Shreyas Ravi Date: Wed, 25 Feb 2026 08:51:23 -0800 Subject: [PATCH 3/3] move i128-cls tests into cls tests --- .../filetests/filetests/runtests/cls.clif | 16 ++++++++++--- .../filetests/runtests/i128-cls.clif | 23 ------------------- 2 files changed, 13 insertions(+), 26 deletions(-) delete mode 100644 cranelift/filetests/filetests/runtests/i128-cls.clif diff --git a/cranelift/filetests/filetests/runtests/cls.clif b/cranelift/filetests/filetests/runtests/cls.clif index bd601d085120..5ee8b97356ab 100644 --- a/cranelift/filetests/filetests/runtests/cls.clif +++ b/cranelift/filetests/filetests/runtests/cls.clif @@ -6,6 +6,7 @@ target x86_64 target riscv64 target riscv64 has_zbb target riscv64 has_c has_zcb +set enable_multi_ret_implicit_sret target s390x function %cls_i8(i8) -> i8 { @@ -50,10 +51,19 @@ block0(v0: i64): function %cls_i128(i128) -> i128 { block0(v0: i128): - v1 = cls.i128 v0 + v1 = cls v0 return v1 } ; run: %cls_i128(1) == 126 -; run: %cls_i128(0x40000000000000000000000000000000) == 0 +; run: %cls_i128(0x40000000_00000000_00000000_00000000) == 0 ; run: %cls_i128(0) == 127 -; run: %cls_i128(-1) == 127 +; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127 +; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127 +; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1 +; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1 +; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-cls.clif b/cranelift/filetests/filetests/runtests/i128-cls.clif deleted file mode 100644 index e7a7f4672241..000000000000 --- a/cranelift/filetests/filetests/runtests/i128-cls.clif +++ /dev/null @@ -1,23 +0,0 @@ -test run -target aarch64 -target riscv64 -target riscv64 has_zbb -target riscv64 has_c has_zcb -set enable_multi_ret_implicit_sret -target s390x - -function %cls_i128(i128) -> i128 { -block0(v0: i128): - v1 = cls v0 - return v1 -} -; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127 -; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127 -; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 -; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1 -; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1 -; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0