diff --git a/cranelift/codegen/src/opts/icmp.isle b/cranelift/codegen/src/opts/icmp.isle index e2d128b2dcb0..db6f2510b2d7 100644 --- a/cranelift/codegen/src/opts/icmp.isle +++ b/cranelift/codegen/src/opts/icmp.isle @@ -465,3 +465,12 @@ ;; `brif (clz X) bt be` branches when `clz(X) != 0`, i.e. MSB(X) == 0. (rule (simplify_skeleton (brif (clz x_ty X) _ _)) (replace_branch_cond (sge $I8 X (iconst_u x_ty 0)))) + +;; Same when an `ireduce` (truncation) sits between the count and the +;; brif. The count is in [0, bitwidth] so the truncation is lossless. +(rule (simplify_skeleton (brif (ireduce _ (ctz x_ty x)) _ _)) + (replace_branch_cond + (eq $I8 (band x_ty x (iconst_u x_ty 1)) (iconst_u x_ty 0)))) + +(rule (simplify_skeleton (brif (ireduce _ (clz x_ty x)) _ _)) + (replace_branch_cond (sge $I8 x (iconst_u x_ty 0)))) diff --git a/cranelift/filetests/filetests/egraph/brif-cnt-cond.clif b/cranelift/filetests/filetests/egraph/brif-cnt-cond.clif index c2fc4b11ce5c..a08a998b84fb 100644 --- a/cranelift/filetests/filetests/egraph/brif-cnt-cond.clif +++ b/cranelift/filetests/filetests/egraph/brif-cnt-cond.clif @@ -130,3 +130,67 @@ block2: ; v3 = iconst.i32 200 ; return v3 ; v3 = 200 ; } + +;; Same with a truncating `ireduce` between ctz and brif. +function %brif_ireduce_ctz_i64(i64) -> i32 { +block0(v0: i64): + v1 = ctz v0 + v2 = ireduce.i32 v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i32 100 + return v3 + +block2: + v4 = iconst.i32 200 + return v4 +} + +; function %brif_ireduce_ctz_i64(i64) -> i32 fast { +; block0(v0: i64): +; v5 = iconst.i64 1 +; v6 = band v0, v5 ; v5 = 1 +; v7 = iconst.i64 0 +; v8 = icmp eq v6, v7 ; v7 = 0 +; brif v8, block1, block2 +; +; block1: +; v3 = iconst.i32 100 +; return v3 ; v3 = 100 +; +; block2: +; v4 = iconst.i32 200 +; return v4 ; v4 = 200 +; } + +;; Same shape with `clz` instead of `ctz`. +function %brif_ireduce_clz_i64(i64) -> i32 { +block0(v0: i64): + v1 = clz v0 + v2 = ireduce.i32 v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i32 100 + return v3 + +block2: + v4 = iconst.i32 200 + return v4 +} + +; function %brif_ireduce_clz_i64(i64) -> i32 fast { +; block0(v0: i64): +; v5 = iconst.i64 0 +; v6 = icmp sge v0, v5 ; v5 = 0 +; brif v6, block1, block2 +; +; block1: +; v3 = iconst.i32 100 +; return v3 ; v3 = 100 +; +; block2: +; v4 = iconst.i32 200 +; return v4 ; v4 = 200 +; } diff --git a/tests/disas/ctz-clz-bool-condition.wat b/tests/disas/ctz-clz-bool-condition.wat index 78b79355b86d..67e17de4ca23 100644 --- a/tests/disas/ctz-clz-bool-condition.wat +++ b/tests/disas/ctz-clz-bool-condition.wat @@ -73,6 +73,9 @@ (func $if_clz_ne0_i64 (param i64) (result i32) (i64.ne (i64.clz (local.get 0)) (i64.const 0)) if (result i32) i32.const 100 else i32.const 200 end) + (func $if_clz_bare_i64 (param i64) (result i32) + (i64.clz (local.get 0)) i32.wrap_i64 + if (result i32) i32.const 100 else i32.const 200 end) ;; ----- negative test: numeric comparison must NOT collapse ------------ ;; `ctz(x) == 4` is an arithmetic test on the count, not a boolean @@ -164,14 +167,11 @@ ;; wasm[0]::function[7]::if_ctz_bare_i64: ;; pushq %rbp ;; movq %rsp, %rbp -;; movl $0x40, %esi -;; bsfq %rdx, %r9 -;; cmoveq %rsi, %r9 -;; testl %r9d, %r9d -;; jne 0x1a4 -;; 19a: movl $0xc8, %eax -;; jmp 0x1a9 -;; 1a4: movl $0x64, %eax +;; testq $1, %rdx +;; je 0x19b +;; 191: movl $0xc8, %eax +;; jmp 0x1a0 +;; 19b: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq @@ -256,17 +256,29 @@ ;; popq %rbp ;; retq ;; -;; wasm[0]::function[15]::if_ctz_eq4_i32: +;; wasm[0]::function[15]::if_clz_bare_i64: +;; pushq %rbp +;; movq %rsp, %rbp +;; testq %rdx, %rdx +;; jge 0x2f7 +;; 2ed: movl $0xc8, %eax +;; jmp 0x2fc +;; 2f7: movl $0x64, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[16]::if_ctz_eq4_i32: ;; pushq %rbp ;; movq %rsp, %rbp ;; movl $0x20, %esi ;; bsfl %edx, %r9d ;; cmovel %esi, %r9d ;; cmpl $4, %r9d -;; je 0x305 -;; 2fb: movl $0xc8, %eax -;; jmp 0x30a -;; 305: movl $0x64, %eax +;; je 0x345 +;; 33b: movl $0xc8, %eax +;; jmp 0x34a +;; 345: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq