Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions cranelift/codegen/src/opts/icmp.isle
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,39 @@
(iconst_u _ k2)))
(if-let false (u64_eq k1 k2))
(ne select_ty inner_cond (iconst_u inner_ty 0)))

;;;;; Boolean-context simplifications for `ctz` and `clz` ;;;;;;;;;;;;;;;;;;;;;;
;;
;; When a count-trailing/leading-zeros instruction's result is fed into a
;; comparison against zero (the consumer cares whether the count is zero,
;; not its numeric value), rewrite to test the corresponding bit of X
;; directly:
;;
;; ctz(X) == 0 iff LSB of X is set iff (X & 1) != 0
;; clz(X) == 0 iff MSB of X is set iff X is signed-negative
;;
;; LZCNT/TZCNT/BSF/BSR each cost ~3 cycles on Intel and write a GPR (creating
;; a false dependency); the rewritten forms emit a single-cycle `test` whose
;; result lives only in flags. JIT-less interpreters benefit even more — their
;; bit-counting paths are typically loops.
;;
;; The matching wasm-side fold is in WebAssembly/binaryen#8562 (LSB→ctz under
;; `-Os`). With these mid-end rules in place, that fold becomes cycle-neutral
;; on cranelift JITs even when produced unconditionally.

;; ctz(X) == 0 iff the LSB of X is 1, i.e. (X & 1) != 0.
(rule (simplify (eq result_ty (ctz x_ty X) (iconst_u _ 0)))
(ne result_ty (band x_ty X (iconst_u x_ty 1)) (iconst_u x_ty 0)))

;; ctz(X) != 0 iff the LSB of X is 0, i.e. (X & 1) == 0.
(rule (simplify (ne result_ty (ctz x_ty X) (iconst_u _ 0)))
(eq result_ty (band x_ty X (iconst_u x_ty 1)) (iconst_u x_ty 0)))

;; clz(X) == 0 iff the MSB of X is 1, i.e. X is signed-negative.
;; Lowers to `test X, X; js` on x86_64 — single-instruction sign-bit test.
(rule (simplify (eq result_ty (clz x_ty X) (iconst_u _ 0)))
(slt result_ty X (iconst_u x_ty 0)))

;; clz(X) != 0 iff the MSB of X is 0, i.e. X is signed-non-negative.
(rule (simplify (ne result_ty (clz x_ty X) (iconst_u _ 0)))
(sge result_ty X (iconst_u x_ty 0)))
98 changes: 98 additions & 0 deletions cranelift/filetests/filetests/egraph/cnt-bool-context.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
test optimize precise-output
set opt_level=speed
target x86_64

;; Boolean-context simplifications for ctz / clz: the result of the
;; bit-counting instruction is only used to test "is it zero?", which
;; reduces to a direct bit test on X.
;;
;; ctz(X) == 0 iff LSB of X set iff (X & 1) != 0
;; clz(X) == 0 iff MSB of X set iff X is signed-negative

;; ctz(X) == 0 → (X & 1) != 0
function %ctz_eq_zero_i32(i32) -> i8 {
block0(v0: i32):
v1 = ctz v0
v2 = iconst.i32 0
v3 = icmp eq v1, v2
return v3
}

; function %ctz_eq_zero_i32(i32) -> i8 fast {
; block0(v0: i32):
; v4 = iconst.i32 1
; v5 = band v0, v4 ; v4 = 1
; v2 = iconst.i32 0
; v6 = icmp ne v5, v2 ; v2 = 0
; return v6
; }

;; ctz(X) != 0 → (X & 1) == 0
function %ctz_ne_zero_i64(i64) -> i8 {
block0(v0: i64):
v1 = ctz v0
v2 = iconst.i64 0
v3 = icmp ne v1, v2
return v3
}

; function %ctz_ne_zero_i64(i64) -> i8 fast {
; block0(v0: i64):
; v4 = iconst.i64 1
; v5 = band v0, v4 ; v4 = 1
; v2 = iconst.i64 0
; v6 = icmp eq v5, v2 ; v2 = 0
; return v6
; }

;; clz(X) == 0 → X <signed 0 (sign-bit test)
function %clz_eq_zero_i32(i32) -> i8 {
block0(v0: i32):
v1 = clz v0
v2 = iconst.i32 0
v3 = icmp eq v1, v2
return v3
}

; function %clz_eq_zero_i32(i32) -> i8 fast {
; block0(v0: i32):
; v2 = iconst.i32 0
; v4 = icmp slt v0, v2 ; v2 = 0
; return v4
; }

;; clz(X) != 0 → X >=signed 0 (i64 case)
function %clz_ne_zero_i64(i64) -> i8 {
block0(v0: i64):
v1 = clz v0
v2 = iconst.i64 0
v3 = icmp ne v1, v2
return v3
}

; function %clz_ne_zero_i64(i64) -> i8 fast {
; block0(v0: i64):
; v2 = iconst.i64 0
; v4 = icmp sge v0, v2 ; v2 = 0
; return v4
; }

;; Negative test: only the comparison-against-zero pattern fires.
;; `ctz(X) == 4` is a numeric-value test on the count, not a boolean,
;; and must be left alone.
function %ctz_eq_nonzero_i32(i32) -> i8 {
block0(v0: i32):
v1 = ctz v0
v2 = iconst.i32 4
v3 = icmp eq v1, v2
return v3
}

; function %ctz_eq_nonzero_i32(i32) -> i8 fast {
; block0(v0: i32):
; v1 = ctz v0
; v2 = iconst.i32 4
; v3 = icmp eq v1, v2 ; v2 = 4
; return v3
; }

Loading
Loading