Skip to content

Commit 830b4ee

Browse files
authored
Rollup merge of rust-lang#66881 - krishna-veerareddy:issue-66780-bool-ord-optimization, r=sfackler
Optimize Ord trait implementation for bool Casting the booleans to `i8`s and converting their difference into `Ordering` generates better assembly than casting them to `u8`s and comparing them. Fixes rust-lang#66780 #### Comparison([Godbolt link](https://rust.godbolt.org/z/PjBpvF)) ##### Old assembly: ```asm example::boolean_cmp: mov ecx, edi xor ecx, esi test esi, esi mov eax, 255 cmove eax, ecx test edi, edi cmovne eax, ecx ret ``` ##### New assembly: ```asm example::boolean_cmp: mov eax, edi sub al, sil ret ``` ##### Old LLVM-MCA statistics: ``` Iterations: 100 Instructions: 800 Total Cycles: 234 Total uOps: 1000 Dispatch Width: 6 uOps Per Cycle: 4.27 IPC: 3.42 Block RThroughput: 1.7 ``` ##### New LLVM-MCA statistics: ``` Iterations: 100 Instructions: 300 Total Cycles: 110 Total uOps: 500 Dispatch Width: 6 uOps Per Cycle: 4.55 IPC: 2.73 Block RThroughput: 1.0 ```
2 parents ddca1e0 + 1f07aa5 commit 830b4ee

File tree

3 files changed

+36
-1
lines changed

3 files changed

+36
-1
lines changed

src/libcore/cmp.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,7 @@ pub fn max_by_key<T, F: FnMut(&T) -> K, K: Ord>(v1: T, v2: T, mut f: F) -> T {
10051005

10061006
// Implementation of PartialEq, Eq, PartialOrd and Ord for primitive types
10071007
mod impls {
1008+
use crate::hint::unreachable_unchecked;
10081009
use crate::cmp::Ordering::{self, Less, Greater, Equal};
10091010

10101011
macro_rules! partial_eq_impl {
@@ -1125,7 +1126,16 @@ mod impls {
11251126
impl Ord for bool {
11261127
#[inline]
11271128
fn cmp(&self, other: &bool) -> Ordering {
1128-
(*self as u8).cmp(&(*other as u8))
1129+
// Casting to i8's and converting the difference to an Ordering generates
1130+
// more optimal assembly.
1131+
// See <https://github.com/rust-lang/rust/issues/66780> for more info.
1132+
match (*self as i8) - (*other as i8) {
1133+
-1 => Less,
1134+
0 => Equal,
1135+
1 => Greater,
1136+
// SAFETY: bool as i8 returns 0 or 1, so the difference can't be anything else
1137+
_ => unsafe { unreachable_unchecked() },
1138+
}
11291139
}
11301140
}
11311141

src/libcore/tests/cmp.rs

+8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ fn test_int_totalord() {
99
assert_eq!(12.cmp(&-5), Greater);
1010
}
1111

12+
#[test]
13+
fn test_bool_totalord() {
14+
assert_eq!(true.cmp(&false), Greater);
15+
assert_eq!(false.cmp(&true), Less);
16+
assert_eq!(true.cmp(&true), Equal);
17+
assert_eq!(false.cmp(&false), Equal);
18+
}
19+
1220
#[test]
1321
fn test_mut_int_totalord() {
1422
assert_eq!((&mut 5).cmp(&&mut 10), Less);

src/test/codegen/bool-cmp.rs

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// This is a test for optimal Ord trait implementation for bool.
2+
// See <https://github.com/rust-lang/rust/issues/66780> for more info.
3+
4+
// compile-flags: -C opt-level=3
5+
6+
#![crate_type = "lib"]
7+
8+
use std::cmp::Ordering;
9+
10+
// CHECK-LABEL: @cmp_bool
11+
#[no_mangle]
12+
pub fn cmp_bool(a: bool, b: bool) -> Ordering {
13+
// CHECK: zext i1
14+
// CHECK: zext i1
15+
// CHECK: sub nsw
16+
a.cmp(&b)
17+
}

0 commit comments

Comments
 (0)