Skip to content

Commit 932c736

Browse files
committed
Auto merge of #48057 - scottmcm:less-match-more-compare, r=dtolnay
Simplify RangeInclusive::next[_back] `match`ing on an `Option<Ordering>` seems cause some confusion for LLVM; switching to just using comparison operators removes a few jumps from the simple `for` loops I was trying. cc #45222 #28237 (comment) Example: ```rust #[no_mangle] pub fn coresum(x: std::ops::RangeInclusive<u64>) -> u64 { let mut sum = 0; for i in x { sum += i ^ (i-1); } sum } ``` Today: ```asm coresum: xor r8d, r8d mov r9, -1 xor eax, eax jmp .LBB0_1 .LBB0_4: lea rcx, [rdi - 1] xor rcx, rdi add rax, rcx mov rsi, rdx mov rdi, r10 .LBB0_1: cmp rdi, rsi mov ecx, 1 cmovb rcx, r9 cmove rcx, r8 test rcx, rcx mov edx, 0 mov r10d, 1 je .LBB0_4 // 1 cmp rcx, -1 jne .LBB0_5 // 2 lea r10, [rdi + 1] mov rdx, rsi jmp .LBB0_4 // 3 .LBB0_5: ret ``` With this PR: ```asm coresum: cmp rcx, rdx jbe .LBB0_2 xor eax, eax ret .LBB0_2: xor r8d, r8d mov r9d, 1 xor eax, eax .p2align 4, 0x90 .LBB0_3: lea r10, [rcx + 1] cmp rcx, rdx cmovae rdx, r8 cmovae r10, r9 lea r11, [rcx - 1] xor r11, rcx add rax, r11 mov rcx, r10 cmp r10, rdx jbe .LBB0_3 // Just this ret ``` <details><summary>Though using internal iteration (`.map(|i| i ^ (i-1)).sum()`) is still shorter to type, and lets the compiler unroll it</summary> ```asm coresum_inner: .Lcfi0: .seh_proc coresum_inner sub rsp, 168 .Lcfi1: .seh_stackalloc 168 vmovdqa xmmword ptr [rsp + 144], xmm15 .Lcfi2: .seh_savexmm 15, 144 vmovdqa xmmword ptr [rsp + 128], xmm14 .Lcfi3: .seh_savexmm 14, 128 vmovdqa xmmword ptr [rsp + 112], xmm13 .Lcfi4: .seh_savexmm 13, 112 vmovdqa xmmword ptr [rsp + 96], xmm12 .Lcfi5: .seh_savexmm 12, 96 vmovdqa xmmword ptr [rsp + 80], xmm11 .Lcfi6: .seh_savexmm 11, 80 vmovdqa xmmword ptr [rsp + 64], xmm10 .Lcfi7: .seh_savexmm 10, 64 vmovdqa xmmword ptr [rsp + 48], xmm9 .Lcfi8: .seh_savexmm 9, 48 vmovdqa xmmword ptr [rsp + 32], xmm8 .Lcfi9: .seh_savexmm 8, 32 vmovdqa xmmword ptr [rsp + 16], xmm7 .Lcfi10: .seh_savexmm 7, 16 vmovdqa xmmword ptr [rsp], xmm6 .Lcfi11: .seh_savexmm 6, 0 .Lcfi12: .seh_endprologue cmp rdx, rcx jae .LBB1_2 xor eax, eax jmp .LBB1_13 .LBB1_2: mov r8, rdx sub r8, rcx jbe .LBB1_3 cmp r8, 7 jbe .LBB1_5 mov rax, r8 and rax, -8 mov r9, r8 and r9, -8 je .LBB1_5 add rax, rcx vmovq xmm0, rcx vpshufd xmm0, xmm0, 68 mov ecx, 1 vmovq xmm1, rcx vpslldq xmm1, xmm1, 8 vpaddq xmm1, xmm0, xmm1 vpxor xmm0, xmm0, xmm0 vpcmpeqd xmm11, xmm11, xmm11 vmovdqa xmm12, xmmword ptr [rip + __xmm@00000000000000010000000000000001] vmovdqa xmm13, xmmword ptr [rip + __xmm@00000000000000030000000000000003] vmovdqa xmm14, xmmword ptr [rip + __xmm@00000000000000050000000000000005] vmovdqa xmm15, xmmword ptr [rip + __xmm@00000000000000080000000000000008] mov rcx, r9 vpxor xmm4, xmm4, xmm4 vpxor xmm5, xmm5, xmm5 vpxor xmm6, xmm6, xmm6 .p2align 4, 0x90 .LBB1_9: vpaddq xmm7, xmm1, xmmword ptr [rip + __xmm@00000000000000020000000000000002] vpaddq xmm9, xmm1, xmmword ptr [rip + __xmm@00000000000000040000000000000004] vpaddq xmm10, xmm1, xmmword ptr [rip + __xmm@00000000000000060000000000000006] vpaddq xmm8, xmm1, xmm12 vpxor xmm7, xmm8, xmm7 vpaddq xmm2, xmm1, xmm13 vpxor xmm8, xmm2, xmm9 vpaddq xmm3, xmm1, xmm14 vpxor xmm3, xmm3, xmm10 vpaddq xmm2, xmm1, xmm11 vpxor xmm2, xmm2, xmm1 vpaddq xmm0, xmm2, xmm0 vpaddq xmm4, xmm7, xmm4 vpaddq xmm5, xmm8, xmm5 vpaddq xmm6, xmm3, xmm6 vpaddq xmm1, xmm1, xmm15 add rcx, -8 jne .LBB1_9 vpaddq xmm0, xmm4, xmm0 vpaddq xmm0, xmm5, xmm0 vpaddq xmm0, xmm6, xmm0 vpshufd xmm1, xmm0, 78 vpaddq xmm0, xmm0, xmm1 vmovq r10, xmm0 cmp r8, r9 jne .LBB1_6 jmp .LBB1_11 .LBB1_3: xor r10d, r10d jmp .LBB1_12 .LBB1_5: xor r10d, r10d mov rax, rcx .p2align 4, 0x90 .LBB1_6: lea rcx, [rax - 1] xor rcx, rax inc rax add r10, rcx cmp rdx, rax jne .LBB1_6 .LBB1_11: mov rcx, rdx .LBB1_12: lea rax, [rcx - 1] xor rax, rcx add rax, r10 .LBB1_13: vmovaps xmm6, xmmword ptr [rsp] vmovaps xmm7, xmmword ptr [rsp + 16] vmovaps xmm8, xmmword ptr [rsp + 32] vmovaps xmm9, xmmword ptr [rsp + 48] vmovaps xmm10, xmmword ptr [rsp + 64] vmovaps xmm11, xmmword ptr [rsp + 80] vmovaps xmm12, xmmword ptr [rsp + 96] vmovaps xmm13, xmmword ptr [rsp + 112] vmovaps xmm14, xmmword ptr [rsp + 128] vmovaps xmm15, xmmword ptr [rsp + 144] add rsp, 168 ret .seh_handlerdata .section .text,"xr",one_only,coresum_inner .Lcfi13: .seh_endproc ``` </details>
2 parents 29c8276 + 27d4d51 commit 932c736

File tree

2 files changed

+31
-16
lines changed

2 files changed

+31
-16
lines changed

src/libcore/iter/range.rs

+12-16
Original file line numberDiff line numberDiff line change
@@ -334,19 +334,17 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
334334

335335
#[inline]
336336
fn next(&mut self) -> Option<A> {
337-
use cmp::Ordering::*;
338-
339-
match self.start.partial_cmp(&self.end) {
340-
Some(Less) => {
337+
if self.start <= self.end {
338+
if self.start < self.end {
341339
let n = self.start.add_one();
342340
Some(mem::replace(&mut self.start, n))
343-
},
344-
Some(Equal) => {
341+
} else {
345342
let last = self.start.replace_one();
346343
self.end.replace_zero();
347344
Some(last)
348-
},
349-
_ => None,
345+
}
346+
} else {
347+
None
350348
}
351349
}
352350

@@ -428,19 +426,17 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
428426
impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
429427
#[inline]
430428
fn next_back(&mut self) -> Option<A> {
431-
use cmp::Ordering::*;
432-
433-
match self.start.partial_cmp(&self.end) {
434-
Some(Less) => {
429+
if self.start <= self.end {
430+
if self.start < self.end {
435431
let n = self.end.sub_one();
436432
Some(mem::replace(&mut self.end, n))
437-
},
438-
Some(Equal) => {
433+
} else {
439434
let last = self.end.replace_zero();
440435
self.start.replace_one();
441436
Some(last)
442-
},
443-
_ => None,
437+
}
438+
} else {
439+
None
444440
}
445441
}
446442

src/libcore/tests/iter.rs

+19
Original file line numberDiff line numberDiff line change
@@ -1332,6 +1332,18 @@ fn test_range_inclusive_exhaustion() {
13321332
assert_eq!(r.next_back(), Some(10));
13331333
assert_eq!(r, 1..=0);
13341334

1335+
let mut r = 10..=12;
1336+
assert_eq!(r.next(), Some(10));
1337+
assert_eq!(r.next(), Some(11));
1338+
assert_eq!(r.next(), Some(12));
1339+
assert_eq!(r, 1..=0);
1340+
1341+
let mut r = 10..=12;
1342+
assert_eq!(r.next_back(), Some(12));
1343+
assert_eq!(r.next_back(), Some(11));
1344+
assert_eq!(r.next_back(), Some(10));
1345+
assert_eq!(r, 1..=0);
1346+
13351347
let mut r = 10..=12;
13361348
assert_eq!(r.nth(2), Some(12));
13371349
assert_eq!(r, 1..=0);
@@ -1340,6 +1352,13 @@ fn test_range_inclusive_exhaustion() {
13401352
assert_eq!(r.nth(5), None);
13411353
assert_eq!(r, 1..=0);
13421354

1355+
let mut r = 100..=10;
1356+
assert_eq!(r.next(), None);
1357+
assert_eq!(r, 100..=10);
1358+
1359+
let mut r = 100..=10;
1360+
assert_eq!(r.next_back(), None);
1361+
assert_eq!(r, 100..=10);
13431362
}
13441363

13451364
#[test]

0 commit comments

Comments
 (0)