Skip to content

Commit 6468333

Browse files
authored
Rollup merge of rust-lang#81837 - gilescope:to_ascii_speedups, r=dtolnay
Slight perf improvement on char::to_ascii_lowercase `char::to_ascii_lowercase()` was checking if it was ascii and then if it was in the right range. Instead propose to check once (I think removing a compare and a shift in the process: [godbolt](https://godbolt.org/z/e5Tora) ). before: ``` test char::methods::bench_to_ascii_lowercase ... bench: 11,196 ns/iter (+/- 632) test char::methods::bench_to_ascii_uppercase ... bench: 11,656 ns/iter (+/- 671) ``` after: ``` test char::methods::bench_to_ascii_lowercase ... bench: 9,612 ns/iter (+/- 979) test char::methods::bench_to_ascii_uppercase ... bench: 8,241 ns/iter (+/- 701) ``` (calling u8::to_ascii_lowercase and letting that flip the 5th bit is also an option, but it's more instructions. I'm thinking for things around ascii and char we want to be as efficient as possible.)
2 parents e37adf5 + 33d8b04 commit 6468333

File tree

4 files changed

+35
-6
lines changed

4 files changed

+35
-6
lines changed

library/core/benches/ascii.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ macro_rules! benches {
6666
use test::black_box;
6767
use test::Bencher;
6868

69+
const ASCII_CASE_MASK: u8 = 0b0010_0000;
70+
6971
benches! {
7072
fn case00_alloc_only(_bytes: &mut [u8]) {}
7173

@@ -204,7 +206,7 @@ benches! {
204206
}
205207
}
206208
for byte in bytes {
207-
*byte &= !((is_ascii_lowercase(*byte) as u8) << 5)
209+
*byte &= !((is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK)
208210
}
209211
}
210212

@@ -216,7 +218,7 @@ benches! {
216218
}
217219
}
218220
for byte in bytes {
219-
*byte -= (is_ascii_lowercase(*byte) as u8) << 5
221+
*byte -= (is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK
220222
}
221223
}
222224

library/core/benches/char/methods.rs

+10
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,13 @@ fn bench_to_digit_radix_var(b: &mut Bencher) {
3535
.min()
3636
})
3737
}
38+
39+
#[bench]
40+
fn bench_to_ascii_uppercase(b: &mut Bencher) {
41+
b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_uppercase()).min())
42+
}
43+
44+
#[bench]
45+
fn bench_to_ascii_lowercase(b: &mut Bencher) {
46+
b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min())
47+
}

library/core/src/char/methods.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,11 @@ impl char {
10881088
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
10891089
#[inline]
10901090
pub fn to_ascii_uppercase(&self) -> char {
1091-
if self.is_ascii() { (*self as u8).to_ascii_uppercase() as char } else { *self }
1091+
if self.is_ascii_lowercase() {
1092+
(*self as u8).ascii_change_case_unchecked() as char
1093+
} else {
1094+
*self
1095+
}
10921096
}
10931097

10941098
/// Makes a copy of the value in its ASCII lower case equivalent.
@@ -1116,7 +1120,11 @@ impl char {
11161120
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
11171121
#[inline]
11181122
pub fn to_ascii_lowercase(&self) -> char {
1119-
if self.is_ascii() { (*self as u8).to_ascii_lowercase() as char } else { *self }
1123+
if self.is_ascii_uppercase() {
1124+
(*self as u8).ascii_change_case_unchecked() as char
1125+
} else {
1126+
*self
1127+
}
11201128
}
11211129

11221130
/// Checks that two values are an ASCII case-insensitive match.

library/core/src/num/mod.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ impl isize {
152152
usize_isize_to_xe_bytes_doc!(), usize_isize_from_xe_bytes_doc!() }
153153
}
154154

155+
/// If 6th bit set ascii is upper case.
156+
const ASCII_CASE_MASK: u8 = 0b0010_0000;
157+
155158
#[lang = "u8"]
156159
impl u8 {
157160
uint_impl! { u8, u8, 8, 255, 2, "0x82", "0xa", "0x12", "0x12", "0x48", "[0x12]",
@@ -195,7 +198,7 @@ impl u8 {
195198
#[inline]
196199
pub fn to_ascii_uppercase(&self) -> u8 {
197200
// Unset the fifth bit if this is a lowercase letter
198-
*self & !((self.is_ascii_lowercase() as u8) << 5)
201+
*self & !((self.is_ascii_lowercase() as u8) * ASCII_CASE_MASK)
199202
}
200203

201204
/// Makes a copy of the value in its ASCII lower case equivalent.
@@ -218,7 +221,13 @@ impl u8 {
218221
#[inline]
219222
pub fn to_ascii_lowercase(&self) -> u8 {
220223
// Set the fifth bit if this is an uppercase letter
221-
*self | ((self.is_ascii_uppercase() as u8) << 5)
224+
*self | (self.is_ascii_uppercase() as u8 * ASCII_CASE_MASK)
225+
}
226+
227+
/// Assumes self is ascii
228+
#[inline]
229+
pub(crate) fn ascii_change_case_unchecked(&self) -> u8 {
230+
*self ^ ASCII_CASE_MASK
222231
}
223232

224233
/// Checks that two values are an ASCII case-insensitive match.

0 commit comments

Comments
 (0)