From f165f49d228d2582d2dbfd588c2729cfc9585eb0 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 6 Feb 2021 19:14:13 +0000 Subject: [PATCH 1/5] Slight perf improvement on char::to_ascii_lowercase --- library/core/benches/char/methods.rs | 10 ++++++++++ library/core/src/char/methods.rs | 6 ++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/library/core/benches/char/methods.rs b/library/core/benches/char/methods.rs index a9a08a4d76200..de4b63030fa7c 100644 --- a/library/core/benches/char/methods.rs +++ b/library/core/benches/char/methods.rs @@ -35,3 +35,13 @@ fn bench_to_digit_radix_var(b: &mut Bencher) { .min() }) } + +#[bench] +fn bench_to_ascii_uppercase(b: &mut Bencher) { + b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_uppercase()).min()) +} + +#[bench] +fn bench_to_ascii_lowercase(b: &mut Bencher) { + b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min()) +} diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 2baea7842a796..4c28d9cd673af 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1090,7 +1090,8 @@ impl char { #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[inline] pub fn to_ascii_uppercase(&self) -> char { - if self.is_ascii() { (*self as u8).to_ascii_uppercase() as char } else { *self } + // 6th bit dictates ascii case. + if self.is_ascii_lowercase() { ((*self as u8) & !0b10_0000u8) as char } else { *self } } /// Makes a copy of the value in its ASCII lower case equivalent. @@ -1118,7 +1119,8 @@ impl char { #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[inline] pub fn to_ascii_lowercase(&self) -> char { - if self.is_ascii() { (*self as u8).to_ascii_lowercase() as char } else { *self } + // 6th bit dictates ascii case. + if self.is_ascii_uppercase() { ((*self as u8) | 0b10_0000u8) as char } else { *self } } /// Checks that two values are an ASCII case-insensitive match. From f30c51abe8ce62756f86abbbd6623a9131d3954c Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 6 Feb 2021 20:35:21 +0000 Subject: [PATCH 2/5] Pulling out constant. --- library/core/src/char/methods.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 4c28d9cd673af..4032e7770772e 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -7,6 +7,9 @@ use crate::unicode::{self, conversions}; use super::*; +/// If 6th bit set ascii is upper case. +const ASCII_CASE_MASK: u8 = 0b10_0000u8; + #[lang = "char"] impl char { /// The highest valid code point a `char` can have. @@ -1090,8 +1093,7 @@ impl char { #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[inline] pub fn to_ascii_uppercase(&self) -> char { - // 6th bit dictates ascii case. - if self.is_ascii_lowercase() { ((*self as u8) & !0b10_0000u8) as char } else { *self } + if self.is_ascii_lowercase() { ((*self as u8) & !ASCII_CASE_MASK) as char } else { *self } } /// Makes a copy of the value in its ASCII lower case equivalent. @@ -1119,8 +1121,7 @@ impl char { #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[inline] pub fn to_ascii_lowercase(&self) -> char { - // 6th bit dictates ascii case. - if self.is_ascii_uppercase() { ((*self as u8) | 0b10_0000u8) as char } else { *self } + if self.is_ascii_uppercase() { ((*self as u8) | ASCII_CASE_MASK) as char } else { *self } } /// Checks that two values are an ASCII case-insensitive match. From cadcf5ed990dc02ad86cbb9f31423959a5517f50 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 8 Feb 2021 12:21:36 +0000 Subject: [PATCH 3/5] Unify way to flip 6th bit. (Same assembly generated) --- library/core/benches/ascii.rs | 6 ++++-- library/core/src/char/methods.rs | 5 +---- library/core/src/num/mod.rs | 5 +++-- library/core/src/unicode/mod.rs | 3 +++ 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/library/core/benches/ascii.rs b/library/core/benches/ascii.rs index bc59c378609f0..64938745a4a16 100644 --- a/library/core/benches/ascii.rs +++ b/library/core/benches/ascii.rs @@ -66,6 +66,8 @@ macro_rules! benches { use test::black_box; use test::Bencher; +const ASCII_CASE_MASK: u8 = 0b0010_0000; + benches! { fn case00_alloc_only(_bytes: &mut [u8]) {} @@ -204,7 +206,7 @@ benches! { } } for byte in bytes { - *byte &= !((is_ascii_lowercase(*byte) as u8) << 5) + *byte &= !((is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK) } } @@ -216,7 +218,7 @@ benches! { } } for byte in bytes { - *byte -= (is_ascii_lowercase(*byte) as u8) << 5 + *byte -= (is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK } } diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 4032e7770772e..bbdb2a5d41b99 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -3,13 +3,10 @@ use crate::slice; use crate::str::from_utf8_unchecked_mut; use crate::unicode::printable::is_printable; -use crate::unicode::{self, conversions}; +use crate::unicode::{self, conversions, ASCII_CASE_MASK}; use super::*; -/// If 6th bit set ascii is upper case. -const ASCII_CASE_MASK: u8 = 0b10_0000u8; - #[lang = "char"] impl char { /// The highest valid code point a `char` can have. diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 6bdfa18fa434c..7563a742b9a90 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -5,6 +5,7 @@ use crate::intrinsics; use crate::mem; use crate::str::FromStr; +use crate::unicode::ASCII_CASE_MASK; // Used because the `?` operator is not allowed in a const context. macro_rules! try_opt { @@ -195,7 +196,7 @@ impl u8 { #[inline] pub fn to_ascii_uppercase(&self) -> u8 { // Unset the fifth bit if this is a lowercase letter - *self & !((self.is_ascii_lowercase() as u8) << 5) + *self & !((self.is_ascii_lowercase() as u8) * ASCII_CASE_MASK) } /// Makes a copy of the value in its ASCII lower case equivalent. @@ -218,7 +219,7 @@ impl u8 { #[inline] pub fn to_ascii_lowercase(&self) -> u8 { // Set the fifth bit if this is an uppercase letter - *self | ((self.is_ascii_uppercase() as u8) << 5) + *self | (self.is_ascii_uppercase() as u8 * ASCII_CASE_MASK) } /// Checks that two values are an ASCII case-insensitive match. diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs index 37ca0a0779b17..b333b46310532 100644 --- a/library/core/src/unicode/mod.rs +++ b/library/core/src/unicode/mod.rs @@ -17,6 +17,9 @@ mod unicode_data; #[stable(feature = "unicode_version", since = "1.45.0")] pub const UNICODE_VERSION: (u8, u8, u8) = unicode_data::UNICODE_VERSION; +/// If 6th bit set ascii is upper case. +pub(crate) const ASCII_CASE_MASK: u8 = 0b0010_0000; + // For use in liballoc, not re-exported in libstd. pub use unicode_data::{ case_ignorable::lookup as Case_Ignorable, cased::lookup as Cased, conversions, From daa55acdb0bd174bb0bca1f88d5920289808a8f1 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Fri, 12 Feb 2021 13:42:42 +0000 Subject: [PATCH 4/5] Slightly more explicit --- library/core/src/char/methods.rs | 14 +++++++++++--- library/core/src/num/mod.rs | 10 +++++++++- library/core/src/unicode/mod.rs | 3 --- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index bbdb2a5d41b99..3ddf0e638946c 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -3,7 +3,7 @@ use crate::slice; use crate::str::from_utf8_unchecked_mut; use crate::unicode::printable::is_printable; -use crate::unicode::{self, conversions, ASCII_CASE_MASK}; +use crate::unicode::{self, conversions}; use super::*; @@ -1090,7 +1090,11 @@ impl char { #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[inline] pub fn to_ascii_uppercase(&self) -> char { - if self.is_ascii_lowercase() { ((*self as u8) & !ASCII_CASE_MASK) as char } else { *self } + if self.is_ascii_lowercase() { + (*self as u8).ascii_change_case_unchecked() as char + } else { + *self + } } /// Makes a copy of the value in its ASCII lower case equivalent. @@ -1118,7 +1122,11 @@ impl char { #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] #[inline] pub fn to_ascii_lowercase(&self) -> char { - if self.is_ascii_uppercase() { ((*self as u8) | ASCII_CASE_MASK) as char } else { *self } + if self.is_ascii_uppercase() { + (*self as u8).ascii_change_case_unchecked() as char + } else { + *self + } } /// Checks that two values are an ASCII case-insensitive match. diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 7563a742b9a90..42ccdd00bbd6a 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -5,7 +5,9 @@ use crate::intrinsics; use crate::mem; use crate::str::FromStr; -use crate::unicode::ASCII_CASE_MASK; + +/// If 6th bit set ascii is upper case. +const ASCII_CASE_MASK: u8 = 0b0010_0000; // Used because the `?` operator is not allowed in a const context. macro_rules! try_opt { @@ -222,6 +224,12 @@ impl u8 { *self | (self.is_ascii_uppercase() as u8 * ASCII_CASE_MASK) } + /// Assumes self is ascii + #[inline] + pub(crate) fn ascii_change_case_unchecked(&self) -> u8 { + *self ^ ASCII_CASE_MASK + } + /// Checks that two values are an ASCII case-insensitive match. /// /// This is equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`. diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs index b333b46310532..37ca0a0779b17 100644 --- a/library/core/src/unicode/mod.rs +++ b/library/core/src/unicode/mod.rs @@ -17,9 +17,6 @@ mod unicode_data; #[stable(feature = "unicode_version", since = "1.45.0")] pub const UNICODE_VERSION: (u8, u8, u8) = unicode_data::UNICODE_VERSION; -/// If 6th bit set ascii is upper case. -pub(crate) const ASCII_CASE_MASK: u8 = 0b0010_0000; - // For use in liballoc, not re-exported in libstd. pub use unicode_data::{ case_ignorable::lookup as Case_Ignorable, cased::lookup as Cased, conversions, From 33d8b0456876181883f8d97997a3a0a6e9ff652f Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sun, 14 Feb 2021 18:23:30 +0000 Subject: [PATCH 5/5] Move const def nearer usage. --- library/core/src/num/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 42ccdd00bbd6a..c13f000a73615 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -6,9 +6,6 @@ use crate::intrinsics; use crate::mem; use crate::str::FromStr; -/// If 6th bit set ascii is upper case. -const ASCII_CASE_MASK: u8 = 0b0010_0000; - // Used because the `?` operator is not allowed in a const context. macro_rules! try_opt { ($e:expr) => { @@ -155,6 +152,9 @@ impl isize { usize_isize_to_xe_bytes_doc!(), usize_isize_from_xe_bytes_doc!() } } +/// If 6th bit set ascii is upper case. +const ASCII_CASE_MASK: u8 = 0b0010_0000; + #[lang = "u8"] impl u8 { uint_impl! { u8, u8, 8, 255, 2, "0x82", "0xa", "0x12", "0x12", "0x48", "[0x12]",