Skip to content

Commit b496974

Browse files
Rollup merge of #131520 - zachs18:const-str-split, r=Noratrieb
Mark `str::is_char_boundary` and `str::split_at*` unstably `const`. Tracking issues: #131516, #131518 First commit implements `const_is_char_boundary`, second commit implements `const_str_split_at` (which depends on `const_is_char_boundary`) ~~I used `const_eval_select` for `is_char_boundary` since there is a comment about optimizations that would theoretically not happen with the simple `const`-compatible version (since `slice::get` is not `const`ifiable) cc #84751. I have not checked if this code difference is still required for the optimization, so it might not be worth the code complication, but 🤷.~~ This changes `str::split_at_checked` to use a new private helper function `split_at_unchecked` (copied from `split_at_mut_unchecked`) that does pointer stuff instead of `get_unchecked`, since that is not currently `const`ifiable due to using the `SliceIndex` trait.
2 parents 5d0f52e + aa493d0 commit b496974

File tree

2 files changed

+38
-12
lines changed

2 files changed

+38
-12
lines changed

library/core/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@
185185
#![feature(cfg_target_has_atomic_equal_alignment)]
186186
#![feature(cfg_ub_checks)]
187187
#![feature(const_for)]
188+
#![feature(const_is_char_boundary)]
188189
#![feature(const_precise_live_drops)]
190+
#![feature(const_str_split_at)]
189191
#![feature(decl_macro)]
190192
#![feature(deprecated_suggestion)]
191193
#![feature(doc_cfg)]

library/core/src/str/mod.rs

+36-12
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,9 @@ impl str {
185185
/// ```
186186
#[must_use]
187187
#[stable(feature = "is_char_boundary", since = "1.9.0")]
188+
#[rustc_const_unstable(feature = "const_is_char_boundary", issue = "131516")]
188189
#[inline]
189-
pub fn is_char_boundary(&self, index: usize) -> bool {
190+
pub const fn is_char_boundary(&self, index: usize) -> bool {
190191
// 0 is always ok.
191192
// Test for 0 explicitly so that it can optimize out the check
192193
// easily and skip reading string data for that case.
@@ -195,8 +196,8 @@ impl str {
195196
return true;
196197
}
197198

198-
match self.as_bytes().get(index) {
199-
// For `None` we have two options:
199+
if index >= self.len() {
200+
// For `true` we have two options:
200201
//
201202
// - index == self.len()
202203
// Empty strings are valid, so return true
@@ -205,9 +206,9 @@ impl str {
205206
//
206207
// The check is placed exactly here, because it improves generated
207208
// code on higher opt-levels. See PR #84751 for more details.
208-
None => index == self.len(),
209-
210-
Some(&b) => b.is_utf8_char_boundary(),
209+
index == self.len()
210+
} else {
211+
self.as_bytes()[index].is_utf8_char_boundary()
211212
}
212213
}
213214

@@ -637,7 +638,8 @@ impl str {
637638
#[inline]
638639
#[must_use]
639640
#[stable(feature = "str_split_at", since = "1.4.0")]
640-
pub fn split_at(&self, mid: usize) -> (&str, &str) {
641+
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
642+
pub const fn split_at(&self, mid: usize) -> (&str, &str) {
641643
match self.split_at_checked(mid) {
642644
None => slice_error_fail(self, 0, mid),
643645
Some(pair) => pair,
@@ -677,7 +679,8 @@ impl str {
677679
#[inline]
678680
#[must_use]
679681
#[stable(feature = "str_split_at", since = "1.4.0")]
680-
pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
682+
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
683+
pub const fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
681684
// is_char_boundary checks that the index is in [0, .len()]
682685
if self.is_char_boundary(mid) {
683686
// SAFETY: just checked that `mid` is on a char boundary.
@@ -716,11 +719,12 @@ impl str {
716719
#[inline]
717720
#[must_use]
718721
#[stable(feature = "split_at_checked", since = "1.80.0")]
719-
pub fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> {
722+
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
723+
pub const fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> {
720724
// is_char_boundary checks that the index is in [0, .len()]
721725
if self.is_char_boundary(mid) {
722726
// SAFETY: just checked that `mid` is on a char boundary.
723-
Some(unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) })
727+
Some(unsafe { self.split_at_unchecked(mid) })
724728
} else {
725729
None
726730
}
@@ -756,7 +760,9 @@ impl str {
756760
#[inline]
757761
#[must_use]
758762
#[stable(feature = "split_at_checked", since = "1.80.0")]
759-
pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> {
763+
#[rustc_const_unstable(feature = "const_str_split_at", issue = "131518")]
764+
#[rustc_allow_const_fn_unstable(const_is_char_boundary)]
765+
pub const fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> {
760766
// is_char_boundary checks that the index is in [0, .len()]
761767
if self.is_char_boundary(mid) {
762768
// SAFETY: just checked that `mid` is on a char boundary.
@@ -772,7 +778,25 @@ impl str {
772778
///
773779
/// The caller must ensure that `mid` is a valid byte offset from the start
774780
/// of the string and falls on the boundary of a UTF-8 code point.
775-
unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) {
781+
const unsafe fn split_at_unchecked(&self, mid: usize) -> (&str, &str) {
782+
let len = self.len();
783+
let ptr = self.as_ptr();
784+
// SAFETY: caller guarantees `mid` is on a char boundary.
785+
unsafe {
786+
(
787+
from_utf8_unchecked(slice::from_raw_parts(ptr, mid)),
788+
from_utf8_unchecked(slice::from_raw_parts(ptr.add(mid), len - mid)),
789+
)
790+
}
791+
}
792+
793+
/// Divides one string slice into two at an index.
794+
///
795+
/// # Safety
796+
///
797+
/// The caller must ensure that `mid` is a valid byte offset from the start
798+
/// of the string and falls on the boundary of a UTF-8 code point.
799+
const unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) {
776800
let len = self.len();
777801
let ptr = self.as_mut_ptr();
778802
// SAFETY: caller guarantees `mid` is on a char boundary.

0 commit comments

Comments
 (0)