Skip to content

Commit f9ae9f7

Browse files
committed
Auto merge of rust-lang#127143 - matthiaskrgr:rollup-72eqqik, r=matthiaskrgr
Rollup of 7 pull requests Successful merges: - rust-lang#123778 (Improve autovectorization of to_lowercase / to_uppercase functions) - rust-lang#126705 (Updated docs on `#[panic_handler]` in `library/core/src/lib.rs`) - rust-lang#126876 (Add `.ignore` file to make `config.toml` searchable in vscode) - rust-lang#126906 (Small fixme in core now that split_first has no codegen issues) - rust-lang#127127 (rustdoc: update to pulldown-cmark 0.11) - rust-lang#127131 (Remove unused `rustc_trait_selection` dependencies) - rust-lang#127134 (Print `TypeId` as a `u128` for `Debug`) r? `@ghost` `@rustbot` modify labels: rollup
2 parents 716752e + 865788e commit f9ae9f7

File tree

26 files changed

+526
-164
lines changed

26 files changed

+526
-164
lines changed

.ignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Make vscode *not* count `config.toml` as ignored, so it is included in search
2+
!/config.toml

.reuse/dep5

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Files: compiler/*
3636
.gitignore
3737
.gitmodules
3838
.mailmap
39+
.ignore
3940
Copyright: The Rust Project Developers (see https://thanks.rust-lang.org)
4041
License: MIT or Apache-2.0
4142

Cargo.lock

+21-6
Original file line numberDiff line numberDiff line change
@@ -3141,7 +3141,19 @@ dependencies = [
31413141
"bitflags 2.5.0",
31423142
"getopts",
31433143
"memchr",
3144-
"pulldown-cmark-escape",
3144+
"pulldown-cmark-escape 0.10.1",
3145+
"unicase",
3146+
]
3147+
3148+
[[package]]
3149+
name = "pulldown-cmark"
3150+
version = "0.11.0"
3151+
source = "registry+https://github.com/rust-lang/crates.io-index"
3152+
checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
3153+
dependencies = [
3154+
"bitflags 2.5.0",
3155+
"memchr",
3156+
"pulldown-cmark-escape 0.11.0",
31453157
"unicase",
31463158
]
31473159

@@ -3151,6 +3163,12 @@ version = "0.10.1"
31513163
source = "registry+https://github.com/rust-lang/crates.io-index"
31523164
checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3"
31533165

3166+
[[package]]
3167+
name = "pulldown-cmark-escape"
3168+
version = "0.11.0"
3169+
source = "registry+https://github.com/rust-lang/crates.io-index"
3170+
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
3171+
31543172
[[package]]
31553173
name = "pulldown-cmark-to-cmark"
31563174
version = "13.0.0"
@@ -4604,7 +4622,7 @@ name = "rustc_resolve"
46044622
version = "0.0.0"
46054623
dependencies = [
46064624
"bitflags 2.5.0",
4607-
"pulldown-cmark 0.9.6",
4625+
"pulldown-cmark 0.11.0",
46084626
"rustc_arena",
46094627
"rustc_ast",
46104628
"rustc_ast_pretty",
@@ -4760,8 +4778,6 @@ checksum = "8ba09476327c4b70ccefb6180f046ef588c26a24cf5d269a9feba316eb4f029f"
47604778
name = "rustc_trait_selection"
47614779
version = "0.0.0"
47624780
dependencies = [
4763-
"bitflags 2.5.0",
4764-
"derivative",
47654781
"itertools",
47664782
"rustc_ast",
47674783
"rustc_ast_ir",
@@ -4770,7 +4786,6 @@ dependencies = [
47704786
"rustc_errors",
47714787
"rustc_fluent_macro",
47724788
"rustc_hir",
4773-
"rustc_index",
47744789
"rustc_infer",
47754790
"rustc_macros",
47764791
"rustc_middle",
@@ -4783,7 +4798,6 @@ dependencies = [
47834798
"rustc_target",
47844799
"rustc_transmute",
47854800
"rustc_type_ir",
4786-
"rustc_type_ir_macros",
47874801
"smallvec",
47884802
"tracing",
47894803
]
@@ -4887,6 +4901,7 @@ dependencies = [
48874901
"indexmap",
48884902
"itertools",
48894903
"minifier",
4904+
"pulldown-cmark 0.9.6",
48904905
"regex",
48914906
"rustdoc-json-types",
48924907
"serde",

compiler/rustc_resolve/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ edition = "2021"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9-
pulldown-cmark = { version = "0.9.6", default-features = false }
9+
pulldown-cmark = { version = "0.11", features = ["html"], default-features = false }
1010
rustc_arena = { path = "../rustc_arena" }
1111
rustc_ast = { path = "../rustc_ast" }
1212
rustc_ast_pretty = { path = "../rustc_ast_pretty" }

compiler/rustc_resolve/src/rustdoc.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
1+
use pulldown_cmark::{
2+
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
3+
};
24
use rustc_ast as ast;
35
use rustc_ast::util::comments::beautify_doc_string;
46
use rustc_data_structures::fx::FxHashMap;
@@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
427429

428430
while let Some(event) = event_iter.next() {
429431
match event {
430-
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
432+
Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
433+
if may_be_doc_link(link_type) =>
434+
{
431435
if matches!(
432436
link_type,
433437
LinkType::Inline
@@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
441445
}
442446
}
443447

444-
links.push(preprocess_link(&dest));
448+
links.push(preprocess_link(&dest_url));
445449
}
446450
_ => {}
447451
}
@@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
451455
}
452456

453457
/// Collects additional data of link.
454-
fn collect_link_data<'input, 'callback>(
455-
event_iter: &mut Parser<'input, 'callback>,
458+
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
459+
event_iter: &mut Parser<'input, F>,
456460
) -> Option<Box<str>> {
457461
let mut display_text: Option<String> = None;
458462
let mut append_text = |text: CowStr<'_>| {

compiler/rustc_trait_selection/Cargo.toml

-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ edition = "2021"
55

66
[dependencies]
77
# tidy-alphabetical-start
8-
bitflags = "2.4.1"
9-
derivative = "2.2.0"
108
itertools = "0.12"
119
rustc_ast = { path = "../rustc_ast" }
1210
rustc_ast_ir = { path = "../rustc_ast_ir" }
@@ -15,7 +13,6 @@ rustc_data_structures = { path = "../rustc_data_structures" }
1513
rustc_errors = { path = "../rustc_errors" }
1614
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
1715
rustc_hir = { path = "../rustc_hir" }
18-
rustc_index = { path = "../rustc_index" }
1916
rustc_infer = { path = "../rustc_infer" }
2017
rustc_macros = { path = "../rustc_macros" }
2118
rustc_middle = { path = "../rustc_middle" }
@@ -28,7 +25,6 @@ rustc_span = { path = "../rustc_span" }
2825
rustc_target = { path = "../rustc_target" }
2926
rustc_transmute = { path = "../rustc_transmute", features = ["rustc"] }
3027
rustc_type_ir = { path = "../rustc_type_ir" }
31-
rustc_type_ir_macros = { path = "../rustc_type_ir_macros" }
3228
smallvec = { version = "1.8.1", features = ["union", "may_dangle"] }
3329
tracing = "0.1"
3430
# tidy-alphabetical-end

library/alloc/benches/str.rs

+2
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,5 @@ make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count());
347347

348348
make_test!(split_space_str, s, s.split(" ").count());
349349
make_test!(split_ad_str, s, s.split("ad").count());
350+
351+
make_test!(to_lowercase, s, s.to_lowercase());

library/alloc/src/str.rs

+73-52
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use core::borrow::{Borrow, BorrowMut};
1111
use core::iter::FusedIterator;
1212
use core::mem;
13+
use core::mem::MaybeUninit;
1314
use core::ptr;
1415
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
1516
use core::unicode::conversions;
@@ -367,14 +368,9 @@ impl str {
367368
without modifying the original"]
368369
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
369370
pub fn to_lowercase(&self) -> String {
370-
let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_lowercase);
371+
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
371372

372-
// Safety: we know this is a valid char boundary since
373-
// out.len() is only progressed if ascii bytes are found
374-
let rest = unsafe { self.get_unchecked(out.len()..) };
375-
376-
// Safety: We have written only valid ASCII to our vec
377-
let mut s = unsafe { String::from_utf8_unchecked(out) };
373+
let prefix_len = s.len();
378374

379375
for (i, c) in rest.char_indices() {
380376
if c == 'Σ' {
@@ -383,8 +379,7 @@ impl str {
383379
// in `SpecialCasing.txt`,
384380
// so hard-code it rather than have a generic "condition" mechanism.
385381
// See https://github.com/rust-lang/rust/issues/26035
386-
let out_len = self.len() - rest.len();
387-
let sigma_lowercase = map_uppercase_sigma(&self, i + out_len);
382+
let sigma_lowercase = map_uppercase_sigma(self, prefix_len + i);
388383
s.push(sigma_lowercase);
389384
} else {
390385
match conversions::to_lower(c) {
@@ -460,14 +455,7 @@ impl str {
460455
without modifying the original"]
461456
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
462457
pub fn to_uppercase(&self) -> String {
463-
let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_uppercase);
464-
465-
// Safety: we know this is a valid char boundary since
466-
// out.len() is only progressed if ascii bytes are found
467-
let rest = unsafe { self.get_unchecked(out.len()..) };
468-
469-
// Safety: We have written only valid ASCII to our vec
470-
let mut s = unsafe { String::from_utf8_unchecked(out) };
458+
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase);
471459

472460
for c in rest.chars() {
473461
match conversions::to_upper(c) {
@@ -616,50 +604,83 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
616604
unsafe { Box::from_raw(Box::into_raw(v) as *mut str) }
617605
}
618606

619-
/// Converts the bytes while the bytes are still ascii.
607+
/// Converts leading ascii bytes in `s` by calling the `convert` function.
608+
///
620609
/// For better average performance, this happens in chunks of `2*size_of::<usize>()`.
621-
/// Returns a vec with the converted bytes.
610+
///
611+
/// Returns a tuple of the converted prefix and the remainder starting from
612+
/// the first non-ascii character.
622613
#[inline]
623614
#[cfg(not(test))]
624615
#[cfg(not(no_global_oom_handling))]
625-
fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
626-
let mut out = Vec::with_capacity(b.len());
616+
fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
617+
// Process the input in chunks of 16 bytes to enable auto-vectorization.
618+
// Previously the chunk size depended on the size of `usize`,
619+
// but on 32-bit platforms with sse or neon is also the better choice.
620+
// The only downside on other platforms would be a bit more loop-unrolling.
621+
const N: usize = 16;
622+
623+
let mut slice = s.as_bytes();
624+
let mut out = Vec::with_capacity(slice.len());
625+
let mut out_slice = out.spare_capacity_mut();
626+
627+
let mut ascii_prefix_len = 0_usize;
628+
let mut is_ascii = [false; N];
629+
630+
while slice.len() >= N {
631+
// Safety: checked in loop condition
632+
let chunk = unsafe { slice.get_unchecked(..N) };
633+
// Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
634+
let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
635+
636+
for j in 0..N {
637+
is_ascii[j] = chunk[j] <= 127;
638+
}
627639

628-
const USIZE_SIZE: usize = mem::size_of::<usize>();
629-
const MAGIC_UNROLL: usize = 2;
630-
const N: usize = USIZE_SIZE * MAGIC_UNROLL;
631-
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
640+
// Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
641+
// size gives the best result, specifically a pmovmsk instruction on x86.
642+
// There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
643+
// be updated when this method is changed.
644+
// See also https://github.com/llvm/llvm-project/issues/96395
645+
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
646+
break;
647+
}
632648

633-
let mut i = 0;
634-
unsafe {
635-
while i + N <= b.len() {
636-
// Safety: we have checks the sizes `b` and `out` to know that our
637-
let in_chunk = b.get_unchecked(i..i + N);
638-
let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);
639-
640-
let mut bits = 0;
641-
for j in 0..MAGIC_UNROLL {
642-
// read the bytes 1 usize at a time (unaligned since we haven't checked the alignment)
643-
// safety: in_chunk is valid bytes in the range
644-
bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
645-
}
646-
// if our chunks aren't ascii, then return only the prior bytes as init
647-
if bits & NONASCII_MASK != 0 {
648-
break;
649-
}
649+
for j in 0..N {
650+
out_chunk[j] = MaybeUninit::new(convert(&chunk[j]));
651+
}
650652

651-
// perform the case conversions on N bytes (gets heavily autovec'd)
652-
for j in 0..N {
653-
// safety: in_chunk and out_chunk is valid bytes in the range
654-
let out = out_chunk.get_unchecked_mut(j);
655-
out.write(convert(in_chunk.get_unchecked(j)));
656-
}
653+
ascii_prefix_len += N;
654+
slice = unsafe { slice.get_unchecked(N..) };
655+
out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
656+
}
657657

658-
// mark these bytes as initialised
659-
i += N;
658+
// handle the remainder as individual bytes
659+
while slice.len() > 0 {
660+
let byte = slice[0];
661+
if byte > 127 {
662+
break;
660663
}
661-
out.set_len(i);
664+
// Safety: out_slice has same length as input slice and gets sliced with the same offsets
665+
unsafe {
666+
*out_slice.get_unchecked_mut(0) = MaybeUninit::new(convert(&byte));
667+
}
668+
ascii_prefix_len += 1;
669+
slice = unsafe { slice.get_unchecked(1..) };
670+
out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
662671
}
663672

664-
out
673+
unsafe {
674+
// SAFETY: ascii_prefix_len bytes have been initialized above
675+
out.set_len(ascii_prefix_len);
676+
677+
// SAFETY: We have written only valid ascii to the output vec
678+
let ascii_string = String::from_utf8_unchecked(out);
679+
680+
// SAFETY: we know this is a valid char boundary
681+
// since we only skipped over leading ascii bytes
682+
let rest = core::str::from_utf8_unchecked(slice);
683+
684+
(ascii_string, rest)
685+
}
665686
}

library/alloc/tests/str.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1849,7 +1849,10 @@ fn to_lowercase() {
18491849
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");
18501850

18511851
// https://github.com/rust-lang/rust/issues/124714
1852+
// input lengths around the boundary of the chunk size used by the ascii prefix optimization
1853+
assert_eq!("abcdefghijklmnoΣ".to_lowercase(), "abcdefghijklmnoς");
18521854
assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς");
1855+
assert_eq!("abcdefghijklmnopqΣ".to_lowercase(), "abcdefghijklmnopqς");
18531856

18541857
// a really long string that has it's lowercase form
18551858
// even longer. this tests that implementations don't assume

library/core/src/any.rs

+12-1
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ impl dyn Any + Send + Sync {
602602
/// While `TypeId` implements `Hash`, `PartialOrd`, and `Ord`, it is worth
603603
/// noting that the hashes and ordering will vary between Rust releases. Beware
604604
/// of relying on them inside of your code!
605-
#[derive(Clone, Copy, Debug, Eq, PartialOrd, Ord)]
605+
#[derive(Clone, Copy, Eq, PartialOrd, Ord)]
606606
#[stable(feature = "rust1", since = "1.0.0")]
607607
pub struct TypeId {
608608
// We avoid using `u128` because that imposes higher alignment requirements on many platforms.
@@ -644,6 +644,10 @@ impl TypeId {
644644
let t2 = t as u64;
645645
TypeId { t: (t1, t2) }
646646
}
647+
648+
fn as_u128(self) -> u128 {
649+
u128::from(self.t.0) << 64 | u128::from(self.t.1)
650+
}
647651
}
648652

649653
#[stable(feature = "rust1", since = "1.0.0")]
@@ -666,6 +670,13 @@ impl hash::Hash for TypeId {
666670
}
667671
}
668672

673+
#[stable(feature = "rust1", since = "1.0.0")]
674+
impl fmt::Debug for TypeId {
675+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
676+
f.debug_tuple("TypeId").field(&self.as_u128()).finish()
677+
}
678+
}
679+
669680
/// Returns the name of a type as a string slice.
670681
///
671682
/// # Note

library/core/src/lib.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,9 @@
3434
//! Rust user code is to call the functions provided by this library instead (such as
3535
//! `ptr::copy`).
3636
//!
37-
//! * `rust_begin_panic` - This function takes four arguments, a
38-
//! `fmt::Arguments`, a `&'static str`, and two `u32`'s. These four arguments
39-
//! dictate the panic message, the file at which panic was invoked, and the
40-
//! line and column inside the file. It is up to consumers of this core
37+
//! * Panic handler - This function takes one argument, a `&panic::PanicInfo`. It is up to consumers of this core
4138
//! library to define this panic function; it is only required to never
42-
//! return. This requires a `lang` attribute named `panic_impl`.
39+
//! return. You should mark your implementation using `#[panic_handler]`.
4340
//!
4441
//! * `rust_eh_personality` - is used by the failure mechanisms of the
4542
//! compiler. This is often mapped to GCC's personality function, but crates

0 commit comments

Comments
 (0)