Skip to content

Commit f6072ca

Browse files
committed
Auto merge of rust-lang#72813 - RalfJung:rollup-4ko6q8j, r=RalfJung
Rollup of 5 pull requests Successful merges: - rust-lang#72683 (from_u32_unchecked: check validity, and fix UB in Wtf8) - rust-lang#72715 (Account for trailing comma when suggesting `where` clauses) - rust-lang#72745 (generalize Borrow<[T]> for Interned<'tcx, List<T>>) - rust-lang#72749 (Update stdarch submodule to latest head) - rust-lang#72781 (Use `LocalDefId` instead of `NodeId` in `resolve_str_path_error`) Failed merges: r? @ghost
2 parents 4b1f86a + cbc73dc commit f6072ca

File tree

14 files changed

+137
-145
lines changed

14 files changed

+137
-145
lines changed

src/libcore/char/convert.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ pub fn from_u32(i: u32) -> Option<char> {
9999
#[inline]
100100
#[stable(feature = "char_from_unchecked", since = "1.5.0")]
101101
pub unsafe fn from_u32_unchecked(i: u32) -> char {
102-
transmute(i)
102+
if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { transmute(i) }
103103
}
104104

105105
#[stable(feature = "char_convert", since = "1.13.0")]
@@ -218,7 +218,7 @@ impl TryFrom<u32> for char {
218218
Err(CharTryFromError(()))
219219
} else {
220220
// SAFETY: checked that it's a legal unicode value
221-
Ok(unsafe { from_u32_unchecked(i) })
221+
Ok(unsafe { transmute(i) })
222222
}
223223
}
224224
}

src/libcore/char/methods.rs

+101-62
Original file line numberDiff line numberDiff line change
@@ -593,16 +593,7 @@ impl char {
593593
#[stable(feature = "rust1", since = "1.0.0")]
594594
#[inline]
595595
pub fn len_utf8(self) -> usize {
596-
let code = self as u32;
597-
if code < MAX_ONE_B {
598-
1
599-
} else if code < MAX_TWO_B {
600-
2
601-
} else if code < MAX_THREE_B {
602-
3
603-
} else {
604-
4
605-
}
596+
len_utf8(self as u32)
606597
}
607598

608599
/// Returns the number of 16-bit code units this `char` would need if
@@ -670,36 +661,8 @@ impl char {
670661
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
671662
#[inline]
672663
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
673-
let code = self as u32;
674-
let len = self.len_utf8();
675-
match (len, &mut dst[..]) {
676-
(1, [a, ..]) => {
677-
*a = code as u8;
678-
}
679-
(2, [a, b, ..]) => {
680-
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
681-
*b = (code & 0x3F) as u8 | TAG_CONT;
682-
}
683-
(3, [a, b, c, ..]) => {
684-
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
685-
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
686-
*c = (code & 0x3F) as u8 | TAG_CONT;
687-
}
688-
(4, [a, b, c, d, ..]) => {
689-
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
690-
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
691-
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
692-
*d = (code & 0x3F) as u8 | TAG_CONT;
693-
}
694-
_ => panic!(
695-
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
696-
len,
697-
code,
698-
dst.len(),
699-
),
700-
};
701-
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
702-
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
664+
// SAFETY: `char` is not a surrogate, so this is valid UTF-8.
665+
unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
703666
}
704667

705668
/// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -739,28 +702,7 @@ impl char {
739702
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
740703
#[inline]
741704
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
742-
let mut code = self as u32;
743-
// SAFETY: each arm checks whether there are enough bits to write into
744-
unsafe {
745-
if (code & 0xFFFF) == code && !dst.is_empty() {
746-
// The BMP falls through (assuming non-surrogate, as it should)
747-
*dst.get_unchecked_mut(0) = code as u16;
748-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
749-
} else if dst.len() >= 2 {
750-
// Supplementary planes break into surrogates.
751-
code -= 0x1_0000;
752-
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
753-
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
754-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
755-
} else {
756-
panic!(
757-
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
758-
from_u32_unchecked(code).len_utf16(),
759-
code,
760-
dst.len(),
761-
)
762-
}
763-
}
705+
encode_utf16_raw(self as u32, dst)
764706
}
765707

766708
/// Returns `true` if this `char` has the `Alphabetic` property.
@@ -1673,3 +1615,100 @@ impl char {
16731615
}
16741616
}
16751617
}
1618+
1619+
#[inline]
1620+
fn len_utf8(code: u32) -> usize {
1621+
if code < MAX_ONE_B {
1622+
1
1623+
} else if code < MAX_TWO_B {
1624+
2
1625+
} else if code < MAX_THREE_B {
1626+
3
1627+
} else {
1628+
4
1629+
}
1630+
}
1631+
1632+
/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1633+
/// and then returns the subslice of the buffer that contains the encoded character.
1634+
///
1635+
/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1636+
/// (Creating a `char` in the surrogate range is UB.)
1637+
/// The result is valid [generalized UTF-8] but not valid UTF-8.
1638+
///
1639+
/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1640+
///
1641+
/// # Panics
1642+
///
1643+
/// Panics if the buffer is not large enough.
1644+
/// A buffer of length four is large enough to encode any `char`.
1645+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1646+
#[doc(hidden)]
1647+
#[inline]
1648+
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1649+
let len = len_utf8(code);
1650+
match (len, &mut dst[..]) {
1651+
(1, [a, ..]) => {
1652+
*a = code as u8;
1653+
}
1654+
(2, [a, b, ..]) => {
1655+
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
1656+
*b = (code & 0x3F) as u8 | TAG_CONT;
1657+
}
1658+
(3, [a, b, c, ..]) => {
1659+
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
1660+
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1661+
*c = (code & 0x3F) as u8 | TAG_CONT;
1662+
}
1663+
(4, [a, b, c, d, ..]) => {
1664+
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
1665+
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1666+
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1667+
*d = (code & 0x3F) as u8 | TAG_CONT;
1668+
}
1669+
_ => panic!(
1670+
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
1671+
len,
1672+
code,
1673+
dst.len(),
1674+
),
1675+
};
1676+
&mut dst[..len]
1677+
}
1678+
1679+
/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1680+
/// and then returns the subslice of the buffer that contains the encoded character.
1681+
///
1682+
/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
1683+
/// (Creating a `char` in the surrogate range is UB.)
1684+
///
1685+
/// # Panics
1686+
///
1687+
/// Panics if the buffer is not large enough.
1688+
/// A buffer of length 2 is large enough to encode any `char`.
1689+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1690+
#[doc(hidden)]
1691+
#[inline]
1692+
pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
1693+
// SAFETY: each arm checks whether there are enough bits to write into
1694+
unsafe {
1695+
if (code & 0xFFFF) == code && !dst.is_empty() {
1696+
// The BMP falls through
1697+
*dst.get_unchecked_mut(0) = code as u16;
1698+
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
1699+
} else if dst.len() >= 2 {
1700+
// Supplementary planes break into surrogates.
1701+
code -= 0x1_0000;
1702+
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
1703+
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
1704+
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
1705+
} else {
1706+
panic!(
1707+
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
1708+
from_u32_unchecked(code).len_utf16(),
1709+
code,
1710+
dst.len(),
1711+
)
1712+
}
1713+
}
1714+
}

src/libcore/char/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
3737
#[stable(feature = "unicode_version", since = "1.45.0")]
3838
pub use crate::unicode::UNICODE_VERSION;
3939

40+
// perma-unstable re-exports
41+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
42+
pub use self::methods::encode_utf16_raw;
43+
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
44+
pub use self::methods::encode_utf8_raw;
45+
4046
use crate::fmt::{self, Write};
4147
use crate::iter::FusedIterator;
4248

src/librustc_hir/hir.rs

+7
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,13 @@ impl WhereClause<'_> {
524524
pub fn span_for_predicates_or_empty_place(&self) -> Span {
525525
self.span
526526
}
527+
528+
/// `Span` where further predicates would be suggested, accounting for trailing commas, like
529+
/// in `fn foo<T>(t: T) where T: Foo,` so we don't suggest two trailing commas.
530+
pub fn tail_span_for_suggestion(&self) -> Span {
531+
let end = self.span_for_predicates_or_empty_place().shrink_to_hi();
532+
self.predicates.last().map(|p| p.span()).unwrap_or(end).shrink_to_hi().to(end)
533+
}
527534
}
528535

529536
/// A single predicate in a where-clause.

src/librustc_middle/ty/context.rs

+2-48
Original file line numberDiff line numberDiff line change
@@ -1971,32 +1971,8 @@ impl<'tcx, T: Hash> Hash for Interned<'tcx, List<T>> {
19711971
}
19721972
}
19731973

1974-
impl<'tcx> Borrow<[Ty<'tcx>]> for Interned<'tcx, List<Ty<'tcx>>> {
1975-
fn borrow<'a>(&'a self) -> &'a [Ty<'tcx>] {
1976-
&self.0[..]
1977-
}
1978-
}
1979-
1980-
impl<'tcx> Borrow<[CanonicalVarInfo]> for Interned<'tcx, List<CanonicalVarInfo>> {
1981-
fn borrow(&self) -> &[CanonicalVarInfo] {
1982-
&self.0[..]
1983-
}
1984-
}
1985-
1986-
impl<'tcx> Borrow<[GenericArg<'tcx>]> for Interned<'tcx, InternalSubsts<'tcx>> {
1987-
fn borrow<'a>(&'a self) -> &'a [GenericArg<'tcx>] {
1988-
&self.0[..]
1989-
}
1990-
}
1991-
1992-
impl<'tcx> Borrow<[ProjectionKind]> for Interned<'tcx, List<ProjectionKind>> {
1993-
fn borrow(&self) -> &[ProjectionKind] {
1994-
&self.0[..]
1995-
}
1996-
}
1997-
1998-
impl<'tcx> Borrow<[PlaceElem<'tcx>]> for Interned<'tcx, List<PlaceElem<'tcx>>> {
1999-
fn borrow(&self) -> &[PlaceElem<'tcx>] {
1974+
impl<'tcx, T> Borrow<[T]> for Interned<'tcx, List<T>> {
1975+
fn borrow<'a>(&'a self) -> &'a [T] {
20001976
&self.0[..]
20011977
}
20021978
}
@@ -2007,34 +1983,12 @@ impl<'tcx> Borrow<RegionKind> for Interned<'tcx, RegionKind> {
20071983
}
20081984
}
20091985

2010-
impl<'tcx> Borrow<[ExistentialPredicate<'tcx>]>
2011-
for Interned<'tcx, List<ExistentialPredicate<'tcx>>>
2012-
{
2013-
fn borrow<'a>(&'a self) -> &'a [ExistentialPredicate<'tcx>] {
2014-
&self.0[..]
2015-
}
2016-
}
2017-
2018-
impl<'tcx> Borrow<[Predicate<'tcx>]> for Interned<'tcx, List<Predicate<'tcx>>> {
2019-
fn borrow<'a>(&'a self) -> &'a [Predicate<'tcx>] {
2020-
&self.0[..]
2021-
}
2022-
}
2023-
20241986
impl<'tcx> Borrow<Const<'tcx>> for Interned<'tcx, Const<'tcx>> {
20251987
fn borrow<'a>(&'a self) -> &'a Const<'tcx> {
20261988
&self.0
20271989
}
20281990
}
20291991

2030-
impl<'tcx> Borrow<[traits::ChalkEnvironmentClause<'tcx>]>
2031-
for Interned<'tcx, List<traits::ChalkEnvironmentClause<'tcx>>>
2032-
{
2033-
fn borrow<'a>(&'a self) -> &'a [traits::ChalkEnvironmentClause<'tcx>] {
2034-
&self.0[..]
2035-
}
2036-
}
2037-
20381992
impl<'tcx> Borrow<PredicateKind<'tcx>> for Interned<'tcx, PredicateKind<'tcx>> {
20391993
fn borrow<'a>(&'a self) -> &'a PredicateKind<'tcx> {
20401994
&self.0

src/librustc_middle/ty/diagnostics.rs

+1-15
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use rustc_errors::{Applicability, DiagnosticBuilder};
77
use rustc_hir as hir;
88
use rustc_hir::def_id::DefId;
99
use rustc_hir::{QPath, TyKind, WhereBoundPredicate, WherePredicate};
10-
use rustc_span::{BytePos, Span};
1110

1211
impl<'tcx> TyS<'tcx> {
1312
/// Similar to `TyS::is_primitive`, but also considers inferred numeric values to be primitive.
@@ -221,24 +220,11 @@ pub fn suggest_constraining_type_param(
221220
}
222221
}
223222

224-
let where_clause_span = generics.where_clause.span_for_predicates_or_empty_place();
225-
// Account for `fn foo<T>(t: T) where T: Foo,` so we don't suggest two trailing commas.
226-
let mut trailing_comma = false;
227-
if let Ok(snippet) = tcx.sess.source_map().span_to_snippet(where_clause_span) {
228-
trailing_comma = snippet.ends_with(',');
229-
}
230-
let where_clause_span = if trailing_comma {
231-
let hi = where_clause_span.hi();
232-
Span::new(hi - BytePos(1), hi, where_clause_span.ctxt())
233-
} else {
234-
where_clause_span.shrink_to_hi()
235-
};
236-
237223
match &param_spans[..] {
238224
&[&param_span] => suggest_restrict(param_span.shrink_to_hi()),
239225
_ => {
240226
err.span_suggestion_verbose(
241-
where_clause_span,
227+
generics.where_clause.tail_span_for_suggestion(),
242228
&msg_restrict_type_further,
243229
format!(", {}: {}", param_name, constraint),
244230
Applicability::MachineApplicable,

src/librustc_resolve/lib.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -2902,7 +2902,7 @@ impl<'a> Resolver<'a> {
29022902
span: Span,
29032903
path_str: &str,
29042904
ns: Namespace,
2905-
module_id: NodeId,
2905+
module_id: LocalDefId,
29062906
) -> Result<(ast::Path, Res), ()> {
29072907
let path = if path_str.starts_with("::") {
29082908
ast::Path {
@@ -2922,10 +2922,7 @@ impl<'a> Resolver<'a> {
29222922
.collect(),
29232923
}
29242924
};
2925-
let module = self.block_map.get(&module_id).copied().unwrap_or_else(|| {
2926-
let def_id = self.definitions.local_def_id(module_id);
2927-
self.module_map.get(&def_id).copied().unwrap_or(self.graph_root)
2928-
});
2925+
let module = self.module_map.get(&module_id).copied().unwrap_or(self.graph_root);
29292926
let parent_scope = &ParentScope::module(module);
29302927
let res = self.resolve_ast_path(&path, ns, parent_scope).map_err(|_| ())?;
29312928
Ok((path, res))

src/librustc_trait_selection/traits/error_reporting/suggestions.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ pub trait InferCtxtExt<'tcx> {
170170

171171
fn predicate_constraint(generics: &hir::Generics<'_>, pred: String) -> (Span, String) {
172172
(
173-
generics.where_clause.span_for_predicates_or_empty_place().shrink_to_hi(),
173+
generics.where_clause.tail_span_for_suggestion(),
174174
format!(
175175
"{} {}",
176176
if !generics.where_clause.predicates.is_empty() { "," } else { " where" },

src/librustdoc/core.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use rustc_ast::ast::CRATE_NODE_ID;
21
use rustc_attr as attr;
32
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
43
use rustc_data_structures::sync::{self, Lrc};
@@ -7,7 +6,7 @@ use rustc_errors::emitter::{Emitter, EmitterWriter};
76
use rustc_errors::json::JsonEmitter;
87
use rustc_feature::UnstableFeatures;
98
use rustc_hir::def::Namespace::TypeNS;
10-
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LOCAL_CRATE};
9+
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
1110
use rustc_hir::HirId;
1211
use rustc_interface::interface;
1312
use rustc_middle::middle::cstore::CrateStore;
@@ -390,7 +389,12 @@ pub fn run_core(options: RustdocOptions) -> (clean::Crate, RenderInfo, RenderOpt
390389
resolver.borrow_mut().access(|resolver| {
391390
for extern_name in &extern_names {
392391
resolver
393-
.resolve_str_path_error(DUMMY_SP, extern_name, TypeNS, CRATE_NODE_ID)
392+
.resolve_str_path_error(
393+
DUMMY_SP,
394+
extern_name,
395+
TypeNS,
396+
LocalDefId { local_def_index: CRATE_DEF_INDEX },
397+
)
394398
.unwrap_or_else(|()| {
395399
panic!("Unable to resolve external crate {}", extern_name)
396400
});

0 commit comments

Comments
 (0)