Skip to content

Commit 47fe1a3

Browse files
authored
Rollup merge of #111609 - LegionMammal978:internal-unsafe, r=thomcc
Mark internal functions and traits unsafe to reflect preconditions No semantics are changed in this PR; I only mark some functions and and a trait `unsafe` which already had implicit preconditions. Although it seems somewhat redundant for `numfmt::Part::Copy` to contain a `&[u8]` instead of a `&str`, given that all of its current consumers ultimately expect valid UTF-8. Is the type also intended to work for byte-slice formatting in the future?
2 parents df86200 + 7748109 commit 47fe1a3

File tree

5 files changed

+57
-32
lines changed

5 files changed

+57
-32
lines changed

library/alloc/src/vec/in_place_collect.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ where
178178
)
179179
};
180180

181-
let len = SpecInPlaceCollect::collect_in_place(&mut iterator, dst_buf, dst_end);
181+
// SAFETY: `dst_buf` and `dst_end` are the start and end of the buffer.
182+
let len = unsafe { SpecInPlaceCollect::collect_in_place(&mut iterator, dst_buf, dst_end) };
182183

183184
let src = unsafe { iterator.as_inner().as_into_iter() };
184185
// check if SourceIter contract was upheld
@@ -239,15 +240,15 @@ trait SpecInPlaceCollect<T, I>: Iterator<Item = T> {
239240
/// `Iterator::__iterator_get_unchecked` calls with a `TrustedRandomAccessNoCoerce` bound
240241
/// on `I` which means the caller of this method must take the safety conditions
241242
/// of that trait into consideration.
242-
fn collect_in_place(&mut self, dst: *mut T, end: *const T) -> usize;
243+
unsafe fn collect_in_place(&mut self, dst: *mut T, end: *const T) -> usize;
243244
}
244245

245246
impl<T, I> SpecInPlaceCollect<T, I> for I
246247
where
247248
I: Iterator<Item = T>,
248249
{
249250
#[inline]
250-
default fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
251+
default unsafe fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
251252
// use try-fold since
252253
// - it vectorizes better for some iterator adapters
253254
// - unlike most internal iteration methods, it only takes a &mut self
@@ -265,7 +266,7 @@ where
265266
I: Iterator<Item = T> + TrustedRandomAccessNoCoerce,
266267
{
267268
#[inline]
268-
fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
269+
unsafe fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
269270
let len = self.size();
270271
let mut drop_guard = InPlaceDrop { inner: dst_buf, dst: dst_buf };
271272
for i in 0..len {

library/core/src/fmt/float.rs

+8-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ where
4545
&mut buf,
4646
&mut parts,
4747
);
48-
fmt.pad_formatted_parts(&formatted)
48+
// SAFETY: `to_exact_fixed_str` and `format_exact` produce only ASCII characters.
49+
unsafe { fmt.pad_formatted_parts(&formatted) }
4950
}
5051

5152
// Don't inline this so callers that call both this and the above won't wind
@@ -71,7 +72,8 @@ where
7172
&mut buf,
7273
&mut parts,
7374
);
74-
fmt.pad_formatted_parts(&formatted)
75+
// SAFETY: `to_shortest_str` and `format_shortest` produce only ASCII characters.
76+
unsafe { fmt.pad_formatted_parts(&formatted) }
7577
}
7678

7779
fn float_to_decimal_display<T>(fmt: &mut Formatter<'_>, num: &T) -> Result
@@ -116,7 +118,8 @@ where
116118
&mut buf,
117119
&mut parts,
118120
);
119-
fmt.pad_formatted_parts(&formatted)
121+
// SAFETY: `to_exact_exp_str` and `format_exact` produce only ASCII characters.
122+
unsafe { fmt.pad_formatted_parts(&formatted) }
120123
}
121124

122125
// Don't inline this so callers that call both this and the above won't wind
@@ -143,7 +146,8 @@ where
143146
&mut buf,
144147
&mut parts,
145148
);
146-
fmt.pad_formatted_parts(&formatted)
149+
// SAFETY: `to_shortest_exp_str` and `format_shortest` produce only ASCII characters.
150+
unsafe { fmt.pad_formatted_parts(&formatted) }
147151
}
148152

149153
// Common code of floating point LowerExp and UpperExp.

library/core/src/fmt/mod.rs

+27-15
Original file line numberDiff line numberDiff line change
@@ -1415,7 +1415,11 @@ impl<'a> Formatter<'a> {
14151415
/// Takes the formatted parts and applies the padding.
14161416
/// Assumes that the caller already has rendered the parts with required precision,
14171417
/// so that `self.precision` can be ignored.
1418-
fn pad_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result {
1418+
///
1419+
/// # Safety
1420+
///
1421+
/// Any `numfmt::Part::Copy` parts in `formatted` must contain valid UTF-8.
1422+
unsafe fn pad_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result {
14191423
if let Some(mut width) = self.width {
14201424
// for the sign-aware zero padding, we render the sign first and
14211425
// behave as if we had no sign from the beginning.
@@ -1438,31 +1442,35 @@ impl<'a> Formatter<'a> {
14381442
let len = formatted.len();
14391443
let ret = if width <= len {
14401444
// no padding
1441-
self.write_formatted_parts(&formatted)
1445+
// SAFETY: Per the precondition.
1446+
unsafe { self.write_formatted_parts(&formatted) }
14421447
} else {
14431448
let post_padding = self.padding(width - len, Alignment::Right)?;
1444-
self.write_formatted_parts(&formatted)?;
1449+
// SAFETY: Per the precondition.
1450+
unsafe {
1451+
self.write_formatted_parts(&formatted)?;
1452+
}
14451453
post_padding.write(self)
14461454
};
14471455
self.fill = old_fill;
14481456
self.align = old_align;
14491457
ret
14501458
} else {
14511459
// this is the common case and we take a shortcut
1452-
self.write_formatted_parts(formatted)
1460+
// SAFETY: Per the precondition.
1461+
unsafe { self.write_formatted_parts(formatted) }
14531462
}
14541463
}
14551464

1456-
fn write_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result {
1457-
fn write_bytes(buf: &mut dyn Write, s: &[u8]) -> Result {
1465+
/// # Safety
1466+
///
1467+
/// Any `numfmt::Part::Copy` parts in `formatted` must contain valid UTF-8.
1468+
unsafe fn write_formatted_parts(&mut self, formatted: &numfmt::Formatted<'_>) -> Result {
1469+
unsafe fn write_bytes(buf: &mut dyn Write, s: &[u8]) -> Result {
14581470
// SAFETY: This is used for `numfmt::Part::Num` and `numfmt::Part::Copy`.
14591471
// It's safe to use for `numfmt::Part::Num` since every char `c` is between
1460-
// `b'0'` and `b'9'`, which means `s` is valid UTF-8.
1461-
// It's also probably safe in practice to use for `numfmt::Part::Copy(buf)`
1462-
// since `buf` should be plain ASCII, but it's possible for someone to pass
1463-
// in a bad value for `buf` into `numfmt::to_shortest_str` since it is a
1464-
// public function.
1465-
// FIXME: Determine whether this could result in UB.
1472+
// `b'0'` and `b'9'`, which means `s` is valid UTF-8. It's safe to use for
1473+
// `numfmt::Part::Copy` due to this function's precondition.
14661474
buf.write_str(unsafe { str::from_utf8_unchecked(s) })
14671475
}
14681476

@@ -1489,11 +1497,15 @@ impl<'a> Formatter<'a> {
14891497
*c = b'0' + (v % 10) as u8;
14901498
v /= 10;
14911499
}
1492-
write_bytes(self.buf, &s[..len])?;
1500+
// SAFETY: Per the precondition.
1501+
unsafe {
1502+
write_bytes(self.buf, &s[..len])?;
1503+
}
14931504
}
1494-
numfmt::Part::Copy(buf) => {
1505+
// SAFETY: Per the precondition.
1506+
numfmt::Part::Copy(buf) => unsafe {
14951507
write_bytes(self.buf, buf)?;
1496-
}
1508+
},
14971509
}
14981510
}
14991511
Ok(())

library/core/src/fmt/num.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,12 @@ impl_int! { i8 i16 i32 i64 i128 isize }
5252
impl_uint! { u8 u16 u32 u64 u128 usize }
5353

5454
/// A type that represents a specific radix
55+
///
56+
/// # Safety
57+
///
58+
/// `digit` must return an ASCII character.
5559
#[doc(hidden)]
56-
trait GenericRadix: Sized {
60+
unsafe trait GenericRadix: Sized {
5761
/// The number of digits.
5862
const BASE: u8;
5963

@@ -129,7 +133,7 @@ struct UpperHex;
129133

130134
macro_rules! radix {
131135
($T:ident, $base:expr, $prefix:expr, $($x:pat => $conv:expr),+) => {
132-
impl GenericRadix for $T {
136+
unsafe impl GenericRadix for $T {
133137
const BASE: u8 = $base;
134138
const PREFIX: &'static str = $prefix;
135139
fn digit(x: u8) -> u8 {
@@ -407,7 +411,7 @@ macro_rules! impl_Exp {
407411
let parts = &[
408412
numfmt::Part::Copy(buf_slice),
409413
numfmt::Part::Zero(added_precision),
410-
numfmt::Part::Copy(exp_slice)
414+
numfmt::Part::Copy(exp_slice),
411415
];
412416
let sign = if !is_nonnegative {
413417
"-"
@@ -416,8 +420,9 @@ macro_rules! impl_Exp {
416420
} else {
417421
""
418422
};
419-
let formatted = numfmt::Formatted{sign, parts};
420-
f.pad_formatted_parts(&formatted)
423+
let formatted = numfmt::Formatted { sign, parts };
424+
// SAFETY: `buf_slice` and `exp_slice` contain only ASCII characters.
425+
unsafe { f.pad_formatted_parts(&formatted) }
421426
}
422427

423428
$(

library/std/src/path.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -733,8 +733,9 @@ impl<'a> Components<'a> {
733733
}
734734
}
735735

736-
// parse a given byte sequence into the corresponding path component
737-
fn parse_single_component<'b>(&self, comp: &'b [u8]) -> Option<Component<'b>> {
736+
// parse a given byte sequence following the OsStr encoding into the
737+
// corresponding path component
738+
unsafe fn parse_single_component<'b>(&self, comp: &'b [u8]) -> Option<Component<'b>> {
738739
match comp {
739740
b"." if self.prefix_verbatim() => Some(Component::CurDir),
740741
b"." => None, // . components are normalized away, except at
@@ -754,7 +755,8 @@ impl<'a> Components<'a> {
754755
None => (0, self.path),
755756
Some(i) => (1, &self.path[..i]),
756757
};
757-
(comp.len() + extra, self.parse_single_component(comp))
758+
// SAFETY: `comp` is a valid substring, since it is split on a separator.
759+
(comp.len() + extra, unsafe { self.parse_single_component(comp) })
758760
}
759761

760762
// parse a component from the right, saying how many bytes to consume to
@@ -766,7 +768,8 @@ impl<'a> Components<'a> {
766768
None => (0, &self.path[start..]),
767769
Some(i) => (1, &self.path[start + i + 1..]),
768770
};
769-
(comp.len() + extra, self.parse_single_component(comp))
771+
// SAFETY: `comp` is a valid substring, since it is split on a separator.
772+
(comp.len() + extra, unsafe { self.parse_single_component(comp) })
770773
}
771774

772775
// trim away repeated separators (i.e., empty components) on the left

0 commit comments

Comments
 (0)