Skip to content

Commit 5702cfa

Browse files
authored
Rollup merge of #80764 - CAD97:weak-unsized-as-ptr-again, r=RalfJung
Re-stabilize Weak::as_ptr and friends for unsized T As per [T-lang consensus](https://hackmd.io/7r3_is6uTz-163fsOV8Vfg), this uses a branch to handle the dangling case. The discussed optimization of only doing the branch in the T: ?Sized case is left for a followup patch, as doing so is not trivial (as it requires specialization) and not _obviously_ better (as it requires using `wrapping_offset` rather than `offset` more). <details><summary>Basically said optimization</summary> Specialize on `T: Sized`: ```rust fn as_ptr(&self) -> *const T { if [ T is Sized ] || !is_dangling(ptr) { (ptr as *mut T).set_ptr_value( (ptr as *mut u8).wrapping_offset(data_offset) ) } else { ptr::null() } } fn from_raw(*const T) -> Self { if [ T is Sized ] || !ptr.is_null() { let ptr = (ptr as *mut RcBox).set_ptr_value( (ptr as *mut u8).wrapping_offset(-data_offset) ); Weak { ptr } } else { Weak::new() } } ``` (but with more `set_ptr_value` to avoid `Sized` restrictions and maintain metadata.) Written in this fashion, this is not a correctness-critical specialization (i.e. so long as `[ T is Sized ]` is false for unsized `T`, it can be `rand()` for sized `T` without breaking correctness), but it's still touchy, so I'd rather do it in another PR with separate review. --- </details> This effectively reverts #80422 and re-establishes #74160. T-libs [previously signed off](#74160 (comment)) on this stable API change in #74160.
2 parents 40d2506 + c14e919 commit 5702cfa

File tree

5 files changed

+153
-95
lines changed

5 files changed

+153
-95
lines changed

library/alloc/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120
#![feature(receiver_trait)]
121121
#![cfg_attr(bootstrap, feature(min_const_generics))]
122122
#![feature(min_specialization)]
123+
#![feature(set_ptr_value)]
123124
#![feature(slice_ptr_get)]
124125
#![feature(slice_ptr_len)]
125126
#![feature(staged_api)]

library/alloc/src/rc.rs

+36-49
Original file line numberDiff line numberDiff line change
@@ -829,8 +829,8 @@ impl<T: ?Sized> Rc<T> {
829829
let offset = unsafe { data_offset(ptr) };
830830

831831
// Reverse the offset to find the original RcBox.
832-
let fake_ptr = ptr as *mut RcBox<T>;
833-
let rc_ptr = unsafe { set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)) };
832+
let rc_ptr =
833+
unsafe { (ptr as *mut RcBox<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) };
834834

835835
unsafe { Self::from_ptr(rc_ptr) }
836836
}
@@ -850,7 +850,7 @@ impl<T: ?Sized> Rc<T> {
850850
pub fn downgrade(this: &Self) -> Weak<T> {
851851
this.inner().inc_weak();
852852
// Make sure we do not create a dangling Weak
853-
debug_assert!(!is_dangling(this.ptr));
853+
debug_assert!(!is_dangling(this.ptr.as_ptr()));
854854
Weak { ptr: this.ptr }
855855
}
856856

@@ -1164,7 +1164,7 @@ impl<T: ?Sized> Rc<T> {
11641164
Self::allocate_for_layout(
11651165
Layout::for_value(&*ptr),
11661166
|layout| Global.allocate(layout),
1167-
|mem| set_data_ptr(ptr as *mut T, mem) as *mut RcBox<T>,
1167+
|mem| (ptr as *mut RcBox<T>).set_ptr_value(mem),
11681168
)
11691169
}
11701170
}
@@ -1203,20 +1203,7 @@ impl<T> Rc<[T]> {
12031203
)
12041204
}
12051205
}
1206-
}
1207-
1208-
/// Sets the data pointer of a `?Sized` raw pointer.
1209-
///
1210-
/// For a slice/trait object, this sets the `data` field and leaves the rest
1211-
/// unchanged. For a sized raw pointer, this simply sets the pointer.
1212-
unsafe fn set_data_ptr<T: ?Sized, U>(mut ptr: *mut T, data: *mut U) -> *mut T {
1213-
unsafe {
1214-
ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8);
1215-
}
1216-
ptr
1217-
}
12181206

1219-
impl<T> Rc<[T]> {
12201207
/// Copy elements from slice into newly allocated Rc<\[T\]>
12211208
///
12221209
/// Unsafe because the caller must either take ownership or bind `T: Copy`
@@ -1860,8 +1847,8 @@ impl<T> Weak<T> {
18601847
}
18611848
}
18621849

1863-
pub(crate) fn is_dangling<T: ?Sized>(ptr: NonNull<T>) -> bool {
1864-
let address = ptr.as_ptr() as *mut () as usize;
1850+
pub(crate) fn is_dangling<T: ?Sized>(ptr: *mut T) -> bool {
1851+
let address = ptr as *mut () as usize;
18651852
address == usize::MAX
18661853
}
18671854

@@ -1872,7 +1859,7 @@ struct WeakInner<'a> {
18721859
strong: &'a Cell<usize>,
18731860
}
18741861

1875-
impl<T> Weak<T> {
1862+
impl<T: ?Sized> Weak<T> {
18761863
/// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
18771864
///
18781865
/// The pointer is valid only if there are some strong references. The pointer may be dangling,
@@ -1902,15 +1889,15 @@ impl<T> Weak<T> {
19021889
pub fn as_ptr(&self) -> *const T {
19031890
let ptr: *mut RcBox<T> = NonNull::as_ptr(self.ptr);
19041891

1905-
// SAFETY: we must offset the pointer manually, and said pointer may be
1906-
// a dangling weak (usize::MAX) if T is sized. data_offset is safe to call,
1907-
// because we know that a pointer to unsized T was derived from a real
1908-
// unsized T, as dangling weaks are only created for sized T. wrapping_offset
1909-
// is used so that we can use the same code path for the non-dangling
1910-
// unsized case and the potentially dangling sized case.
1911-
unsafe {
1912-
let offset = data_offset(ptr as *mut T);
1913-
set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset))
1892+
if is_dangling(ptr) {
1893+
// If the pointer is dangling, we return the sentinel directly. This cannot be
1894+
// a valid payload address, as the payload is at least as aligned as RcBox (usize).
1895+
ptr as *const T
1896+
} else {
1897+
// SAFETY: if is_dangling returns false, then the pointer is dereferencable.
1898+
// The payload may be dropped at this point, and we have to maintain provenance,
1899+
// so use raw pointer manipulation.
1900+
unsafe { &raw const (*ptr).value }
19141901
}
19151902
}
19161903

@@ -1992,22 +1979,24 @@ impl<T> Weak<T> {
19921979
/// [`new`]: Weak::new
19931980
#[stable(feature = "weak_into_raw", since = "1.45.0")]
19941981
pub unsafe fn from_raw(ptr: *const T) -> Self {
1995-
// SAFETY: data_offset is safe to call, because this pointer originates from a Weak.
19961982
// See Weak::as_ptr for context on how the input pointer is derived.
1997-
let offset = unsafe { data_offset(ptr) };
19981983

1999-
// Reverse the offset to find the original RcBox.
2000-
// SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized).
2001-
let ptr = unsafe {
2002-
set_data_ptr(ptr as *mut RcBox<T>, (ptr as *mut u8).wrapping_offset(-offset))
1984+
let ptr = if is_dangling(ptr as *mut T) {
1985+
// This is a dangling Weak.
1986+
ptr as *mut RcBox<T>
1987+
} else {
1988+
// Otherwise, we're guaranteed the pointer came from a nondangling Weak.
1989+
// SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T.
1990+
let offset = unsafe { data_offset(ptr) };
1991+
// Thus, we reverse the offset to get the whole RcBox.
1992+
// SAFETY: the pointer originated from a Weak, so this offset is safe.
1993+
unsafe { (ptr as *mut RcBox<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) }
20031994
};
20041995

20051996
// SAFETY: we now have recovered the original Weak pointer, so can create the Weak.
20061997
Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } }
20071998
}
2008-
}
20091999

2010-
impl<T: ?Sized> Weak<T> {
20112000
/// Attempts to upgrade the `Weak` pointer to an [`Rc`], delaying
20122001
/// dropping of the inner value if successful.
20132002
///
@@ -2070,7 +2059,7 @@ impl<T: ?Sized> Weak<T> {
20702059
/// (i.e., when this `Weak` was created by `Weak::new`).
20712060
#[inline]
20722061
fn inner(&self) -> Option<WeakInner<'_>> {
2073-
if is_dangling(self.ptr) {
2062+
if is_dangling(self.ptr.as_ptr()) {
20742063
None
20752064
} else {
20762065
// We are careful to *not* create a reference covering the "data" field, as
@@ -2325,21 +2314,19 @@ impl<T: ?Sized> AsRef<T> for Rc<T> {
23252314
#[stable(feature = "pin", since = "1.33.0")]
23262315
impl<T: ?Sized> Unpin for Rc<T> {}
23272316

2328-
/// Get the offset within an `RcBox` for
2329-
/// a payload of type described by a pointer.
2317+
/// Get the offset within an `RcBox` for the payload behind a pointer.
23302318
///
23312319
/// # Safety
23322320
///
2333-
/// This has the same safety requirements as `align_of_val_raw`. In effect:
2334-
///
2335-
/// - This function is safe for any argument if `T` is sized, and
2336-
/// - if `T` is unsized, the pointer must have appropriate pointer metadata
2337-
/// acquired from the real instance that you are getting this offset for.
2321+
/// The pointer must point to (and have valid metadata for) a previously
2322+
/// valid instance of T, but the T is allowed to be dropped.
23382323
unsafe fn data_offset<T: ?Sized>(ptr: *const T) -> isize {
2339-
// Align the unsized value to the end of the `RcBox`.
2340-
// Because it is ?Sized, it will always be the last field in memory.
2341-
// Note: This is a detail of the current implementation of the compiler,
2342-
// and is not a guaranteed language detail. Do not rely on it outside of std.
2324+
// Align the unsized value to the end of the RcBox.
2325+
// Because RcBox is repr(C), it will always be the last field in memory.
2326+
// SAFETY: since the only unsized types possible are slices, trait objects,
2327+
// and extern types, the input safety requirement is currently enough to
2328+
// satisfy the requirements of align_of_val_raw; this is an implementation
2329+
// detail of the language that may not be relied upon outside of std.
23432330
unsafe { data_offset_align(align_of_val_raw(ptr)) }
23442331
}
23452332

library/alloc/src/rc/tests.rs

+41
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,30 @@ fn into_from_weak_raw() {
208208
}
209209
}
210210

211+
#[test]
212+
fn test_into_from_weak_raw_unsized() {
213+
use std::fmt::Display;
214+
use std::string::ToString;
215+
216+
let arc: Rc<str> = Rc::from("foo");
217+
let weak: Weak<str> = Rc::downgrade(&arc);
218+
219+
let ptr = Weak::into_raw(weak.clone());
220+
let weak2 = unsafe { Weak::from_raw(ptr) };
221+
222+
assert_eq!(unsafe { &*ptr }, "foo");
223+
assert!(weak.ptr_eq(&weak2));
224+
225+
let arc: Rc<dyn Display> = Rc::new(123);
226+
let weak: Weak<dyn Display> = Rc::downgrade(&arc);
227+
228+
let ptr = Weak::into_raw(weak.clone());
229+
let weak2 = unsafe { Weak::from_raw(ptr) };
230+
231+
assert_eq!(unsafe { &*ptr }.to_string(), "123");
232+
assert!(weak.ptr_eq(&weak2));
233+
}
234+
211235
#[test]
212236
fn get_mut() {
213237
let mut x = Rc::new(3);
@@ -294,6 +318,23 @@ fn test_unsized() {
294318
assert_eq!(foo, foo.clone());
295319
}
296320

321+
#[test]
322+
fn test_maybe_thin_unsized() {
323+
// If/when custom thin DSTs exist, this test should be updated to use one
324+
use std::ffi::{CStr, CString};
325+
326+
let x: Rc<CStr> = Rc::from(CString::new("swordfish").unwrap().into_boxed_c_str());
327+
assert_eq!(format!("{:?}", x), "\"swordfish\"");
328+
let y: Weak<CStr> = Rc::downgrade(&x);
329+
drop(x);
330+
331+
// At this point, the weak points to a dropped DST
332+
assert!(y.upgrade().is_none());
333+
// But we still need to be able to get the alloc layout to drop.
334+
// CStr has no drop glue, but custom DSTs might, and need to work.
335+
drop(y);
336+
}
337+
297338
#[test]
298339
fn test_from_owned() {
299340
let foo = 123;

library/alloc/src/sync.rs

+34-46
Original file line numberDiff line numberDiff line change
@@ -846,8 +846,7 @@ impl<T: ?Sized> Arc<T> {
846846
let offset = data_offset(ptr);
847847

848848
// Reverse the offset to find the original ArcInner.
849-
let fake_ptr = ptr as *mut ArcInner<T>;
850-
let arc_ptr = set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset));
849+
let arc_ptr = (ptr as *mut ArcInner<T>).set_ptr_value((ptr as *mut u8).offset(-offset));
851850

852851
Self::from_ptr(arc_ptr)
853852
}
@@ -888,7 +887,7 @@ impl<T: ?Sized> Arc<T> {
888887
match this.inner().weak.compare_exchange_weak(cur, cur + 1, Acquire, Relaxed) {
889888
Ok(_) => {
890889
// Make sure we do not create a dangling Weak
891-
debug_assert!(!is_dangling(this.ptr));
890+
debug_assert!(!is_dangling(this.ptr.as_ptr()));
892891
return Weak { ptr: this.ptr };
893892
}
894893
Err(old) => cur = old,
@@ -1131,7 +1130,7 @@ impl<T: ?Sized> Arc<T> {
11311130
Self::allocate_for_layout(
11321131
Layout::for_value(&*ptr),
11331132
|layout| Global.allocate(layout),
1134-
|mem| set_data_ptr(ptr as *mut T, mem) as *mut ArcInner<T>,
1133+
|mem| (ptr as *mut ArcInner<T>).set_ptr_value(mem) as *mut ArcInner<T>,
11351134
)
11361135
}
11371136
}
@@ -1170,20 +1169,7 @@ impl<T> Arc<[T]> {
11701169
)
11711170
}
11721171
}
1173-
}
1174-
1175-
/// Sets the data pointer of a `?Sized` raw pointer.
1176-
///
1177-
/// For a slice/trait object, this sets the `data` field and leaves the rest
1178-
/// unchanged. For a sized raw pointer, this simply sets the pointer.
1179-
unsafe fn set_data_ptr<T: ?Sized, U>(mut ptr: *mut T, data: *mut U) -> *mut T {
1180-
unsafe {
1181-
ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8);
1182-
}
1183-
ptr
1184-
}
11851172

1186-
impl<T> Arc<[T]> {
11871173
/// Copy elements from slice into newly allocated Arc<\[T\]>
11881174
///
11891175
/// Unsafe because the caller must either take ownership or bind `T: Copy`.
@@ -1653,7 +1639,7 @@ struct WeakInner<'a> {
16531639
strong: &'a atomic::AtomicUsize,
16541640
}
16551641

1656-
impl<T> Weak<T> {
1642+
impl<T: ?Sized> Weak<T> {
16571643
/// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
16581644
///
16591645
/// The pointer is valid only if there are some strong references. The pointer may be dangling,
@@ -1683,15 +1669,15 @@ impl<T> Weak<T> {
16831669
pub fn as_ptr(&self) -> *const T {
16841670
let ptr: *mut ArcInner<T> = NonNull::as_ptr(self.ptr);
16851671

1686-
// SAFETY: we must offset the pointer manually, and said pointer may be
1687-
// a dangling weak (usize::MAX) if T is sized. data_offset is safe to call,
1688-
// because we know that a pointer to unsized T was derived from a real
1689-
// unsized T, as dangling weaks are only created for sized T. wrapping_offset
1690-
// is used so that we can use the same code path for the non-dangling
1691-
// unsized case and the potentially dangling sized case.
1692-
unsafe {
1693-
let offset = data_offset(ptr as *mut T);
1694-
set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset))
1672+
if is_dangling(ptr) {
1673+
// If the pointer is dangling, we return the sentinel directly. This cannot be
1674+
// a valid payload address, as the payload is at least as aligned as ArcInner (usize).
1675+
ptr as *const T
1676+
} else {
1677+
// SAFETY: if is_dangling returns false, then the pointer is dereferencable.
1678+
// The payload may be dropped at this point, and we have to maintain provenance,
1679+
// so use raw pointer manipulation.
1680+
unsafe { &raw mut (*ptr).data }
16951681
}
16961682
}
16971683

@@ -1773,18 +1759,22 @@ impl<T> Weak<T> {
17731759
/// [`forget`]: std::mem::forget
17741760
#[stable(feature = "weak_into_raw", since = "1.45.0")]
17751761
pub unsafe fn from_raw(ptr: *const T) -> Self {
1776-
// SAFETY: data_offset is safe to call, because this pointer originates from a Weak.
17771762
// See Weak::as_ptr for context on how the input pointer is derived.
1778-
let offset = unsafe { data_offset(ptr) };
17791763

1780-
// Reverse the offset to find the original ArcInner.
1781-
// SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized)
1782-
let ptr = unsafe {
1783-
set_data_ptr(ptr as *mut ArcInner<T>, (ptr as *mut u8).wrapping_offset(-offset))
1764+
let ptr = if is_dangling(ptr as *mut T) {
1765+
// This is a dangling Weak.
1766+
ptr as *mut ArcInner<T>
1767+
} else {
1768+
// Otherwise, we're guaranteed the pointer came from a nondangling Weak.
1769+
// SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T.
1770+
let offset = unsafe { data_offset(ptr) };
1771+
// Thus, we reverse the offset to get the whole RcBox.
1772+
// SAFETY: the pointer originated from a Weak, so this offset is safe.
1773+
unsafe { (ptr as *mut ArcInner<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) }
17841774
};
17851775

17861776
// SAFETY: we now have recovered the original Weak pointer, so can create the Weak.
1787-
unsafe { Weak { ptr: NonNull::new_unchecked(ptr) } }
1777+
Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } }
17881778
}
17891779
}
17901780

@@ -1889,7 +1879,7 @@ impl<T: ?Sized> Weak<T> {
18891879
/// (i.e., when this `Weak` was created by `Weak::new`).
18901880
#[inline]
18911881
fn inner(&self) -> Option<WeakInner<'_>> {
1892-
if is_dangling(self.ptr) {
1882+
if is_dangling(self.ptr.as_ptr()) {
18931883
None
18941884
} else {
18951885
// We are careful to *not* create a reference covering the "data" field, as
@@ -2469,21 +2459,19 @@ impl<T: ?Sized> AsRef<T> for Arc<T> {
24692459
#[stable(feature = "pin", since = "1.33.0")]
24702460
impl<T: ?Sized> Unpin for Arc<T> {}
24712461

2472-
/// Get the offset within an `ArcInner` for
2473-
/// a payload of type described by a pointer.
2462+
/// Get the offset within an `ArcInner` for the payload behind a pointer.
24742463
///
24752464
/// # Safety
24762465
///
2477-
/// This has the same safety requirements as `align_of_val_raw`. In effect:
2478-
///
2479-
/// - This function is safe for any argument if `T` is sized, and
2480-
/// - if `T` is unsized, the pointer must have appropriate pointer metadata
2481-
/// acquired from the real instance that you are getting this offset for.
2466+
/// The pointer must point to (and have valid metadata for) a previously
2467+
/// valid instance of T, but the T is allowed to be dropped.
24822468
unsafe fn data_offset<T: ?Sized>(ptr: *const T) -> isize {
2483-
// Align the unsized value to the end of the `ArcInner`.
2484-
// Because it is `?Sized`, it will always be the last field in memory.
2485-
// Note: This is a detail of the current implementation of the compiler,
2486-
// and is not a guaranteed language detail. Do not rely on it outside of std.
2469+
// Align the unsized value to the end of the ArcInner.
2470+
// Because RcBox is repr(C), it will always be the last field in memory.
2471+
// SAFETY: since the only unsized types possible are slices, trait objects,
2472+
// and extern types, the input safety requirement is currently enough to
2473+
// satisfy the requirements of align_of_val_raw; this is an implementation
2474+
// detail of the language that may not be relied upon outside of std.
24872475
unsafe { data_offset_align(align_of_val_raw(ptr)) }
24882476
}
24892477

0 commit comments

Comments
 (0)