Skip to content

Commit 304b7f8

Browse files
committed
Auto merge of #129778 - RalfJung:interp-lossy-typed-copy, r=saethlin
interpret: make typed copies lossy wrt provenance and padding A "typed copy" in Rust can be a lossy process: when copying at type `usize` (or any other non-pointer type), if the original memory had any provenance, that provenance is lost. When copying at pointer type, if the original memory had partial provenance (i.e., not the same provenance for all bytes), that provenance is lost. When copying any type with padding, the contents of padding are lost. This PR equips our validity-checking pass with the ability to reset provenance and padding according to those rules. Can be reviewed commit-by-commit. The first three commits are just preparation without any functional change. Fixes rust-lang/miri#845 Fixes rust-lang/miri#2182
2 parents 712463d + 0a70924 commit 304b7f8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1246
-310
lines changed

compiler/rustc_const_eval/src/const_eval/eval_queries.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ fn eval_body_using_ecx<'tcx, R: InterpretationResult<'tcx>>(
9494
let intern_result = intern_const_alloc_recursive(ecx, intern_kind, &ret);
9595

9696
// Since evaluation had no errors, validate the resulting constant.
97-
const_validate_mplace(&ecx, &ret, cid)?;
97+
const_validate_mplace(ecx, &ret, cid)?;
9898

9999
// Only report this after validation, as validaiton produces much better diagnostics.
100100
// FIXME: ensure validation always reports this and stop making interning care about it.
@@ -391,7 +391,7 @@ fn eval_in_interpreter<'tcx, R: InterpretationResult<'tcx>>(
391391

392392
#[inline(always)]
393393
fn const_validate_mplace<'tcx>(
394-
ecx: &InterpCx<'tcx, CompileTimeMachine<'tcx>>,
394+
ecx: &mut InterpCx<'tcx, CompileTimeMachine<'tcx>>,
395395
mplace: &MPlaceTy<'tcx>,
396396
cid: GlobalId<'tcx>,
397397
) -> Result<(), ErrorHandled> {

compiler/rustc_const_eval/src/const_eval/machine.rs

+22-5
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
use std::borrow::Borrow;
1+
use std::borrow::{Borrow, Cow};
22
use std::fmt;
33
use std::hash::Hash;
44
use std::ops::ControlFlow;
55

66
use rustc_ast::Mutability;
7-
use rustc_data_structures::fx::{FxIndexMap, IndexEntry};
7+
use rustc_data_structures::fx::{FxHashMap, FxIndexMap, IndexEntry};
88
use rustc_hir::def_id::{DefId, LocalDefId};
99
use rustc_hir::{self as hir, LangItem, CRATE_HIR_ID};
1010
use rustc_middle::mir::AssertMessage;
1111
use rustc_middle::query::TyCtxtAt;
1212
use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout};
13-
use rustc_middle::ty::{self, TyCtxt};
13+
use rustc_middle::ty::{self, Ty, TyCtxt};
1414
use rustc_middle::{bug, mir};
1515
use rustc_span::symbol::{sym, Symbol};
1616
use rustc_span::Span;
@@ -24,8 +24,8 @@ use crate::fluent_generated as fluent;
2424
use crate::interpret::{
2525
self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom, throw_unsup,
2626
throw_unsup_format, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame,
27-
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar,
28-
StackPopCleanup,
27+
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic,
28+
RangeSet, Scalar, StackPopCleanup,
2929
};
3030

3131
/// When hitting this many interpreted terminators we emit a deny by default lint
@@ -65,6 +65,9 @@ pub struct CompileTimeMachine<'tcx> {
6565
/// storing the result in the given `AllocId`.
6666
/// Used to prevent reads from a static's base allocation, as that may allow for self-initialization loops.
6767
pub(crate) static_root_ids: Option<(AllocId, LocalDefId)>,
68+
69+
/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
70+
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
6871
}
6972

7073
#[derive(Copy, Clone)]
@@ -99,6 +102,7 @@ impl<'tcx> CompileTimeMachine<'tcx> {
99102
can_access_mut_global,
100103
check_alignment,
101104
static_root_ids: None,
105+
union_data_ranges: FxHashMap::default(),
102106
}
103107
}
104108
}
@@ -766,6 +770,19 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> {
766770
}
767771
Ok(())
768772
}
773+
774+
fn cached_union_data_range<'e>(
775+
ecx: &'e mut InterpCx<'tcx, Self>,
776+
ty: Ty<'tcx>,
777+
compute_range: impl FnOnce() -> RangeSet,
778+
) -> Cow<'e, RangeSet> {
779+
if ecx.tcx.sess.opts.unstable_opts.extra_const_ub_checks {
780+
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
781+
} else {
782+
// Don't bother caching, we're only doing one validation at the end anyway.
783+
Cow::Owned(compute_range())
784+
}
785+
}
769786
}
770787

771788
// Please do not add any code below the above `Machine` trait impl. I (oli-obk) plan more cleanups

compiler/rustc_const_eval/src/interpret/discriminant.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use rustc_target::abi::{self, TagEncoding, VariantIdx, Variants};
77
use tracing::{instrument, trace};
88

99
use super::{
10-
err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Readable, Scalar, Writeable,
10+
err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Projectable, Scalar, Writeable,
1111
};
1212

1313
impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
@@ -60,7 +60,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
6060
#[instrument(skip(self), level = "trace")]
6161
pub fn read_discriminant(
6262
&self,
63-
op: &impl Readable<'tcx, M::Provenance>,
63+
op: &impl Projectable<'tcx, M::Provenance>,
6464
) -> InterpResult<'tcx, VariantIdx> {
6565
let ty = op.layout().ty;
6666
trace!("read_discriminant_value {:#?}", op.layout());

compiler/rustc_const_eval/src/interpret/machine.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use rustc_apfloat::{Float, FloatConvert};
1010
use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
1111
use rustc_middle::query::TyCtxtAt;
1212
use rustc_middle::ty::layout::TyAndLayout;
13+
use rustc_middle::ty::Ty;
1314
use rustc_middle::{mir, ty};
1415
use rustc_span::def_id::DefId;
1516
use rustc_span::Span;
@@ -19,7 +20,7 @@ use rustc_target::spec::abi::Abi as CallAbi;
1920
use super::{
2021
throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation,
2122
ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy,
22-
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT,
23+
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, RangeSet, CTFE_ALLOC_SALT,
2324
};
2425

2526
/// Data returned by [`Machine::after_stack_pop`], and consumed by
@@ -578,6 +579,15 @@ pub trait Machine<'tcx>: Sized {
578579
ecx: &InterpCx<'tcx, Self>,
579580
instance: Option<ty::Instance<'tcx>>,
580581
) -> usize;
582+
583+
fn cached_union_data_range<'e>(
584+
_ecx: &'e mut InterpCx<'tcx, Self>,
585+
_ty: Ty<'tcx>,
586+
compute_range: impl FnOnce() -> RangeSet,
587+
) -> Cow<'e, RangeSet> {
588+
// Default to no caching.
589+
Cow::Owned(compute_range())
590+
}
581591
}
582592

583593
/// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines

compiler/rustc_const_eval/src/interpret/memory.rs

+36-13
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
99
use std::assert_matches::assert_matches;
1010
use std::borrow::Cow;
11-
use std::cell::Cell;
1211
use std::collections::VecDeque;
13-
use std::{fmt, ptr};
12+
use std::{fmt, mem, ptr};
1413

1514
use rustc_ast::Mutability;
1615
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
@@ -118,7 +117,7 @@ pub struct Memory<'tcx, M: Machine<'tcx>> {
118117
/// This stores whether we are currently doing reads purely for the purpose of validation.
119118
/// Those reads do not trigger the machine's hooks for memory reads.
120119
/// Needless to say, this must only be set with great care!
121-
validation_in_progress: Cell<bool>,
120+
validation_in_progress: bool,
122121
}
123122

124123
/// A reference to some allocation that was already bounds-checked for the given region
@@ -145,7 +144,7 @@ impl<'tcx, M: Machine<'tcx>> Memory<'tcx, M> {
145144
alloc_map: M::MemoryMap::default(),
146145
extra_fn_ptr_map: FxIndexMap::default(),
147146
dead_alloc_map: FxIndexMap::default(),
148-
validation_in_progress: Cell::new(false),
147+
validation_in_progress: false,
149148
}
150149
}
151150

@@ -682,15 +681,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
682681
// We want to call the hook on *all* accesses that involve an AllocId, including zero-sized
683682
// accesses. That means we cannot rely on the closure above or the `Some` branch below. We
684683
// do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked.
685-
if !self.memory.validation_in_progress.get() {
684+
if !self.memory.validation_in_progress {
686685
if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr, size_i64) {
687686
M::before_alloc_read(self, alloc_id)?;
688687
}
689688
}
690689

691690
if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc {
692691
let range = alloc_range(offset, size);
693-
if !self.memory.validation_in_progress.get() {
692+
if !self.memory.validation_in_progress {
694693
M::before_memory_read(
695694
self.tcx,
696695
&self.machine,
@@ -766,11 +765,14 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
766765
let parts = self.get_ptr_access(ptr, size)?;
767766
if let Some((alloc_id, offset, prov)) = parts {
768767
let tcx = self.tcx;
768+
let validation_in_progress = self.memory.validation_in_progress;
769769
// FIXME: can we somehow avoid looking up the allocation twice here?
770770
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
771771
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
772772
let range = alloc_range(offset, size);
773-
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
773+
if !validation_in_progress {
774+
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
775+
}
774776
Ok(Some(AllocRefMut { alloc, range, tcx: *tcx, alloc_id }))
775777
} else {
776778
Ok(None)
@@ -1014,16 +1016,16 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
10141016
///
10151017
/// We do this so Miri's allocation access tracking does not show the validation
10161018
/// reads as spurious accesses.
1017-
pub fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
1019+
pub fn run_for_validation<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
10181020
// This deliberately uses `==` on `bool` to follow the pattern
10191021
// `assert!(val.replace(new) == old)`.
10201022
assert!(
1021-
self.memory.validation_in_progress.replace(true) == false,
1023+
mem::replace(&mut self.memory.validation_in_progress, true) == false,
10221024
"`validation_in_progress` was already set"
10231025
);
1024-
let res = f();
1026+
let res = f(self);
10251027
assert!(
1026-
self.memory.validation_in_progress.replace(false) == true,
1028+
mem::replace(&mut self.memory.validation_in_progress, false) == true,
10271029
"`validation_in_progress` was unset by someone else"
10281030
);
10291031
res
@@ -1115,6 +1117,10 @@ impl<'a, 'tcx, M: Machine<'tcx>> std::fmt::Debug for DumpAllocs<'a, 'tcx, M> {
11151117
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
11161118
AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
11171119
{
1120+
pub fn as_ref<'b>(&'b self) -> AllocRef<'b, 'tcx, Prov, Extra, Bytes> {
1121+
AllocRef { alloc: self.alloc, range: self.range, tcx: self.tcx, alloc_id: self.alloc_id }
1122+
}
1123+
11181124
/// `range` is relative to this allocation reference, not the base of the allocation.
11191125
pub fn write_scalar(&mut self, range: AllocRange, val: Scalar<Prov>) -> InterpResult<'tcx> {
11201126
let range = self.range.subrange(range);
@@ -1130,13 +1136,30 @@ impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
11301136
self.write_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size), val)
11311137
}
11321138

1139+
/// Mark the given sub-range (relative to this allocation reference) as uninitialized.
1140+
pub fn write_uninit(&mut self, range: AllocRange) -> InterpResult<'tcx> {
1141+
let range = self.range.subrange(range);
1142+
Ok(self
1143+
.alloc
1144+
.write_uninit(&self.tcx, range)
1145+
.map_err(|e| e.to_interp_error(self.alloc_id))?)
1146+
}
1147+
11331148
/// Mark the entire referenced range as uninitialized
1134-
pub fn write_uninit(&mut self) -> InterpResult<'tcx> {
1149+
pub fn write_uninit_full(&mut self) -> InterpResult<'tcx> {
11351150
Ok(self
11361151
.alloc
11371152
.write_uninit(&self.tcx, self.range)
11381153
.map_err(|e| e.to_interp_error(self.alloc_id))?)
11391154
}
1155+
1156+
/// Remove all provenance in the reference range.
1157+
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
1158+
Ok(self
1159+
.alloc
1160+
.clear_provenance(&self.tcx, self.range)
1161+
.map_err(|e| e.to_interp_error(self.alloc_id))?)
1162+
}
11401163
}
11411164

11421165
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> {
@@ -1278,7 +1301,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
12781301
};
12791302
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
12801303
let src_range = alloc_range(src_offset, size);
1281-
assert!(!self.memory.validation_in_progress.get(), "we can't be copying during validation");
1304+
assert!(!self.memory.validation_in_progress, "we can't be copying during validation");
12821305
M::before_memory_read(
12831306
tcx,
12841307
&self.machine,

compiler/rustc_const_eval/src/interpret/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ pub(crate) use self::intrinsics::eval_nullary_intrinsic;
3333
pub use self::machine::{compile_time_machine, AllocMap, Machine, MayLeak, ReturnAction};
3434
pub use self::memory::{AllocKind, AllocRef, AllocRefMut, FnVal, Memory, MemoryKind};
3535
use self::operand::Operand;
36-
pub use self::operand::{ImmTy, Immediate, OpTy, Readable};
36+
pub use self::operand::{ImmTy, Immediate, OpTy};
3737
pub use self::place::{MPlaceTy, MemPlaceMeta, PlaceTy, Writeable};
3838
use self::place::{MemPlace, Place};
3939
pub use self::projection::{OffsetMode, Projectable};
4040
pub use self::stack::{Frame, FrameInfo, LocalState, StackPopCleanup, StackPopInfo};
4141
pub(crate) use self::util::create_static_alloc;
42-
pub use self::validity::{CtfeValidationMode, RefTracking};
42+
pub use self::validity::{CtfeValidationMode, RangeSet, RefTracking};
4343
pub use self::visitor::ValueVisitor;

0 commit comments

Comments
 (0)