Skip to content

Commit 47d11a8

Browse files
committed
interpret: better control over whether we read data with provenance, and implicit provenance stripping where possible
1 parent 5e6bb83 commit 47d11a8

22 files changed

+495
-404
lines changed

compiler/rustc_const_eval/src/interpret/memory.rs

+19-4
Original file line numberDiff line numberDiff line change
@@ -908,11 +908,15 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRefMut<'a, 'tcx, Tag, Extra> {
908908
}
909909

910910
impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
911-
pub fn read_scalar(&self, range: AllocRange) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
911+
pub fn read_scalar(
912+
&self,
913+
range: AllocRange,
914+
read_provenance: bool,
915+
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
912916
let range = self.range.subrange(range);
913917
let res = self
914918
.alloc
915-
.read_scalar(&self.tcx, range)
919+
.read_scalar(&self.tcx, range, read_provenance)
916920
.map_err(|e| e.to_interp_error(self.alloc_id))?;
917921
debug!(
918922
"read_scalar in {} at {:#x}, size {}: {:?}",
@@ -924,8 +928,19 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
924928
Ok(res)
925929
}
926930

927-
pub fn read_ptr_sized(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
928-
self.read_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size))
931+
pub fn read_integer(
932+
&self,
933+
offset: Size,
934+
size: Size,
935+
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
936+
self.read_scalar(alloc_range(offset, size), /*read_provenance*/ false)
937+
}
938+
939+
pub fn read_pointer(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
940+
self.read_scalar(
941+
alloc_range(offset, self.tcx.data_layout().pointer_size),
942+
/*read_provenance*/ true,
943+
)
929944
}
930945

931946
pub fn check_bytes(

compiler/rustc_const_eval/src/interpret/operand.rs

+16-6
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use rustc_target::abi::{VariantIdx, Variants};
1515

1616
use super::{
1717
alloc_range, from_known_layout, mir_assign_valid_types, AllocId, ConstValue, GlobalId,
18-
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer, Provenance,
19-
Scalar, ScalarMaybeUninit,
18+
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer,
19+
PointerArithmetic, Provenance, Scalar, ScalarMaybeUninit,
2020
};
2121

2222
/// An `Immediate` represents a single immediate self-contained Rust value.
@@ -284,11 +284,15 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
284284
Abi::Scalar(s) if force => Some(s.primitive()),
285285
_ => None,
286286
};
287-
if let Some(_s) = scalar_layout {
287+
let number_may_have_provenance = !M::enforce_number_no_provenance(self);
288+
if let Some(s) = scalar_layout {
288289
//FIXME(#96185): let size = s.size(self);
289290
//FIXME(#96185): assert_eq!(size, mplace.layout.size, "abi::Scalar size does not match layout size");
290291
let size = mplace.layout.size; //FIXME(#96185): remove this line
291-
let scalar = alloc.read_scalar(alloc_range(Size::ZERO, size))?;
292+
let scalar = alloc.read_scalar(
293+
alloc_range(Size::ZERO, size),
294+
s.is_ptr() || (number_may_have_provenance && size == self.pointer_size()),
295+
)?;
292296
return Ok(Some(ImmTy { imm: scalar.into(), layout: mplace.layout }));
293297
}
294298
let scalar_pair_layout = match mplace.layout.abi {
@@ -306,8 +310,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
306310
let (a_size, b_size) = (a.size(self), b.size(self));
307311
let b_offset = a_size.align_to(b.align(self).abi);
308312
assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields
309-
let a_val = alloc.read_scalar(alloc_range(Size::ZERO, a_size))?;
310-
let b_val = alloc.read_scalar(alloc_range(b_offset, b_size))?;
313+
let a_val = alloc.read_scalar(
314+
alloc_range(Size::ZERO, a_size),
315+
a.is_ptr() || (number_may_have_provenance && a_size == self.pointer_size()),
316+
)?;
317+
let b_val = alloc.read_scalar(
318+
alloc_range(b_offset, b_size),
319+
b.is_ptr() || (number_may_have_provenance && b_size == self.pointer_size()),
320+
)?;
311321
return Ok(Some(ImmTy {
312322
imm: Immediate::ScalarPair(a_val, b_val),
313323
layout: mplace.layout,

compiler/rustc_const_eval/src/interpret/traits.rs

+11-8
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
5050
let vtable_slot = self
5151
.get_ptr_alloc(vtable_slot, ptr_size, self.tcx.data_layout.pointer_align.abi)?
5252
.expect("cannot be a ZST");
53-
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_ptr_sized(Size::ZERO)?.check_init()?)?;
53+
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_pointer(Size::ZERO)?.check_init()?)?;
5454
self.get_ptr_fn(fn_ptr)
5555
}
5656

@@ -69,9 +69,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
6969
)?
7070
.expect("cannot be a ZST");
7171
let drop_fn = vtable
72-
.read_ptr_sized(
73-
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap(),
74-
)?
72+
.read_pointer(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap())?
7573
.check_init()?;
7674
// We *need* an instance here, no other kind of function value, to be able
7775
// to determine the type.
@@ -104,12 +102,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
104102
)?
105103
.expect("cannot be a ZST");
106104
let size = vtable
107-
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap())?
105+
.read_integer(
106+
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap(),
107+
pointer_size,
108+
)?
108109
.check_init()?;
109110
let size = size.to_machine_usize(self)?;
110111
let size = Size::from_bytes(size);
111112
let align = vtable
112-
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap())?
113+
.read_integer(
114+
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap(),
115+
pointer_size,
116+
)?
113117
.check_init()?;
114118
let align = align.to_machine_usize(self)?;
115119
let align = Align::from_bytes(align).map_err(|e| err_ub!(InvalidVtableAlignment(e)))?;
@@ -132,8 +136,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
132136
.get_ptr_alloc(vtable_slot, pointer_size, self.tcx.data_layout.pointer_align.abi)?
133137
.expect("cannot be a ZST");
134138

135-
let new_vtable =
136-
self.scalar_to_ptr(new_vtable.read_ptr_sized(Size::ZERO)?.check_init()?)?;
139+
let new_vtable = self.scalar_to_ptr(new_vtable.read_pointer(Size::ZERO)?.check_init()?)?;
137140

138141
Ok(new_vtable)
139142
}

compiler/rustc_middle/src/mir/interpret/allocation.rs

+53-28
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,18 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
264264

265265
/// Byte accessors.
266266
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
267-
/// The last argument controls whether we error out when there are uninitialized
268-
/// or pointer bytes. You should never call this, call `get_bytes` or
269-
/// `get_bytes_with_uninit_and_ptr` instead,
267+
/// This is the entirely abstraction-violating way to just grab the raw bytes without
268+
/// caring about relocations. It just deduplicates some code between `read_scalar`
269+
/// and `get_bytes_internal`.
270+
fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
271+
&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
272+
}
273+
274+
/// The last argument controls whether we error out when there are uninitialized or pointer
275+
/// bytes. However, we *always* error when there are relocations overlapping the edges of the
276+
/// range.
277+
///
278+
/// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
270279
///
271280
/// This function also guarantees that the resulting pointer will remain stable
272281
/// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
@@ -287,7 +296,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
287296
self.check_relocation_edges(cx, range)?;
288297
}
289298

290-
Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
299+
Ok(self.get_bytes_even_more_internal(range))
291300
}
292301

293302
/// Checks that these bytes are initialized and not pointer bytes, and then return them
@@ -373,6 +382,9 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
373382

374383
/// Reads a *non-ZST* scalar.
375384
///
385+
/// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
386+
/// supports that) provenance is entirely ignored.
387+
///
376388
/// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
377389
/// for ZSTness anyway due to integer pointers being valid for ZSTs.
378390
///
@@ -382,35 +394,47 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
382394
&self,
383395
cx: &impl HasDataLayout,
384396
range: AllocRange,
397+
read_provenance: bool,
385398
) -> AllocResult<ScalarMaybeUninit<Tag>> {
386-
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
387-
// We deliberately error when loading data that partially has provenance, or partially
388-
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
389-
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
390-
// further discussion.
391-
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
392-
// Uninit check happens *after* we established that the alignment is correct.
393-
// We must not return `Ok()` for unaligned pointers!
399+
if read_provenance {
400+
assert_eq!(range.size, cx.data_layout().pointer_size);
401+
}
402+
403+
// First and foremost, if anything is uninit, bail.
394404
if self.is_init(range).is_err() {
395405
// This inflates uninitialized bytes to the entire scalar, even if only a few
396406
// bytes are uninitialized.
397407
return Ok(ScalarMaybeUninit::Uninit);
398408
}
399-
// Now we do the actual reading.
400-
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
401-
// See if we got a pointer.
402-
if range.size != cx.data_layout().pointer_size {
403-
// Not a pointer.
404-
// *Now*, we better make sure that the inside is free of relocations too.
405-
self.check_relocations(cx, range)?;
406-
} else {
407-
// Maybe a pointer.
408-
if let Some(&prov) = self.relocations.get(&range.start) {
409-
let ptr = Pointer::new(prov, Size::from_bytes(bits));
410-
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
411-
}
409+
410+
// If we are doing a pointer read, and there is a relocation exactly where we
411+
// are reading, then we can put data and relocation back together and return that.
412+
if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
413+
// We already checked init and relocations, so we can use this function.
414+
let bytes = self.get_bytes_even_more_internal(range);
415+
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
416+
let ptr = Pointer::new(prov, Size::from_bytes(bits));
417+
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
412418
}
413-
// We don't. Just return the bits.
419+
420+
// If we are *not* reading a pointer, and we can just ignore relocations,
421+
// then do exactly that.
422+
if !read_provenance && Tag::OFFSET_IS_ADDR {
423+
// We just strip provenance.
424+
let bytes = self.get_bytes_even_more_internal(range);
425+
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
426+
return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
427+
}
428+
429+
// It's complicated. Better make sure there is no provenance anywhere.
430+
// FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
431+
// `read_pointer` is true and we ideally would distinguish the following two cases:
432+
// - The entire `range` is covered by 2 relocations for the same provenance.
433+
// Then we should return a pointer with that provenance.
434+
// - The range has inhomogeneous provenance. Then we should return just the
435+
// underlying bits.
436+
let bytes = self.get_bytes(cx, range)?;
437+
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
414438
Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
415439
}
416440

@@ -513,8 +537,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
513537
let start = range.start;
514538
let end = range.end();
515539

516-
// We need to handle clearing the relocations from parts of a pointer. See
517-
// <https://github.com/rust-lang/rust/issues/87184> for details.
540+
// We need to handle clearing the relocations from parts of a pointer.
541+
// FIXME: Miri should preserve partial relocations; see
542+
// https://github.com/rust-lang/miri/issues/2181.
518543
if first < start {
519544
if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
520545
return Err(AllocError::PartialPointerOverwrite(first));

compiler/rustc_middle/src/mir/interpret/error.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ pub enum UnsupportedOpInfo {
428428
/// Encountered a pointer where we needed raw bytes.
429429
ReadPointerAsBytes,
430430
/// Overwriting parts of a pointer; the resulting state cannot be represented in our
431-
/// `Allocation` data structure.
431+
/// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
432432
PartialPointerOverwrite(Pointer<AllocId>),
433433
//
434434
// The variants below are only reachable from CTFE/const prop, miri will never emit them.

compiler/rustc_target/src/abi/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,11 @@ impl Primitive {
746746
pub fn is_int(self) -> bool {
747747
matches!(self, Int(..))
748748
}
749+
750+
#[inline]
751+
pub fn is_ptr(self) -> bool {
752+
matches!(self, Pointer)
753+
}
749754
}
750755

751756
/// Inclusive wrap-around range of valid values, that is, if

0 commit comments

Comments
 (0)