Skip to content

Commit 48bed67

Browse files
committed
TypeId: use a (v0) mangled type to remain sound in the face of hash collisions.
1 parent bb34360 commit 48bed67

File tree

14 files changed

+305
-47
lines changed

14 files changed

+305
-47
lines changed

compiler/rustc_const_eval/src/const_eval/mod.rs

+87-6
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@
22

33
use std::convert::TryFrom;
44

5+
use rustc_hir::lang_items::LangItem;
56
use rustc_hir::Mutability;
6-
use rustc_middle::ty::{self, TyCtxt};
7-
use rustc_middle::{
8-
mir::{self, interpret::ConstAlloc},
9-
ty::ScalarInt,
10-
};
7+
use rustc_middle::mir::{self, interpret::ConstAlloc};
8+
use rustc_middle::ty::layout::LayoutOf;
9+
use rustc_middle::ty::{self, ScalarInt, Ty, TyCtxt};
1110
use rustc_span::{source_map::DUMMY_SP, symbol::Symbol};
11+
use rustc_target::abi::Size;
1212

1313
use crate::interpret::{
14-
intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, InterpResult, MPlaceTy,
14+
self, intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, InterpResult, MPlaceTy,
1515
MemPlaceMeta, Scalar,
1616
};
1717

@@ -39,6 +39,87 @@ pub(crate) fn const_caller_location(
3939
ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
4040
}
4141

42+
pub(crate) fn const_type_id<'tcx>(
43+
tcx: TyCtxt<'tcx>,
44+
param_env: ty::ParamEnv<'tcx>,
45+
ty: Ty<'tcx>,
46+
) -> ConstValue<'tcx> {
47+
trace!("const_type_id: {}", ty);
48+
49+
// Compute (logical) `TypeId` field values, before trying to encode them.
50+
let hash = tcx.type_id_hash(ty);
51+
let mangling = tcx.type_id_mangling(param_env.and(ty)).name;
52+
53+
let mut ecx = mk_eval_cx(tcx, DUMMY_SP, param_env, false);
54+
55+
let type_id_ty = tcx.type_of(tcx.require_lang_item(LangItem::TypeId, None));
56+
let type_id_layout = ecx.layout_of(type_id_ty).unwrap();
57+
58+
// Encode `TypeId` field values, before putting together the allocation.
59+
let hash_val = Scalar::from_u64(hash);
60+
let mangling_val = {
61+
let mangling_len = u64::try_from(mangling.len()).unwrap();
62+
let mangling_len_val = Scalar::from_machine_usize(mangling_len, &ecx);
63+
64+
// The field is `mangling: &TypeManglingStr`, get `TypeManglingStr` from it.
65+
let mangling_field_ty = type_id_layout.field(&ecx, 1).ty;
66+
let type_mangling_str_ty = mangling_field_ty.builtin_deref(true).unwrap().ty;
67+
68+
// Allocate memory for `TypeManglingStr` struct.
69+
let type_mangling_str_layout = ecx.layout_of(type_mangling_str_ty).unwrap();
70+
let type_mangling_str_place = {
71+
// NOTE(eddyb) this similar to the `ecx.allocate(...)` used below
72+
// for `type_id_place`, except with an additional size for the
73+
// string bytes (`mangling`) being added to the `TypeManglingStr`
74+
// (which is unsized, using an `extern { type }` tail).
75+
let layout = type_mangling_str_layout;
76+
let size = layout.size + Size::from_bytes(mangling_len);
77+
let ptr = ecx
78+
.allocate_ptr(size, layout.align.abi, interpret::MemoryKind::IntrinsicGlobal)
79+
.unwrap();
80+
MPlaceTy::from_aligned_ptr(ptr.into(), layout)
81+
};
82+
83+
// Initialize `TypeManglingStr` fields.
84+
ecx.write_scalar(
85+
mangling_len_val,
86+
&ecx.mplace_field(&type_mangling_str_place, 0).unwrap().into(),
87+
)
88+
.unwrap();
89+
ecx.write_bytes_ptr(
90+
ecx.mplace_field(&type_mangling_str_place, 1).unwrap().ptr,
91+
mangling.bytes(),
92+
)
93+
.unwrap();
94+
95+
// `&TypeManglingStr` has no metadata, thanks to the length being stored
96+
// behind the reference (in the first field of `TypeManglingStr`).
97+
type_mangling_str_place.to_ref(&ecx).to_scalar().unwrap()
98+
};
99+
100+
// FIXME(eddyb) everything below would be unnecessary if `ConstValue` could
101+
// hold a pair of `Scalar`s, or if we moved to valtrees.
102+
103+
// Allocate memory for `TypeId` struct.
104+
let type_id_place =
105+
ecx.allocate(type_id_layout, interpret::MemoryKind::IntrinsicGlobal).unwrap();
106+
107+
// Initialize `TypeId` fields.
108+
ecx.write_scalar(hash_val, &ecx.mplace_field(&type_id_place, 0).unwrap().into()).unwrap();
109+
ecx.write_scalar(mangling_val, &ecx.mplace_field(&type_id_place, 1).unwrap().into()).unwrap();
110+
111+
// Convert the `TypeId` allocation from being in `ecx`, to a global `ConstValue`.
112+
if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &type_id_place).is_err() {
113+
bug!("intern_const_alloc_recursive should not error in this case")
114+
}
115+
let (type_id_alloc_id, type_id_offset) =
116+
type_id_place.ptr.into_pointer_or_addr().unwrap().into_parts();
117+
ConstValue::ByRef {
118+
alloc: tcx.global_alloc(type_id_alloc_id).unwrap_memory(),
119+
offset: type_id_offset,
120+
}
121+
}
122+
42123
/// Convert an evaluated constant to a type level constant
43124
pub(crate) fn const_to_valtree<'tcx>(
44125
tcx: TyCtxt<'tcx>,

compiler/rustc_const_eval/src/interpret/intern.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ fn intern_shallow<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval:
106106
match kind {
107107
MemoryKind::Stack
108108
| MemoryKind::Machine(const_eval::MemoryKind::Heap)
109-
| MemoryKind::CallerLocation => {}
109+
| MemoryKind::IntrinsicGlobal => {}
110110
}
111111
// Set allocation mutability as appropriate. This is used by LLVM to put things into
112112
// read-only memory, and also by Miri when evaluating other globals that

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ crate fn eval_nullary_intrinsic<'tcx>(
7070
}
7171
sym::type_id => {
7272
ensure_monomorphic_enough(tcx, tp_ty)?;
73-
ConstValue::from_u64(tcx.type_id_hash(tp_ty))
73+
crate::const_eval::const_type_id(tcx, param_env, tp_ty)
7474
}
7575
sym::variant_count => match tp_ty.kind() {
7676
// Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough.

compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -82,25 +82,25 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
8282
) -> MPlaceTy<'tcx, M::PointerTag> {
8383
let loc_details = &self.tcx.sess.opts.debugging_opts.location_detail;
8484
let file = if loc_details.file {
85-
self.allocate_str(filename.as_str(), MemoryKind::CallerLocation, Mutability::Not)
85+
self.allocate_str(filename.as_str(), MemoryKind::IntrinsicGlobal, Mutability::Not)
8686
} else {
8787
// FIXME: This creates a new allocation each time. It might be preferable to
8888
// perform this allocation only once, and re-use the `MPlaceTy`.
8989
// See https://github.com/rust-lang/rust/pull/89920#discussion_r730012398
90-
self.allocate_str("<redacted>", MemoryKind::CallerLocation, Mutability::Not)
90+
self.allocate_str("<redacted>", MemoryKind::IntrinsicGlobal, Mutability::Not)
9191
};
9292
let line = if loc_details.line { Scalar::from_u32(line) } else { Scalar::from_u32(0) };
9393
let col = if loc_details.column { Scalar::from_u32(col) } else { Scalar::from_u32(0) };
9494

95-
// Allocate memory for `CallerLocation` struct.
95+
// Allocate memory for `panic::Location` struct.
9696
let loc_ty = self
9797
.tcx
9898
.type_of(self.tcx.require_lang_item(LangItem::PanicLocation, None))
9999
.subst(*self.tcx, self.tcx.mk_substs([self.tcx.lifetimes.re_erased.into()].iter()));
100100
let loc_layout = self.layout_of(loc_ty).unwrap();
101101
// This can fail if rustc runs out of memory right here. Trying to emit an error would be
102102
// pointless, since that would require allocating more memory than a Location.
103-
let location = self.allocate(loc_layout, MemoryKind::CallerLocation).unwrap();
103+
let location = self.allocate(loc_layout, MemoryKind::IntrinsicGlobal).unwrap();
104104

105105
// Initialize fields.
106106
self.write_immediate(file.to_ref(self), &self.mplace_field(&location, 0).unwrap().into())

compiler/rustc_const_eval/src/interpret/memory.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ use super::{
2929
pub enum MemoryKind<T> {
3030
/// Stack memory. Error if deallocated except during a stack pop.
3131
Stack,
32-
/// Memory allocated by `caller_location` intrinsic. Error if ever deallocated.
33-
CallerLocation,
32+
/// Global memory allocated by an intrinsic. Error if ever deallocated.
33+
IntrinsicGlobal,
3434
/// Additional memory kinds a machine wishes to distinguish from the builtin ones.
3535
Machine(T),
3636
}
@@ -40,7 +40,7 @@ impl<T: MayLeak> MayLeak for MemoryKind<T> {
4040
fn may_leak(self) -> bool {
4141
match self {
4242
MemoryKind::Stack => false,
43-
MemoryKind::CallerLocation => true,
43+
MemoryKind::IntrinsicGlobal => true,
4444
MemoryKind::Machine(k) => k.may_leak(),
4545
}
4646
}
@@ -50,7 +50,7 @@ impl<T: fmt::Display> fmt::Display for MemoryKind<T> {
5050
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5151
match self {
5252
MemoryKind::Stack => write!(f, "stack variable"),
53-
MemoryKind::CallerLocation => write!(f, "caller location"),
53+
MemoryKind::IntrinsicGlobal => write!(f, "global memory (from intrinsic)"),
5454
MemoryKind::Machine(m) => write!(f, "{}", m),
5555
}
5656
}

compiler/rustc_middle/src/query/mod.rs

+47-3
Original file line numberDiff line numberDiff line change
@@ -995,14 +995,58 @@ rustc_queries! {
995995
desc { |tcx| "generating MIR shim for `{}`", tcx.def_path_str(key.def_id()) }
996996
}
997997

998-
/// The `symbol_name` query provides the symbol name for calling a
999-
/// given instance from the local crate. In particular, it will also
1000-
/// look up the correct symbol name of instances from upstream crates.
998+
/// The `symbol_name` query provides the symbol name for the given instance.
999+
///
1000+
/// Both `static` and `fn` instances have symbol names, whether definitions
1001+
/// (on the Rust side, either from the local crate or an upstream one), or
1002+
/// imports in a "foreign block" (`extern {...}`).
1003+
///
1004+
/// This symbol name is the canonical one for that instance, and must be
1005+
/// used for both linker-level exports (definitions) and imports (uses),
1006+
/// of that instance (i.e. it's the sole connection the linker sees).
1007+
///
1008+
/// By default, Rust definitions have mangled symbols, to avoid conflicts,
1009+
/// and to allow for many instances ("monomorphizations") of generic `fn`s.
1010+
/// The exact choice of mangling can vary, and not all type information from
1011+
/// the instance may always be present in a form that allows demangling back
1012+
/// to a human-readable form. See also the `symbol_mangling_version` query
1013+
/// and the `rustc_symbol_mangling` crate.
1014+
///
1015+
/// Note however that `fn` lifetime parameters are erased (and so they never
1016+
/// participate in monomorphization), meaning mangled Rust symbol names will
1017+
/// never contain information about such lifetimes (mangled lifetimes only
1018+
/// occur for higher-ranked types, e.g. `foo::<for<'a> fn(&'a X)>`).
10011019
query symbol_name(key: ty::Instance<'tcx>) -> ty::SymbolName<'tcx> {
10021020
desc { "computing the symbol for `{}`", key }
10031021
cache_on_disk_if { true }
10041022
}
10051023

1024+
/// The `type_id_mangling` query provides the Rust mangling of the given type,
1025+
/// for use in `TypeId`, as a guard against `type_id_hash` collisions.
1026+
///
1027+
/// Unlike the `symbol_name` query, the mangling used for types doesn't vary
1028+
/// between crates, and encodes all the type information "structurally"
1029+
/// (i.e. lossy encodings such as hashing aren't allowed, as that would
1030+
/// effectively defeat the purpose of guarding against hash collisions).
1031+
///
1032+
/// If this is used outside of `TypeId`, some additional caveats apply:
1033+
/// * it's not a full symbol, so it could collide with unrelated exports,
1034+
/// if used directly as a linker symbol without a prefix and/or suffix
1035+
/// * mangling features such as compression (e.g. `v0` backrefs) mean that
1036+
/// it cannot be trivially embedded in a larger mangled Rust symbol - for
1037+
/// that usecase, prefer using `symbol_name` with an instance of a either
1038+
/// a custom `InstanceDef`, or at least a generic lang item (`fn`, though
1039+
/// associated `const` may work better for a type-dependent `static`)
1040+
/// * every Rust mangling erases most lifetimes, with the only exception
1041+
/// being those found in higher-ranked types (e.g. `for<'a> fn(&'a X)`)
1042+
//
1043+
// FIXME(eddyb) this shouldn't be using `ty::SymbolName`, but `&'tcx str`,
1044+
// or `ty::SymbolName` should be renamed to "tcx-interned string".
1045+
query type_id_mangling(key: ty::ParamEnvAnd<'tcx, Ty<'tcx>>) -> ty::SymbolName<'tcx> {
1046+
desc { "computing the type mangling of `{}`", key.value }
1047+
cache_on_disk_if { true }
1048+
}
1049+
10061050
query opt_def_kind(def_id: DefId) -> Option<DefKind> {
10071051
desc { |tcx| "looking up definition kind of `{}`", tcx.def_path_str(def_id) }
10081052
separate_provide_extern

compiler/rustc_symbol_mangling/src/lib.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,10 @@ pub fn symbol_name_for_instance_in_crate<'tcx>(
125125
}
126126

127127
pub fn provide(providers: &mut Providers) {
128-
*providers = Providers { symbol_name: symbol_name_provider, ..*providers };
128+
*providers = Providers { symbol_name, type_id_mangling, ..*providers };
129129
}
130130

131-
// The `symbol_name` query provides the symbol name for calling a given
132-
// instance from the local crate. In particular, it will also look up the
133-
// correct symbol name of instances from upstream crates.
134-
fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> {
131+
fn symbol_name<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> {
135132
let symbol_name = compute_symbol_name(tcx, instance, || {
136133
// This closure determines the instantiating crate for instances that
137134
// need an instantiating-crate-suffix for their symbol name, in order
@@ -150,6 +147,14 @@ fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty
150147
ty::SymbolName::new(tcx, &symbol_name)
151148
}
152149

150+
fn type_id_mangling<'tcx>(
151+
tcx: TyCtxt<'tcx>,
152+
query: ty::ParamEnvAnd<'tcx, Ty<'tcx>>,
153+
) -> ty::SymbolName<'tcx> {
154+
let (param_env, ty) = query.into_parts();
155+
ty::SymbolName::new(tcx, &v0::mangle_type(tcx, param_env, ty))
156+
}
157+
153158
/// This function computes the LLVM CFI typeid for the given `FnAbi`.
154159
pub fn llvm_cfi_typeid_for_fn_abi<'tcx>(
155160
_tcx: TyCtxt<'tcx>,

compiler/rustc_symbol_mangling/src/v0.rs

+30-10
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,11 @@ pub(super) fn mangle<'tcx>(
2727
// FIXME(eddyb) this should ideally not be needed.
2828
let substs = tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), instance.substs);
2929

30-
let prefix = "_R";
31-
let mut cx = &mut SymbolMangler {
32-
tcx,
33-
start_offset: prefix.len(),
34-
paths: FxHashMap::default(),
35-
types: FxHashMap::default(),
36-
consts: FxHashMap::default(),
37-
binders: vec![],
38-
out: String::from(prefix),
39-
};
30+
let mut cx = &mut SymbolMangler::new(tcx);
31+
32+
// The `_R` prefix indicates a Rust mangled symbol.
33+
cx.push("_R");
34+
cx.start_offset = cx.out.len();
4035

4136
// Append `::{shim:...#0}` to shims that can coexist with a non-shim instance.
4237
let shim_kind = match instance.def {
@@ -57,6 +52,19 @@ pub(super) fn mangle<'tcx>(
5752
std::mem::take(&mut cx.out)
5853
}
5954

55+
pub(super) fn mangle_type<'tcx>(
56+
tcx: TyCtxt<'tcx>,
57+
param_env: ty::ParamEnv<'tcx>,
58+
ty: Ty<'tcx>,
59+
) -> String {
60+
let param_env = param_env.with_reveal_all_normalized(tcx);
61+
let ty = tcx.normalize_erasing_regions(param_env, ty);
62+
63+
let mut cx = SymbolMangler::new(tcx);
64+
cx.print_type(ty).unwrap();
65+
cx.out
66+
}
67+
6068
struct BinderLevel {
6169
/// The range of distances from the root of what's
6270
/// being printed, to the lifetimes in a binder.
@@ -85,6 +93,18 @@ struct SymbolMangler<'tcx> {
8593
}
8694

8795
impl<'tcx> SymbolMangler<'tcx> {
96+
fn new(tcx: TyCtxt<'tcx>) -> Self {
97+
Self {
98+
tcx,
99+
start_offset: 0,
100+
paths: FxHashMap::default(),
101+
types: FxHashMap::default(),
102+
consts: FxHashMap::default(),
103+
binders: vec![],
104+
out: String::new(),
105+
}
106+
}
107+
88108
fn push(&mut self, s: &str) {
89109
self.out.push_str(s);
90110
}

0 commit comments

Comments
 (0)