Skip to content

Commit 691ab94

Browse files
author
Jonathan Turner
authored
Rollup merge of #37336 - michaelwoerister:debuginfo-type-ids, r=eddyb
debuginfo: Use TypeIdHasher for generating global debuginfo type IDs. The only requirement for debuginfo type IDs is that they are globally unique. The `TypeIdHasher` (which is used for `std::intrinsic::type_id()` provides that, so we can get rid of some redundancy by re-using it for debuginfo. Values produced by the `TypeIdHasher` are also more stable than the current `UniqueTypeId` generation algorithm produces -- these incorporate the `NodeId`s, which is not good for incremental compilation. @alexcrichton @eddyb : Could you take a look at the endianess adaptations that I made to the `TypeIdHasher`? Also, are we sure that a 64 bit hash is wide enough for something that is supposed to be globally unique? For debuginfo I'm using 160 bits to make sure that we don't run into conflicts there.
2 parents e7da619 + 025b27d commit 691ab94

File tree

2 files changed

+79
-226
lines changed

2 files changed

+79
-226
lines changed

src/librustc/ty/util.rs

+32-25
Original file line numberDiff line numberDiff line change
@@ -392,27 +392,30 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
392392
}
393393
}
394394

395-
// When hashing a type this ends up affecting properties like symbol names. We
396-
// want these symbol names to be calculated independent of other factors like
397-
// what architecture you're compiling *from*.
398-
//
399-
// The hashing just uses the standard `Hash` trait, but the implementations of
400-
// `Hash` for the `usize` and `isize` types are *not* architecture independent
401-
// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
402-
// `isize` completely when hashing. To ensure that these don't leak in we use a
403-
// custom hasher implementation here which inflates the size of these to a `u64`
404-
// and `i64`.
405-
struct WidenUsizeHasher<H> {
395+
/// When hashing a type this ends up affecting properties like symbol names. We
396+
/// want these symbol names to be calculated independent of other factors like
397+
/// what architecture you're compiling *from*.
398+
///
399+
/// The hashing just uses the standard `Hash` trait, but the implementations of
400+
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
401+
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
402+
/// `isize` completely when hashing. To ensure that these don't leak in we use a
403+
/// custom hasher implementation here which inflates the size of these to a `u64`
404+
/// and `i64`.
405+
///
406+
/// The same goes for endianess: We always convert multi-byte integers to little
407+
/// endian before hashing.
408+
pub struct ArchIndependentHasher<H> {
406409
inner: H,
407410
}
408411

409-
impl<H> WidenUsizeHasher<H> {
410-
fn new(inner: H) -> WidenUsizeHasher<H> {
411-
WidenUsizeHasher { inner: inner }
412+
impl<H> ArchIndependentHasher<H> {
413+
pub fn new(inner: H) -> ArchIndependentHasher<H> {
414+
ArchIndependentHasher { inner: inner }
412415
}
413416
}
414417

415-
impl<H: Hasher> Hasher for WidenUsizeHasher<H> {
418+
impl<H: Hasher> Hasher for ArchIndependentHasher<H> {
416419
fn write(&mut self, bytes: &[u8]) {
417420
self.inner.write(bytes)
418421
}
@@ -425,44 +428,44 @@ impl<H: Hasher> Hasher for WidenUsizeHasher<H> {
425428
self.inner.write_u8(i)
426429
}
427430
fn write_u16(&mut self, i: u16) {
428-
self.inner.write_u16(i)
431+
self.inner.write_u16(i.to_le())
429432
}
430433
fn write_u32(&mut self, i: u32) {
431-
self.inner.write_u32(i)
434+
self.inner.write_u32(i.to_le())
432435
}
433436
fn write_u64(&mut self, i: u64) {
434-
self.inner.write_u64(i)
437+
self.inner.write_u64(i.to_le())
435438
}
436439
fn write_usize(&mut self, i: usize) {
437-
self.inner.write_u64(i as u64)
440+
self.inner.write_u64((i as u64).to_le())
438441
}
439442
fn write_i8(&mut self, i: i8) {
440443
self.inner.write_i8(i)
441444
}
442445
fn write_i16(&mut self, i: i16) {
443-
self.inner.write_i16(i)
446+
self.inner.write_i16(i.to_le())
444447
}
445448
fn write_i32(&mut self, i: i32) {
446-
self.inner.write_i32(i)
449+
self.inner.write_i32(i.to_le())
447450
}
448451
fn write_i64(&mut self, i: i64) {
449-
self.inner.write_i64(i)
452+
self.inner.write_i64(i.to_le())
450453
}
451454
fn write_isize(&mut self, i: isize) {
452-
self.inner.write_i64(i as i64)
455+
self.inner.write_i64((i as i64).to_le())
453456
}
454457
}
455458

456459
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> {
457460
tcx: TyCtxt<'a, 'gcx, 'tcx>,
458-
state: WidenUsizeHasher<H>,
461+
state: ArchIndependentHasher<H>,
459462
}
460463

461464
impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
462465
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self {
463466
TypeIdHasher {
464467
tcx: tcx,
465-
state: WidenUsizeHasher::new(state),
468+
state: ArchIndependentHasher::new(state),
466469
}
467470
}
468471

@@ -493,6 +496,10 @@ impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
493496
pub fn def_path(&mut self, def_path: &ast_map::DefPath) {
494497
def_path.deterministic_hash_to(self.tcx, &mut self.state);
495498
}
499+
500+
pub fn into_inner(self) -> H {
501+
self.state.inner
502+
}
496503
}
497504

498505
impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> {

0 commit comments

Comments
 (0)