Skip to content

Commit 76c7144

Browse files
committed
Properly handle Spans that reference imported SourceFiles
Previously, metadata encoding used DUMMY_SP to represent any spans that referenced an 'imported' SourceFile - e.g. a SourceFile from an upstream dependency. These leads to sub-optimal error messages in certain cases (see the included test). This PR changes how we encode and decode spans in crate metadata. We encode spans in one of two ways: * 'Local' spans, which reference non-imported SourceFiles, are encoded exactly as before. * 'Foreign' spans, which reference imported SourceFiles, are encoded with the CrateNum of their 'originating' crate. Additionally, their 'lo' and 'high' values are rebased on top of the 'originating' crate, which allows them to be used with the SourceMap data encoded for that crate. The `ExternalSource` enum is renamed to `ExternalSourceKind`. There is now a struct called `ExternalSource`, which holds an `ExternalSourceKind` along with the original line number information for the file. This is used during `Span` serialization to rebase spans onto their 'owning' crate.
1 parent d607231 commit 76c7144

File tree

11 files changed

+233
-50
lines changed

11 files changed

+233
-50
lines changed

src/librustc/hir/map/collector.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
1010
use rustc_data_structures::svh::Svh;
1111
use rustc_hir as hir;
1212
use rustc_hir::def_id::CRATE_DEF_INDEX;
13-
use rustc_hir::def_id::{CrateNum, DefIndex, LOCAL_CRATE};
13+
use rustc_hir::def_id::{DefIndex, LOCAL_CRATE};
1414
use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
1515
use rustc_hir::*;
1616
use rustc_index::vec::IndexVec;
@@ -213,7 +213,7 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> {
213213
.source_map
214214
.files()
215215
.iter()
216-
.filter(|source_file| CrateNum::from_u32(source_file.crate_of_origin) == LOCAL_CRATE)
216+
.filter(|source_file| source_file.cnum == LOCAL_CRATE)
217217
.map(|source_file| source_file.name_hash)
218218
.collect();
219219

src/librustc/ich/impls_syntax.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use crate::ich::StableHashingContext;
55

66
use rustc_ast::ast;
77
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
8-
use rustc_hir::def_id::{CrateNum, DefId, CRATE_DEF_INDEX};
98
use rustc_span::SourceFile;
109

1110
use smallvec::SmallVec;
@@ -59,7 +58,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
5958
name_hash,
6059
name_was_remapped,
6160
unmapped_path: _,
62-
crate_of_origin,
61+
cnum,
6362
// Do not hash the source as it is not encoded
6463
src: _,
6564
src_hash,
@@ -75,9 +74,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
7574
(name_hash as u64).hash_stable(hcx, hasher);
7675
name_was_remapped.hash_stable(hcx, hasher);
7776

78-
DefId { krate: CrateNum::from_u32(crate_of_origin), index: CRATE_DEF_INDEX }
79-
.hash_stable(hcx, hasher);
80-
8177
src_hash.hash_stable(hcx, hasher);
8278

8379
// We only hash the relative position within this source_file
@@ -101,6 +97,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
10197
for &char_pos in normalized_pos.iter() {
10298
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
10399
}
100+
101+
cnum.hash_stable(hcx, hasher);
104102
}
105103
}
106104

src/librustc_metadata/rmeta/decoder.rs

+85-6
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
387387
return Ok(DUMMY_SP);
388388
}
389389

390-
debug_assert_eq!(tag, TAG_VALID_SPAN);
390+
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
391391

392392
let lo = BytePos::decode(self)?;
393393
let len = BytePos::decode(self)?;
@@ -399,7 +399,68 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
399399
bug!("Cannot decode Span without Session.")
400400
};
401401

402-
let imported_source_files = self.cdata().imported_source_files(&sess.source_map());
402+
// There are two possibilities here:
403+
// 1. This is a 'local span', which is located inside a `SourceFile`
404+
// that came from this crate. In this case, we use the source map data
405+
// encoded in this crate. This branch should be taken nearly all of the time.
406+
// 2. This is a 'foreign span', which is located inside a `SourceFile`
407+
// that came from a *different* crate (some crate upstream of the one
408+
// whose metadata we're looking at). For example, consider this dependency graph:
409+
//
410+
// A -> B -> C
411+
//
412+
// Suppose that we're currently compiling crate A, and start deserializing
413+
// metadata from crate B. When we deserialize a Span from crate B's metadata,
414+
// there are two posibilites:
415+
//
416+
// 1. The span references a file from crate B. This makes it a 'local' span,
417+
// which means that we can use crate B's serialized source map information.
418+
// 2. The span references a file from crate C. This makes it a 'foreign' span,
419+
// which means we need to use Crate *C* (not crate B) to determine the source
420+
// map information. We only record source map information for a file in the
421+
// crate that 'owns' it, so deserializing a Span may require us to look at
422+
// a transitive dependency.
423+
//
424+
// When we encode a foreign span, we adjust its 'lo' and 'high' values
425+
// to be based on the *foreign* crate (e.g. crate C), not the crate
426+
// we are writing metadata for (e.g. crate B). This allows us to
427+
// treat the 'local' and 'foreign' cases almost identically during deserialization:
428+
// we can call `imported_source_files` for the proper crate, and binary search
429+
// through the returned slice using our span.
430+
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
431+
self.cdata().imported_source_files(sess.source_map())
432+
} else {
433+
// FIXME: We don't decode dependencies of proc-macros.
434+
// Remove this once #69976 is merged
435+
if self.cdata().root.is_proc_macro_crate() {
436+
debug!(
437+
"SpecializedDecoder<Span>::specialized_decode: skipping span for proc-macro crate {:?}",
438+
self.cdata().cnum
439+
);
440+
// Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE
441+
// since we don't have `cnum_map` populated.
442+
// This advances the decoder position so that we can continue
443+
// to read metadata.
444+
let _ = u32::decode(self)?;
445+
return Ok(DUMMY_SP);
446+
}
447+
// tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above
448+
let cnum = CrateNum::decode(self)?;
449+
debug!(
450+
"SpecializedDecoder<Span>::specialized_decode: loading source files from cnum {:?}",
451+
cnum
452+
);
453+
454+
// Decoding 'foreign' spans should be rare enough that it's
455+
// not worth it to maintain a per-CrateNum cache for `last_source_file_index`.
456+
// We just set it to 0, to ensure that we don't try to access something out
457+
// of bounds for our initial 'guess'
458+
self.last_source_file_index = 0;
459+
460+
let foreign_data = self.cdata().cstore.get_crate_data(cnum);
461+
foreign_data.imported_source_files(sess.source_map())
462+
};
463+
403464
let source_file = {
404465
// Optimize for the case that most spans within a translated item
405466
// originate from the same source_file.
@@ -413,16 +474,32 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
413474
.binary_search_by_key(&lo, |source_file| source_file.original_start_pos)
414475
.unwrap_or_else(|index| index - 1);
415476

416-
self.last_source_file_index = index;
477+
// Don't try to cache the index for foreign spans,
478+
// as this would require a map from CrateNums to indices
479+
if tag == TAG_VALID_SPAN_LOCAL {
480+
self.last_source_file_index = index;
481+
}
417482
&imported_source_files[index]
418483
}
419484
};
420485

421486
// Make sure our binary search above is correct.
422-
debug_assert!(lo >= source_file.original_start_pos && lo <= source_file.original_end_pos);
487+
debug_assert!(
488+
lo >= source_file.original_start_pos && lo <= source_file.original_end_pos,
489+
"Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
490+
lo,
491+
source_file.original_start_pos,
492+
source_file.original_end_pos
493+
);
423494

424495
// Make sure we correctly filtered out invalid spans during encoding
425-
debug_assert!(hi >= source_file.original_start_pos && hi <= source_file.original_end_pos);
496+
debug_assert!(
497+
hi >= source_file.original_start_pos && hi <= source_file.original_end_pos,
498+
"Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
499+
hi,
500+
source_file.original_start_pos,
501+
source_file.original_end_pos
502+
);
426503

427504
let lo =
428505
(lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
@@ -1424,14 +1501,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
14241501
let local_version = local_source_map.new_imported_source_file(
14251502
name,
14261503
name_was_remapped,
1427-
self.cnum.as_u32(),
14281504
src_hash,
14291505
name_hash,
14301506
source_length,
1507+
self.cnum,
14311508
lines,
14321509
multibyte_chars,
14331510
non_narrow_chars,
14341511
normalized_pos,
1512+
start_pos,
1513+
end_pos,
14351514
);
14361515
debug!(
14371516
"CrateMetaData::imported_source_files alloc \

src/librustc_metadata/rmeta/encoder.rs

+47-11
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use rustc_ast::ast;
2929
use rustc_ast::attr;
3030
use rustc_span::source_map::Spanned;
3131
use rustc_span::symbol::{kw, sym, Ident, Symbol};
32-
use rustc_span::{self, FileName, SourceFile, Span};
32+
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span};
3333
use std::hash::Hash;
3434
use std::num::NonZeroUsize;
3535
use std::path::Path;
@@ -167,20 +167,56 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
167167
return TAG_INVALID_SPAN.encode(self);
168168
}
169169

170-
// HACK(eddyb) there's no way to indicate which crate a Span is coming
171-
// from right now, so decoding would fail to find the SourceFile if
172-
// it's not local to the crate the Span is found in.
173-
if self.source_file_cache.is_imported() {
174-
return TAG_INVALID_SPAN.encode(self);
175-
}
170+
// There are two possible cases here:
171+
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
172+
// crate we are writing metadata for. When the metadata for *this* crate gets
173+
// deserialized, the deserializer will need to know which crate it originally came
174+
// from. We use `TAG_VALID_SPAN_FOREIGN` to indicate that a `CrateNum` should
175+
// be deserialized after the rest of the span data, which tells the deserializer
176+
// which crate contains the source map information.
177+
// 2. This span comes from our own crate. No special hamdling is needed - we just
178+
// write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use
179+
// our own source map information.
180+
let (tag, lo, hi) = if self.source_file_cache.is_imported() {
181+
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
182+
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
183+
// are relative to the source map information for the 'foreign' crate whose CrateNum
184+
// we write into the metadata. This allows `imported_source_files` to binary
185+
// search through the 'foreign' crate's source map information, using the
186+
// deserialized 'lo' and 'hi' values directly.
187+
//
188+
// All of this logic ensures that the final result of deserialization is a 'normal'
189+
// Span that can be used without any additional trouble.
190+
let external_start_pos = {
191+
// Introduce a new scope so that we drop the 'lock()' temporary
192+
match &*self.source_file_cache.external_src.lock() {
193+
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
194+
src => panic!("Unexpected external source {:?}", src),
195+
}
196+
};
197+
let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos;
198+
let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos;
199+
200+
(TAG_VALID_SPAN_FOREIGN, lo, hi)
201+
} else {
202+
(TAG_VALID_SPAN_LOCAL, span.lo, span.hi)
203+
};
176204

177-
TAG_VALID_SPAN.encode(self)?;
178-
span.lo.encode(self)?;
205+
tag.encode(self)?;
206+
lo.encode(self)?;
179207

180208
// Encode length which is usually less than span.hi and profits more
181209
// from the variable-length integer encoding that we use.
182-
let len = span.hi - span.lo;
183-
len.encode(self)
210+
let len = hi - lo;
211+
len.encode(self)?;
212+
213+
if tag == TAG_VALID_SPAN_FOREIGN {
214+
// This needs to be two lines to avoid holding the `self.source_file_cache`
215+
// while calling `cnum.encode(self)`
216+
let cnum = self.source_file_cache.cnum;
217+
cnum.encode(self)?;
218+
}
219+
Ok(())
184220

185221
// Don't encode the expansion context.
186222
}

src/librustc_metadata/rmeta/mod.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -411,5 +411,6 @@ struct GeneratorData<'tcx> {
411411
}
412412

413413
// Tags used for encoding Spans:
414-
const TAG_VALID_SPAN: u8 = 0;
415-
const TAG_INVALID_SPAN: u8 = 1;
414+
const TAG_VALID_SPAN_LOCAL: u8 = 0;
415+
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
416+
const TAG_INVALID_SPAN: u8 = 2;

0 commit comments

Comments
 (0)