Skip to content

Commit d462551

Browse files
committed
Auto merge of #73706 - Aaron1011:fix/proc-macro-foreign-span, r=petrochenkov
Serialize all foreign `SourceFile`s into proc-macro crate metadata Normally, we encode a `Span` that references a foreign `SourceFile` by encoding information about the foreign crate. When we decode this `Span`, we lookup the foreign crate in order to decode the `SourceFile`. However, this approach does not work for proc-macro crates. When we load a proc-macro crate, we do not deserialzie any of its dependencies (since a proc-macro crate can only export proc-macros). This means that we cannot serialize a reference to an upstream crate, since the associated metadata will not be available when we try to deserialize it. This commit modifies foreign span handling so that we treat all foreign `SourceFile`s as local `SourceFile`s when serializing a proc-macro. All `SourceFile`s will be stored into the metadata of a proc-macro crate, allowing us to cotinue to deserialize a proc-macro crate without needing to load any of its dependencies. Since the number of foreign `SourceFile`s that we load during a compilation session may be very large, we only serialize a `SourceFile` if we have also serialized a `Span` which requires it.
2 parents 16957bd + 37a48fa commit d462551

13 files changed

+226
-43
lines changed

src/librustc_metadata/rmeta/decoder.rs

+15-12
Original file line numberDiff line numberDiff line change
@@ -450,19 +450,17 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
450450
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
451451
self.cdata().imported_source_files(sess)
452452
} else {
453-
// FIXME: We don't decode dependencies of proc-macros.
454-
// Remove this once #69976 is merged
453+
// When we encode a proc-macro crate, all `Span`s should be encoded
454+
// with `TAG_VALID_SPAN_LOCAL`
455455
if self.cdata().root.is_proc_macro_crate() {
456-
debug!(
457-
"SpecializedDecoder<Span>::specialized_decode: skipping span for proc-macro crate {:?}",
458-
self.cdata().cnum
459-
);
460456
// Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE
461457
// since we don't have `cnum_map` populated.
462-
// This advances the decoder position so that we can continue
463-
// to read metadata.
464-
let _ = u32::decode(self)?;
465-
return Ok(DUMMY_SP);
458+
let cnum = u32::decode(self)?;
459+
panic!(
460+
"Decoding of crate {:?} tried to access proc-macro dep {:?}",
461+
self.cdata().root.name,
462+
cnum
463+
);
466464
}
467465
// tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above
468466
let cnum = CrateNum::decode(self)?;
@@ -990,8 +988,13 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
990988
DefKind::Macro(macro_kind(raw_macro)),
991989
self.local_def_id(def_index),
992990
);
993-
let ident = Ident::from_str(raw_macro.name());
994-
callback(Export { ident, res, vis: ty::Visibility::Public, span: DUMMY_SP });
991+
let ident = self.item_ident(def_index, sess);
992+
callback(Export {
993+
ident,
994+
res,
995+
vis: ty::Visibility::Public,
996+
span: self.get_span(def_index, sess),
997+
});
995998
}
996999
}
9971000
return;

src/librustc_metadata/rmeta/encoder.rs

+78-24
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
1616
use rustc_hir::itemlikevisit::{ItemLikeVisitor, ParItemLikeVisitor};
1717
use rustc_hir::lang_items;
1818
use rustc_hir::{AnonConst, GenericParamKind};
19+
use rustc_index::bit_set::GrowableBitSet;
1920
use rustc_index::vec::Idx;
2021
use rustc_middle::hir::map::Map;
2122
use rustc_middle::middle::cstore::{EncodedMetadata, ForeignModule, LinkagePreference, NativeLib};
@@ -51,7 +52,20 @@ struct EncodeContext<'tcx> {
5152
interpret_allocs_inverse: Vec<interpret::AllocId>,
5253

5354
// This is used to speed up Span encoding.
54-
source_file_cache: Lrc<SourceFile>,
55+
// The `usize` is an index into the `MonotonicVec`
56+
// that stores the `SourceFile`
57+
source_file_cache: (Lrc<SourceFile>, usize),
58+
// The indices (into the `SourceMap`'s `MonotonicVec`)
59+
// of all of the `SourceFiles` that we need to serialize.
60+
// When we serialize a `Span`, we insert the index of its
61+
// `SourceFile` into the `GrowableBitSet`.
62+
//
63+
// This needs to be a `GrowableBitSet` and not a
64+
// regular `BitSet` because we may actually import new `SourceFiles`
65+
// during metadata encoding, due to executing a query
66+
// with a result containing a foreign `Span`.
67+
required_source_files: Option<GrowableBitSet<usize>>,
68+
is_proc_macro: bool,
5569
}
5670

5771
macro_rules! encoder_methods {
@@ -154,18 +168,23 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
154168
// The Span infrastructure should make sure that this invariant holds:
155169
debug_assert!(span.lo <= span.hi);
156170

157-
if !self.source_file_cache.contains(span.lo) {
171+
if !self.source_file_cache.0.contains(span.lo) {
158172
let source_map = self.tcx.sess.source_map();
159173
let source_file_index = source_map.lookup_source_file_idx(span.lo);
160-
self.source_file_cache = source_map.files()[source_file_index].clone();
174+
self.source_file_cache =
175+
(source_map.files()[source_file_index].clone(), source_file_index);
161176
}
162177

163-
if !self.source_file_cache.contains(span.hi) {
178+
if !self.source_file_cache.0.contains(span.hi) {
164179
// Unfortunately, macro expansion still sometimes generates Spans
165180
// that malformed in this way.
166181
return TAG_INVALID_SPAN.encode(self);
167182
}
168183

184+
let source_files = self.required_source_files.as_mut().expect("Already encoded SourceMap!");
185+
// Record the fact that we need to encode the data for this `SourceFile`
186+
source_files.insert(self.source_file_cache.1);
187+
169188
// There are two possible cases here:
170189
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
171190
// crate we are writing metadata for. When the metadata for *this* crate gets
@@ -176,7 +195,13 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
176195
// 2. This span comes from our own crate. No special hamdling is needed - we just
177196
// write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use
178197
// our own source map information.
179-
let (tag, lo, hi) = if self.source_file_cache.is_imported() {
198+
//
199+
// If we're a proc-macro crate, we always treat this as a local `Span`.
200+
// In `encode_source_map`, we serialize foreign `SourceFile`s into our metadata
201+
// if we're a proc-macro crate.
202+
// This allows us to avoid loading the dependencies of proc-macro crates: all of
203+
// the information we need to decode `Span`s is stored in the proc-macro crate.
204+
let (tag, lo, hi) = if self.source_file_cache.0.is_imported() && !self.is_proc_macro {
180205
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
181206
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
182207
// are relative to the source map information for the 'foreign' crate whose CrateNum
@@ -188,13 +213,13 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
188213
// Span that can be used without any additional trouble.
189214
let external_start_pos = {
190215
// Introduce a new scope so that we drop the 'lock()' temporary
191-
match &*self.source_file_cache.external_src.lock() {
216+
match &*self.source_file_cache.0.external_src.lock() {
192217
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
193218
src => panic!("Unexpected external source {:?}", src),
194219
}
195220
};
196-
let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos;
197-
let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos;
221+
let lo = (span.lo - self.source_file_cache.0.start_pos) + external_start_pos;
222+
let hi = (span.hi - self.source_file_cache.0.start_pos) + external_start_pos;
198223

199224
(TAG_VALID_SPAN_FOREIGN, lo, hi)
200225
} else {
@@ -212,7 +237,7 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
212237
if tag == TAG_VALID_SPAN_FOREIGN {
213238
// This needs to be two lines to avoid holding the `self.source_file_cache`
214239
// while calling `cnum.encode(self)`
215-
let cnum = self.source_file_cache.cnum;
240+
let cnum = self.source_file_cache.0.cnum;
216241
cnum.encode(self)?;
217242
}
218243
Ok(())
@@ -386,17 +411,24 @@ impl<'tcx> EncodeContext<'tcx> {
386411
let all_source_files = source_map.files();
387412

388413
let (working_dir, _cwd_remapped) = self.tcx.sess.working_dir.clone();
414+
// By replacing the `Option` with `None`, we ensure that we can't
415+
// accidentally serialize any more `Span`s after the source map encoding
416+
// is done.
417+
let required_source_files = self.required_source_files.take().unwrap();
389418

390419
let adapted = all_source_files
391420
.iter()
392-
.filter(|source_file| {
393-
// No need to re-export imported source_files, as any downstream
394-
// crate will import them from their original source.
395-
// FIXME(eddyb) the `Span` encoding should take that into account.
396-
!source_file.is_imported()
421+
.enumerate()
422+
.filter(|(idx, source_file)| {
423+
// Only serialize `SourceFile`s that were used
424+
// during the encoding of a `Span`
425+
required_source_files.contains(*idx) &&
426+
// Don't serialize imported `SourceFile`s, unless
427+
// we're in a proc-macro crate.
428+
(!source_file.is_imported() || self.is_proc_macro)
397429
})
398-
.map(|source_file| {
399-
match source_file.name {
430+
.map(|(_, source_file)| {
431+
let mut adapted = match source_file.name {
400432
// This path of this SourceFile has been modified by
401433
// path-remapping, so we use it verbatim (and avoid
402434
// cloning the whole map in the process).
@@ -419,15 +451,30 @@ impl<'tcx> EncodeContext<'tcx> {
419451

420452
// expanded code, not from a file
421453
_ => source_file.clone(),
454+
};
455+
456+
// We're serializing this `SourceFile` into our crate metadata,
457+
// so mark it as coming from this crate.
458+
// This also ensures that we don't try to deserialize the
459+
// `CrateNum` for a proc-macro dependency - since proc macro
460+
// dependencies aren't loaded when we deserialize a proc-macro,
461+
// trying to remap the `CrateNum` would fail.
462+
if self.is_proc_macro {
463+
Lrc::make_mut(&mut adapted).cnum = LOCAL_CRATE;
422464
}
465+
adapted
423466
})
424467
.collect::<Vec<_>>();
425468

426469
self.lazy(adapted.iter().map(|rc| &**rc))
427470
}
428471

472+
fn is_proc_macro(&self) -> bool {
473+
self.tcx.sess.crate_types().contains(&CrateType::ProcMacro)
474+
}
475+
429476
fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
430-
let is_proc_macro = self.tcx.sess.crate_types().contains(&CrateType::ProcMacro);
477+
let is_proc_macro = self.is_proc_macro();
431478

432479
let mut i = self.position();
433480

@@ -458,11 +505,6 @@ impl<'tcx> EncodeContext<'tcx> {
458505

459506
let foreign_modules = self.encode_foreign_modules();
460507

461-
// Encode source_map
462-
i = self.position();
463-
let source_map = self.encode_source_map();
464-
let source_map_bytes = self.position() - i;
465-
466508
// Encode DefPathTable
467509
i = self.position();
468510
let def_path_table = self.encode_def_path_table();
@@ -514,12 +556,19 @@ impl<'tcx> EncodeContext<'tcx> {
514556
let proc_macro_data_bytes = self.position() - i;
515557

516558
// Encode exported symbols info. This is prefetched in `encode_metadata` so we encode
517-
// this last to give the prefetching as much time as possible to complete.
559+
// this late to give the prefetching as much time as possible to complete.
518560
i = self.position();
519561
let exported_symbols = self.tcx.exported_symbols(LOCAL_CRATE);
520562
let exported_symbols = self.encode_exported_symbols(&exported_symbols);
521563
let exported_symbols_bytes = self.position() - i;
522564

565+
// Encode source_map. This needs to be done last,
566+
// since encoding `Span`s tells us which `SourceFiles` we actually
567+
// need to encode.
568+
i = self.position();
569+
let source_map = self.encode_source_map();
570+
let source_map_bytes = self.position() - i;
571+
523572
let attrs = tcx.hir().krate_attrs();
524573
let has_default_lib_allocator = attr::contains_name(&attrs, sym::default_lib_allocator);
525574

@@ -1854,17 +1903,22 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
18541903
// Will be filled with the root position after encoding everything.
18551904
encoder.emit_raw_bytes(&[0, 0, 0, 0]);
18561905

1906+
let source_map_files = tcx.sess.source_map().files();
1907+
18571908
let mut ecx = EncodeContext {
18581909
opaque: encoder,
18591910
tcx,
18601911
tables: Default::default(),
18611912
lazy_state: LazyState::NoNode,
18621913
type_shorthands: Default::default(),
18631914
predicate_shorthands: Default::default(),
1864-
source_file_cache: tcx.sess.source_map().files()[0].clone(),
1915+
source_file_cache: (source_map_files[0].clone(), 0),
18651916
interpret_allocs: Default::default(),
18661917
interpret_allocs_inverse: Default::default(),
1918+
required_source_files: Some(GrowableBitSet::with_capacity(source_map_files.len())),
1919+
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
18671920
};
1921+
drop(source_map_files);
18681922

18691923
// Encode the rustc version string in a predictable location.
18701924
rustc_version().encode(&mut ecx).unwrap();

src/librustc_metadata/rmeta/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ crate struct CrateRoot<'tcx> {
192192
diagnostic_items: Lazy<[(Symbol, DefIndex)]>,
193193
native_libraries: Lazy<[NativeLib]>,
194194
foreign_modules: Lazy<[ForeignModule]>,
195-
source_map: Lazy<[rustc_span::SourceFile]>,
196195
def_path_table: Lazy<rustc_hir::definitions::DefPathTable>,
197196
impls: Lazy<[TraitImpls]>,
198197
interpret_alloc_index: Lazy<[u32]>,
@@ -203,6 +202,7 @@ crate struct CrateRoot<'tcx> {
203202
proc_macro_data: Option<Lazy<[DefIndex]>>,
204203

205204
exported_symbols: Lazy!([(ExportedSymbol<'tcx>, SymbolExportLevel)]),
205+
source_map: Lazy<[rustc_span::SourceFile]>,
206206

207207
compiler_builtins: bool,
208208
needs_allocator: bool,

src/librustc_span/hygiene.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -395,10 +395,11 @@ pub fn debug_hygiene_data(verbose: bool) -> String {
395395
data.expn_data.iter().enumerate().for_each(|(id, expn_info)| {
396396
let expn_info = expn_info.as_ref().expect("no expansion data for an expansion ID");
397397
s.push_str(&format!(
398-
"\n{}: parent: {:?}, call_site_ctxt: {:?}, kind: {:?}",
398+
"\n{}: parent: {:?}, call_site_ctxt: {:?}, def_site_ctxt: {:?}, kind: {:?}",
399399
id,
400400
expn_info.parent,
401401
expn_info.call_site.ctxt(),
402+
expn_info.def_site.ctxt(),
402403
expn_info.kind,
403404
));
404405
});

src/librustc_span/source_map.rs

+41-2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,41 @@ pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
4040
}
4141
}
4242

43+
pub mod monotonic {
44+
use std::ops::{Deref, DerefMut};
45+
46+
/// A `MonotonicVec` is a `Vec` which can only be grown.
47+
/// Once inserted, an element can never be removed or swapped,
48+
/// guaranteeing that any indices into a `MonotonicVec` are stable
49+
// This is declared in its own module to ensure that the private
50+
// field is inaccessible
51+
pub struct MonotonicVec<T>(Vec<T>);
52+
impl<T> MonotonicVec<T> {
53+
pub fn new(val: Vec<T>) -> MonotonicVec<T> {
54+
MonotonicVec(val)
55+
}
56+
57+
pub fn push(&mut self, val: T) {
58+
self.0.push(val);
59+
}
60+
}
61+
62+
impl<T> Default for MonotonicVec<T> {
63+
fn default() -> Self {
64+
MonotonicVec::new(vec![])
65+
}
66+
}
67+
68+
impl<T> Deref for MonotonicVec<T> {
69+
type Target = Vec<T>;
70+
fn deref(&self) -> &Self::Target {
71+
&self.0
72+
}
73+
}
74+
75+
impl<T> !DerefMut for MonotonicVec<T> {}
76+
}
77+
4378
#[derive(Clone, RustcEncodable, RustcDecodable, Debug, Copy, HashStable_Generic)]
4479
pub struct Spanned<T> {
4580
pub node: T,
@@ -125,7 +160,7 @@ impl StableSourceFileId {
125160

126161
#[derive(Default)]
127162
pub(super) struct SourceMapFiles {
128-
source_files: Vec<Lrc<SourceFile>>,
163+
source_files: monotonic::MonotonicVec<Lrc<SourceFile>>,
129164
stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>,
130165
}
131166

@@ -199,7 +234,9 @@ impl SourceMap {
199234
Ok(bytes)
200235
}
201236

202-
pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
237+
// By returning a `MonotonicVec`, we ensure that consumers cannot invalidate
238+
// any existing indices pointing into `files`.
239+
pub fn files(&self) -> MappedLockGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> {
203240
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
204241
}
205242

@@ -912,6 +949,8 @@ impl SourceMap {
912949
}
913950

914951
// Returns the index of the `SourceFile` (in `self.files`) that contains `pos`.
952+
// This index is guaranteed to be valid for the lifetime of this `SourceMap`,
953+
// since `source_files` is a `MonotonicVec`
915954
pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize {
916955
self.files
917956
.borrow()

src/test/ui/hygiene/unpretty-debug.stdout

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ fn y /* 0#0 */() { }
1616

1717
/*
1818
Expansions:
19-
0: parent: ExpnId(0), call_site_ctxt: #0, kind: Root
20-
1: parent: ExpnId(0), call_site_ctxt: #0, kind: Macro(Bang, "foo")
19+
0: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Root
20+
1: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Macro(Bang, "foo")
2121

2222
SyntaxContexts:
2323
#0: parent: #0, outer_mark: (ExpnId(0), Opaque)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// force-host
2+
3+
#[macro_export]
4+
macro_rules! make_it {
5+
($name:ident) => {
6+
#[proc_macro]
7+
pub fn $name(input: TokenStream) -> TokenStream {
8+
println!("Def site: {:?}", Span::def_site());
9+
input
10+
}
11+
};
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// force-host
2+
// no-prefer-dynamic
3+
// edition:2018
4+
5+
#![feature(proc_macro_def_site)]
6+
#![crate_type = "proc-macro"]
7+
8+
extern crate proc_macro;
9+
extern crate make_macro;
10+
use proc_macro::{TokenStream, Span};
11+
12+
make_macro::make_it!(print_def_site);

0 commit comments

Comments
 (0)