Skip to content

Commit e1be9ca

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 145f9cf commit e1be9ca

File tree

8 files changed

+97
-27
lines changed

8 files changed

+97
-27
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ fn link_rlib<'a>(
309309
let (metadata, metadata_position) = create_wrapper_file(
310310
sess,
311311
".rmeta".to_string(),
312-
codegen_results.metadata.raw_data(),
312+
codegen_results.metadata.maybe_reference(),
313313
);
314314
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
315315
match metadata_position {

compiler/rustc_codegen_ssa/src/back/metadata.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,8 @@ pub fn create_compressed_metadata_file(
548548
symbol_name: &str,
549549
) -> Vec<u8> {
550550
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
551-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
552-
packed_metadata.extend(metadata.raw_data());
551+
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
552+
packed_metadata.extend(metadata.maybe_reference());
553553

554554
let Some(mut file) = create_object_file(sess) else {
555555
if sess.target.is_like_wasm {

compiler/rustc_interface/src/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,7 @@ fn test_unstable_options_tracking_hash() {
852852
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
853853
tracked!(small_data_threshold, Some(16));
854854
tracked!(split_lto_unit, Some(true));
855+
tracked!(split_metadata, true);
855856
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
856857
tracked!(stack_protector, StackProtector::All);
857858
tracked!(teach, true);

compiler/rustc_metadata/src/fs.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");
5455

5556
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5657
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6061
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6162
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6263
});
64+
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
65+
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
66+
});
6367
}
6468
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
69+
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
6670
}
6771
};
6872

@@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100104

101105
// Load metadata back to memory: codegen may need to include it in object files.
102106
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
107+
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
108+
.unwrap_or_else(|err| {
109+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
110+
});
106111

107112
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108113

compiler/rustc_metadata/src/locator.rs

+5
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,11 @@ impl<'a> CrateLocator<'a> {
577577
) {
578578
Ok(blob) => {
579579
if let Some(h) = self.crate_matches(&blob, &lib) {
580+
if blob.get_header().is_reference {
581+
if slot.is_none() {
582+
todo!("return error");
583+
}
584+
}
580585
(h, blob)
581586
} else {
582587
info!("metadata mismatch");

compiler/rustc_metadata/src/rmeta/encoder.rs

+72-19
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
701701
triple: tcx.sess.opts.target_triple.clone(),
702702
hash: tcx.crate_hash(LOCAL_CRATE),
703703
is_proc_macro_crate: proc_macro_data.is_some(),
704+
is_reference: false,
704705
},
705706
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
706707
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2198,42 +2199,61 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
21982199
// generated regardless of trailing bytes that end up in it.
21992200

22002201
pub struct EncodedMetadata {
2201-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2202-
mmap: Option<Mmap>,
2202+
// The declaration order matters because `full_mmap` should be dropped
2203+
// before `_temp_dir`.
2204+
full_mmap: Option<Mmap>,
2205+
reference: Option<Vec<u8>>,
22032206
// We need to carry MaybeTempDir to avoid deleting the temporary
22042207
// directory while accessing the Mmap.
22052208
_temp_dir: Option<MaybeTempDir>,
22062209
}
22072210

22082211
impl EncodedMetadata {
22092212
#[inline]
2210-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2213+
pub fn from_path(
2214+
path: PathBuf,
2215+
reference_path: PathBuf,
2216+
temp_dir: Option<MaybeTempDir>,
2217+
) -> std::io::Result<Self> {
22112218
let file = std::fs::File::open(&path)?;
22122219
let file_metadata = file.metadata()?;
22132220
if file_metadata.len() == 0 {
2214-
return Ok(Self { mmap: None, _temp_dir: None });
2221+
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
22152222
}
2216-
let mmap = unsafe { Some(Mmap::map(file)?) };
2217-
Ok(Self { mmap, _temp_dir: temp_dir })
2223+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2224+
2225+
let reference = std::fs::read(reference_path)?;
2226+
let reference = if reference.is_empty() { None } else { Some(reference) };
2227+
2228+
Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
2229+
}
2230+
2231+
#[inline]
2232+
pub fn full(&self) -> &[u8] {
2233+
&self.full_mmap.as_deref().unwrap_or_default()
22182234
}
22192235

22202236
#[inline]
2221-
pub fn raw_data(&self) -> &[u8] {
2222-
self.mmap.as_deref().unwrap_or_default()
2237+
pub fn maybe_reference(&self) -> &[u8] {
2238+
self.reference.as_deref().unwrap_or(self.full())
22232239
}
22242240
}
22252241

22262242
impl<S: Encoder> Encodable<S> for EncodedMetadata {
22272243
fn encode(&self, s: &mut S) {
2228-
let slice = self.raw_data();
2244+
self.reference.encode(s);
2245+
2246+
let slice = self.full();
22292247
slice.encode(s)
22302248
}
22312249
}
22322250

22332251
impl<D: Decoder> Decodable<D> for EncodedMetadata {
22342252
fn decode(d: &mut D) -> Self {
2253+
let reference = <Option<Vec<u8>>>::decode(d);
2254+
22352255
let len = d.read_usize();
2236-
let mmap = if len > 0 {
2256+
let full_mmap = if len > 0 {
22372257
let mut mmap = MmapMut::map_anon(len).unwrap();
22382258
for _ in 0..len {
22392259
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
@@ -2244,11 +2264,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
22442264
None
22452265
};
22462266

2247-
Self { mmap, _temp_dir: None }
2267+
Self { full_mmap, reference, _temp_dir: None }
22482268
}
22492269
}
22502270

2251-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2271+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: &Path) {
22522272
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
22532273

22542274
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2262,6 +2282,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22622282
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
22632283
}
22642284

2285+
with_encode_metadata_header(tcx, path, |ecx| {
2286+
// Encode all the entries and extra information in the crate,
2287+
// culminating in the `CrateRoot` which points to all of it.
2288+
let root = ecx.encode_crate_root();
2289+
2290+
// Flush buffer to ensure backing file has the correct size.
2291+
ecx.opaque.flush();
2292+
// Record metadata size for self-profiling
2293+
tcx.prof.artifact_size(
2294+
"crate_metadata",
2295+
"crate_metadata",
2296+
ecx.opaque.file().metadata().unwrap().len(),
2297+
);
2298+
2299+
root.position.get()
2300+
});
2301+
2302+
if tcx.sess.opts.unstable_opts.split_metadata
2303+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
2304+
{
2305+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2306+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2307+
name: tcx.crate_name(LOCAL_CRATE),
2308+
triple: tcx.sess.opts.target_triple.clone(),
2309+
hash: tcx.crate_hash(LOCAL_CRATE),
2310+
is_proc_macro_crate: false,
2311+
is_reference: true,
2312+
});
2313+
header.position.get()
2314+
});
2315+
}
2316+
}
2317+
2318+
fn with_encode_metadata_header(
2319+
tcx: TyCtxt<'_>,
2320+
path: &Path,
2321+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2322+
) {
22652323
let mut encoder = opaque::FileEncoder::new(path)
22662324
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22672325
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2296,9 +2354,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22962354
// Encode the rustc version string in a predictable location.
22972355
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
22982356

2299-
// Encode all the entries and extra information in the crate,
2300-
// culminating in the `CrateRoot` which points to all of it.
2301-
let root = ecx.encode_crate_root();
2357+
let root_position = f(&mut ecx);
23022358

23032359
// Make sure we report any errors from writing to the file.
23042360
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2308,12 +2364,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23082364
}
23092365

23102366
let file = ecx.opaque.file();
2311-
if let Err(err) = encode_root_position(file, root.position.get()) {
2367+
if let Err(err) = encode_root_position(file, root_position) {
23122368
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
23132369
}
2314-
2315-
// Record metadata size for self-profiling
2316-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
23172370
}
23182371

23192372
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

compiler/rustc_metadata/src/rmeta/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ pub(crate) struct CrateHeader {
221221
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
222222
/// time ProcMacroData changes.
223223
pub(crate) is_proc_macro_crate: bool,
224+
/// Whether this header is a reference to a separate rmeta file.
225+
///
226+
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
227+
pub(crate) is_reference: bool,
224228
}
225229

226230
/// Serialized `.rmeta` data for a crate.

compiler/rustc_session/src/options.rs

+2
Original file line numberDiff line numberDiff line change
@@ -2075,6 +2075,8 @@ written to standard error output)"),
20752075
by the linker"),
20762076
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
20772077
"enable LTO unit splitting (default: no)"),
2078+
split_metadata: bool = (false, parse_bool, [TRACKED],
2079+
"split metadata out of libraries into .rmeta files"),
20782080
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
20792081
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
20802082
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]

0 commit comments

Comments
 (0)