Skip to content

Commit ddf099f

Browse files
authored
Rollup merge of #139397 - Zalathar:virtual, r=jieyouxu
coverage: Build the CGU's global file table as late as possible Embedding coverage metadata in the output binary is a delicate dance, because per-function records need to embed references to the per-CGU filename table, but we only want to include files in that table if they are successfully used by at least one function. The way that we build the file tables has changed a few times over the last few years. This particular change is motivated by experimental work on properly supporting macro-expansion regions, which adds some additional constraints that our previous implementation wasn't equipped to deal with. LLVM is very strict about not allowing unused entries in local file tables. Currently that's not much of an issue, because we assume one source file per function, but to support expansion regions we need the flexibility to avoid committing to the use of a file until we're completely sure that we are able and willing to produce at least one coverage mapping region for it. In particular, when preparing a function's covfun record, we need the flexibility to decide at a late stage that a particular file isn't needed/usable after all. (It's OK for the *global* file table to contain unused entries, but we would still prefer to avoid that if possible, and this implementation also achieves that.)
2 parents f4c429f + 4322b6e commit ddf099f

File tree

3 files changed

+110
-88
lines changed

3 files changed

+110
-88
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+83-69
Original file line numberDiff line numberDiff line change
@@ -53,31 +53,20 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
5353
None => return,
5454
};
5555

56-
// The order of entries in this global file table needs to be deterministic,
57-
// and ideally should also be independent of the details of stable-hashing,
58-
// because coverage tests snapshots (`.cov-map`) can observe the order and
59-
// would need to be re-blessed if it changes. As long as those requirements
60-
// are satisfied, the order can be arbitrary.
61-
let mut global_file_table = GlobalFileTable::new();
62-
6356
let mut covfun_records = instances_used
6457
.iter()
6558
.copied()
6659
// Sort by symbol name, so that the global file table is built in an
6760
// order that doesn't depend on the stable-hash-based order in which
6861
// instances were visited during codegen.
6962
.sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
70-
.filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
63+
.filter_map(|instance| prepare_covfun_record(tcx, instance, true))
7164
.collect::<Vec<_>>();
7265

7366
// In a single designated CGU, also prepare covfun records for functions
7467
// in this crate that were instrumented for coverage, but are unused.
7568
if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
76-
unused::prepare_covfun_records_for_unused_functions(
77-
cx,
78-
&mut global_file_table,
79-
&mut covfun_records,
80-
);
69+
unused::prepare_covfun_records_for_unused_functions(cx, &mut covfun_records);
8170
}
8271

8372
// If there are no covfun records for this CGU, don't generate a covmap record.
@@ -89,68 +78,88 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
8978
return;
9079
}
9180

92-
// Encode all filenames referenced by coverage mappings in this CGU.
93-
let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
94-
// The `llvm-cov` tool uses this hash to associate each covfun record with
95-
// its corresponding filenames table, since the final binary will typically
96-
// contain multiple covmap records from different compilation units.
97-
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
81+
// Prepare the global file table for this CGU, containing all paths needed
82+
// by one or more covfun records.
83+
let global_file_table =
84+
GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files()));
9885

9986
for covfun in &covfun_records {
100-
covfun::generate_covfun_record(cx, filenames_hash, covfun)
87+
covfun::generate_covfun_record(cx, &global_file_table, covfun)
10188
}
10289

10390
// Generate the coverage map header, which contains the filenames used by
10491
// this CGU's coverage mappings, and store it in a well-known global.
10592
// (This is skipped if we returned early due to having no covfun records.)
106-
generate_covmap_record(cx, covmap_version, &filenames_buffer);
93+
generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer);
10794
}
10895

109-
/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
96+
/// Maps "global" (per-CGU) file ID numbers to their underlying source file paths.
97+
#[derive(Debug)]
11098
struct GlobalFileTable {
11199
/// This "raw" table doesn't include the working dir, so a file's
112100
/// global ID is its index in this set **plus one**.
113-
raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
101+
raw_file_table: FxIndexMap<StableSourceFileId, String>,
102+
103+
/// The file table in encoded form (possibly compressed), which can be
104+
/// included directly in this CGU's `__llvm_covmap` record.
105+
filenames_buffer: Vec<u8>,
106+
107+
/// Truncated hash of the bytes in `filenames_buffer`.
108+
///
109+
/// The `llvm-cov` tool uses this hash to associate each covfun record with
110+
/// its corresponding filenames table, since the final binary will typically
111+
/// contain multiple covmap records from different compilation units.
112+
filenames_hash: u64,
114113
}
115114

116115
impl GlobalFileTable {
117-
fn new() -> Self {
118-
Self { raw_file_table: FxIndexMap::default() }
119-
}
116+
/// Builds a "global file table" for this CGU, mapping numeric IDs to
117+
/// path strings.
118+
fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator<Item = &'a SourceFile>) -> Self {
119+
let mut raw_file_table = FxIndexMap::default();
120+
121+
for file in all_files {
122+
raw_file_table.entry(file.stable_id).or_insert_with(|| {
123+
file.name
124+
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
125+
.to_string_lossy()
126+
.into_owned()
127+
});
128+
}
129+
130+
// FIXME(Zalathar): Consider sorting the file table here, but maybe
131+
// only after adding filename support to coverage-dump, so that the
132+
// table order isn't directly visible in `.coverage-map` snapshots.
133+
134+
let mut table = Vec::with_capacity(raw_file_table.len() + 1);
135+
136+
// Since version 6 of the LLVM coverage mapping format, the first entry
137+
// in the global file table is treated as a base directory, used to
138+
// resolve any other entries that are stored as relative paths.
139+
let base_dir = tcx
140+
.sess
141+
.opts
142+
.working_dir
143+
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
144+
.to_string_lossy();
145+
table.push(base_dir.as_ref());
120146

121-
fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
122-
// Ensure the given file has a table entry, and get its index.
123-
let entry = self.raw_file_table.entry(file.stable_id);
124-
let raw_id = entry.index();
125-
entry.or_insert_with(|| Arc::clone(file));
126-
127-
// The raw file table doesn't include an entry for the working dir
128-
// (which has ID 0), so add 1 to get the correct ID.
129-
GlobalFileId::from_usize(raw_id + 1)
130-
}
147+
// Add the regular entries after the base directory.
148+
table.extend(raw_file_table.values().map(|name| name.as_str()));
131149

132-
fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
133-
let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
134-
135-
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
136-
// requires setting the first filename to the compilation directory.
137-
// Since rustc generates coverage maps with relative paths, the
138-
// compilation directory can be combined with the relative paths
139-
// to get absolute paths, if needed.
140-
table.push(
141-
tcx.sess
142-
.opts
143-
.working_dir
144-
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
145-
.to_string_lossy(),
146-
);
150+
// Encode the file table into a buffer, and get the hash of its encoded
151+
// bytes, so that we can embed that hash in `__llvm_covfun` records.
152+
let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table);
153+
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
147154

148-
// Add the regular entries after the base directory.
149-
table.extend(self.raw_file_table.values().map(|file| {
150-
file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
151-
}));
155+
Self { raw_file_table, filenames_buffer, filenames_hash }
156+
}
152157

153-
llvm_cov::write_filenames_to_buffer(&table)
158+
fn get_existing_id(&self, file: &SourceFile) -> Option<GlobalFileId> {
159+
let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?;
160+
// The raw file table doesn't include an entry for the base dir
161+
// (which has ID 0), so add 1 to get the correct ID.
162+
Some(GlobalFileId::from_usize(raw_id + 1))
154163
}
155164
}
156165

@@ -166,26 +175,31 @@ rustc_index::newtype_index! {
166175
struct LocalFileId {}
167176
}
168177

169-
/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
170-
/// file IDs.
178+
/// Holds a mapping from "local" (per-function) file IDs to their corresponding
179+
/// source files.
171180
#[derive(Debug, Default)]
172181
struct VirtualFileMapping {
173-
local_to_global: IndexVec<LocalFileId, GlobalFileId>,
174-
global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
182+
local_file_table: IndexVec<LocalFileId, Arc<SourceFile>>,
175183
}
176184

177185
impl VirtualFileMapping {
178-
fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId {
179-
*self
180-
.global_to_local
181-
.entry(global_file_id)
182-
.or_insert_with(|| self.local_to_global.push(global_file_id))
186+
fn push_file(&mut self, source_file: &Arc<SourceFile>) -> LocalFileId {
187+
self.local_file_table.push(Arc::clone(source_file))
183188
}
184189

185-
fn to_vec(&self) -> Vec<u32> {
186-
// This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
187-
// but it isn't hot or expensive enough to justify the extra unsafety.
188-
self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
190+
/// Resolves all of the filenames in this local file mapping to a list of
191+
/// global file IDs in its CGU, for inclusion in this function's
192+
/// `__llvm_covfun` record.
193+
///
194+
/// The global file IDs are returned as `u32` to make FFI easier.
195+
fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option<Vec<u32>> {
196+
self.local_file_table
197+
.iter()
198+
.map(|file| try {
199+
let id = global_file_table.get_existing_id(file)?;
200+
GlobalFileId::as_u32(id)
201+
})
202+
.collect::<Option<Vec<_>>>()
189203
}
190204
}
191205

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs

+26-16
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//! [^win]: On Windows the section name is `.lcovfun`.
66
77
use std::ffi::CString;
8+
use std::sync::Arc;
89

910
use rustc_abi::Align;
1011
use rustc_codegen_ssa::traits::{
@@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{
1516
MappingKind, Op,
1617
};
1718
use rustc_middle::ty::{Instance, TyCtxt};
18-
use rustc_span::Span;
19+
use rustc_span::{SourceFile, Span};
1920
use rustc_target::spec::HasTargetSpec;
2021
use tracing::debug;
2122

@@ -37,9 +38,16 @@ pub(crate) struct CovfunRecord<'tcx> {
3738
regions: ffi::Regions,
3839
}
3940

41+
impl<'tcx> CovfunRecord<'tcx> {
42+
/// Iterator that yields all source files referred to by this function's
43+
/// coverage mappings. Used to build the global file table for the CGU.
44+
pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> {
45+
self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref)
46+
}
47+
}
48+
4049
pub(crate) fn prepare_covfun_record<'tcx>(
4150
tcx: TyCtxt<'tcx>,
42-
global_file_table: &mut GlobalFileTable,
4351
instance: Instance<'tcx>,
4452
is_used: bool,
4553
) -> Option<CovfunRecord<'tcx>> {
@@ -57,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
5765
regions: ffi::Regions::default(),
5866
};
5967

60-
fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
68+
fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun);
6169

6270
if covfun.regions.has_no_regions() {
6371
debug!(?covfun, "function has no mappings to embed; skipping");
@@ -92,7 +100,6 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression
92100
/// Populates the mapping region tables in the current function's covfun record.
93101
fn fill_region_tables<'tcx>(
94102
tcx: TyCtxt<'tcx>,
95-
global_file_table: &mut GlobalFileTable,
96103
fn_cov_info: &'tcx FunctionCoverageInfo,
97104
ids_info: &'tcx CoverageIdsInfo,
98105
covfun: &mut CovfunRecord<'tcx>,
@@ -106,11 +113,7 @@ fn fill_region_tables<'tcx>(
106113
};
107114
let source_file = source_map.lookup_source_file(first_span.lo());
108115

109-
// Look up the global file ID for that file.
110-
let global_file_id = global_file_table.global_file_id_for_file(&source_file);
111-
112-
// Associate that global file ID with a local file ID for this function.
113-
let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
116+
let local_file_id = covfun.virtual_file_mapping.push_file(&source_file);
114117

115118
// In rare cases, _all_ of a function's spans are discarded, and coverage
116119
// codegen needs to handle that gracefully to avoid #133606.
@@ -179,7 +182,7 @@ fn fill_region_tables<'tcx>(
179182
/// as a global variable in the `__llvm_covfun` section.
180183
pub(crate) fn generate_covfun_record<'tcx>(
181184
cx: &CodegenCx<'_, 'tcx>,
182-
filenames_hash: u64,
185+
global_file_table: &GlobalFileTable,
183186
covfun: &CovfunRecord<'tcx>,
184187
) {
185188
let &CovfunRecord {
@@ -191,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>(
191194
ref regions,
192195
} = covfun;
193196

197+
let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else {
198+
debug_assert!(
199+
false,
200+
"all local files should be present in the global file table: \
201+
global_file_table = {global_file_table:?}, \
202+
virtual_file_mapping = {virtual_file_mapping:?}"
203+
);
204+
return;
205+
};
206+
194207
// Encode the function's coverage mappings into a buffer.
195-
let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
196-
&virtual_file_mapping.to_vec(),
197-
expressions,
198-
regions,
199-
);
208+
let coverage_mapping_buffer =
209+
llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions);
200210

201211
// A covfun record consists of four target-endian integers, followed by the
202212
// encoded mapping data in bytes. Note that the length field is 32 bits.
@@ -209,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>(
209219
cx.const_u64(func_name_hash),
210220
cx.const_u32(coverage_mapping_buffer.len() as u32),
211221
cx.const_u64(source_hash),
212-
cx.const_u64(filenames_hash),
222+
cx.const_u64(global_file_table.filenames_hash),
213223
cx.const_bytes(&coverage_mapping_buffer),
214224
],
215225
// This struct needs to be packed, so that the 32-bit length field

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use rustc_middle::ty::{self, TyCtxt};
77
use rustc_span::def_id::DefIdSet;
88

99
use crate::common::CodegenCx;
10-
use crate::coverageinfo::mapgen::GlobalFileTable;
1110
use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
1211
use crate::llvm;
1312

@@ -21,7 +20,6 @@ use crate::llvm;
2120
/// its embedded coverage data.
2221
pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
2322
cx: &CodegenCx<'_, 'tcx>,
24-
global_file_table: &mut GlobalFileTable,
2523
covfun_records: &mut Vec<CovfunRecord<'tcx>>,
2624
) {
2725
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
@@ -33,7 +31,7 @@ pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
3331
// Try to create a covfun record for each unused function.
3432
let mut name_globals = Vec::with_capacity(unused_instances.len());
3533
covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
36-
let record = prepare_covfun_record(cx.tcx, global_file_table, unused.instance, false)?;
34+
let record = prepare_covfun_record(cx.tcx, unused.instance, false)?;
3735
// If successful, also store its symbol name in a global constant.
3836
name_globals.push(cx.const_str(unused.symbol_name.name).0);
3937
record

0 commit comments

Comments
 (0)