Skip to content

Commit 1a99ca8

Browse files
committed
The embedded bitcode should always be prepared for LTO/ThinLTO
1 parent 1805b33 commit 1a99ca8

File tree

13 files changed

+265
-69
lines changed

13 files changed

+265
-69
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ fn produce_final_output_artifacts(
210210
// to get rid of it.
211211
for output_type in crate_output.outputs.keys() {
212212
match *output_type {
213-
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
213+
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
214214
// Cranelift doesn't have bitcode
215215
// user_wants_bitcode = true;
216216
// // Copy to .bc, but always keep the .0.bc. There is a later

compiler/rustc_codegen_llvm/src/back/lto.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::collections::BTreeMap;
22
use std::ffi::{CStr, CString};
33
use std::fs::File;
44
use std::path::Path;
5+
use std::ptr::NonNull;
56
use std::sync::Arc;
67
use std::{io, iter, slice};
78

@@ -655,14 +656,14 @@ pub(crate) fn run_pass_manager(
655656
}
656657

657658
unsafe {
658-
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
659+
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
659660
}
660661

661662
if cfg!(llvm_enzyme) && enable_ad {
662663
let opt_stage = llvm::OptStage::FatLTO;
663664
let stage = write::AutodiffStage::PostAD;
664665
unsafe {
665-
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
666+
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
666667
}
667668

668669
// This is the final IR, so people should be able to inspect the optimized autodiff output.
@@ -729,6 +730,11 @@ impl ThinBuffer {
729730
ThinBuffer(buffer)
730731
}
731732
}
733+
734+
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
735+
let mut ptr = NonNull::new(ptr).unwrap();
736+
ThinBuffer(unsafe { ptr.as_mut() })
737+
}
732738
}
733739

734740
impl ThinBufferMethods for ThinBuffer {

compiler/rustc_codegen_llvm/src/back/write.rs

+91-42
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::ffi::{CStr, CString};
22
use std::io::{self, Write};
33
use std::path::{Path, PathBuf};
4+
use std::ptr::null_mut;
45
use std::sync::Arc;
56
use std::{fs, slice, str};
67

@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
1516
TargetMachineFactoryFn,
1617
};
1718
use rustc_codegen_ssa::traits::*;
18-
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
19+
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
1920
use rustc_data_structures::profiling::SelfProfilerRef;
2021
use rustc_data_structures::small_c_str::SmallCStr;
2122
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
551552
cgcx: &CodegenContext<LlvmCodegenBackend>,
552553
dcx: DiagCtxtHandle<'_>,
553554
module: &ModuleCodegen<ModuleLlvm>,
555+
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
554556
config: &ModuleConfig,
555557
opt_level: config::OptLevel,
556558
opt_stage: llvm::OptStage,
@@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
584586
vectorize_loop = config.vectorize_loop;
585587
}
586588
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
587-
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
589+
if thin_lto_buffer.is_some() {
590+
assert!(
591+
matches!(
592+
opt_stage,
593+
llvm::OptStage::PreLinkNoLTO
594+
| llvm::OptStage::PreLinkFatLTO
595+
| llvm::OptStage::PreLinkThinLTO
596+
),
597+
"the bitcode for LTO can only be obtained at the pre-link stage"
598+
);
599+
}
588600
let pgo_gen_path = get_pgo_gen_path(config);
589601
let pgo_use_path = get_pgo_use_path(config);
590602
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
644656
config.no_prepopulate_passes,
645657
config.verify_llvm_ir,
646658
config.lint_llvm_ir,
647-
using_thin_buffers,
659+
thin_lto_buffer,
660+
config.emit_thin_lto,
661+
config.emit_thin_lto_summary,
648662
config.merge_functions,
649663
unroll_loops,
650664
vectorize_slp,
@@ -705,9 +719,56 @@ pub(crate) unsafe fn optimize(
705719
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
706720
let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
707721
let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
708-
return unsafe {
709-
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
722+
// The embedded bitcode is used to run LTO/ThinLTO.
723+
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
724+
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
725+
// this point.
726+
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
727+
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
728+
|| config.emit_thin_lto_summary
729+
{
730+
Some(null_mut())
731+
} else {
732+
None
710733
};
734+
unsafe {
735+
llvm_optimize(
736+
cgcx,
737+
dcx,
738+
module,
739+
thin_lto_buffer.as_mut(),
740+
config,
741+
opt_level,
742+
opt_stage,
743+
autodiff_stage,
744+
)
745+
}?;
746+
if let Some(thin_lto_buffer) = thin_lto_buffer {
747+
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
748+
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
749+
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
750+
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
751+
}
752+
let bc_summary_out =
753+
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
754+
if config.emit_thin_lto_summary
755+
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
756+
{
757+
let summary_data = thin_lto_buffer.thin_link_data();
758+
cgcx.prof.artifact_size(
759+
"llvm_bitcode_summary",
760+
thin_link_bitcode_filename.to_string_lossy(),
761+
summary_data.len() as u64,
762+
);
763+
let _timer = cgcx.prof.generic_activity_with_arg(
764+
"LLVM_module_codegen_emit_bitcode_summary",
765+
&*module.name,
766+
);
767+
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
768+
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
769+
}
770+
}
771+
}
711772
}
712773
Ok(())
713774
}
@@ -760,59 +821,47 @@ pub(crate) unsafe fn codegen(
760821
// otherwise requested.
761822

762823
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
763-
let bc_summary_out =
764-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
765824
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
766825

767826
if config.bitcode_needed() {
768-
let _timer = cgcx
769-
.prof
770-
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
771-
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
772-
let data = thin.data();
773-
774-
if let Some(bitcode_filename) = bc_out.file_name() {
775-
cgcx.prof.artifact_size(
776-
"llvm_bitcode",
777-
bitcode_filename.to_string_lossy(),
778-
data.len() as u64,
779-
);
780-
}
781-
782-
if config.emit_thin_lto_summary
783-
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
784-
{
785-
let summary_data = thin.thin_link_data();
786-
cgcx.prof.artifact_size(
787-
"llvm_bitcode_summary",
788-
thin_link_bitcode_filename.to_string_lossy(),
789-
summary_data.len() as u64,
790-
);
791-
792-
let _timer = cgcx.prof.generic_activity_with_arg(
793-
"LLVM_module_codegen_emit_bitcode_summary",
794-
&*module.name,
795-
);
796-
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
797-
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
798-
}
799-
}
800-
801827
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
828+
let thin = {
829+
let _timer = cgcx.prof.generic_activity_with_arg(
830+
"LLVM_module_codegen_make_bitcode",
831+
&*module.name,
832+
);
833+
ThinBuffer::new(llmod, config.emit_thin_lto, false)
834+
};
835+
let data = thin.data();
802836
let _timer = cgcx
803837
.prof
804838
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
839+
if let Some(bitcode_filename) = bc_out.file_name() {
840+
cgcx.prof.artifact_size(
841+
"llvm_bitcode",
842+
bitcode_filename.to_string_lossy(),
843+
data.len() as u64,
844+
);
845+
}
805846
if let Err(err) = fs::write(&bc_out, data) {
806847
dcx.emit_err(WriteBytecode { path: &bc_out, err });
807848
}
808849
}
809850

810-
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
851+
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
852+
&& module.kind == ModuleKind::Regular
853+
{
811854
let _timer = cgcx
812855
.prof
813856
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
857+
let thin_bc_out =
858+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
859+
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
860+
let data = fs::read(&thin_bc_out).unwrap();
861+
debug!("removing embed bitcode file {:?}", thin_bc_out);
862+
ensure_removed(dcx, &thin_bc_out);
814863
unsafe {
815-
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
864+
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
816865
}
817866
}
818867
}

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -2421,7 +2421,9 @@ unsafe extern "C" {
24212421
NoPrepopulatePasses: bool,
24222422
VerifyIR: bool,
24232423
LintIR: bool,
2424-
UseThinLTOBuffers: bool,
2424+
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
2425+
EmitThinLTO: bool,
2426+
EmitThinLTOSummary: bool,
24252427
MergeFunctions: bool,
24262428
UnrollLoops: bool,
24272429
SLPVectorize: bool,

compiler/rustc_codegen_ssa/src/back/write.rs

+3
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,9 @@ fn produce_final_output_artifacts(
626626
// them for making an rlib.
627627
copy_if_one_unit(OutputType::Bitcode, true);
628628
}
629+
OutputType::ThinBitcode => {
630+
copy_if_one_unit(OutputType::ThinBitcode, true);
631+
}
629632
OutputType::ThinLinkBitcode => {
630633
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
631634
}

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

+50-16
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "llvm/Analysis/Lint.h"
88
#include "llvm/Analysis/TargetLibraryInfo.h"
99
#include "llvm/Bitcode/BitcodeWriter.h"
10+
#include "llvm/Bitcode/BitcodeWriterPass.h"
1011
#include "llvm/CodeGen/CommandFlags.h"
1112
#include "llvm/IR/AssemblyAnnotationWriter.h"
1213
#include "llvm/IR/AutoUpgrade.h"
@@ -37,6 +38,7 @@
3738
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
3839
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
3940
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
41+
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
4042
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
4143
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
4244
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
@@ -195,6 +197,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
195197
GEN_SUBTARGETS
196198
#undef SUBTARGET
197199

200+
// This struct and various functions are sort of a hack right now, but the
201+
// problem is that we've got in-memory LLVM modules after we generate and
202+
// optimize all codegen-units for one compilation in rustc. To be compatible
203+
// with the LTO support above we need to serialize the modules plus their
204+
// ThinLTO summary into memory.
205+
//
206+
// This structure is basically an owned version of a serialize module, with
207+
// a ThinLTO summary attached.
208+
struct LLVMRustThinLTOBuffer {
209+
std::string data;
210+
std::string thin_link_data;
211+
};
212+
198213
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
199214
const char *Feature) {
200215
TargetMachine *Target = unwrap(TM);
@@ -704,7 +719,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
704719
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
705720
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
706721
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
707-
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
722+
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
723+
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
708724
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
709725
bool EmitLifetimeMarkers, bool RunEnzyme,
710726
LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
@@ -952,7 +968,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
952968
}
953969

954970
ModulePassManager MPM;
955-
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
971+
bool NeedThinLTOBufferPasses = EmitThinLTO;
972+
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
973+
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
974+
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
956975
if (!NoPrepopulatePasses) {
957976
// The pre-link pipelines don't support O0 and require using
958977
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@@ -976,7 +995,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
976995

977996
switch (OptStage) {
978997
case LLVMRustOptStage::PreLinkNoLTO:
979-
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
998+
if (ThinLTOBufferRef) {
999+
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
1000+
// bitcode for embedding is obtained after performing
1001+
// `ThinLTOPreLinkDefaultPipeline`.
1002+
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
1003+
if (EmitThinLTO) {
1004+
MPM.addPass(ThinLTOBitcodeWriterPass(
1005+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
1006+
} else {
1007+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1008+
}
1009+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1010+
MPM.addPass(PB.buildModuleOptimizationPipeline(
1011+
OptLevel, ThinOrFullLTOPhase::None));
1012+
MPM.addPass(
1013+
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
1014+
} else {
1015+
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
1016+
}
9801017
break;
9811018
case LLVMRustOptStage::PreLinkThinLTO:
9821019
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@@ -1022,6 +1059,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
10221059
MPM.addPass(CanonicalizeAliasesPass());
10231060
MPM.addPass(NameAnonGlobalPass());
10241061
}
1062+
// For `-Copt-level=0`, ThinLTO, or LTO.
1063+
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
1064+
if (EmitThinLTO) {
1065+
MPM.addPass(ThinLTOBitcodeWriterPass(
1066+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
1067+
} else {
1068+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1069+
}
1070+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1071+
}
10251072

10261073
// now load "-enzyme" pass:
10271074
#ifdef ENZYME
@@ -1500,19 +1547,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
15001547
return true;
15011548
}
15021549

1503-
// This struct and various functions are sort of a hack right now, but the
1504-
// problem is that we've got in-memory LLVM modules after we generate and
1505-
// optimize all codegen-units for one compilation in rustc. To be compatible
1506-
// with the LTO support above we need to serialize the modules plus their
1507-
// ThinLTO summary into memory.
1508-
//
1509-
// This structure is basically an owned version of a serialize module, with
1510-
// a ThinLTO summary attached.
1511-
struct LLVMRustThinLTOBuffer {
1512-
std::string data;
1513-
std::string thin_link_data;
1514-
};
1515-
15161550
extern "C" LLVMRustThinLTOBuffer *
15171551
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
15181552
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

0 commit comments

Comments
 (0)