Skip to content

Commit 2d24ed1

Browse files
committed
[ThinLTO][Split] Split module for parallel compilation in backend
An interface for splitting a module by callgraph is added. This interface is called in the thinlto backend phase. The module is split into N Mparts, and opt and codegen are performed on the Mparts in parallel to implement parallel compilation in the thinlto backend.
1 parent a4cd17e commit 2d24ed1

4 files changed

Lines changed: 332 additions & 17 deletions

File tree

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
2+
#define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
3+
4+
#include "llvm/Analysis/CallGraph.h"
5+
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
6+
#include "llvm/LTO/Config.h"
7+
#include "llvm/ADT/DenseMap.h"
8+
#include "llvm/ADT/DenseSet.h"
9+
10+
namespace llvm {
11+
/// Splits the module M into N linkable partitions. The function ModuleCallback
12+
/// is called N times passing each individual partition as the MPart argument.
13+
class SplitModuleCG {
14+
public:
15+
using ModuleCreationCallback =
16+
function_ref<void(std::unique_ptr<Module> MPart, unsigned PartitionId)>;
17+
SplitModuleCG(Module &M,
18+
const ModuleSummaryIndex &CombinedIndex,
19+
unsigned LimitPartition = 0);
20+
void SplitModule(ModuleCreationCallback ModuleCallback,
21+
const llvm::lto::Config &C);
22+
23+
unsigned getPartitionNum() { return N; }
24+
25+
private:
26+
unsigned N;
27+
Module &M;
28+
CallGraph CG;
29+
DenseSet<const Function *> EntryFuncs;
30+
};
31+
32+
} // end namespace llvm
33+
34+
#endif // LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H

llvm/lib/LTO/LTOBackend.cpp

Lines changed: 271 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@
3434
#include "llvm/Plugins/PassPlugin.h"
3535
#include "llvm/Support/Error.h"
3636
#include "llvm/Support/FileSystem.h"
37+
#include "llvm/Support/FileUtilities.h"
3738
#include "llvm/Support/MemoryBuffer.h"
3839
#include "llvm/Support/Path.h"
40+
#include "llvm/Support/Program.h"
3941
#include "llvm/Support/ThreadPool.h"
4042
#include "llvm/Support/ToolOutputFile.h"
4143
#include "llvm/Support/VirtualFileSystem.h"
@@ -45,6 +47,8 @@
4547
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
4648
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
4749
#include "llvm/Transforms/Utils/SplitModule.h"
50+
#include "llvm/Transforms/Utils/SplitModuleCG.h"
51+
#include <filesystem>
4852
#include <optional>
4953

5054
using namespace llvm;
@@ -80,6 +84,23 @@ static cl::list<std::string>
8084
"path matches this for -save-temps options"),
8185
cl::CommaSeparated, cl::Hidden);
8286

87+
static cl::opt<unsigned> ThinLTOSplitModuleSizeThreshold(
88+
"thinlto-split-module-size-threshold", cl::Hidden, cl::init(500),
89+
cl::desc("Control the amount of whether split in thinlto backend"
90+
"accroding to the size of a module."));
91+
92+
static cl::opt<float> ThinLTOSplitModuleSizeRateThreshold(
93+
"thinlto-split-module-size-rate-threshold", cl::Hidden, cl::init(0.5),
94+
cl::desc("Whether to split in thinlto backend based on the ratio of "
95+
"(callgraph size)/(module size)"));
96+
97+
static cl::opt<unsigned> ThinLTOSplitPartitions(
98+
"thinlto-split-partitions", cl::Hidden, cl::init(0),
99+
cl::desc("Control split to how many partitions in thinlto backend."));
100+
101+
static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false),
102+
cl::desc("Enable split module in thinlto backend."));
103+
83104
namespace llvm {
84105
extern cl::opt<bool> NoPGOWarnMismatch;
85106
}
@@ -124,12 +145,19 @@ Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath,
124145
if (LinkerHook && !LinkerHook(Task, M))
125146
return false;
126147

148+
auto extract_filename = [](const std::string &path) -> std::string {
149+
std::filesystem::path fs_path(path);
150+
return fs_path.filename().string();
151+
};
152+
127153
std::string PathPrefix;
128154
// If this is the combined module (not a ThinLTO backend compile) or the
129155
// user hasn't requested using the input module's path, emit to a file
130156
// named from the provided OutputFileName with the Task ID appended.
131157
if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
132158
PathPrefix = OutputFileName;
159+
if (ThinLTOSplit)
160+
PathPrefix += extract_filename(M.getSourceFileName()) + ".";
133161
if (Task != (unsigned)-1)
134162
PathPrefix += utostr(Task) + ".";
135163
} else
@@ -513,6 +541,208 @@ static void codegen(const Config &Conf, TargetMachine *TM,
513541
report_fatal_error(std::move(Err));
514542
}
515543

544+
static unsigned calFunctionSize(const llvm::Function &F) {
545+
unsigned size = 0;
546+
for (const auto &BB : F)
547+
size += std::distance(BB.begin(), BB.end());
548+
return size;
549+
}
550+
551+
static unsigned calModuleSize(const llvm::Module &M) {
552+
unsigned size = 0;
553+
for (const auto &F : M)
554+
size += calFunctionSize(F);
555+
return size;
556+
}
557+
558+
static bool canDoSplitModule(const llvm::Module &M) {
559+
if (calModuleSize(M) < ThinLTOSplitModuleSizeThreshold)
560+
return false;
561+
return true;
562+
}
563+
564+
static bool HasLargeCG(Module &Mod, const ModuleSummaryIndex &CombinedIndex) {
565+
// TODO: Check whether there has large callgraphs. When multiple callgraphs
566+
// are split, thinlto parallel compilation can bring benefits.
567+
return true;
568+
}
569+
570+
struct TaskIdAllocator {
571+
using TaskId = unsigned;
572+
573+
// Use the most significant bit (MSB) as a namespace tag.
574+
// - Original ThinLTO backend tasks are expected to have MSB == 0.
575+
// - Split partitions allocated by this allocator always have MSB == 1.
576+
// This guarantees the two ID spaces never overlap.
577+
static constexpr TaskId tag() {
578+
return TaskId{1} << (std::numeric_limits<TaskId>::digits - 1);
579+
}
580+
581+
// Monotonic sequence counter for split partitions (MSB must remain 0 here).
582+
std::atomic<TaskId> seq{0};
583+
584+
// Allocate a globally unique TaskId for a split partition.
585+
// The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace.
586+
TaskId alloc() {
587+
TaskId v = seq.fetch_add(1, std::memory_order_relaxed);
588+
589+
// If the counter ever reaches the MSB, we'd overlap namespaces.
590+
// This indicates an overflow / too many partitions.
591+
if (v & tag())
592+
report_fatal_error("Partition TaskId overflow: seq reached the tag bit.");
593+
594+
return tag() | v;
595+
}
596+
597+
// Helper for sanity checks / debugging.
598+
static bool isPartition(TaskId id) { return (id & tag()) != 0; }
599+
};
600+
601+
// Global allocator shared by all split partitions.
602+
static TaskIdAllocator gSplitTaskIds;
603+
604+
static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
605+
TargetMachine *TM, AddStreamFn AddStream,
606+
unsigned ParallelCodeGenParallelismLevel,
607+
Module &Mod,
608+
const ModuleSummaryIndex &CombinedIndex,
609+
const std::vector<uint8_t> &CmdArgs,
610+
bool DoOpt, AddStreamFn IRAddStream,
611+
ArrayRef<StringRef> &BitcodeLibFuncs) {
612+
unsigned ThreadCount = 0;
613+
const Target *T = &TM->getTarget();
614+
615+
static std::mutex PrintMutex;
616+
617+
SplitModuleCG SplitModuleCG(Mod, CombinedIndex, ParallelCodeGenParallelismLevel);
618+
ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum();
619+
620+
std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel);
621+
std::vector<llvm::FileRemover> TempFileRemovers(ParallelCodeGenParallelismLevel);
622+
623+
const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart,
624+
unsigned PartitionId) {
625+
unsigned CurrentThreadId, UniqueTaskId;
626+
{
627+
std::lock_guard<std::mutex> Lock(PrintMutex);
628+
CurrentThreadId = ThreadCount++;
629+
630+
// In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to
631+
// unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it
632+
// as "no base task id" and don't enforce the namespace check on it.
633+
//
634+
// We do not rely on the incoming `task` for partition uniqueness: split
635+
// partitions get a dedicated UniqueTaskId allocated below.
636+
if (task != std::numeric_limits<unsigned>::max()) {
637+
assert(!TaskIdAllocator::isPartition(task) &&
638+
"Original ThinLTO TaskId unexpectedly overlaps the partition "
639+
"namespace");
640+
}
641+
UniqueTaskId = gSplitTaskIds.alloc();
642+
}
643+
644+
std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine(C, T, *MPart);
645+
646+
if (DoOpt) {
647+
if (!opt(C, ThreadTM.get(), UniqueTaskId, *MPart, /*IsThinLTO=*/true,
648+
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
649+
CmdArgs, BitcodeLibFuncs)) {
650+
report_fatal_error("Failed to gen opt for split mod in thread.");
651+
}
652+
653+
// Save the current module before the first codegen round.
654+
// Note that the second codegen round runs only `codegen()` without
655+
// running `opt()`. We're not reaching here as it's bailed out earlier
656+
// with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
657+
if (IRAddStream)
658+
cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId,
659+
IRAddStream);
660+
}
661+
662+
auto splitStream = [&](unsigned task, const Twine &moduleName)
663+
-> Expected<std::unique_ptr<CachedFileStream>> {
664+
int FD;
665+
SmallString<128> TempFilename;
666+
if (std::error_code EC = sys::fs::createTemporaryFile(
667+
"thinlto-split", "o", FD, TempFilename))
668+
return errorCodeToError(EC);
669+
670+
TempObjectFiles[PartitionId] = std::string(TempFilename.str());
671+
TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]);
672+
673+
auto OS = std::make_unique<raw_fd_ostream>(
674+
FD, true, /*CloseOnDestruct*/true);
675+
676+
auto Stream = std::make_unique<CachedFileStream>(
677+
std::move(OS), std::string(TempFilename.str()));
678+
679+
return std::move(Stream);
680+
};
681+
682+
codegen(C, ThreadTM.get(), splitStream, UniqueTaskId, *MPart,
683+
CombinedIndex);
684+
};
685+
686+
SplitModuleCG.SplitModule(HandleModulePartition, C);
687+
688+
// Use ld.lld to combine the partitions into a object.
689+
if (TempObjectFiles.empty()) {
690+
llvm::errs() << "TempObjectFiles.empty()\n";
691+
return true;
692+
}
693+
694+
auto FinalStream = AddStream(task, Mod.getModuleIdentifier());
695+
if (!FinalStream)
696+
report_fatal_error("Failed to open final output stream");
697+
698+
int MergedFD;
699+
SmallString<128> MergedFilename;
700+
if (sys::fs::createTemporaryFile("thinlto-merged", "o", MergedFD,
701+
MergedFilename))
702+
report_fatal_error("Failed to create merged temp file.");
703+
llvm::FileRemover MergedFileRemover(MergedFilename);
704+
sys::fs::closeFile(MergedFD);
705+
706+
std::vector<StringRef> Args;
707+
std::string LinkerPath = "";
708+
if (auto Path = sys::findProgramByName("ld.lld"))
709+
LinkerPath = *Path;
710+
else if (auto Path = sys::findProgramByName("ld"))
711+
LinkerPath = *Path;
712+
713+
if (LinkerPath.empty())
714+
report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge partitions.");
715+
716+
Args.push_back(LinkerPath);
717+
Args.push_back("-r");
718+
Args.push_back("-o");
719+
Args.push_back(MergedFilename);
720+
721+
for (const auto &File : TempObjectFiles)
722+
Args.push_back(File);
723+
724+
std::string ErrMsg;
725+
int Result = sys::ExecuteAndWait(LinkerPath, Args, /*Env=*/std::nullopt,
726+
/*Redirects=*/{}, /*SecondsToWait=*/0,
727+
/*MemoryLimit=*/0, &ErrMsg);
728+
729+
if (Result != 0) {
730+
errs() << "Linker failed: " << ErrMsg << "\n";
731+
report_fatal_error("Failed to merge split objects.");
732+
}
733+
734+
{
735+
std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream;
736+
auto BufferOrErr = MemoryBuffer::getFile(MergedFilename);
737+
if (!BufferOrErr)
738+
report_fatal_error("Failed to read merged object.");
739+
740+
FinalFileStream->OS->write(BufferOrErr.get()->getBufferStart(),
741+
BufferOrErr.get()->getBufferSize());
742+
}
743+
return true;
744+
}
745+
516746
static void splitCodeGen(const Config &C, TargetMachine *TM,
517747
AddStreamFn AddStream,
518748
unsigned ParallelCodeGenParallelismLevel, Module &Mod,
@@ -671,11 +901,28 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
671901
// the module, if applicable.
672902
Mod.setPartialSampleProfileRatio(CombinedIndex);
673903

904+
bool ProfitableToSplit = true;
905+
if (ThinLTOSplit) {
906+
if (!canDoSplitModule(Mod) || !HasLargeCG(Mod, CombinedIndex)) {
907+
ProfitableToSplit = false;
908+
LLVM_DEBUG(dbgs() << "warning: thinlto split not enable for module: "
909+
<< Mod.getName());
910+
} else {
911+
LLVM_DEBUG(dbgs() << "thinlto: split codegen for module: "
912+
<< Mod.getName());
913+
}
914+
}
915+
674916
LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
675917
if (CodeGenOnly) {
676-
// If CodeGenOnly is set, we only perform code generation and skip
677-
// optimization. This value may differ from Conf.CodeGenOnly.
678-
codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
918+
if (ThinLTOSplit && ProfitableToSplit)
919+
splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream,
920+
ThinLTOSplitPartitions, Mod, CombinedIndex,
921+
CmdArgs, false, IRAddStream, BitcodeLibFuncs);
922+
else
923+
// If CodeGenOnly is set, we only perform code generation and skip
924+
// optimization. This value may differ from Conf.CodeGenOnly.
925+
codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
679926
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
680927
}
681928

@@ -685,20 +932,27 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
685932
auto OptimizeAndCodegen =
686933
[&](Module &Mod, TargetMachine *TM,
687934
LLVMRemarkFileHandle DiagnosticOutputFile) {
688-
// Perform optimization and code generation for ThinLTO.
689-
if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
690-
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
691-
CmdArgs, BitcodeLibFuncs))
692-
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
693-
694-
// Save the current module before the first codegen round.
695-
// Note that the second codegen round runs only `codegen()` without
696-
// running `opt()`. We're not reaching here as it's bailed out earlier
697-
// with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
698-
if (IRAddStream)
699-
cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
700-
701-
codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
935+
if (ThinLTOSplit && ProfitableToSplit) {
936+
if (!splitOptAndCodeGenThin(
937+
Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
938+
CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs))
939+
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
940+
} else {
941+
// Perform optimization and code generation for ThinLTO.
942+
if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
943+
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
944+
CmdArgs, BitcodeLibFuncs))
945+
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
946+
947+
// Save the current module before the first codegen round.
948+
// Note that the second codegen round runs only `codegen()` without
949+
// running `opt()`. We're not reaching here as it's bailed out earlier
950+
// with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
951+
if (IRAddStream)
952+
cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
953+
954+
codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
955+
}
702956
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
703957
};
704958

llvm/lib/Transforms/Utils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ add_llvm_component_library(LLVMTransformUtils
8888
SizeOpts.cpp
8989
SplitModule.cpp
9090
SplitModuleByCategory.cpp
91+
SplitModuleCG.cpp
9192
StripNonLineTableDebugInfo.cpp
9293
SymbolRewriter.cpp
9394
UnifyFunctionExitNodes.cpp

0 commit comments

Comments
 (0)