[WIP][ThinLTO][Split] Split module for parallel compilation in backend (1/N)#198702
[WIP][ThinLTO][Split] Split module for parallel compilation in backend (1/N)#198702mmjjpp wants to merge 1 commit into
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-lto Author: MaoJiaping (mmjjpp) ChangesPatch is 21.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/198702.diff 8 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
new file mode 100644
index 0000000000000..e60c4e931d40c
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
@@ -0,0 +1,34 @@
+#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
+#define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+
+namespace llvm {
+/// Splits the module M into N linkable partitions. The function ModuleCallback
+/// is called N times passing each individual partition as the MPart argument.
+class SplitModuleCG {
+public:
+ using ModuleCreationCallback =
+ function_ref<void(std::unique_ptr<Module> MPart, unsigned PartitionId)>;
+ SplitModuleCG(Module &M,
+ const ModuleSummaryIndex &CombinedIndex,
+ unsigned LimitPartition = 0);
+ void SplitModule(ModuleCreationCallback ModuleCallback,
+ const llvm::lto::Config &C);
+
+ unsigned getPartitionNum() { return N; }
+
+ private:
+ unsigned N;
+ Module &M;
+ CallGraph CG;
+ DenseSet<const Function *> EntryFuncs;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 16a42e526eb8a..711ea9684bab0 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -34,8 +34,10 @@
#include "llvm/Plugins/PassPlugin.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/VirtualFileSystem.h"
@@ -45,6 +47,8 @@
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/Transforms/Utils/SplitModuleCG.h"
+#include <filesystem>
#include <optional>
using namespace llvm;
@@ -80,6 +84,23 @@ static cl::list<std::string>
"path matches this for -save-temps options"),
cl::CommaSeparated, cl::Hidden);
+static cl::opt<unsigned> ThinLTOSplitModuleSizeThreshold(
+ "thinlto-split-module-size-threshold", cl::Hidden, cl::init(500),
+ cl::desc("Control the amount of whether split in thinlto backend"
+ "accroding to the size of a module."));
+
+static cl::opt<float> ThinLTOSplitModuleSizeRateThreshold(
+ "thinlto-split-module-size-rate-threshold", cl::Hidden, cl::init(0.5),
+ cl::desc("Whether to split in thinlto backend based on the ratio of "
+ "(callgraph size)/(module size)"));
+
+static cl::opt<unsigned> ThinLTOSplitPartitions(
+ "thinlto-split-partitions", cl::Hidden, cl::init(0),
+ cl::desc("Control split to how many partitions in thinlto backend."));
+
+static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false),
+ cl::desc("Enable split module in thinlto backend."));
+
namespace llvm {
extern cl::opt<bool> NoPGOWarnMismatch;
}
@@ -124,12 +145,19 @@ Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath,
if (LinkerHook && !LinkerHook(Task, M))
return false;
+ auto extract_filename = [](const std::string &path) -> std::string {
+ std::filesystem::path fs_path(path);
+ return fs_path.filename().string();
+ };
+
std::string PathPrefix;
// If this is the combined module (not a ThinLTO backend compile) or the
// user hasn't requested using the input module's path, emit to a file
// named from the provided OutputFileName with the Task ID appended.
if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
PathPrefix = OutputFileName;
+ if (ThinLTOSplit)
+ PathPrefix += extract_filename(M.getSourceFileName()) + ".";
if (Task != (unsigned)-1)
PathPrefix += utostr(Task) + ".";
} else
@@ -513,6 +541,208 @@ static void codegen(const Config &Conf, TargetMachine *TM,
report_fatal_error(std::move(Err));
}
+static unsigned calFunctionSize(const llvm::Function &F) {
+ unsigned size = 0;
+ for (const auto &BB : F)
+ size += std::distance(BB.begin(), BB.end());
+ return size;
+}
+
+static unsigned calModuleSize(const llvm::Module &M) {
+ unsigned size = 0;
+ for (const auto &F : M)
+ size += calFunctionSize(F);
+ return size;
+}
+
+static bool canDoSplitModule(const llvm::Module &M) {
+ if (calModuleSize(M) < ThinLTOSplitModuleSizeThreshold)
+ return false;
+ return true;
+}
+
+static bool HasLargeCG(Module &Mod, const ModuleSummaryIndex &CombinedIndex) {
+ // TODO: Check whether there has large callgraphs. When multiple callgraphs
+ // are split, thinlto parallel compilation can bring benefits.
+ return true;
+}
+
+struct TaskIdAllocator {
+ using TaskId = unsigned;
+
+ // Use the most significant bit (MSB) as a namespace tag.
+ // - Original ThinLTO backend tasks are expected to have MSB == 0.
+ // - Split partitions allocated by this allocator always have MSB == 1.
+ // This guarantees the two ID spaces never overlap.
+ static constexpr TaskId tag() {
+ return TaskId{1} << (std::numeric_limits<TaskId>::digits - 1);
+ }
+
+ // Monotonic sequence counter for split partitions (MSB must remain 0 here).
+ std::atomic<TaskId> seq{0};
+
+ // Allocate a globally unique TaskId for a split partition.
+ // The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace.
+ TaskId alloc() {
+ TaskId v = seq.fetch_add(1, std::memory_order_relaxed);
+
+ // If the counter ever reaches the MSB, we'd overlap namespaces.
+ // This indicates an overflow / too many partitions.
+ if (v & tag())
+ report_fatal_error("Partition TaskId overflow: seq reached the tag bit.");
+
+ return tag() | v;
+ }
+
+ // Helper for sanity checks / debugging.
+ static bool isPartition(TaskId id) { return (id & tag()) != 0; }
+};
+
+// Global allocator shared by all split partitions.
+static TaskIdAllocator gSplitTaskIds;
+
+static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
+ TargetMachine *TM, AddStreamFn AddStream,
+ unsigned ParallelCodeGenParallelismLevel,
+ Module &Mod,
+ const ModuleSummaryIndex &CombinedIndex,
+ const std::vector<uint8_t> &CmdArgs,
+ bool DoOpt, AddStreamFn IRAddStream,
+ ArrayRef<StringRef> &BitcodeLibFuncs) {
+ unsigned ThreadCount = 0;
+ const Target *T = &TM->getTarget();
+
+ static std::mutex PrintMutex;
+
+ SplitModuleCG SplitModuleCG(Mod, CombinedIndex, ParallelCodeGenParallelismLevel);
+ ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum();
+
+ std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel);
+ std::vector<llvm::FileRemover> TempFileRemovers(ParallelCodeGenParallelismLevel);
+
+ const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart,
+ unsigned PartitionId) {
+ unsigned CurrentThreadId, UniqueTaskId;
+ {
+ std::lock_guard<std::mutex> Lock(PrintMutex);
+ CurrentThreadId = ThreadCount++;
+
+ // In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to
+ // unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it
+ // as "no base task id" and don't enforce the namespace check on it.
+ //
+ // We do not rely on the incoming `task` for partition uniqueness: split
+ // partitions get a dedicated UniqueTaskId allocated below.
+ if (task != std::numeric_limits<unsigned>::max()) {
+ assert(!TaskIdAllocator::isPartition(task) &&
+ "Original ThinLTO TaskId unexpectedly overlaps the partition "
+ "namespace");
+ }
+ UniqueTaskId = gSplitTaskIds.alloc();
+ }
+
+ std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine(C, T, *MPart);
+
+ if (DoOpt) {
+ if (!opt(C, ThreadTM.get(), UniqueTaskId, *MPart, /*IsThinLTO=*/true,
+ /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+ CmdArgs, BitcodeLibFuncs)) {
+ report_fatal_error("Failed to gen opt for split mod in thread.");
+ }
+
+ // Save the current module before the first codegen round.
+ // Note that the second codegen round runs only `codegen()` without
+ // running `opt()`. We're not reaching here as it's bailed out earlier
+ // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
+ if (IRAddStream)
+ cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId,
+ IRAddStream);
+ }
+
+ auto splitStream = [&](unsigned task, const Twine &moduleName)
+ -> Expected<std::unique_ptr<CachedFileStream>> {
+ int FD;
+ SmallString<128> TempFilename;
+ if (std::error_code EC = sys::fs::createTemporaryFile(
+ "thinlto-split", "o", FD, TempFilename))
+ return errorCodeToError(EC);
+
+ TempObjectFiles[PartitionId] = std::string(TempFilename.str());
+ TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]);
+
+ auto OS = std::make_unique<raw_fd_ostream>(
+ FD, true, /*CloseOnDestruct*/true);
+
+ auto Stream = std::make_unique<CachedFileStream>(
+ std::move(OS), std::string(TempFilename.str()));
+
+ return std::move(Stream);
+ };
+
+ codegen(C, ThreadTM.get(), splitStream, UniqueTaskId, *MPart,
+ CombinedIndex);
+ };
+
+ SplitModuleCG.SplitModule(HandleModulePartition, C);
+
+ // Use ld.lld to combine the partitions into a object.
+ if (TempObjectFiles.empty()) {
+ llvm::errs() << "TempObjectFiles.empty()\n";
+ return true;
+ }
+
+ auto FinalStream = AddStream(task, Mod.getModuleIdentifier());
+ if (!FinalStream)
+ report_fatal_error("Failed to open final output stream");
+
+ int MergedFD;
+ SmallString<128> MergedFilename;
+ if (sys::fs::createTemporaryFile("thinlto-merged", "o", MergedFD,
+ MergedFilename))
+ report_fatal_error("Failed to create merged temp file.");
+ llvm::FileRemover MergedFileRemover(MergedFilename);
+ sys::fs::closeFile(MergedFD);
+
+ std::vector<StringRef> Args;
+ std::string LinkerPath = "";
+ if (auto Path = sys::findProgramByName("ld.lld"))
+ LinkerPath = *Path;
+ else if (auto Path = sys::findProgramByName("ld"))
+ LinkerPath = *Path;
+
+ if (LinkerPath.empty())
+ report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge partitions.");
+
+ Args.push_back(LinkerPath);
+ Args.push_back("-r");
+ Args.push_back("-o");
+ Args.push_back(MergedFilename);
+
+ for (const auto &File : TempObjectFiles)
+ Args.push_back(File);
+
+ std::string ErrMsg;
+ int Result = sys::ExecuteAndWait(LinkerPath, Args, /*Env=*/std::nullopt,
+ /*Redirects=*/{}, /*SecondsToWait=*/0,
+ /*MemoryLimit=*/0, &ErrMsg);
+
+ if (Result != 0) {
+ errs() << "Linker failed: " << ErrMsg << "\n";
+ report_fatal_error("Failed to merge split objects.");
+ }
+
+ {
+ std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream;
+ auto BufferOrErr = MemoryBuffer::getFile(MergedFilename);
+ if (!BufferOrErr)
+ report_fatal_error("Failed to read merged object.");
+
+ FinalFileStream->OS->write(BufferOrErr.get()->getBufferStart(),
+ BufferOrErr.get()->getBufferSize());
+ }
+ return true;
+}
+
static void splitCodeGen(const Config &C, TargetMachine *TM,
AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel, Module &Mod,
@@ -671,11 +901,28 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// the module, if applicable.
Mod.setPartialSampleProfileRatio(CombinedIndex);
+ bool ProfitableToSplit = true;
+ if (ThinLTOSplit) {
+ if (!canDoSplitModule(Mod) || !HasLargeCG(Mod, CombinedIndex)) {
+ ProfitableToSplit = false;
+ LLVM_DEBUG(dbgs() << "warning: thinlto split not enable for module: "
+ << Mod.getName());
+ } else {
+ LLVM_DEBUG(dbgs() << "thinlto: split codegen for module: "
+ << Mod.getName());
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
if (CodeGenOnly) {
- // If CodeGenOnly is set, we only perform code generation and skip
- // optimization. This value may differ from Conf.CodeGenOnly.
- codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
+ if (ThinLTOSplit && ProfitableToSplit)
+ splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream,
+ ThinLTOSplitPartitions, Mod, CombinedIndex,
+ CmdArgs, false, IRAddStream, BitcodeLibFuncs);
+ else
+ // If CodeGenOnly is set, we only perform code generation and skip
+ // optimization. This value may differ from Conf.CodeGenOnly.
+ codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
}
@@ -685,20 +932,27 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
auto OptimizeAndCodegen =
[&](Module &Mod, TargetMachine *TM,
LLVMRemarkFileHandle DiagnosticOutputFile) {
- // Perform optimization and code generation for ThinLTO.
- if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
- /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
- CmdArgs, BitcodeLibFuncs))
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
-
- // Save the current module before the first codegen round.
- // Note that the second codegen round runs only `codegen()` without
- // running `opt()`. We're not reaching here as it's bailed out earlier
- // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
- if (IRAddStream)
- cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
-
- codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+ if (ThinLTOSplit && ProfitableToSplit) {
+ if (!splitOptAndCodeGenThin(
+ Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
+ CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs))
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ } else {
+ // Perform optimization and code generation for ThinLTO.
+ if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
+ /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+ CmdArgs, BitcodeLibFuncs))
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+
+ // Save the current module before the first codegen round.
+ // Note that the second codegen round runs only `codegen()` without
+ // running `opt()`. We're not reaching here as it's bailed out earlier
+ // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
+ if (IRAddStream)
+ cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
+
+ codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+ }
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
};
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 933e204081ad2..9b152136a40d6 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -88,6 +88,7 @@ add_llvm_component_library(LLVMTransformUtils
SizeOpts.cpp
SplitModule.cpp
SplitModuleByCategory.cpp
+ SplitModuleCG.cpp
StripNonLineTableDebugInfo.cpp
SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp
index bb638f180bfbf..a2030bb376a84 100644
--- a/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -117,6 +117,18 @@ std::unique_ptr<Module> llvm::CloneModule(
for (const GlobalIFunc &I : M.ifuncs()) {
// Defer setting the resolver function until after functions are cloned.
+ if (!ShouldCloneDefinition(&I)) {
+ // An ifunc also cannot act as an external reference, so we need to create
+ // a function depending on the value type.
+ GlobalValue *GV;
+ assert(I.getValueType()->isFunctionTy() &&
+ "ValueType of ifunc must be function type!");
+ GV = Function::Create(cast<FunctionType>(I.getValueType()),
+ GlobalValue::ExternalLinkage, I.getAddressSpace(),
+ I.getName(), New.get());
+ VMap[&I] = GV;
+ continue;
+ }
auto *GI =
GlobalIFunc::create(I.getValueType(), I.getAddressSpace(),
I.getLinkage(), I.getName(), nullptr, New.get());
@@ -174,6 +186,9 @@ std::unique_ptr<Module> llvm::CloneModule(
}
for (const GlobalIFunc &I : M.ifuncs()) {
+ // We already dealt with undefined ifuncs above.
+ if (!ShouldCloneDefinition(&I))
+ continue;
GlobalIFunc *GI = cast<GlobalIFunc>(VMap[&I]);
if (const Constant *Resolver = I.getResolver())
GI->setResolver(MapValue(Resolver, VMap));
diff --git a/llvm/lib/Transforms/Utils/SplitModule.cpp b/llvm/lib/Transforms/Utils/SplitModule.cpp
index c39771733ee0d..64910b4d1ce99 100644
--- a/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -162,6 +162,7 @@ static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap,
llvm::for_each(M.functions(), recordGVSet);
llvm::for_each(M.globals(), recordGVSet);
llvm::for_each(M.aliases(), recordGVSet);
+ llvm::for_each(M.ifuncs(), recordGVSet);
// Assigned all GVs to merged clusters while balancing number of objects in
// each.
diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
new file mode 100644
index 0000000000000..a10a2a551f655
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
@@ -0,0 +1,26 @@
+#include "llvm/Transforms/Utils/SplitModuleCG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "split-module-CG"
+
+void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback,
+ const llvm::lto::Config &C) {
+ // TODO: 1. Process the linkage of the GlobalValue; 2. Allocate the callgraph
+ // to N partitions; 3.Invoke the cloneModule API to copy the N partitions to
+ // obtain MParts.
+
+}
+
+SplitModuleCG::SplitModuleCG(Module &M,
+ const ModuleSummaryIndex &CombinedIndex,
+ unsigned LimitPartition)
+ : M(M), CG(M), N(LimitPartition) {
+ // TODO: The module is split based on the callgraph, and EntryFuncs stores
+ // the root function of each callgraph.
+
+ if (N == 0 || N > EntryFuncs.size()) {
+ N = EntryFuncs.size();
+ }
+ N = N == 0 ? 1 : N;
+}
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-split/alias-to-ifunc.ll b/llvm/test/tools/llvm-split/alias-to-ifunc.ll
new file mode 100644
index 0000000000000..dbfa1a52bfab8
--- /dev/null
+++ b/llvm/test/tools/llvm-split/alias-to-ifunc.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-split -j2 -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-DAG: @alias_foo = alias void (), ptr @foo_a.ifunc
+; CHECK0-DAG: @foo_a.ifunc = ifunc void (), ptr @foo_a.resolver
+; CHECK0-DAG: define hidden ptr @foo_a.resolver()
+; CHECK1-DAG: declare void @alias_foo()
+; CHECK1-DAG: declare void @foo_a.ifunc()
+; CHECK1-DAG: declare hidden ptr @foo_a.resolver()
+
+@alias_foo = alias void (), ptr @foo_a.ifunc
+@foo_a.ifunc = ifunc void (), ptr @foo_a.resolver
+
+define internal void @foo.impl(...
[truncated]
|
3d9040f to
2d24ed1
Compare
2d24ed1 to
01740e9
Compare
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/include/llvm/Transforms/Utils/SplitModuleCG.h llvm/lib/Transforms/Utils/SplitModuleCG.cpp llvm/lib/LTO/LTOBackend.cpp --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
index e60c4e931..4df527303 100644
--- a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
+++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
@@ -1,11 +1,11 @@
#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
#define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/LTO/Config.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
namespace llvm {
/// Splits the module M into N linkable partitions. The function ModuleCallback
@@ -14,15 +14,14 @@ class SplitModuleCG {
public:
using ModuleCreationCallback =
function_ref<void(std::unique_ptr<Module> MPart, unsigned PartitionId)>;
- SplitModuleCG(Module &M,
- const ModuleSummaryIndex &CombinedIndex,
+ SplitModuleCG(Module &M, const ModuleSummaryIndex &CombinedIndex,
unsigned LimitPartition = 0);
void SplitModule(ModuleCreationCallback ModuleCallback,
const llvm::lto::Config &C);
unsigned getPartitionNum() { return N; }
- private:
+private:
unsigned N;
Module &M;
CallGraph CG;
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 711ea9684..a942e9fc3 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -98,8 +98,9 @@ static cl::opt<unsigned> ThinLTOSplitPartitions(
"thinlto-split-partitions", cl::Hidden, cl::init(0),
cl::desc("Control split to how many partitions in thinlto backend."));
-static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false),
- cl::desc("Enable split module in thinlto backend."));
+static cl::opt<bool>
+ ThinLTOSplit("thinlto-split", cl::init(false),
+ cl::desc("Enable split module in thinlto backend."));
namespace llvm {
extern cl::opt<bool> NoPGOWarnMismatch;
@@ -614,11 +615,13 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
static std::mutex PrintMutex;
- SplitModuleCG SplitModuleCG(Mod, CombinedIndex, ParallelCodeGenParallelismLevel);
+ SplitModuleCG SplitModuleCG(Mod, CombinedIndex,
+ ParallelCodeGenParallelismLevel);
ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum();
std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel);
- std::vector<llvm::FileRemover> TempFileRemovers(ParallelCodeGenParallelismLevel);
+ std::vector<llvm::FileRemover> TempFileRemovers(
+ ParallelCodeGenParallelismLevel);
const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart,
unsigned PartitionId) {
@@ -670,8 +673,8 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
TempObjectFiles[PartitionId] = std::string(TempFilename.str());
TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]);
- auto OS = std::make_unique<raw_fd_ostream>(
- FD, true, /*CloseOnDestruct*/true);
+ auto OS =
+ std::make_unique<raw_fd_ostream>(FD, true, /*CloseOnDestruct*/ true);
auto Stream = std::make_unique<CachedFileStream>(
std::move(OS), std::string(TempFilename.str()));
@@ -711,7 +714,8 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
LinkerPath = *Path;
if (LinkerPath.empty())
- report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge partitions.");
+ report_fatal_error(
+ "Cannot find linkeer (ld or ld.lld) to merge partitions.");
Args.push_back(LinkerPath);
Args.push_back("-r");
@@ -929,32 +933,31 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
- auto OptimizeAndCodegen =
- [&](Module &Mod, TargetMachine *TM,
- LLVMRemarkFileHandle DiagnosticOutputFile) {
- if (ThinLTOSplit && ProfitableToSplit) {
- if (!splitOptAndCodeGenThin(
- Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
- CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs))
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
- } else {
- // Perform optimization and code generation for ThinLTO.
- if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
- /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
- CmdArgs, BitcodeLibFuncs))
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
-
- // Save the current module before the first codegen round.
- // Note that the second codegen round runs only `codegen()` without
- // running `opt()`. We're not reaching here as it's bailed out earlier
- // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
- if (IRAddStream)
- cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
-
- codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
- }
+ auto OptimizeAndCodegen = [&](Module &Mod, TargetMachine *TM,
+ LLVMRemarkFileHandle DiagnosticOutputFile) {
+ if (ThinLTOSplit && ProfitableToSplit) {
+ if (!splitOptAndCodeGenThin(Task, Conf, TM, AddStream,
+ ThinLTOSplitPartitions, Mod, CombinedIndex,
+ CmdArgs, true, IRAddStream, BitcodeLibFuncs))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
- };
+ } else {
+ // Perform optimization and code generation for ThinLTO.
+ if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
+ /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+ CmdArgs, BitcodeLibFuncs))
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+
+ // Save the current module before the first codegen round.
+ // Note that the second codegen round runs only `codegen()` without
+ // running `opt()`. We're not reaching here as it's bailed out earlier
+ // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
+ if (IRAddStream)
+ cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
+
+ codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+ }
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ };
if (ThinLTOAssumeMerged)
return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
index 9f57cb3ed..35470ae9b 100644
--- a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
@@ -9,11 +9,9 @@ void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback,
// TODO: 1. Process the linkage of the GlobalValue; 2. Allocate the callgraph
// to N partitions; 3.Invoke the cloneModule API to copy the N partitions to
// obtain MParts.
-
}
-SplitModuleCG::SplitModuleCG(Module &M,
- const ModuleSummaryIndex &CombinedIndex,
+SplitModuleCG::SplitModuleCG(Module &M, const ModuleSummaryIndex &CombinedIndex,
unsigned LimitPartition)
: M(M), CG(M), N(LimitPartition) {
// TODO: The module is split based on the callgraph, and EntryFuncs stores
|
🐧 Linux x64 Test ResultsThe build failed before running any tests. Click on a failure below to see the details. lib/Transforms/Utils/CMakeFiles/LLVMTransformUtils.dir/SplitModuleCG.cpp.oIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) MLIRMLIR.Target/LLVMIR/openmp-llvm.mlirIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
|
|
||
| std::vector<StringRef> Args; | ||
| std::string LinkerPath = ""; | ||
| if (auto Path = sys::findProgramByName("ld.lld")) |
There was a problem hiding this comment.
This appears to search for ld.lld in the environment. Seems better to prioritize clang's local directory first, and then check the system PATH?
| @@ -0,0 +1,34 @@ | |||
| #ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H | |||
There was a problem hiding this comment.
We already have (more than one) module split pass. I don't see a reason of a new pass.
teresajohnson
left a comment
There was a problem hiding this comment.
This change has no description and no tests - it needs both, and possibly an RFC for a change as big as this. At first glance, a couple of high level comments: LTO should not be directly calling lld. Also, regular LTO already has support for split codegen, can that be leveraged? In particular, look at how that adds the split modules to the link, which is via the AddStream callback.
An interface for splitting a module by callgraph is added. This interface is called in the thinlto backend phase. The module is split into N Mparts, and opt and codegen are performed on the Mparts in parallel to implement parallel compilation in the thinlto backend.
01740e9 to
88bcb22
Compare
Stack Pr:
1/N: #198702 [WIP][ThinLTO][Split] Split module for parallel compilation in backend (1/N) ⬅
2/N: #199151 [WIP][Thinlto][Split] Add callgraph-based module splitting(SplitModuleCG) (2/N)
3/N: #199154 [WIP][ThinLTO][SplitModuleCG] Add comdat group handling (3/N)
There are more PRs that have not been submitted yet.