3434#include " llvm/Plugins/PassPlugin.h"
3535#include " llvm/Support/Error.h"
3636#include " llvm/Support/FileSystem.h"
37+ #include " llvm/Support/FileUtilities.h"
3738#include " llvm/Support/MemoryBuffer.h"
3839#include " llvm/Support/Path.h"
40+ #include " llvm/Support/Program.h"
3941#include " llvm/Support/ThreadPool.h"
4042#include " llvm/Support/ToolOutputFile.h"
4143#include " llvm/Support/VirtualFileSystem.h"
4547#include " llvm/Transforms/IPO/WholeProgramDevirt.h"
4648#include " llvm/Transforms/Utils/FunctionImportUtils.h"
4749#include " llvm/Transforms/Utils/SplitModule.h"
50+ #include " llvm/Transforms/Utils/SplitModuleCG.h"
51+ #include < filesystem>
4852#include < optional>
4953
5054using namespace llvm ;
@@ -80,6 +84,23 @@ static cl::list<std::string>
8084 " path matches this for -save-temps options" ),
8185 cl::CommaSeparated, cl::Hidden);
8286
87+ static cl::opt<unsigned > ThinLTOSplitModuleSizeThreshold (
88+ " thinlto-split-module-size-threshold" , cl::Hidden, cl::init(500 ),
89+ cl::desc(" Control the amount of whether split in thinlto backend"
90+ " accroding to the size of a module." ));
91+
92+ static cl::opt<float > ThinLTOSplitModuleSizeRateThreshold (
93+ " thinlto-split-module-size-rate-threshold" , cl::Hidden, cl::init(0.5 ),
94+ cl::desc(" Whether to split in thinlto backend based on the ratio of "
95+ " (callgraph size)/(module size)" ));
96+
97+ static cl::opt<unsigned > ThinLTOSplitPartitions (
98+ " thinlto-split-partitions" , cl::Hidden, cl::init(0 ),
99+ cl::desc(" Control split to how many partitions in thinlto backend." ));
100+
101+ static cl::opt<bool > ThinLTOSplit (" thinlto-split" , cl::init(false ),
102+ cl::desc(" Enable split module in thinlto backend." ));
103+
83104namespace llvm {
84105extern cl::opt<bool > NoPGOWarnMismatch;
85106}
@@ -124,12 +145,19 @@ Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath,
124145 if (LinkerHook && !LinkerHook (Task, M))
125146 return false ;
126147
148+ auto extract_filename = [](const std::string &path) -> std::string {
149+ std::filesystem::path fs_path (path);
150+ return fs_path.filename ().string ();
151+ };
152+
127153 std::string PathPrefix;
128154 // If this is the combined module (not a ThinLTO backend compile) or the
129155 // user hasn't requested using the input module's path, emit to a file
130156 // named from the provided OutputFileName with the Task ID appended.
131157 if (M.getModuleIdentifier () == " ld-temp.o" || !UseInputModulePath) {
132158 PathPrefix = OutputFileName;
159+ if (ThinLTOSplit)
160+ PathPrefix += extract_filename (M.getSourceFileName ()) + " ." ;
133161 if (Task != (unsigned )-1 )
134162 PathPrefix += utostr (Task) + " ." ;
135163 } else
@@ -513,6 +541,208 @@ static void codegen(const Config &Conf, TargetMachine *TM,
513541 report_fatal_error (std::move (Err));
514542}
515543
544+ static unsigned calFunctionSize (const llvm::Function &F) {
545+ unsigned size = 0 ;
546+ for (const auto &BB : F)
547+ size += std::distance (BB.begin (), BB.end ());
548+ return size;
549+ }
550+
551+ static unsigned calModuleSize (const llvm::Module &M) {
552+ unsigned size = 0 ;
553+ for (const auto &F : M)
554+ size += calFunctionSize (F);
555+ return size;
556+ }
557+
558+ static bool canDoSplitModule (const llvm::Module &M) {
559+ if (calModuleSize (M) < ThinLTOSplitModuleSizeThreshold)
560+ return false ;
561+ return true ;
562+ }
563+
564+ static bool HasLargeCG (Module &Mod, const ModuleSummaryIndex &CombinedIndex) {
565+ // TODO: Check whether there has large callgraphs. When multiple callgraphs
566+ // are split, thinlto parallel compilation can bring benefits.
567+ return true ;
568+ }
569+
570+ struct TaskIdAllocator {
571+ using TaskId = unsigned ;
572+
573+ // Use the most significant bit (MSB) as a namespace tag.
574+ // - Original ThinLTO backend tasks are expected to have MSB == 0.
575+ // - Split partitions allocated by this allocator always have MSB == 1.
576+ // This guarantees the two ID spaces never overlap.
577+ static constexpr TaskId tag () {
578+ return TaskId{1 } << (std::numeric_limits<TaskId>::digits - 1 );
579+ }
580+
581+ // Monotonic sequence counter for split partitions (MSB must remain 0 here).
582+ std::atomic<TaskId> seq{0 };
583+
584+ // Allocate a globally unique TaskId for a split partition.
585+ // The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace.
586+ TaskId alloc () {
587+ TaskId v = seq.fetch_add (1 , std::memory_order_relaxed);
588+
589+ // If the counter ever reaches the MSB, we'd overlap namespaces.
590+ // This indicates an overflow / too many partitions.
591+ if (v & tag ())
592+ report_fatal_error (" Partition TaskId overflow: seq reached the tag bit." );
593+
594+ return tag () | v;
595+ }
596+
597+ // Helper for sanity checks / debugging.
598+ static bool isPartition (TaskId id) { return (id & tag ()) != 0 ; }
599+ };
600+
601+ // Global allocator shared by all split partitions.
602+ static TaskIdAllocator gSplitTaskIds ;
603+
604+ static bool splitOptAndCodeGenThin (unsigned task, const Config &C,
605+ TargetMachine *TM, AddStreamFn AddStream,
606+ unsigned ParallelCodeGenParallelismLevel,
607+ Module &Mod,
608+ const ModuleSummaryIndex &CombinedIndex,
609+ const std::vector<uint8_t > &CmdArgs,
610+ bool DoOpt, AddStreamFn IRAddStream,
611+ ArrayRef<StringRef> &BitcodeLibFuncs) {
612+ unsigned ThreadCount = 0 ;
613+ const Target *T = &TM->getTarget ();
614+
615+ static std::mutex PrintMutex;
616+
617+ SplitModuleCG SplitModuleCG (Mod, CombinedIndex, ParallelCodeGenParallelismLevel);
618+ ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum ();
619+
620+ std::vector<std::string> TempObjectFiles (ParallelCodeGenParallelismLevel);
621+ std::vector<llvm::FileRemover> TempFileRemovers (ParallelCodeGenParallelismLevel);
622+
623+ const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart,
624+ unsigned PartitionId) {
625+ unsigned CurrentThreadId, UniqueTaskId;
626+ {
627+ std::lock_guard<std::mutex> Lock (PrintMutex);
628+ CurrentThreadId = ThreadCount++;
629+
630+ // In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to
631+ // unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it
632+ // as "no base task id" and don't enforce the namespace check on it.
633+ //
634+ // We do not rely on the incoming `task` for partition uniqueness: split
635+ // partitions get a dedicated UniqueTaskId allocated below.
636+ if (task != std::numeric_limits<unsigned >::max ()) {
637+ assert (!TaskIdAllocator::isPartition (task) &&
638+ " Original ThinLTO TaskId unexpectedly overlaps the partition "
639+ " namespace" );
640+ }
641+ UniqueTaskId = gSplitTaskIds .alloc ();
642+ }
643+
644+ std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine (C, T, *MPart);
645+
646+ if (DoOpt) {
647+ if (!opt (C, ThreadTM.get (), UniqueTaskId, *MPart, /* IsThinLTO=*/ true ,
648+ /* ExportSummary=*/ nullptr , /* ImportSummary=*/ &CombinedIndex,
649+ CmdArgs, BitcodeLibFuncs)) {
650+ report_fatal_error (" Failed to gen opt for split mod in thread." );
651+ }
652+
653+ // Save the current module before the first codegen round.
654+ // Note that the second codegen round runs only `codegen()` without
655+ // running `opt()`. We're not reaching here as it's bailed out earlier
656+ // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
657+ if (IRAddStream)
658+ cgdata::saveModuleForTwoRounds (*MPart, task + CurrentThreadId,
659+ IRAddStream);
660+ }
661+
662+ auto splitStream = [&](unsigned task, const Twine &moduleName)
663+ -> Expected<std::unique_ptr<CachedFileStream>> {
664+ int FD;
665+ SmallString<128 > TempFilename;
666+ if (std::error_code EC = sys::fs::createTemporaryFile (
667+ " thinlto-split" , " o" , FD, TempFilename))
668+ return errorCodeToError (EC);
669+
670+ TempObjectFiles[PartitionId] = std::string (TempFilename.str ());
671+ TempFileRemovers[PartitionId].setFile (TempObjectFiles[PartitionId]);
672+
673+ auto OS = std::make_unique<raw_fd_ostream>(
674+ FD, true , /* CloseOnDestruct*/ true );
675+
676+ auto Stream = std::make_unique<CachedFileStream>(
677+ std::move (OS), std::string (TempFilename.str ()));
678+
679+ return std::move (Stream);
680+ };
681+
682+ codegen (C, ThreadTM.get (), splitStream, UniqueTaskId, *MPart,
683+ CombinedIndex);
684+ };
685+
686+ SplitModuleCG.SplitModule (HandleModulePartition, C);
687+
688+ // Use ld.lld to combine the partitions into a object.
689+ if (TempObjectFiles.empty ()) {
690+ llvm::errs () << " TempObjectFiles.empty()\n " ;
691+ return true ;
692+ }
693+
694+ auto FinalStream = AddStream (task, Mod.getModuleIdentifier ());
695+ if (!FinalStream)
696+ report_fatal_error (" Failed to open final output stream" );
697+
698+ int MergedFD;
699+ SmallString<128 > MergedFilename;
700+ if (sys::fs::createTemporaryFile (" thinlto-merged" , " o" , MergedFD,
701+ MergedFilename))
702+ report_fatal_error (" Failed to create merged temp file." );
703+ llvm::FileRemover MergedFileRemover (MergedFilename);
704+ sys::fs::closeFile (MergedFD);
705+
706+ std::vector<StringRef> Args;
707+ std::string LinkerPath = " " ;
708+ if (auto Path = sys::findProgramByName (" ld.lld" ))
709+ LinkerPath = *Path;
710+ else if (auto Path = sys::findProgramByName (" ld" ))
711+ LinkerPath = *Path;
712+
713+ if (LinkerPath.empty ())
714+ report_fatal_error (" Cannot find linkeer (ld or ld.lld) to merge partitions." );
715+
716+ Args.push_back (LinkerPath);
717+ Args.push_back (" -r" );
718+ Args.push_back (" -o" );
719+ Args.push_back (MergedFilename);
720+
721+ for (const auto &File : TempObjectFiles)
722+ Args.push_back (File);
723+
724+ std::string ErrMsg;
725+ int Result = sys::ExecuteAndWait (LinkerPath, Args, /* Env=*/ std::nullopt ,
726+ /* Redirects=*/ {}, /* SecondsToWait=*/ 0 ,
727+ /* MemoryLimit=*/ 0 , &ErrMsg);
728+
729+ if (Result != 0 ) {
730+ errs () << " Linker failed: " << ErrMsg << " \n " ;
731+ report_fatal_error (" Failed to merge split objects." );
732+ }
733+
734+ {
735+ std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream;
736+ auto BufferOrErr = MemoryBuffer::getFile (MergedFilename);
737+ if (!BufferOrErr)
738+ report_fatal_error (" Failed to read merged object." );
739+
740+ FinalFileStream->OS ->write (BufferOrErr.get ()->getBufferStart (),
741+ BufferOrErr.get ()->getBufferSize ());
742+ }
743+ return true ;
744+ }
745+
516746static void splitCodeGen (const Config &C, TargetMachine *TM,
517747 AddStreamFn AddStream,
518748 unsigned ParallelCodeGenParallelismLevel, Module &Mod,
@@ -671,11 +901,28 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
671901 // the module, if applicable.
672902 Mod.setPartialSampleProfileRatio (CombinedIndex);
673903
904+ bool ProfitableToSplit = true ;
905+ if (ThinLTOSplit) {
906+ if (!canDoSplitModule (Mod) || !HasLargeCG (Mod, CombinedIndex)) {
907+ ProfitableToSplit = false ;
908+ LLVM_DEBUG (dbgs () << " warning: thinlto split not enable for module: "
909+ << Mod.getName ());
910+ } else {
911+ LLVM_DEBUG (dbgs () << " thinlto: split codegen for module: "
912+ << Mod.getName ());
913+ }
914+ }
915+
674916 LLVM_DEBUG (dbgs () << " Running ThinLTO\n " );
675917 if (CodeGenOnly) {
676- // If CodeGenOnly is set, we only perform code generation and skip
677- // optimization. This value may differ from Conf.CodeGenOnly.
678- codegen (Conf, TM.get (), AddStream, Task, Mod, CombinedIndex);
918+ if (ThinLTOSplit && ProfitableToSplit)
919+ splitOptAndCodeGenThin (Task, Conf, TM.get (), AddStream,
920+ ThinLTOSplitPartitions, Mod, CombinedIndex,
921+ CmdArgs, false , IRAddStream, BitcodeLibFuncs);
922+ else
923+ // If CodeGenOnly is set, we only perform code generation and skip
924+ // optimization. This value may differ from Conf.CodeGenOnly.
925+ codegen (Conf, TM.get (), AddStream, Task, Mod, CombinedIndex);
679926 return finalizeOptimizationRemarks (std::move (DiagnosticOutputFile));
680927 }
681928
@@ -685,20 +932,27 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
685932 auto OptimizeAndCodegen =
686933 [&](Module &Mod, TargetMachine *TM,
687934 LLVMRemarkFileHandle DiagnosticOutputFile) {
688- // Perform optimization and code generation for ThinLTO.
689- if (!opt (Conf, TM, Task, Mod, /* IsThinLTO=*/ true ,
690- /* ExportSummary=*/ nullptr , /* ImportSummary=*/ &CombinedIndex,
691- CmdArgs, BitcodeLibFuncs))
692- return finalizeOptimizationRemarks (std::move (DiagnosticOutputFile));
693-
694- // Save the current module before the first codegen round.
695- // Note that the second codegen round runs only `codegen()` without
696- // running `opt()`. We're not reaching here as it's bailed out earlier
697- // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
698- if (IRAddStream)
699- cgdata::saveModuleForTwoRounds (Mod, Task, IRAddStream);
700-
701- codegen (Conf, TM, AddStream, Task, Mod, CombinedIndex);
935+ if (ThinLTOSplit && ProfitableToSplit) {
936+ if (!splitOptAndCodeGenThin (
937+ Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
938+ CombinedIndex, CmdArgs, true , IRAddStream, BitcodeLibFuncs))
939+ return finalizeOptimizationRemarks (std::move (DiagnosticOutputFile));
940+ } else {
941+ // Perform optimization and code generation for ThinLTO.
942+ if (!opt (Conf, TM, Task, Mod, /* IsThinLTO=*/ true ,
943+ /* ExportSummary=*/ nullptr , /* ImportSummary=*/ &CombinedIndex,
944+ CmdArgs, BitcodeLibFuncs))
945+ return finalizeOptimizationRemarks (std::move (DiagnosticOutputFile));
946+
947+ // Save the current module before the first codegen round.
948+ // Note that the second codegen round runs only `codegen()` without
949+ // running `opt()`. We're not reaching here as it's bailed out earlier
950+ // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
951+ if (IRAddStream)
952+ cgdata::saveModuleForTwoRounds (Mod, Task, IRAddStream);
953+
954+ codegen (Conf, TM, AddStream, Task, Mod, CombinedIndex);
955+ }
702956 return finalizeOptimizationRemarks (std::move (DiagnosticOutputFile));
703957 };
704958
0 commit comments