Skip to content

Commit fdf1f69

Browse files
authored
[CGData][GMF] Skip No Params (#116548)
This update follows up on change #112671 and is mostly a NFC, with the following exceptions: - Introduced `-global-merging-skip-no-params` to bypass merging when no parameters are required. - Parameter count is now calculated based on the unique hash count. - Added `-global-merging-inst-overhead` to adjust the instruction overhead, reflecting the machine instruction size. - Costs and benefits are now computed using the double data type. Since the finalization process occurs offline, this should not significantly impact build time. - Moved a sorting operation outside of the loop. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent bb88fd1 commit fdf1f69

File tree

4 files changed

+86
-22
lines changed

4 files changed

+86
-22
lines changed

llvm/lib/CGData/StableFunctionMap.cpp

+37-16
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
#include "llvm/CGData/StableFunctionMap.h"
17+
#include "llvm/ADT/SmallSet.h"
1718
#include "llvm/Support/CommandLine.h"
1819
#include "llvm/Support/Debug.h"
1920

@@ -35,21 +36,30 @@ static cl::opt<unsigned> GlobalMergingMaxParams(
3536
cl::desc(
3637
"The maximum number of parameters allowed when merging functions."),
3738
cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
38-
static cl::opt<unsigned> GlobalMergingParamOverhead(
39+
static cl::opt<bool> GlobalMergingSkipNoParams(
40+
"global-merging-skip-no-params",
41+
cl::desc("Skip merging functions with no parameters."), cl::init(true),
42+
cl::Hidden);
43+
static cl::opt<double> GlobalMergingInstOverhead(
44+
"global-merging-inst-overhead",
45+
cl::desc("The overhead cost associated with each instruction when lowering "
46+
"to machine instruction."),
47+
cl::init(1.2), cl::Hidden);
48+
static cl::opt<double> GlobalMergingParamOverhead(
3949
"global-merging-param-overhead",
4050
cl::desc("The overhead cost associated with each parameter when merging "
4151
"functions."),
42-
cl::init(2), cl::Hidden);
43-
static cl::opt<unsigned>
52+
cl::init(2.0), cl::Hidden);
53+
static cl::opt<double>
4454
GlobalMergingCallOverhead("global-merging-call-overhead",
4555
cl::desc("The overhead cost associated with each "
4656
"function call when merging functions."),
47-
cl::init(1), cl::Hidden);
48-
static cl::opt<unsigned> GlobalMergingExtraThreshold(
57+
cl::init(1.0), cl::Hidden);
58+
static cl::opt<double> GlobalMergingExtraThreshold(
4959
"global-merging-extra-threshold",
5060
cl::desc("An additional cost threshold that must be exceeded for merging "
5161
"to be considered beneficial."),
52-
cl::init(0), cl::Hidden);
62+
cl::init(0.0), cl::Hidden);
5363

5464
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
5565
auto It = NameToId.find(Name);
@@ -160,21 +170,32 @@ static bool isProfitable(
160170
if (InstCount < GlobalMergingMinInstrs)
161171
return false;
162172

163-
unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
164-
if (ParamCount > GlobalMergingMaxParams)
165-
return false;
166-
167-
unsigned Benefit = InstCount * (StableFunctionCount - 1);
168-
unsigned Cost =
169-
(GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
170-
StableFunctionCount +
171-
GlobalMergingExtraThreshold;
173+
double Cost = 0.0;
174+
SmallSet<stable_hash, 8> UniqueHashVals;
175+
for (auto &SF : SFS) {
176+
UniqueHashVals.clear();
177+
for (auto &[IndexPair, Hash] : *SF->IndexOperandHashMap)
178+
UniqueHashVals.insert(Hash);
179+
unsigned ParamCount = UniqueHashVals.size();
180+
if (ParamCount > GlobalMergingMaxParams)
181+
return false;
182+
// Theoretically, if ParamCount is 0, it results in identical code folding
183+
// (ICF), which we can skip merging here since the linker already handles
184+
// ICF. This pass would otherwise introduce unnecessary thunks that are
185+
// merely direct jumps. However, enabling this could be beneficial depending
186+
// on downstream passes, so we provide an option for it.
187+
if (GlobalMergingSkipNoParams && ParamCount == 0)
188+
return false;
189+
Cost += ParamCount * GlobalMergingParamOverhead + GlobalMergingCallOverhead;
190+
}
191+
Cost += GlobalMergingExtraThreshold;
172192

193+
double Benefit =
194+
InstCount * (StableFunctionCount - 1) * GlobalMergingInstOverhead;
173195
bool Result = Benefit > Cost;
174196
LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", "
175197
<< "StableFunctionCount = " << StableFunctionCount
176198
<< ", InstCount = " << InstCount
177-
<< ", ParamCount = " << ParamCount
178199
<< ", Benefit = " << Benefit << ", Cost = " << Cost
179200
<< ", Result = " << (Result ? "true" : "false") << "\n");
180201
return Result;

llvm/lib/CodeGen/GlobalMergeFunctions.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -405,12 +405,13 @@ static ParamLocsVecTy computeParamInfo(
405405
}
406406

407407
ParamLocsVecTy ParamLocsVec;
408-
for (auto &[HashSeq, Locs] : HashSeqToLocs) {
408+
for (auto &[HashSeq, Locs] : HashSeqToLocs)
409409
ParamLocsVec.push_back(std::move(Locs));
410-
llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
411-
return L[0] < R[0];
412-
});
413-
}
410+
411+
llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
412+
return L[0] < R[0];
413+
});
414+
414415
return ParamLocsVec;
415416
}
416417

llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll renamed to llvm/test/CodeGen/Generic/cgdata-merge-local.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; while parameterizing a difference in their global variables, g1 and g2.
33
; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
44
; which are tail-called from thunks f1 and f2 respectively.
5-
; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
5+
; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (ICF).
66

77
; RUN: opt -S --passes=global-merge-func %s | FileCheck %s
88

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; This test verifies whether two identical functions, f1 and f2, can be merged
2+
; locally using the global merge function.
3+
; The functions, f1.Tgm and f2.Tgm, will be folded by the linker through
4+
; Identical Code Folding (ICF).
5+
; While identical functions can already be folded by the linker, creating this
6+
; canonical form can be beneficial in downstream passes. This merging process
7+
; can be controlled by the -global-merging-skip-no-params option.
8+
9+
; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s --check-prefix=MERGE
10+
; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=true < %s | FileCheck %s --implicit-check-not=".Tgm"
11+
12+
; MERGE: _f1.Tgm
13+
; MERGE: _f2.Tgm
14+
15+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
16+
target triple = "arm64-unknown-ios12.0.0"
17+
18+
@g = external local_unnamed_addr global [0 x i32], align 4
19+
@g1 = external global i32, align 4
20+
@g2 = external global i32, align 4
21+
22+
define i32 @f1(i32 %a) {
23+
entry:
24+
%idxprom = sext i32 %a to i64
25+
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
26+
%0 = load i32, i32* %arrayidx, align 4
27+
%1 = load volatile i32, i32* @g1, align 4
28+
%mul = mul nsw i32 %1, %0
29+
%add = add nsw i32 %mul, 1
30+
ret i32 %add
31+
}
32+
33+
define i32 @f2(i32 %a) {
34+
entry:
35+
%idxprom = sext i32 %a to i64
36+
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
37+
%0 = load i32, i32* %arrayidx, align 4
38+
%1 = load volatile i32, i32* @g1, align 4
39+
%mul = mul nsw i32 %1, %0
40+
%add = add nsw i32 %mul, 1
41+
ret i32 %add
42+
}

0 commit comments

Comments
 (0)