Skip to content

Commit b4b2cc0

Browse files
committed
Merging r323915:
------------------------------------------------------------------------ r323915 | chandlerc | 2018-01-31 12:56:37 -0800 (Wed, 31 Jan 2018) | 17 lines [x86] Make the retpoline thunk insertion a machine function pass. Summary: This removes the need for a machine module pass using some deeply questionable hacks. This should address PR36123 which is a case where in full LTO the memory usage of a machine module pass actually ended up being significant. We should revert this on trunk as soon as we understand and fix the memory usage issue, but we should include this in any backports of retpolines themselves. Reviewers: echristo, MatzeB Subscribers: sanjoy, mcrosier, mehdi_amini, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D42726 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@324009 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 54049c8 commit b4b2cc0

File tree

3 files changed

+87
-53
lines changed

3 files changed

+87
-53
lines changed

lib/Target/X86/X86.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ void initializeFixupBWInstPassPass(PassRegistry &);
100100
FunctionPass *createX86EvexToVexInsts();
101101

102102
/// This pass creates the thunks for the retpoline feature.
103-
ModulePass *createX86RetpolineThunksPass();
103+
FunctionPass *createX86RetpolineThunksPass();
104104

105105
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
106106
X86Subtarget &,

lib/Target/X86/X86RetpolineThunks.cpp

+85-50
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,27 @@ using namespace llvm;
3838

3939
#define DEBUG_TYPE "x86-retpoline-thunks"
4040

41+
static const char ThunkNamePrefix[] = "__llvm_retpoline_";
42+
static const char R11ThunkName[] = "__llvm_retpoline_r11";
43+
static const char EAXThunkName[] = "__llvm_retpoline_eax";
44+
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
45+
static const char EDXThunkName[] = "__llvm_retpoline_edx";
46+
static const char PushThunkName[] = "__llvm_retpoline_push";
47+
4148
namespace {
42-
class X86RetpolineThunks : public ModulePass {
49+
class X86RetpolineThunks : public MachineFunctionPass {
4350
public:
4451
static char ID;
4552

46-
X86RetpolineThunks() : ModulePass(ID) {}
53+
X86RetpolineThunks() : MachineFunctionPass(ID) {}
4754

4855
StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
4956

50-
bool runOnModule(Module &M) override;
57+
bool doInitialization(Module &M) override;
58+
bool runOnMachineFunction(MachineFunction &F) override;
5159

5260
void getAnalysisUsage(AnalysisUsage &AU) const override {
61+
MachineFunctionPass::getAnalysisUsage(AU);
5362
AU.addRequired<MachineModuleInfo>();
5463
AU.addPreserved<MachineModuleInfo>();
5564
}
@@ -61,51 +70,74 @@ class X86RetpolineThunks : public ModulePass {
6170
const X86Subtarget *STI;
6271
const X86InstrInfo *TII;
6372

64-
Function *createThunkFunction(Module &M, StringRef Name);
73+
bool InsertedThunks;
74+
75+
void createThunkFunction(Module &M, StringRef Name);
6576
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
6677
void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
67-
void createThunk(Module &M, StringRef NameSuffix,
68-
Optional<unsigned> Reg = None);
78+
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
6979
};
7080

7181
} // end anonymous namespace
7282

73-
ModulePass *llvm::createX86RetpolineThunksPass() {
83+
FunctionPass *llvm::createX86RetpolineThunksPass() {
7484
return new X86RetpolineThunks();
7585
}
7686

7787
char X86RetpolineThunks::ID = 0;
7888

79-
bool X86RetpolineThunks::runOnModule(Module &M) {
80-
DEBUG(dbgs() << getPassName() << '\n');
89+
bool X86RetpolineThunks::doInitialization(Module &M) {
90+
InsertedThunks = false;
91+
return false;
92+
}
8193

82-
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
83-
assert(TPC && "X86-specific target pass should not be run without a target "
84-
"pass config!");
94+
bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
95+
DEBUG(dbgs() << getPassName() << '\n');
8596

86-
MMI = &getAnalysis<MachineModuleInfo>();
87-
TM = &TPC->getTM<TargetMachine>();
97+
TM = &MF.getTarget();;
98+
STI = &MF.getSubtarget<X86Subtarget>();
99+
TII = STI->getInstrInfo();
88100
Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
89101

90-
// Only add a thunk if we have at least one function that has the retpoline
91-
// feature enabled in its subtarget.
92-
// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
93-
// nothing will end up calling it.
94-
// FIXME: It's a little silly to look at every function just to enumerate
95-
// the subtargets, but eventually we'll want to look at them for indirect
96-
// calls, so maybe this is OK.
97-
if (!llvm::any_of(M, [&](const Function &F) {
98-
// Save the subtarget we find for use in emitting the subsequent
99-
// thunk.
100-
STI = &TM->getSubtarget<X86Subtarget>(F);
101-
return STI->useRetpoline() && !STI->useRetpolineExternalThunk();
102-
}))
103-
return false;
104-
105-
// If we have a relevant subtarget, get the instr info as well.
106-
TII = STI->getInstrInfo();
102+
MMI = &getAnalysis<MachineModuleInfo>();
103+
Module &M = const_cast<Module &>(*MMI->getModule());
104+
105+
// If this function is not a thunk, check to see if we need to insert
106+
// a thunk.
107+
if (!MF.getName().startswith(ThunkNamePrefix)) {
108+
// If we've already inserted a thunk, nothing else to do.
109+
if (InsertedThunks)
110+
return false;
111+
112+
// Only add a thunk if one of the functions has the retpoline feature
113+
// enabled in its subtarget, and doesn't enable external thunks.
114+
// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
115+
// nothing will end up calling it.
116+
// FIXME: It's a little silly to look at every function just to enumerate
117+
// the subtargets, but eventually we'll want to look at them for indirect
118+
// calls, so maybe this is OK.
119+
if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
120+
return false;
121+
122+
// Otherwise, we need to insert the thunk.
123+
// WARNING: This is not really a well behaving thing to do in a function
124+
// pass. We extract the module and insert a new function (and machine
125+
// function) directly into the module.
126+
if (Is64Bit)
127+
createThunkFunction(M, R11ThunkName);
128+
else
129+
for (StringRef Name :
130+
{EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
131+
createThunkFunction(M, Name);
132+
InsertedThunks = true;
133+
return true;
134+
}
107135

136+
// If this *is* a thunk function, we need to populate it with the correct MI.
108137
if (Is64Bit) {
138+
assert(MF.getName() == "__llvm_retpoline_r11" &&
139+
"Should only have an r11 thunk on 64-bit targets");
140+
109141
// __llvm_retpoline_r11:
110142
// callq .Lr11_call_target
111143
// .Lr11_capture_spec:
@@ -116,8 +148,7 @@ bool X86RetpolineThunks::runOnModule(Module &M) {
116148
// .Lr11_call_target:
117149
// movq %r11, (%rsp)
118150
// retq
119-
120-
createThunk(M, "r11", X86::R11);
151+
populateThunk(MF, X86::R11);
121152
} else {
122153
// For 32-bit targets we need to emit a collection of thunks for various
123154
// possible scratch registers as well as a fallback that is used when
@@ -161,16 +192,25 @@ bool X86RetpolineThunks::runOnModule(Module &M) {
161192
// popl 8(%esp) # Pop RA to final RA
162193
// popl (%esp) # Pop callee to next top of stack
163194
// retl # Ret to callee
164-
createThunk(M, "eax", X86::EAX);
165-
createThunk(M, "ecx", X86::ECX);
166-
createThunk(M, "edx", X86::EDX);
167-
createThunk(M, "push");
195+
if (MF.getName() == EAXThunkName)
196+
populateThunk(MF, X86::EAX);
197+
else if (MF.getName() == ECXThunkName)
198+
populateThunk(MF, X86::ECX);
199+
else if (MF.getName() == EDXThunkName)
200+
populateThunk(MF, X86::EDX);
201+
else if (MF.getName() == PushThunkName)
202+
populateThunk(MF);
203+
else
204+
llvm_unreachable("Invalid thunk name on x86-32!");
168205
}
169206

170207
return true;
171208
}
172209

173-
Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
210+
void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
211+
assert(Name.startswith(ThunkNamePrefix) &&
212+
"Created a thunk with an unexpected prefix!");
213+
174214
LLVMContext &Ctx = M.getContext();
175215
auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
176216
Function *F =
@@ -190,7 +230,6 @@ Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
190230
IRBuilder<> Builder(Entry);
191231

192232
Builder.CreateRetVoid();
193-
return F;
194233
}
195234

196235
void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
@@ -200,6 +239,7 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
200239
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
201240
.addReg(Reg);
202241
}
242+
203243
void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
204244
MachineBasicBlock &MBB) {
205245
// The instruction sequence we use to replace the return address without
@@ -225,21 +265,16 @@ void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
225265
false, 0);
226266
}
227267

228-
void X86RetpolineThunks::createThunk(Module &M, StringRef NameSuffix,
229-
Optional<unsigned> Reg) {
230-
Function &F =
231-
*createThunkFunction(M, (Twine("__llvm_retpoline_") + NameSuffix).str());
232-
MachineFunction &MF = MMI->getOrCreateMachineFunction(F);
233-
268+
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
269+
Optional<unsigned> Reg) {
234270
// Set MF properties. We never use vregs...
235271
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
236272

237-
BasicBlock &OrigEntryBB = F.getEntryBlock();
238-
MachineBasicBlock *Entry = MF.CreateMachineBasicBlock(&OrigEntryBB);
239-
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(&OrigEntryBB);
240-
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(&OrigEntryBB);
273+
MachineBasicBlock *Entry = &MF.front();
274+
Entry->clear();
241275

242-
MF.push_back(Entry);
276+
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
277+
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
243278
MF.push_back(CaptureSpec);
244279
MF.push_back(CallTarget);
245280

test/CodeGen/X86/O0-pipeline.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@
5656
; CHECK-NEXT: Machine Natural Loop Construction
5757
; CHECK-NEXT: Insert XRay ops
5858
; CHECK-NEXT: Implement the 'patchable-function' attribute
59-
; CHECK-NEXT: X86 Retpoline Thunks
60-
; CHECK-NEXT: FunctionPass Manager
59+
; CHECK-NEXT: X86 Retpoline Thunks
6160
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
6261
; CHECK-NEXT: Machine Optimization Remark Emitter
6362
; CHECK-NEXT: MachineDominator Tree Construction

0 commit comments

Comments
 (0)