Skip to content

Commit 475a93a

Browse files
committed
[BOLT] Calculate output values using BOLTLinker
BOLT uses `MCAsmLayout` to calculate the output values of functions and basic blocks. This means output values are calculated based on a pre-linking state and any changes to symbol values during linking will cause incorrect values to be used. This issue can be triggered by enabling linker relaxation on RISC-V. Since linker relaxation can remove instructions, symbol values may change. This causes, among other things, the symbol table created by BOLT in the output executable to be incorrect. This patch solves this issue by using `BOLTLinker` to get symbol values instead of `MCAsmLayout`. This way, output values are calculated based on a post-linking state. To make sure the linker can update all necessary symbols, this patch also makes sure all these symbols are not marked as temporary so that they end-up in the object file's symbol table. Note that this patch only deals with symbols of binary functions (`BinaryFunction::updateOutputValues`). The technique described above turned out to be too expensive for basic block symbols so those are handled differently in D155604. Reviewed By: maksfb Differential Revision: https://reviews.llvm.org/D154604
1 parent a470df3 commit 475a93a

9 files changed

+56
-53
lines changed

bolt/include/bolt/Core/BinaryFunction.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ class BinaryFunction {
11911191

11921192
if (!Islands->FunctionConstantIslandLabel) {
11931193
Islands->FunctionConstantIslandLabel =
1194-
BC.Ctx->createNamedTempSymbol("func_const_island");
1194+
BC.Ctx->getOrCreateSymbol("func_const_island@" + getOneName());
11951195
}
11961196
return Islands->FunctionConstantIslandLabel;
11971197
}
@@ -1201,7 +1201,7 @@ class BinaryFunction {
12011201

12021202
if (!Islands->FunctionColdConstantIslandLabel) {
12031203
Islands->FunctionColdConstantIslandLabel =
1204-
BC.Ctx->createNamedTempSymbol("func_cold_const_island");
1204+
BC.Ctx->getOrCreateSymbol("func_cold_const_island@" + getOneName());
12051205
}
12061206
return Islands->FunctionColdConstantIslandLabel;
12071207
}
@@ -1221,7 +1221,7 @@ class BinaryFunction {
12211221
}
12221222

12231223
/// Update output values of the function based on the final \p Layout.
1224-
void updateOutputValues(const MCAsmLayout &Layout);
1224+
void updateOutputValues(const BOLTLinker &Linker);
12251225

12261226
/// Register relocation type \p RelType at a given \p Address in the function
12271227
/// against \p Symbol.

bolt/include/bolt/Core/Linker.h

+14-1
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,28 @@ class BOLTLinker {
3131
std::function<void(const BinarySection &Section, uint64_t Address)>;
3232
using SectionsMapper = std::function<void(SectionMapper)>;
3333

34+
struct SymbolInfo {
35+
uint64_t Address;
36+
uint64_t Size;
37+
};
38+
3439
virtual ~BOLTLinker() = default;
3540

3641
/// Load and link \p Obj. \p MapSections will be called before the object is
3742
/// linked to allow section addresses to be remapped. When called, the address
3843
/// of a section can be changed by calling the passed SectionMapper.
3944
virtual void loadObject(MemoryBufferRef Obj, SectionsMapper MapSections) = 0;
4045

46+
/// Return the address and size of a symbol or std::nullopt if it cannot be
47+
/// found.
48+
virtual std::optional<SymbolInfo> lookupSymbolInfo(StringRef Name) const = 0;
49+
4150
/// Return the address of a symbol or std::nullopt if it cannot be found.
42-
virtual std::optional<uint64_t> lookupSymbol(StringRef Name) const = 0;
51+
std::optional<uint64_t> lookupSymbol(StringRef Name) const {
52+
if (const auto Info = lookupSymbolInfo(Name))
53+
return Info->Address;
54+
return std::nullopt;
55+
}
4356
};
4457

4558
} // namespace bolt

bolt/include/bolt/Rewrite/JITLinkLinker.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "bolt/Rewrite/ExecutableFileMemoryManager.h"
1818
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
1919

20-
#include <map>
2120
#include <memory>
2221
#include <vector>
2322

@@ -35,15 +34,15 @@ class JITLinkLinker : public BOLTLinker {
3534
std::unique_ptr<ExecutableFileMemoryManager> MM;
3635
jitlink::JITLinkDylib Dylib{"main"};
3736
std::vector<ExecutableFileMemoryManager::FinalizedAlloc> Allocs;
38-
std::map<std::string, uint64_t> Symtab;
37+
StringMap<SymbolInfo> Symtab;
3938

4039
public:
4140
JITLinkLinker(BinaryContext &BC,
4241
std::unique_ptr<ExecutableFileMemoryManager> MM);
4342
~JITLinkLinker();
4443

4544
void loadObject(MemoryBufferRef Obj, SectionsMapper MapSections) override;
46-
std::optional<uint64_t> lookupSymbol(StringRef Name) const override;
45+
std::optional<SymbolInfo> lookupSymbolInfo(StringRef Name) const override;
4746

4847
static SmallVector<jitlink::Block *, 2>
4948
orderedBlocks(const jitlink::Section &Section);

bolt/include/bolt/Rewrite/RewriteInstance.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ class RewriteInstance {
190190
void mapAllocatableSections(BOLTLinker::SectionMapper MapSection);
191191

192192
/// Update output object's values based on the final \p Layout.
193-
void updateOutputValues(const MCAsmLayout &Layout);
193+
void updateOutputValues(const BOLTLinker &Linker);
194194

195195
/// Rewrite back all functions (hopefully optimized) that fit in the original
196196
/// memory footprint for that function. If the function is now larger and does

bolt/lib/Core/BinaryBasicBlock.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "bolt/Core/BinaryContext.h"
1515
#include "bolt/Core/BinaryFunction.h"
1616
#include "llvm/ADT/SmallPtrSet.h"
17-
#include "llvm/MC/MCAsmLayout.h"
1817
#include "llvm/MC/MCInst.h"
1918
#include "llvm/Support/Errc.h"
2019

bolt/lib/Core/BinaryFunction.cpp

+25-30
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "llvm/ADT/StringRef.h"
2626
#include "llvm/Demangle/Demangle.h"
2727
#include "llvm/MC/MCAsmInfo.h"
28-
#include "llvm/MC/MCAsmLayout.h"
2928
#include "llvm/MC/MCContext.h"
3029
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
3130
#include "llvm/MC/MCExpr.h"
@@ -4030,33 +4029,37 @@ void BinaryFunction::calculateLoopInfo() {
40304029
}
40314030
}
40324031

4033-
void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
4032+
void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) {
40344033
if (!isEmitted()) {
40354034
assert(!isInjected() && "injected function should be emitted");
40364035
setOutputAddress(getAddress());
40374036
setOutputSize(getSize());
40384037
return;
40394038
}
40404039

4041-
const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
4040+
const auto SymbolInfo = Linker.lookupSymbolInfo(getSymbol()->getName());
4041+
assert(SymbolInfo && "Cannot find function entry symbol");
4042+
setOutputAddress(SymbolInfo->Address);
4043+
setOutputSize(SymbolInfo->Size);
4044+
40424045
if (BC.HasRelocations || isInjected()) {
4043-
const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
4044-
const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
4045-
setOutputAddress(BaseAddress + StartOffset);
4046-
setOutputSize(EndOffset - StartOffset);
40474046
if (hasConstantIsland()) {
4048-
const uint64_t DataOffset =
4049-
Layout.getSymbolOffset(*getFunctionConstantIslandLabel());
4050-
setOutputDataAddress(BaseAddress + DataOffset);
4047+
const auto DataAddress =
4048+
Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName());
4049+
assert(DataAddress && "Cannot find function CI symbol");
4050+
setOutputDataAddress(*DataAddress);
40514051
for (auto It : Islands->Offsets) {
40524052
const uint64_t OldOffset = It.first;
40534053
BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset);
40544054
if (!BD)
40554055
continue;
40564056

40574057
MCSymbol *Symbol = It.second;
4058-
const uint64_t NewOffset = Layout.getSymbolOffset(*Symbol);
4059-
BD->setOutputLocation(*getCodeSection(), NewOffset);
4058+
const auto NewAddress = Linker.lookupSymbol(Symbol->getName());
4059+
assert(NewAddress && "Cannot find CI symbol");
4060+
auto &Section = *getCodeSection();
4061+
const auto NewOffset = *NewAddress - Section.getOutputAddress();
4062+
BD->setOutputLocation(Section, NewOffset);
40604063
}
40614064
}
40624065
if (isSplit()) {
@@ -4066,7 +4069,6 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
40664069
// If fragment is empty, cold section might not exist
40674070
if (FF.empty() && ColdSection.getError())
40684071
continue;
4069-
const uint64_t ColdBaseAddress = ColdSection->getOutputAddress();
40704072

40714073
const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
40724074
// If fragment is empty, symbol might have not been emitted
@@ -4075,31 +4077,24 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
40754077
continue;
40764078
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
40774079
"split function should have defined cold symbol");
4078-
const MCSymbol *ColdEndSymbol =
4079-
getFunctionEndLabel(FF.getFragmentNum());
4080-
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
4081-
"split function should have defined cold end symbol");
4082-
const uint64_t ColdStartOffset =
4083-
Layout.getSymbolOffset(*ColdStartSymbol);
4084-
const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
4085-
FF.setAddress(ColdBaseAddress + ColdStartOffset);
4086-
FF.setImageSize(ColdEndOffset - ColdStartOffset);
4080+
const auto ColdStartSymbolInfo =
4081+
Linker.lookupSymbolInfo(ColdStartSymbol->getName());
4082+
assert(ColdStartSymbolInfo && "Cannot find cold start symbol");
4083+
FF.setAddress(ColdStartSymbolInfo->Address);
4084+
FF.setImageSize(ColdStartSymbolInfo->Size);
40874085
if (hasConstantIsland()) {
4088-
const uint64_t DataOffset =
4089-
Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
4090-
setOutputColdDataAddress(ColdBaseAddress + DataOffset);
4086+
const auto DataAddress = Linker.lookupSymbol(
4087+
getFunctionColdConstantIslandLabel()->getName());
4088+
assert(DataAddress && "Cannot find cold CI symbol");
4089+
setOutputColdDataAddress(*DataAddress);
40914090
}
40924091
}
40934092
}
4094-
} else {
4095-
setOutputAddress(getAddress());
4096-
setOutputSize(Layout.getSymbolOffset(*getFunctionEndLabel()));
40974093
}
40984094

40994095
// Update basic block output ranges for the debug info, if we have
41004096
// secondary entry points in the symbol table to update or if writing BAT.
4101-
if (!opts::UpdateDebugSections && !isMultiEntry() &&
4102-
!requiresAddressTranslation())
4097+
if (!requiresAddressMap())
41034098
return;
41044099

41054100
// Output ranges should match the input if the body hasn't changed.

bolt/lib/Rewrite/JITLinkLinker.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ struct JITLinkLinker::Context : jitlink::JITLinkContext {
142142
});
143143

144144
for (auto *Symbol : G.defined_symbols()) {
145-
Linker.Symtab.insert(
146-
{Symbol->getName().str(), Symbol->getAddress().getValue()});
145+
SymbolInfo Info{Symbol->getAddress().getValue(), Symbol->getSize()};
146+
Linker.Symtab.insert({Symbol->getName().str(), Info});
147147
}
148148

149149
return Error::success();
@@ -174,7 +174,8 @@ void JITLinkLinker::loadObject(MemoryBufferRef Obj,
174174
jitlink::link(std::move(*LG), std::move(Ctx));
175175
}
176176

177-
std::optional<uint64_t> JITLinkLinker::lookupSymbol(StringRef Name) const {
177+
std::optional<JITLinkLinker::SymbolInfo>
178+
JITLinkLinker::lookupSymbolInfo(StringRef Name) const {
178179
auto It = Symtab.find(Name.data());
179180
if (It == Symtab.end())
180181
return std::nullopt;

bolt/lib/Rewrite/MachORewriteInstance.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "bolt/Rewrite/JITLinkLinker.h"
2121
#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
2222
#include "bolt/Utils/Utils.h"
23-
#include "llvm/MC/MCAsmLayout.h"
2423
#include "llvm/MC/MCObjectStreamer.h"
2524
#include "llvm/Support/Errc.h"
2625
#include "llvm/Support/FileSystem.h"
@@ -476,9 +475,6 @@ void MachORewriteInstance::emitAndLink() {
476475
"error creating in-memory object");
477476
assert(Obj && "createObjectFile cannot return nullptr");
478477

479-
MCAsmLayout FinalLayout(
480-
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
481-
482478
auto EFMM = std::make_unique<ExecutableFileMemoryManager>(*BC);
483479
EFMM->setNewSecPrefix(getNewSecPrefix());
484480
EFMM->setOrgSecPrefix(getOrgSecPrefix());

bolt/lib/Rewrite/RewriteInstance.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -3241,15 +3241,15 @@ void RewriteInstance::emitAndLink() {
32413241
Linker->loadObject(ObjectMemBuffer->getMemBufferRef(),
32423242
[this](auto MapSection) { mapFileSections(MapSection); });
32433243

3244-
MCAsmLayout FinalLayout(
3245-
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
3246-
32473244
// Update output addresses based on the new section map and
32483245
// layout. Only do this for the object created by ourselves.
3249-
updateOutputValues(FinalLayout);
3246+
updateOutputValues(*Linker);
32503247

3251-
if (opts::UpdateDebugSections)
3248+
if (opts::UpdateDebugSections) {
3249+
MCAsmLayout FinalLayout(
3250+
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
32523251
DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
3252+
}
32533253

32543254
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
32553255
RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
@@ -3644,15 +3644,15 @@ void RewriteInstance::mapAllocatableSections(
36443644
}
36453645
}
36463646

3647-
void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
3647+
void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) {
36483648
if (auto MapSection = BC->getUniqueSectionByName(AddressMap::SectionName)) {
36493649
auto Map = AddressMap::parse(MapSection->getOutputContents(), *BC);
36503650
BC->setIOAddressMap(std::move(Map));
36513651
BC->deregisterSection(*MapSection);
36523652
}
36533653

36543654
for (BinaryFunction *Function : BC->getAllBinaryFunctions())
3655-
Function->updateOutputValues(Layout);
3655+
Function->updateOutputValues(Linker);
36563656
}
36573657

36583658
void RewriteInstance::patchELFPHDRTable() {

0 commit comments

Comments
 (0)