Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpu-o3: add stats for BPU override bubbles and UFTB pred/update #263

Open
wants to merge 29 commits into
base: xs-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
eb877ea
cpu-o3: Add stats for BPU override bubbles and UFTB pred/update
Lawrence-ID Jan 8, 2025
1b97cec
cpu-o3: optimize and add more stats about bpu override
Lawrence-ID Jan 9, 2025
39251ec
cpu-o3: chores about code style
Lawrence-ID Jan 9, 2025
2b0a348
cpu-o3: add fine-grained statistics for related counters
Lawrence-ID Jan 14, 2025
918dbcc
cpu-o3: update UFTB ctr when UFTB hit but condTaken diff from s1 pred
Lawrence-ID Jan 14, 2025
73a96ec
cpu-o3: comment updateUftbWhenOverrideByL1
Lawrence-ID Jan 14, 2025
893f614
cpu-o3: set uftb tageBits to 16, as the same to RTL
Lawrence-ID Jan 16, 2025
f4f7855
arch-riscv: add sv48 warning message (#262)
jueshiwenli Jan 8, 2025
c87985c
cpu-o3: ideal: fix NumBr = 8
jensen-yan Jan 7, 2025
d9da996
cpu-o3: ideal: use 14 tage table according to seznec
jensen-yan Jan 8, 2025
9b55b83
util: update README
jensen-yan Jan 7, 2025
a02dae9
mem-cache: if slice stall, request will retry
tastynoob Jan 10, 2025
2f157a4
cpu-o3: write request will block dcache one cycle
tastynoob Jan 13, 2025
b27fd85
cpu-o3: delete pending wake events if cancel (#266)
happy-lx Jan 14, 2025
ff7e932
cpu-o3: Transform the lsqunit
happy-lx Nov 26, 2024
41e31fb
arch-riscv: change fence's opType
happy-lx Dec 9, 2024
3cad995
cpu-o3: Split operations in the ldst pipeline
happy-lx Dec 9, 2024
7028e73
cpu-o3: replay cache missed load from replayQ
happy-lx Nov 21, 2024
0f03607
arch: use strictly order-preserving LRSC
happy-lx Dec 13, 2024
f237d7a
mem: let load has certain latency in ruby cahche
happy-lx Dec 17, 2024
843a74e
cpu-o3: tune the behavior of the ldst pipeline
happy-lx Dec 17, 2024
8cdfeb3
cpu-o3: add params to control nuke and miss replay
happy-lx Dec 18, 2024
0b3e0bb
cpu-o3: make store wb stage configurable
happy-lx Dec 20, 2024
13fbc32
cpu-o3: refactor fullforward code
happy-lx Jan 2, 2025
95bdac0
mem: fix write packet latency calculation
happy-lx Jan 10, 2025
104a043
mem: add Load Custom Hint Wakeup
happy-lx Jan 10, 2025
2d232dd
cpu-o3: refactor lsu related code
happy-lx Jan 14, 2025
817c578
cpu-o3: branchPred: add ftq end reason stats (#232)
jensen-yan Jan 16, 2025
ae4d7aa
cpu-o3: resolve conflicts
Lawrence-ID Jan 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion configs/example/xiangshan.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ def setKmhV3IdealParams(args, system):
cpu.branchPred.tage.enableSC = False # TODO(bug): When numBr changes, enabling SC will trigger an assert
cpu.branchPred.ftq_size = 256
cpu.branchPred.fsq_size = 256
cpu.branchPred.uftb.numEntries = 1024
uftb_size = 1024
cpu.branchPred.uftb.numEntries = uftb_size
cpu.branchPred.uftb.numWays = uftb_size
cpu.branchPred.ftb.numEntries = 16384
cpu.branchPred.tage.numPredictors = 9
cpu.branchPred.tage.baseTableSize = 4096
Expand Down
34 changes: 31 additions & 3 deletions src/cpu/pred/ftb/decoupled_bpred.cc
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,18 @@ DecoupledBPUWithFTB::DBPFTBStats::DBPFTBStats(statistics::Group* parent, unsigne
ADD_STAT(otherMiss, statistics::units::Count::get(), "the number of other branch misses"),
ADD_STAT(staticBranchNum, statistics::units::Count::get(), "the number of all (different) static branches"),
ADD_STAT(staticBranchNumEverTaken, statistics::units::Count::get(), "the number of all (different) static branches that are once taken"),
ADD_STAT(predsOfEachStage, statistics::units::Count::get(), "the number of preds of each stage that account for final pred"),
ADD_STAT(commitPredsFromEachStage, statistics::units::Count::get(), "the number of preds of each stage that account for a committed stream"),
ADD_STAT(overrideByL1, statistics::units::Count::get(), "the number of preds override by L1"),
ADD_STAT(overrideByL1WhenL0Hit, statistics::units::Count::get(),
"the number of preds override by L1, when L0 Hit and L1 Hit"),
ADD_STAT(overrideByL1WhenL0Miss, statistics::units::Count::get(),
"the number of preds override by L1, when L0 Miss and L1 Hit"),
ADD_STAT(overrideByL2, statistics::units::Count::get(), "the number of preds override by L2"),
ADD_STAT(squashWhenOverriding, statistics::units::Count::get(), "the number of squash when overriding"),
ADD_STAT(overrideBubbles, statistics::units::Count::get(), "number of bpu pred Override Bubbles"),
ADD_STAT(predsOfEachStage, statistics::units::Count::get(),
"the number of preds of each stage that account for final pred"),
ADD_STAT(commitPredsFromEachStage, statistics::units::Count::get(),
"the number of preds of each stage that account for a committed stream"),
ADD_STAT(fsqEntryDist, statistics::units::Count::get(), "the distribution of number of entries in fsq"),
ADD_STAT(fsqEntryEnqueued, statistics::units::Count::get(), "the number of fsq entries enqueued"),
ADD_STAT(fsqEntryCommitted, statistics::units::Count::get(), "the number of fsq entries committed at last"),
Expand Down Expand Up @@ -610,6 +620,10 @@ DecoupledBPUWithFTB::tick()
numOverrideBubbles = generateFinalPredAndCreateBubbles();
}

if (squashing && numOverrideBubbles > 0 && receivedPred && s0PC != MaxAddr && !sentPCHist) {
dbpFtbStats.squashWhenOverriding++;
}

if (!squashing) {
DPRINTF(DecoupleBP, "DecoupledBPUWithFTB::tick()\n");
DPRINTF(Override, "DecoupledBPUWithFTB::tick()\n");
Expand Down Expand Up @@ -650,7 +664,6 @@ DecoupledBPUWithFTB::tick()
sentPCHist = true;
}


// query loop buffer with start pc
if (enableLoopBuffer && !lb.isActive() &&
lb.streamBeforeLoop.getTakenTarget() == lb.streamBeforeLoop.startPC &&
Expand Down Expand Up @@ -800,6 +813,7 @@ DecoupledBPUWithFTB::generateFinalPredAndCreateBubbles()
}
first_hit_stage++;
}

// generate bubbles
bubblesToCreate = first_hit_stage;
// assign pred source
Expand Down Expand Up @@ -831,6 +845,19 @@ DecoupledBPUWithFTB::generateFinalPredAndCreateBubbles()

printFullFTBPrediction(*chosen);
dbpFtbStats.predsOfEachStage[first_hit_stage]++;
if (!squashing && s0PC != MaxAddr && receivedPred && bubblesToCreate > 0) {
if (first_hit_stage == 1) {
assert(predsOfEachStage[1].valid);
if (predsOfEachStage[0].valid) {
dbpFtbStats.overrideByL1WhenL0Hit++;
} else {
dbpFtbStats.overrideByL1WhenL0Miss++;
}
dbpFtbStats.overrideByL1++;
}else if (first_hit_stage == 2) {
dbpFtbStats.overrideByL2++;
}
}
} else {
bubblesToCreate = 0;
receivedPred = true;
Expand Down Expand Up @@ -2114,6 +2141,7 @@ DecoupledBPUWithFTB::tryEnqFetchStream()
}
// prediction valid, but not ready to enq because of bubbles
if (numOverrideBubbles > 0) {
dbpFtbStats.overrideBubbles++;
DPRINTF(DecoupleBP, "Waiting for bubble caused by overriding, bubbles rest: %u\n", numOverrideBubbles);
DPRINTF(Override, "Waiting for bubble caused by overriding, bubbles rest: %u\n", numOverrideBubbles);
return;
Expand Down
7 changes: 7 additions & 0 deletions src/cpu/pred/ftb/decoupled_bpred.hh
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,13 @@ class DecoupledBPUWithFTB : public BPredUnit
statistics::Scalar staticBranchNum;
statistics::Scalar staticBranchNumEverTaken;

statistics::Scalar overrideByL1;
statistics::Scalar overrideByL1WhenL0Hit;
statistics::Scalar overrideByL1WhenL0Miss;
statistics::Scalar overrideByL2;
statistics::Scalar squashWhenOverriding;
statistics::Scalar overrideBubbles;

statistics::Vector predsOfEachStage;
statistics::Vector commitPredsFromEachStage;
statistics::Distribution fsqEntryDist;
Expand Down
44 changes: 44 additions & 0 deletions src/cpu/pred/ftb/ftb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,16 +221,40 @@ DefaultFTB::lookup(Addr inst_pc)
Addr ftb_tag = getTag(inst_pc);
DPRINTF(FTB, "FTB: Looking up FTB entry index %#lx tag %#lx\n", ftb_idx, ftb_tag);

if (isL0()) {
auto tag_it = predTagSet.find(ftb_tag);
if (tag_it == predTagSet.end()) {
predTagSet.insert(ftb_tag);
ftbStats.predTagSetSize++;
}
}

assert(ftb_idx < numSets);
// ignore false hit when lowest bit is 1
const auto &it = ftb[ftb_idx].find(ftb_tag);
if (it != ftb[ftb_idx].end()) {
assert(it->second.valid);
if (it->second.valid) {
it->second.tick = curTick();
std::make_heap(mruList[ftb_idx].begin(), mruList[ftb_idx].end(), older());
return it->second;
}
}

// ftb not hit
bool ftb_is_full = true;
for (auto it = ftb[ftb_idx].begin(); it != ftb[ftb_idx].end(); ++it) {
if (it->second.tick == 0) {
ftb_is_full = false;
ftbStats.predMissWhenNotFull++;
DPRINTF(FTB, "FTB: Looking up FTB entry index %#lx tag %#lx miss, ftb is not full\n", ftb_idx, ftb_tag);
break;
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bool ftb_is_full = (ftb[ftb_idx].size() >= numWays);
if (ftb_is_full) {
ftbStats.predMissWhenFull++;
DPRINTF(FTB, "FTB: Looking up FTB entry index %#lx tag %#lx miss, ftb is full\n", ftb_idx, ftb_tag);
} else {
ftbStats.predMissWhenNotFull++;
DPRINTF(FTB, "FTB: Looking up FTB entry index %#lx tag %#lx miss, ftb is not full\n", ftb_idx, ftb_tag);
}

I prefer this.

if (ftb_is_full){
ftbStats.predMissWhenFull++;
DPRINTF(FTB, "FTB: Looking up FTB entry index %#lx tag %#lx miss, ftb is full\n", ftb_idx, ftb_tag);
}
return TickedFTBEntry();
}

Expand Down Expand Up @@ -381,6 +405,14 @@ DefaultFTB::update(const FetchStream &stream)
Addr ftb_idx = getIndex(startPC);
Addr ftb_tag = getTag(startPC);

if (isL0()) {
auto tag_it = updateTagSet.find(ftb_tag);
if (tag_it == updateTagSet.end()) {
updateTagSet.insert(ftb_tag);
ftbStats.updateTagSetSize++;
}
}

DPRINTF(FTB, "FTB: Updating FTB entry index %#lx tag %#lx\n", ftb_idx, ftb_tag);

auto it = ftb[ftb_idx].find(ftb_tag);
Expand All @@ -391,6 +423,11 @@ DefaultFTB::update(const FetchStream &stream)
std::pop_heap(mruList[ftb_idx].begin(), mruList[ftb_idx].end(), older());
const auto& old_entry = mruList[ftb_idx].back();
DPRINTF(FTB, "FTB: Replacing entry with tag %#lx in set %#lx\n", old_entry->first, ftb_idx);
if (old_entry->second.tick == 0) {
ftbStats.updateUseEmptyEntry++;
} else {
ftbStats.updateUseOldEntry++;
}
ftb[ftb_idx].erase(old_entry->first);
}

Expand Down Expand Up @@ -447,6 +484,7 @@ DefaultFTB::update(const FetchStream &stream)
}
}

assert(entry_to_write.valid);
ftb[ftb_idx][ftb_tag] = TickedFTBEntry(entry_to_write, curTick());
ftb[ftb_idx][ftb_tag].tag = ftb_tag; // in case different ftb has different tags

Expand Down Expand Up @@ -580,9 +618,15 @@ DefaultFTB::FTBStats::FTBStats(statistics::Group* parent) :
ADD_STAT(oldEntryWithNewCond, statistics::units::Count::get(), "number of old ftb entries with new conditional branches"),
ADD_STAT(oldEntryWithNewUncond, statistics::units::Count::get(), "number of old ftb entries with new unconditional branches"),
ADD_STAT(predMiss, statistics::units::Count::get(), "misses encountered on prediction"),
ADD_STAT(predMissWhenFull, statistics::units::Count::get(), "misses encountered on pred when ftb full"),
ADD_STAT(predMissWhenNotFull, statistics::units::Count::get(), "misses encountered on pred when ftb not full"),
ADD_STAT(predHit, statistics::units::Count::get(), "hits encountered on prediction"),
ADD_STAT(updateMiss, statistics::units::Count::get(), "misses encountered on update"),
ADD_STAT(updateHit, statistics::units::Count::get(), "hits encountered on update"),
ADD_STAT(updateUseEmptyEntry, statistics::units::Count::get(), "use empty entry when update"),
ADD_STAT(updateUseOldEntry, statistics::units::Count::get(), "update old entry when update"),
ADD_STAT(predTagSetSize, statistics::units::Count::get(), "uftb pred tag set size"),
ADD_STAT(updateTagSetSize, statistics::units::Count::get(), "uftb update tag set size"),
ADD_STAT(eraseSlotBehindUncond, statistics::units::Count::get(), "erase slots behind unconditional slot"),
ADD_STAT(predUseL0OnL1Miss, statistics::units::Count::get(), "use l0 result on l1 miss when pred"),
ADD_STAT(updateUseL0OnL1Miss, statistics::units::Count::get(), "use l0 result on l1 miss when update"),
Expand Down
11 changes: 11 additions & 0 deletions src/cpu/pred/ftb/ftb.hh
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ class DefaultFTB : public TimedBaseFTBPredictor
/** The number of tag bits per entry. */
unsigned tagBits;

/** The number of tags, only used for stats. */
std::set<Addr> predTagSet;
std::set<Addr> updateTagSet;

/** The tag mask. */
Addr tagMask;

Expand Down Expand Up @@ -277,9 +281,16 @@ class DefaultFTB : public TimedBaseFTBPredictor
statistics::Scalar oldEntryWithNewUncond;

statistics::Scalar predMiss;
statistics::Scalar predMissWhenFull;
statistics::Scalar predMissWhenNotFull;
statistics::Scalar predHit;
statistics::Scalar updateMiss;
statistics::Scalar updateHit;
statistics::Scalar updateUseEmptyEntry;
statistics::Scalar updateUseOldEntry;

statistics::Scalar predTagSetSize;
statistics::Scalar updateTagSetSize;

statistics::Scalar eraseSlotBehindUncond;

Expand Down