Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

timing(TLB): add tlb query use fast adder comparator support #4165

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/main/scala/top/Configs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -167,23 +167,26 @@ class MinimalConfig(n: Int = 1) extends Config(
partialStaticPMP = true,
outsideRecvFlush = true,
outReplace = false,
lgMaxSize = 4
lgMaxSize = 4,
useFac = true
),
sttlbParameters = TLBParameters(
name = "sttlb",
NWays = 4,
partialStaticPMP = true,
outsideRecvFlush = true,
outReplace = false,
lgMaxSize = 4
lgMaxSize = 4,
useFac = true
),
hytlbParameters = TLBParameters(
name = "hytlb",
NWays = 4,
partialStaticPMP = true,
outsideRecvFlush = true,
outReplace = false,
lgMaxSize = 4
lgMaxSize = 4,
useFac = true
),
pftlbParameters = TLBParameters(
name = "pftlb",
Expand Down
9 changes: 6 additions & 3 deletions src/main/scala/xiangshan/Parameters.scala
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ case class XSCoreParameters
partialStaticPMP = true,
outsideRecvFlush = true,
saveLevel = false,
lgMaxSize = 4
lgMaxSize = 4,
useFac = true
),
sttlbParameters: TLBParameters = TLBParameters(
name = "sttlb",
Expand All @@ -297,7 +298,8 @@ case class XSCoreParameters
partialStaticPMP = true,
outsideRecvFlush = true,
saveLevel = false,
lgMaxSize = 4
lgMaxSize = 4,
useFac = true
),
hytlbParameters: TLBParameters = TLBParameters(
name = "hytlb",
Expand All @@ -306,7 +308,8 @@ case class XSCoreParameters
partialStaticPMP = true,
outsideRecvFlush = true,
saveLevel = false,
lgMaxSize = 4
lgMaxSize = 4,
useFac = true
),
pftlbParameters: TLBParameters = TLBParameters(
name = "pftlb",
Expand Down
5 changes: 4 additions & 1 deletion src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,9 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
}
dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
dtlb_reqs(L2toL1DLBPortIndex).req.bits.facA := io.l2_tlb_req.req.bits.vaddr(VAddrBits-1, sectorvpnOffLen)
dtlb_reqs(L2toL1DLBPortIndex).req.bits.facB := 0.U
dtlb_reqs(L2toL1DLBPortIndex).req.bits.facCarry := false.B
dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp

Expand Down Expand Up @@ -2044,4 +2047,4 @@ class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
if (p(DebugOptionsKey).ResetGen) {
ResetGen(ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), reset, sim = false)
}
}
}
50 changes: 46 additions & 4 deletions src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,35 @@ class TlbSectorEntry(pageNormal: Boolean, pageSuper: Boolean)(implicit p: Parame
* bits0 0: need low 9bits
* bits1 0: need mid 9bits
*/
def fachit(vpn: UInt, facA: UInt, facB: UInt, facCarry: Bool, asid: UInt,
nSets: Int = 1, ignoreAsid: Boolean = false, vmid: UInt, hasS2xlate: Bool, onlyS2: Bool = false.B, onlyS1: Bool = false.B): Bool = {
val asid_hit = Mux(hasS2xlate && onlyS2, true.B, if (ignoreAsid) true.B else (this.asid === asid))
val addr_low_hit = valididx(vpn(2, 0))
val vmid_hit = Mux(hasS2xlate, this.vmid === vmid, true.B)
val isPageSuper = !(level.getOrElse(0.U) === 0.U)
val pteidx_hit = Mux(hasS2xlate && !isPageSuper && !onlyS1 && n === 0.U, pteidx(vpn(2, 0)), true.B)

val tmp_level = level.get
val align_vpn = vpn(vpn.getWidth-1, sectortlbwidth)
val tag_match_fac = FastAdderComparator.genFac(facA, facB, facCarry, tag) // use Fast Adder Comparator to optimization
val tag_matchs = Wire(Vec(Level + 1, Bool()))
tag_matchs(0) := Mux(
n === 0.U,
tag_match_fac(vpnnLen - sectortlbwidth - 1, 0).andR,
tag_match_fac(vpnnLen - sectortlbwidth - 1, pteNapotBits - sectortlbwidth).andR
)
for (i <- 1 until Level) {
tag_matchs(i) := tag_match_fac(vpnnLen * (i + 1) - sectortlbwidth - 1, vpnnLen * i - sectortlbwidth).andR
}
tag_matchs(Level) := tag_match_fac(sectorvpnLen - 1, vpnnLen * Level - sectortlbwidth).andR
val level_matchs = Wire(Vec(Level + 1, Bool()))
for (i <- 0 until Level) {
level_matchs(i) := tag_matchs(i) || tmp_level >= (i + 1).U
}
level_matchs(Level) := tag_matchs(Level)

asid_hit && level_matchs.asUInt.andR && addr_low_hit && vmid_hit && pteidx_hit
}

def hit(vpn: UInt, asid: UInt, nSets: Int = 1, ignoreAsid: Boolean = false, vmid: UInt, hasS2xlate: Bool, onlyS2: Bool = false.B, onlyS1: Bool = false.B): Bool = {
val asid_hit = Mux(hasS2xlate && onlyS2, true.B, if (ignoreAsid) true.B else (this.asid === asid))
Expand Down Expand Up @@ -409,6 +438,9 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int, nDups: Int = 1)(implicit
val req = Vec(ports, Flipped(DecoupledIO(new Bundle {
val vpn = Output(UInt(vpnLen.W))
val s2xlate = Output(UInt(2.W))
val facA = Output(UInt(sectorvpnLen.W))
val facB = Output(UInt(sectorvpnLen.W))
val facCarry = Output(Bool())
})))
val resp = Vec(ports, ValidIO(new Bundle{
val hit = Output(Bool())
Expand All @@ -426,11 +458,13 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int, nDups: Int = 1)(implicit
}))
val access = Vec(ports, new ReplaceAccessBundle(nSets, nWays))

def r_req_apply(valid: Bool, vpn: UInt, i: Int, s2xlate:UInt): Unit = {
def r_req_apply(valid: Bool, vpn: UInt, facA: UInt, facB: UInt, facCarry: Bool, i: Int, s2xlate:UInt): Unit = {
this.r.req(i).valid := valid
this.r.req(i).bits.vpn := vpn
this.r.req(i).bits.s2xlate := s2xlate

this.r.req(i).bits.facA := facA
this.r.req(i).bits.facB := facB
this.r.req(i).bits.facCarry := facCarry
}

def r_resp_apply(i: Int) = {
Expand All @@ -450,6 +484,9 @@ class TlbStorageWrapperIO(ports: Int, q: TLBParameters, nDups: Int = 1)(implicit
val req = Vec(ports, Flipped(DecoupledIO(new Bundle {
val vpn = Output(UInt(vpnLen.W))
val s2xlate = Output(UInt(2.W))
val facA = Output(UInt(sectorvpnLen.W))
val facB = Output(UInt(sectorvpnLen.W))
val facCarry = Output(Bool())
})))
val resp = Vec(ports, ValidIO(new Bundle{
val hit = Output(Bool())
Expand All @@ -466,10 +503,13 @@ class TlbStorageWrapperIO(ports: Int, q: TLBParameters, nDups: Int = 1)(implicit
}))
val replace = if (q.outReplace) Flipped(new TlbReplaceIO(ports, q)) else null

def r_req_apply(valid: Bool, vpn: UInt, i: Int, s2xlate: UInt): Unit = {
def r_req_apply(valid: Bool, vpn: UInt, facA: UInt, facB: UInt, facCarry: Bool, i: Int, s2xlate: UInt): Unit = {
this.r.req(i).valid := valid
this.r.req(i).bits.vpn := vpn
this.r.req(i).bits.s2xlate := s2xlate
this.r.req(i).bits.facA := facA
this.r.req(i).bits.facB := facB
this.r.req(i).bits.facCarry := facCarry
}

def r_resp_apply(i: Int) = {
Expand Down Expand Up @@ -537,7 +577,9 @@ class TlbReq(implicit p: Parameters) extends TlbBundle {
val robIdx = Output(new RobPtr)
val isFirstIssue = Output(Bool())
}

val facA = Output(UInt(sectorvpnLen.W))
val facB = Output(UInt(sectorvpnLen.W))
val facCarry = Output(Bool())
// Maybe Block req needs a kill: for itlb, itlb and icache may not sync, itlb should wait icache to go ahead
override def toPrintable: Printable = {
p"vaddr:0x${Hexadecimal(vaddr)} cmd:${cmd} kill:${kill} pc:0x${Hexadecimal(debug.pc)} robIdx:${debug.robIdx}"
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/xiangshan/cache/mmu/MMUConst.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ case class TLBParameters
partialStaticPMP: Boolean = false, // partial static pmp result stored in entries
outsideRecvFlush: Boolean = false, // if outside moudle waiting for tlb recv flush pipe
saveLevel: Boolean = false,
lgMaxSize: Int = 3
lgMaxSize: Int = 3,
useFac: Boolean = false
)

case class L2TLBParameters
Expand Down Expand Up @@ -119,6 +120,7 @@ trait HasTlbConst extends HasXSParameter {
val sectorgvpnLen = gvpnLen - sectortlbwidth
val sectorvpnLen = vpnLen - sectortlbwidth
val sectorptePPNLen = ptePPNLen - sectortlbwidth
val sectorvpnOffLen = sectortlbwidth + offLen

val loadfiltersize = 16 // 4*3(LduCnt:2 + HyuCnt:1) + 4(prefetch:1)
val storefiltersize = if (StorePipelineWidth >= 3) 16 else 8
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/xiangshan/cache/mmu/TLB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
entries.io.base_connect(sfence, csr, satp)
if (q.outReplace) { io.replace <> entries.io.replace }
for (i <- 0 until Width) {
entries.io.r_req_apply(io.requestor(i).req.valid, get_pn(req_in(i).bits.vaddr), i, req_in_s2xlate(i))
entries.io.r_req_apply(io.requestor(i).req.valid, get_pn(req_in(i).bits.vaddr), req_in(i).bits.facA, req_in(i).bits.facB, req_in(i).bits.facCarry, i, req_in_s2xlate(i))
entries.io.w_apply(refill, ptw.resp.bits)
// TODO: RegNext enable:req.valid
resp(i).bits.debug.isFirstIssue := RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid)
Expand Down
24 changes: 19 additions & 5 deletions src/main/scala/xiangshan/cache/mmu/TLBStorage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ class TLBFA(
nWays: Int,
saveLevel: Boolean = false,
normalPage: Boolean,
superPage: Boolean
superPage: Boolean,
useFac: Boolean
)(implicit p: Parameters) extends TlbModule with HasPerfEvents {

val io = IO(new TlbStorageIO(nSets, nWays, ports, nDups))
Expand All @@ -108,14 +109,22 @@ class TLBFA(

val vpn = req.bits.vpn
val vpn_reg = RegEnable(vpn, req.fire)
val facA = req.bits.facA
val facB = req.bits.facB
val facCarry = req.bits.facCarry
val hasS2xlate = req.bits.s2xlate =/= noS2xlate
val OnlyS2 = req.bits.s2xlate === onlyStage2
val OnlyS1 = req.bits.s2xlate === onlyStage1
val refill_mask = Mux(io.w.valid, UIntToOH(io.w.bits.wayIdx), 0.U(nWays.W))
val hitVec = VecInit((entries.zipWithIndex).zip(v zip refill_mask.asBools).map{
case (e, m) => {
val s2xlate_hit = e._1.s2xlate === req.bits.s2xlate
val hit = e._1.hit(vpn, Mux(hasS2xlate, io.csr.vsatp.asid, io.csr.satp.asid), vmid = io.csr.hgatp.vmid, hasS2xlate = hasS2xlate, onlyS2 = OnlyS2, onlyS1 = OnlyS1)
val hit =
if (useFac) {
e._1.fachit(vpn, facA, facB, facCarry, Mux(hasS2xlate, io.csr.vsatp.asid, io.csr.satp.asid), vmid = io.csr.hgatp.vmid, hasS2xlate = hasS2xlate, onlyS2 = OnlyS2, onlyS1 = OnlyS1)
} else {
e._1.hit(vpn, Mux(hasS2xlate, io.csr.vsatp.asid, io.csr.satp.asid), vmid = io.csr.hgatp.vmid, hasS2xlate = hasS2xlate, onlyS2 = OnlyS2, onlyS1 = OnlyS1)
}
s2xlate_hit && hit && m._1 && !m._2
}
})
Expand Down Expand Up @@ -333,14 +342,15 @@ object TlbStorage {
normalPage: Boolean,
superPage: Boolean,
useDmode: Boolean,
SoftTLB: Boolean
SoftTLB: Boolean,
useFac: Boolean
)(implicit p: Parameters) = {
if (SoftTLB) {
val storage = Module(new TLBFakeFA(ports, nDups, nSets, nWays, useDmode))
storage.suggestName(s"${parentName}_fake_fa")
storage.io
} else {
val storage = Module(new TLBFA(parentName, ports, nDups, nSets, nWays, saveLevel, normalPage, superPage))
val storage = Module(new TLBFA(parentName, ports, nDups, nSets, nWays, saveLevel, normalPage, superPage, useFac))
storage.suggestName(s"${parentName}_fa")
storage.io
}
Expand All @@ -360,13 +370,17 @@ class TlbStorageWrapper(ports: Int, q: TLBParameters, nDups: Int = 1)(implicit p
normalPage = true,
superPage = true,
useDmode = q.useDmode,
SoftTLB = coreParams.softTLB
SoftTLB = coreParams.softTLB,
useFac = q.useFac
)

for (i <- 0 until ports) {
page.r_req_apply(
valid = io.r.req(i).valid,
vpn = io.r.req(i).bits.vpn,
facA = io.r.req(i).bits.facA,
facB = io.r.req(i).bits.facB,
facCarry = io.r.req(i).bits.facCarry,
i = i,
s2xlate = io.r.req(i).bits.s2xlate
)
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/xiangshan/frontend/IFU.scala
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,9 @@ class NewIFU(implicit p: Parameters) extends XSModule
io.iTLBInter.req.bits.debug.robIdx := DontCare
io.iTLBInter.req.bits.debug.isFirstIssue := DontCare
io.iTLBInter.req.bits.pmp_addr := DontCare
io.iTLBInter.req.bits.facA := DontCare
io.iTLBInter.req.bits.facB := DontCare
io.iTLBInter.req.bits.facCarry := DontCare
// whats the difference between req_kill and req.bits.kill?
io.iTLBInter.req_kill := false.B
// wait for itlb response in m_tlbResp state
Expand Down
6 changes: 5 additions & 1 deletion src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ import xiangshan.backend.fu.FuType
import xiangshan.backend.Bundles.{MemExuInput, MemExuOutput}
import xiangshan.backend.fu.NewCSR.TriggerUtil
import xiangshan.backend.fu.util.SdtrigExt
import xiangshan.cache.mmu.Pbmt
import xiangshan.cache.mmu.{HasTlbConst, Pbmt}

class AtomicsUnit(implicit p: Parameters) extends XSModule
with MemoryOpConstants
with HasDCacheParameters
with HasTlbConst
with SdtrigExt{

val StdCnt = backendParams.StdCnt
Expand Down Expand Up @@ -454,6 +455,9 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule
io.dtlb.req.bits.vaddr := vaddr
io.dtlb.req.bits.fullva := vaddr
io.dtlb.req.bits.checkfullva := true.B
io.dtlb.req.bits.facA := vaddr(VAddrBits-1, sectorvpnOffLen)
io.dtlb.req.bits.facB := 0.U
io.dtlb.req.bits.facCarry := false.B
io.dtlb.resp.ready := true.B
io.dtlb.req.bits.cmd := Mux(isLr, TlbCmd.atom_read, TlbCmd.atom_write)
io.dtlb.req.bits.debug.pc := uop.pc
Expand Down
9 changes: 6 additions & 3 deletions src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule
val s0_rep_stall = io.lsin.valid && isAfter(io.ldu_io.replay.bits.uop.robIdx, io.lsin.bits.uop.robIdx)
private val SRC_NUM = 8
private val Seq(
super_rep_idx, fast_rep_idx, lsq_rep_idx, high_pf_idx,
super_rep_idx, fast_rep_idx, lsq_rep_idx, high_pf_idx,
int_iss_idx, vec_iss_idx, l2l_fwd_idx, low_pf_idx
) = (0 until SRC_NUM).toSeq
// load flow source valid
Expand Down Expand Up @@ -274,6 +274,9 @@ class HybridUnit(implicit p: Parameters) extends XSModule
io.tlb.req.bits.no_translate := s0_hw_prf_select // hw b.reqetch addr does not need to be translated
io.tlb.req.bits.debug.pc := s0_uop.pc
io.tlb.req.bits.debug.isFirstIssue := s0_isFirstIssue
io.tlb.req.bits.facA := DontCare
io.tlb.req.bits.facB := DontCare
io.tlb.req.bits.facCarry := DontCare

// query DCache
// for load
Expand Down Expand Up @@ -984,7 +987,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule
// generate XLEN/8 Muxs
for (i <- 0 until VLEN / 8) {
s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i) || io.ldu_io.ubuffer.forwardMask(i)
s2_fwd_data(i) :=
s2_fwd_data(i) :=
Mux(io.ldu_io.lsq.forward.forwardMask(i), io.ldu_io.lsq.forward.forwardData(i),
Mux(io.ldu_io.vec_forward.forwardMask(i), io.ldu_io.vec_forward.forwardData(i),
Mux(io.ldu_io.ubuffer.forwardMask(i), io.ldu_io.ubuffer.forwardData(i),
Expand Down Expand Up @@ -1431,4 +1434,4 @@ class HybridUnit(implicit p: Parameters) extends XSModule
("load_s2_dcache_miss ", s2_fire && io.ldu_io.dcache.resp.bits.miss ),
)
generatePerfEvent()
}
}
Loading
Loading