diff --git a/src/main/scala/top/Configs.scala b/src/main/scala/top/Configs.scala index 91330cb693d..7c10ee0b8a8 100644 --- a/src/main/scala/top/Configs.scala +++ b/src/main/scala/top/Configs.scala @@ -167,7 +167,8 @@ class MinimalConfig(n: Int = 1) extends Config( partialStaticPMP = true, outsideRecvFlush = true, outReplace = false, - lgMaxSize = 4 + lgMaxSize = 4, + useFac = true ), sttlbParameters = TLBParameters( name = "sttlb", @@ -175,7 +176,8 @@ class MinimalConfig(n: Int = 1) extends Config( partialStaticPMP = true, outsideRecvFlush = true, outReplace = false, - lgMaxSize = 4 + lgMaxSize = 4, + useFac = true ), hytlbParameters = TLBParameters( name = "hytlb", @@ -183,7 +185,8 @@ class MinimalConfig(n: Int = 1) extends Config( partialStaticPMP = true, outsideRecvFlush = true, outReplace = false, - lgMaxSize = 4 + lgMaxSize = 4, + useFac = true ), pftlbParameters = TLBParameters( name = "pftlb", diff --git a/src/main/scala/xiangshan/Parameters.scala b/src/main/scala/xiangshan/Parameters.scala index 76e07ad747c..12a0f6e77e8 100644 --- a/src/main/scala/xiangshan/Parameters.scala +++ b/src/main/scala/xiangshan/Parameters.scala @@ -288,7 +288,8 @@ case class XSCoreParameters partialStaticPMP = true, outsideRecvFlush = true, saveLevel = false, - lgMaxSize = 4 + lgMaxSize = 4, + useFac = true ), sttlbParameters: TLBParameters = TLBParameters( name = "sttlb", @@ -297,7 +298,8 @@ case class XSCoreParameters partialStaticPMP = true, outsideRecvFlush = true, saveLevel = false, - lgMaxSize = 4 + lgMaxSize = 4, + useFac = true ), hytlbParameters: TLBParameters = TLBParameters( name = "hytlb", @@ -306,7 +308,8 @@ case class XSCoreParameters partialStaticPMP = true, outsideRecvFlush = true, saveLevel = false, - lgMaxSize = 4 + lgMaxSize = 4, + useFac = true ), pftlbParameters: TLBParameters = TLBParameters( name = "pftlb", diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index b7472c93050..00f3b89b0ed 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -1158,6 +1158,9 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B } dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req + dtlb_reqs(L2toL1DLBPortIndex).req.bits.facA := io.l2_tlb_req.req.bits.vaddr(VAddrBits-1, sectorvpnOffLen) + dtlb_reqs(L2toL1DLBPortIndex).req.bits.facB := 0.U + dtlb_reqs(L2toL1DLBPortIndex).req.bits.facCarry := false.B dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp @@ -2044,4 +2047,4 @@ class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { if (p(DebugOptionsKey).ResetGen) { ResetGen(ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), reset, sim = false) } -} \ No newline at end of file +} diff --git a/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala b/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala index 74d2fbc25f2..e363fae72fd 100644 --- a/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala +++ b/src/main/scala/xiangshan/cache/mmu/MMUBundle.scala @@ -213,6 +213,35 @@ class TlbSectorEntry(pageNormal: Boolean, pageSuper: Boolean)(implicit p: Parame * bits0 0: need low 9bits * bits1 0: need mid 9bits */ + def fachit(vpn: UInt, facA: UInt, facB: UInt, facCarry: Bool, asid: UInt, + nSets: Int = 1, ignoreAsid: Boolean = false, vmid: UInt, hasS2xlate: Bool, onlyS2: Bool = false.B, onlyS1: Bool = false.B): Bool = { + val asid_hit = Mux(hasS2xlate && onlyS2, true.B, if (ignoreAsid) true.B else (this.asid === asid)) + val addr_low_hit = valididx(vpn(2, 0)) + val vmid_hit = Mux(hasS2xlate, this.vmid === vmid, true.B) + val isPageSuper = !(level.getOrElse(0.U) === 0.U) + val pteidx_hit = Mux(hasS2xlate && !isPageSuper && !onlyS1 && n === 0.U, pteidx(vpn(2, 0)), true.B) + + val tmp_level = level.get + val align_vpn = vpn(vpn.getWidth-1, sectortlbwidth) + val tag_match_fac = FastAdderComparator.genFac(facA, facB, facCarry, tag) // use Fast Adder Comparator to optimization + val tag_matchs = Wire(Vec(Level + 1, Bool())) + tag_matchs(0) := Mux( + n === 0.U, + tag_match_fac(vpnnLen - sectortlbwidth - 1, 0).andR, + tag_match_fac(vpnnLen - sectortlbwidth - 1, pteNapotBits - sectortlbwidth).andR + ) + for (i <- 1 until Level) { + tag_matchs(i) := tag_match_fac(vpnnLen * (i + 1) - sectortlbwidth - 1, vpnnLen * i - sectortlbwidth).andR + } + tag_matchs(Level) := tag_match_fac(sectorvpnLen - 1, vpnnLen * Level - sectortlbwidth).andR + val level_matchs = Wire(Vec(Level + 1, Bool())) + for (i <- 0 until Level) { + level_matchs(i) := tag_matchs(i) || tmp_level >= (i + 1).U + } + level_matchs(Level) := tag_matchs(Level) + + asid_hit && level_matchs.asUInt.andR && addr_low_hit && vmid_hit && pteidx_hit + } def hit(vpn: UInt, asid: UInt, nSets: Int = 1, ignoreAsid: Boolean = false, vmid: UInt, hasS2xlate: Bool, onlyS2: Bool = false.B, onlyS1: Bool = false.B): Bool = { val asid_hit = Mux(hasS2xlate && onlyS2, true.B, if (ignoreAsid) true.B else (this.asid === asid)) @@ -409,6 +438,9 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int, nDups: Int = 1)(implicit val req = Vec(ports, Flipped(DecoupledIO(new Bundle { val vpn = Output(UInt(vpnLen.W)) val s2xlate = Output(UInt(2.W)) + val facA = Output(UInt(sectorvpnLen.W)) + val facB = Output(UInt(sectorvpnLen.W)) + val facCarry = Output(Bool()) }))) val resp = Vec(ports, ValidIO(new Bundle{ val hit = Output(Bool()) @@ -426,11 +458,13 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int, nDups: Int = 1)(implicit })) val access = Vec(ports, new ReplaceAccessBundle(nSets, nWays)) - def r_req_apply(valid: Bool, vpn: UInt, i: Int, s2xlate:UInt): Unit = { + def r_req_apply(valid: Bool, vpn: UInt, facA: UInt, facB: UInt, facCarry: Bool, i: Int, s2xlate:UInt): Unit = { this.r.req(i).valid := valid this.r.req(i).bits.vpn := vpn this.r.req(i).bits.s2xlate := s2xlate - + this.r.req(i).bits.facA := facA + this.r.req(i).bits.facB := facB + this.r.req(i).bits.facCarry := facCarry } def r_resp_apply(i: Int) = { @@ -450,6 +484,9 @@ class TlbStorageWrapperIO(ports: Int, q: TLBParameters, nDups: Int = 1)(implicit val req = Vec(ports, Flipped(DecoupledIO(new Bundle { val vpn = Output(UInt(vpnLen.W)) val s2xlate = Output(UInt(2.W)) + val facA = Output(UInt(sectorvpnLen.W)) + val facB = Output(UInt(sectorvpnLen.W)) + val facCarry = Output(Bool()) }))) val resp = Vec(ports, ValidIO(new Bundle{ val hit = Output(Bool()) @@ -466,10 +503,13 @@ class TlbStorageWrapperIO(ports: Int, q: TLBParameters, nDups: Int = 1)(implicit })) val replace = if (q.outReplace) Flipped(new TlbReplaceIO(ports, q)) else null - def r_req_apply(valid: Bool, vpn: UInt, i: Int, s2xlate: UInt): Unit = { + def r_req_apply(valid: Bool, vpn: UInt, facA: UInt, facB: UInt, facCarry: Bool, i: Int, s2xlate: UInt): Unit = { this.r.req(i).valid := valid this.r.req(i).bits.vpn := vpn this.r.req(i).bits.s2xlate := s2xlate + this.r.req(i).bits.facA := facA + this.r.req(i).bits.facB := facB + this.r.req(i).bits.facCarry := facCarry } def r_resp_apply(i: Int) = { @@ -537,7 +577,9 @@ class TlbReq(implicit p: Parameters) extends TlbBundle { val robIdx = Output(new RobPtr) val isFirstIssue = Output(Bool()) } - + val facA = Output(UInt(sectorvpnLen.W)) + val facB = Output(UInt(sectorvpnLen.W)) + val facCarry = Output(Bool()) // Maybe Block req needs a kill: for itlb, itlb and icache may not sync, itlb should wait icache to go ahead override def toPrintable: Printable = { p"vaddr:0x${Hexadecimal(vaddr)} cmd:${cmd} kill:${kill} pc:0x${Hexadecimal(debug.pc)} robIdx:${debug.robIdx}" diff --git a/src/main/scala/xiangshan/cache/mmu/MMUConst.scala b/src/main/scala/xiangshan/cache/mmu/MMUConst.scala index 612778482a7..6630fb69561 100644 --- a/src/main/scala/xiangshan/cache/mmu/MMUConst.scala +++ b/src/main/scala/xiangshan/cache/mmu/MMUConst.scala @@ -42,7 +42,8 @@ case class TLBParameters partialStaticPMP: Boolean = false, // partial static pmp result stored in entries outsideRecvFlush: Boolean = false, // if outside moudle waiting for tlb recv flush pipe saveLevel: Boolean = false, - lgMaxSize: Int = 3 + lgMaxSize: Int = 3, + useFac: Boolean = false ) case class L2TLBParameters @@ -119,6 +120,7 @@ trait HasTlbConst extends HasXSParameter { val sectorgvpnLen = gvpnLen - sectortlbwidth val sectorvpnLen = vpnLen - sectortlbwidth val sectorptePPNLen = ptePPNLen - sectortlbwidth + val sectorvpnOffLen = sectortlbwidth + offLen val loadfiltersize = 16 // 4*3(LduCnt:2 + HyuCnt:1) + 4(prefetch:1) val storefiltersize = if (StorePipelineWidth >= 3) 16 else 8 diff --git a/src/main/scala/xiangshan/cache/mmu/TLB.scala b/src/main/scala/xiangshan/cache/mmu/TLB.scala index 46fce8ef770..aad45f79010 100644 --- a/src/main/scala/xiangshan/cache/mmu/TLB.scala +++ b/src/main/scala/xiangshan/cache/mmu/TLB.scala @@ -216,7 +216,7 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters) entries.io.base_connect(sfence, csr, satp) if (q.outReplace) { io.replace <> entries.io.replace } for (i <- 0 until Width) { - entries.io.r_req_apply(io.requestor(i).req.valid, get_pn(req_in(i).bits.vaddr), i, req_in_s2xlate(i)) + entries.io.r_req_apply(io.requestor(i).req.valid, get_pn(req_in(i).bits.vaddr), req_in(i).bits.facA, req_in(i).bits.facB, req_in(i).bits.facCarry, i, req_in_s2xlate(i)) entries.io.w_apply(refill, ptw.resp.bits) // TODO: RegNext enable:req.valid resp(i).bits.debug.isFirstIssue := RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid) diff --git a/src/main/scala/xiangshan/cache/mmu/TLBStorage.scala b/src/main/scala/xiangshan/cache/mmu/TLBStorage.scala index 84bce40bb19..ddf7f847e67 100644 --- a/src/main/scala/xiangshan/cache/mmu/TLBStorage.scala +++ b/src/main/scala/xiangshan/cache/mmu/TLBStorage.scala @@ -91,7 +91,8 @@ class TLBFA( nWays: Int, saveLevel: Boolean = false, normalPage: Boolean, - superPage: Boolean + superPage: Boolean, + useFac: Boolean )(implicit p: Parameters) extends TlbModule with HasPerfEvents { val io = IO(new TlbStorageIO(nSets, nWays, ports, nDups)) @@ -108,6 +109,9 @@ class TLBFA( val vpn = req.bits.vpn val vpn_reg = RegEnable(vpn, req.fire) + val facA = req.bits.facA + val facB = req.bits.facB + val facCarry = req.bits.facCarry val hasS2xlate = req.bits.s2xlate =/= noS2xlate val OnlyS2 = req.bits.s2xlate === onlyStage2 val OnlyS1 = req.bits.s2xlate === onlyStage1 @@ -115,7 +119,12 @@ class TLBFA( val hitVec = VecInit((entries.zipWithIndex).zip(v zip refill_mask.asBools).map{ case (e, m) => { val s2xlate_hit = e._1.s2xlate === req.bits.s2xlate - val hit = e._1.hit(vpn, Mux(hasS2xlate, io.csr.vsatp.asid, io.csr.satp.asid), vmid = io.csr.hgatp.vmid, hasS2xlate = hasS2xlate, onlyS2 = OnlyS2, onlyS1 = OnlyS1) + val hit = + if (useFac) { + e._1.fachit(vpn, facA, facB, facCarry, Mux(hasS2xlate, io.csr.vsatp.asid, io.csr.satp.asid), vmid = io.csr.hgatp.vmid, hasS2xlate = hasS2xlate, onlyS2 = OnlyS2, onlyS1 = OnlyS1) + } else { + e._1.hit(vpn, Mux(hasS2xlate, io.csr.vsatp.asid, io.csr.satp.asid), vmid = io.csr.hgatp.vmid, hasS2xlate = hasS2xlate, onlyS2 = OnlyS2, onlyS1 = OnlyS1) + } s2xlate_hit && hit && m._1 && !m._2 } }) @@ -333,14 +342,15 @@ object TlbStorage { normalPage: Boolean, superPage: Boolean, useDmode: Boolean, - SoftTLB: Boolean + SoftTLB: Boolean, + useFac: Boolean )(implicit p: Parameters) = { if (SoftTLB) { val storage = Module(new TLBFakeFA(ports, nDups, nSets, nWays, useDmode)) storage.suggestName(s"${parentName}_fake_fa") storage.io } else { - val storage = Module(new TLBFA(parentName, ports, nDups, nSets, nWays, saveLevel, normalPage, superPage)) + val storage = Module(new TLBFA(parentName, ports, nDups, nSets, nWays, saveLevel, normalPage, superPage, useFac)) storage.suggestName(s"${parentName}_fa") storage.io } @@ -360,13 +370,17 @@ class TlbStorageWrapper(ports: Int, q: TLBParameters, nDups: Int = 1)(implicit p normalPage = true, superPage = true, useDmode = q.useDmode, - SoftTLB = coreParams.softTLB + SoftTLB = coreParams.softTLB, + useFac = q.useFac ) for (i <- 0 until ports) { page.r_req_apply( valid = io.r.req(i).valid, vpn = io.r.req(i).bits.vpn, + facA = io.r.req(i).bits.facA, + facB = io.r.req(i).bits.facB, + facCarry = io.r.req(i).bits.facCarry, i = i, s2xlate = io.r.req(i).bits.s2xlate ) diff --git a/src/main/scala/xiangshan/frontend/IFU.scala b/src/main/scala/xiangshan/frontend/IFU.scala index 4bb8baf855d..97d308ed8b2 100644 --- a/src/main/scala/xiangshan/frontend/IFU.scala +++ b/src/main/scala/xiangshan/frontend/IFU.scala @@ -851,6 +851,9 @@ class NewIFU(implicit p: Parameters) extends XSModule io.iTLBInter.req.bits.debug.robIdx := DontCare io.iTLBInter.req.bits.debug.isFirstIssue := DontCare io.iTLBInter.req.bits.pmp_addr := DontCare + io.iTLBInter.req.bits.facA := DontCare + io.iTLBInter.req.bits.facB := DontCare + io.iTLBInter.req.bits.facCarry := DontCare // whats the difference between req_kill and req.bits.kill? io.iTLBInter.req_kill := false.B // wait for itlb response in m_tlbResp state diff --git a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala index e595292c730..a68d6d153bf 100644 --- a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala @@ -31,11 +31,12 @@ import xiangshan.backend.fu.FuType import xiangshan.backend.Bundles.{MemExuInput, MemExuOutput} import xiangshan.backend.fu.NewCSR.TriggerUtil import xiangshan.backend.fu.util.SdtrigExt -import xiangshan.cache.mmu.Pbmt +import xiangshan.cache.mmu.{HasTlbConst, Pbmt} class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstants with HasDCacheParameters + with HasTlbConst with SdtrigExt{ val StdCnt = backendParams.StdCnt @@ -454,6 +455,9 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule io.dtlb.req.bits.vaddr := vaddr io.dtlb.req.bits.fullva := vaddr io.dtlb.req.bits.checkfullva := true.B + io.dtlb.req.bits.facA := vaddr(VAddrBits-1, sectorvpnOffLen) + io.dtlb.req.bits.facB := 0.U + io.dtlb.req.bits.facCarry := false.B io.dtlb.resp.ready := true.B io.dtlb.req.bits.cmd := Mux(isLr, TlbCmd.atom_read, TlbCmd.atom_write) io.dtlb.req.bits.debug.pc := uop.pc diff --git a/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala b/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala index 0ca50ada2a7..20fec869917 100644 --- a/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala @@ -204,7 +204,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule val s0_rep_stall = io.lsin.valid && isAfter(io.ldu_io.replay.bits.uop.robIdx, io.lsin.bits.uop.robIdx) private val SRC_NUM = 8 private val Seq( - super_rep_idx, fast_rep_idx, lsq_rep_idx, high_pf_idx, + super_rep_idx, fast_rep_idx, lsq_rep_idx, high_pf_idx, int_iss_idx, vec_iss_idx, l2l_fwd_idx, low_pf_idx ) = (0 until SRC_NUM).toSeq // load flow source valid @@ -274,6 +274,9 @@ class HybridUnit(implicit p: Parameters) extends XSModule io.tlb.req.bits.no_translate := s0_hw_prf_select // hw b.reqetch addr does not need to be translated io.tlb.req.bits.debug.pc := s0_uop.pc io.tlb.req.bits.debug.isFirstIssue := s0_isFirstIssue + io.tlb.req.bits.facA := DontCare + io.tlb.req.bits.facB := DontCare + io.tlb.req.bits.facCarry := DontCare // query DCache // for load @@ -984,7 +987,7 @@ class HybridUnit(implicit p: Parameters) extends XSModule // generate XLEN/8 Muxs for (i <- 0 until VLEN / 8) { s2_fwd_mask(i) := io.ldu_io.lsq.forward.forwardMask(i) || io.ldu_io.sbuffer.forwardMask(i) || io.ldu_io.vec_forward.forwardMask(i) || io.ldu_io.ubuffer.forwardMask(i) - s2_fwd_data(i) := + s2_fwd_data(i) := Mux(io.ldu_io.lsq.forward.forwardMask(i), io.ldu_io.lsq.forward.forwardData(i), Mux(io.ldu_io.vec_forward.forwardMask(i), io.ldu_io.vec_forward.forwardData(i), Mux(io.ldu_io.ubuffer.forwardMask(i), io.ldu_io.ubuffer.forwardData(i), @@ -1431,4 +1434,4 @@ class HybridUnit(implicit p: Parameters) extends XSModule ("load_s2_dcache_miss ", s2_fire && io.ldu_io.dcache.resp.bits.miss ), ) generatePerfEvent() -} \ No newline at end of file +} diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 086fed9a0ff..fc9736ec2ba 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -112,6 +112,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper with HasVLSUParameters + with HasTlbConst with SdtrigExt { val io = IO(new Bundle() { @@ -274,6 +275,9 @@ class LoadUnit(implicit p: Parameters) extends XSModule val data = UInt((VLEN+1).W) } val s0_sel_src = Wire(new FlowSource) + val s0_sel_facA = Wire(UInt(VAddrBits.W)) + val s0_sel_facB = Wire(UInt(VAddrBits.W)) + val s0_sel_facCarry = Wire(Bool()) // load flow select/gen // src 0: misalignBuffer load (io.misalign_ldin) @@ -387,6 +391,9 @@ class LoadUnit(implicit p: Parameters) extends XSModule io.tlb.req.bits.no_translate := s0_tlb_no_query // hardware prefetch and fast replay does not need to be translated, need this signal for pmp check io.tlb.req.bits.debug.pc := s0_sel_src.uop.pc io.tlb.req.bits.debug.isFirstIssue := s0_sel_src.isFirstIssue + io.tlb.req.bits.facA := s0_sel_facA(VAddrBits-1, sectorvpnOffLen) + io.tlb.req.bits.facB := s0_sel_facB(VAddrBits-1, sectorvpnOffLen) + io.tlb.req.bits.facCarry := s0_sel_facCarry // query DCache io.dcache.req.valid := s0_valid && !s0_sel_src.prf_i && !s0_nc_with_data @@ -686,6 +693,33 @@ class LoadUnit(implicit p: Parameters) extends XSModule int_vec_vaddr ) ) + + s0_sel_facA := Mux( + s0_src_valid_vec(mab_idx) || s0_src_valid_vec(super_rep_idx) || s0_src_valid_vec(lsq_rep_idx), + Mux( + s0_src_valid_vec(mab_idx), + io.misalign_ldin.bits.vaddr, + io.replay.bits.vaddr + ), + Mux( + s0_src_valid_vec(vec_iss_idx), + io.vecldin.bits.vaddr, + io.ldin.bits.src(0) + ) + ) + s0_sel_facB := Mux( + s0_src_valid_vec(mab_idx) || s0_src_valid_vec(super_rep_idx) || s0_src_valid_vec(lsq_rep_idx) || s0_src_valid_vec(vec_iss_idx), + 0.U, + SignExt(io.ldin.bits.uop.imm(11, 0), VAddrBits) + ) + val s0_facCarry = io.ldin.bits.src(0)(sectorvpnOffLen - 1, 0) +& SignExt(io.ldin.bits.uop.imm(11, 0), sectorvpnOffLen) + s0_sel_facCarry := Mux( + s0_src_valid_vec(mab_idx) || s0_src_valid_vec(super_rep_idx) || s0_src_valid_vec(lsq_rep_idx) || s0_src_valid_vec(vec_iss_idx), + false.B, + s0_facCarry(s0_facCarry.getWidth-1) + ) + + s0_dcache_vaddr := Mux( s0_src_select_vec(fast_rep_idx), io.fast_rep_in.bits.vaddr, Mux(s0_hw_prf_select, io.prefetch_req.bits.getVaddr(), diff --git a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala index 853b6efbf60..2ed7f6b0061 100644 --- a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala @@ -29,11 +29,12 @@ import xiangshan.backend.fu.FuConfig._ import xiangshan.backend.fu.FuType._ import xiangshan.backend.ctrlblock.DebugLsInfoBundle import xiangshan.backend.fu.NewCSR._ -import xiangshan.cache.mmu.{TlbCmd, TlbReq, TlbRequestIO, TlbResp, Pbmt} +import xiangshan.cache.mmu.{TlbCmd, TlbReq, TlbRequestIO, TlbResp, Pbmt, HasTlbConst} import xiangshan.cache.{DcacheStoreRequestIO, DCacheStoreIO, MemoryOpConstants, HasDCacheParameters, StorePrefetchReq} class StoreUnit(implicit p: Parameters) extends XSModule with HasDCacheParameters + with HasTlbConst with HasVLSUParameters { val io = IO(new Bundle() { @@ -127,6 +128,9 @@ class StoreUnit(implicit p: Parameters) extends XSModule val s0_mBIndex = s0_vecstin.mBIndex val s0_vecBaseVaddr = s0_vecstin.basevaddr val s0_isFinalSplit = io.misalign_stin.valid && io.misalign_stin.bits.isFinalSplit + val s0_sel_facA = Wire(UInt(VAddrBits.W)) + val s0_sel_facB = Wire(UInt(VAddrBits.W)) + val s0_sel_facCarry = Wire(Bool()) // generate addr val s0_saddr = s0_stin.src(0) + SignExt(s0_stin.uop.imm(11,0), VAddrBits) @@ -213,6 +217,34 @@ class StoreUnit(implicit p: Parameters) extends XSModule io.tlb.req.bits.hyperinst := LSUOpType.isHsv(s0_uop.fuOpType) io.tlb.req.bits.hlvx := false.B io.tlb.req.bits.pmp_addr := DontCare + io.tlb.req.bits.facA := s0_sel_facA(VAddrBits-1, sectorvpnOffLen) + io.tlb.req.bits.facB := s0_sel_facB(VAddrBits-1, sectorvpnOffLen) + io.tlb.req.bits.facCarry := s0_sel_facCarry + + s0_sel_facA := Mux( + s0_use_flow_rs, + s0_stin.src(0), + Mux( + s0_use_flow_ma, + io.misalign_stin.bits.vaddr, + Mux( + s0_use_flow_vec, + s0_vecstin.vaddr, + io.prefetch_req.bits.vaddr + ) + ) + ) + s0_sel_facB := Mux( + s0_use_flow_rs, + SignExt(s0_stin.uop.imm(11, 0), VAddrBits), + 0.U + ) + val s0_facCarry = s0_stin.src(0)(sectorvpnOffLen - 1, 0) +& SignExt(s0_stin.uop.imm(11, 0), sectorvpnOffLen) + s0_sel_facCarry := Mux( + s0_use_flow_rs, + s0_facCarry(s0_facCarry.getWidth-1), + false.B + ) // Dcache access here: not **real** dcache write // just read meta and tag in dcache, to find out the store will hit or miss @@ -257,7 +289,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule io.st_mask_out.valid := s0_use_flow_rs || s0_use_flow_vec io.st_mask_out.bits.mask := s0_out.mask io.st_mask_out.bits.sqIdx := s0_out.uop.sqIdx - + io.stin.ready := s1_ready && s0_use_flow_rs io.vecstin.ready := s1_ready && s0_use_flow_vec io.prefetch_req.ready := s1_ready && io.dcache.req.ready && !s0_iss_valid && !s0_vec_valid && !s0_ma_st_valid @@ -598,7 +630,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule sx_in(i).isForVSnonLeafPTE := s3_in.isForVSnonLeafPTE sx_in(i).vecTriggerMask := s3_in.vecTriggerMask sx_in(i).hasException := s3_exception - sx_in_vec(i) := s3_in.isvec + sx_in_vec(i) := s3_in.isvec sx_ready(i) := !s3_valid(i) || sx_in(i).output.uop.robIdx.needFlush(io.redirect) || (if (TotalDelayCycles == 0) io.stout.ready else sx_ready(i+1)) } else { val cur_kill = sx_in(i).output.uop.robIdx.needFlush(io.redirect) diff --git a/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala b/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala index 8f16a261130..3a4c69ebe55 100644 --- a/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala +++ b/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala @@ -362,7 +362,7 @@ class MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1Pref // 3. actual l1 prefetch // 4. actual l2 prefetch // 5. actual l3 prefetch -class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper { +class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTlbConst { val io = IO(new XSBundle { val enable = Input(Bool()) val flush = Input(Bool()) @@ -593,13 +593,15 @@ class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL for(i <- 0 until MLP_SIZE) { val l1_evict = s1_l1_alloc && (s1_l1_index === i.U) val l2_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === i.U) + val tlb_req_vaddr = Wire(UInt(VAddrBits.W)) if(i < MLP_L1_SIZE) { tlb_req_arb.io.in(i).valid := l1_valids(i) && l1_array(i).is_vaddr && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !l1_evict - tlb_req_arb.io.in(i).bits.vaddr := l1_array(i).get_tlb_va() + tlb_req_vaddr := l1_array(i).get_tlb_va() }else { tlb_req_arb.io.in(i).valid := l2_valids(i - MLP_L1_SIZE) && l2_array(i - MLP_L1_SIZE).is_vaddr && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !l2_evict - tlb_req_arb.io.in(i).bits.vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va() + tlb_req_vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va() } + tlb_req_arb.io.in(i).bits.vaddr := tlb_req_vaddr tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read tlb_req_arb.io.in(i).bits.isPrefetch := true.B tlb_req_arb.io.in(i).bits.size := 3.U @@ -612,6 +614,9 @@ class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL tlb_req_arb.io.in(i).bits.hlvx := DontCare tlb_req_arb.io.in(i).bits.hyperinst := DontCare tlb_req_arb.io.in(i).bits.pmp_addr := DontCare + tlb_req_arb.io.in(i).bits.facA := tlb_req_vaddr(VAddrBits-1, sectorvpnOffLen) + tlb_req_arb.io.in(i).bits.facB := 0.U + tlb_req_arb.io.in(i).bits.facCarry := false.B } assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty") @@ -650,7 +655,7 @@ class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL }.otherwise { val inner_index = s2_tlb_update_index - MLP_L1_SIZE.U l2_array(inner_index).is_vaddr := s2_tlb_resp.bits.miss - + when(!s2_tlb_resp.bits.miss) { l2_array(inner_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s2_tlb_resp.bits.paddr.head(s2_tlb_resp.bits.paddr.head.getWidth - 1, REGION_TAG_OFFSET)) when(s2_tlb_resp.bits.excp.head.pf.ld || s2_tlb_resp.bits.excp.head.gpf.ld || s2_tlb_resp.bits.excp.head.af.ld) { @@ -808,7 +813,7 @@ class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy) XSPerfHistogram("filter_active", PopCount(VecInit( - l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++ + l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++ l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) } ).asUInt), true.B, 0, MLP_SIZE, 1) XSPerfHistogram("l1_filter_active", PopCount(VecInit(l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v)}).asUInt), true.B, 0, MLP_L1_SIZE, 1) diff --git a/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala b/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala index 96c85eb43da..ff71d8f3ead 100644 --- a/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala +++ b/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala @@ -264,7 +264,7 @@ class AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelpe val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W) val decr_mode = Bool() val single_update = Bool()//this is a signal update request - val has_been_signal_updated = Bool() + val has_been_signal_updated = Bool() } class PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { @@ -368,7 +368,7 @@ class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasS s0_agt_entry.pht_tag := s0_lookup.pht_tag s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset) // update bits this time - s0_agt_entry.region_bit_single := region_offset_to_bits(s0_lookup.region_offset) + s0_agt_entry.region_bit_single := region_offset_to_bits(s0_lookup.region_offset) s0_agt_entry.region_tag := s0_lookup.region_tag s0_agt_entry.region_offset := s0_lookup.region_offset s0_agt_entry.access_cnt := 1.U @@ -505,7 +505,7 @@ class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasS s1_pht_lookup.region_vaddr := s1_region_vaddr s1_pht_lookup.region_paddr := s1_region_paddr s1_pht_lookup.region_offset := s1_region_offset - s1_pht_lookup.region_bit_single := s1_bit_region_signal + s1_pht_lookup.region_bit_single := s1_bit_region_signal io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page @@ -664,7 +664,7 @@ class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMS val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en) val s1_single_update = RegEnable(s0_single_update, s1_reg_en) val s1_has_been_single_update = RegEnable(s0_has_been_single_update, s1_reg_en) - val s1_region_bit_single = RegEnable(s0_region_bit_single, s1_reg_en) + val s1_region_bit_single = RegEnable(s0_region_bit_single, s1_reg_en) val s1_pht_valids = pht_valids_reg.map(way => Mux1H( (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), way @@ -745,13 +745,13 @@ class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMS // pipe s3: send addr/data to ram, gen pf_req - val s3_valid = GatedValidRegNext(s2_valid && signal_update_write, false.B) + val s3_valid = GatedValidRegNext(s2_valid && signal_update_write, false.B) val s3_evict = RegEnable(s2_evict, s2_valid) val s3_hist = RegEnable(s2_hist, s2_valid) val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid) val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid) - + val s3_region_offset = RegEnable(s2_region_offset, s2_valid) val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid) val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid) @@ -915,7 +915,7 @@ class PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMS val debug_source_type = UInt(log2Up(nSourceType).W) } -class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { +class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper with HasTlbConst { val io = IO(new Bundle() { val gen_req = Flipped(ValidIO(new PfGenReq())) val tlb_req = new TlbRequestIO(2) @@ -961,8 +961,9 @@ class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModul for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ val is_evicted = s1_valid && s1_replace_vec(i) + val tlb_req_vaddr = Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W)) tlb_req_arb.io.in(i).valid := v && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !ent.paddr_valid && !is_evicted - tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W)) + tlb_req_arb.io.in(i).bits.vaddr := tlb_req_vaddr tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read tlb_req_arb.io.in(i).bits.isPrefetch := true.B tlb_req_arb.io.in(i).bits.size := 3.U @@ -975,6 +976,9 @@ class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModul tlb_req_arb.io.in(i).bits.hlvx := DontCare tlb_req_arb.io.in(i).bits.hyperinst := DontCare tlb_req_arb.io.in(i).bits.pmp_addr := DontCare + tlb_req_arb.io.in(i).bits.facA := tlb_req_vaddr(VAddrBits-1, sectorvpnOffLen) + tlb_req_arb.io.in(i).bits.facB := 0.U + tlb_req_arb.io.in(i).bits.facCarry := false.B val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt val first_one_offset = PriorityMux( diff --git a/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala b/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala index 1ec9c985aad..ad9db59a8ca 100644 --- a/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala +++ b/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala @@ -73,6 +73,7 @@ class VSegmentUnit (implicit p: Parameters) extends VLSUModule with MemoryOpConstants with SdtrigExt with HasLoadHelper + with HasTlbConst { val io = IO(new VSegmentUnitIO) @@ -415,14 +416,40 @@ class VSegmentUnit (implicit p: Parameters) extends VLSUModule val realSegmentOffset = Mux(isIndexed(issueInstType), indexStride, segmentOffset) - val vaddr = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset + val offset = (fieldIdx << alignedType).asUInt + realSegmentOffset + val vaddr = baseVaddr + offset + val tlb_req_facA = Wire(UInt(sectorvpnLen.W)) + val tlb_req_facB = Wire(UInt(sectorvpnLen.W)) + val tlb_req_facCarry = Wire(Bool()) val misalignLowVaddr = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) - val misalignHighVaddr = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U + val misalignHighVaddr = Cat(latchVaddr(latchVaddr.getWidth - 1, 3) + 1.U, 0.U(3.W)) val notCross16ByteVaddr = Cat(latchVaddr(latchVaddr.getWidth - 1, 4), 0.U(4.W)) // val misalignVaddr = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr)) val misalignVaddr = Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr) val tlbReqVaddr = Mux(isMisalignReg, misalignVaddr, vaddr) + tlb_req_facA := Mux( + isMisalignReg, + misalignLowVaddr(VAddrBits-1, sectorvpnOffLen), + baseVaddr(VAddrBits-1, sectorvpnOffLen) + ) + tlb_req_facB := Mux( + isMisalignReg, + 0.U, + offset(VAddrBits-1, sectorvpnOffLen) + ) + + val misalignCarry = misalignLowVaddr(sectorvpnOffLen-1, 0) +& 8.U + val facCarry = baseVaddr(sectorvpnOffLen-1, 0) +& offset(sectorvpnOffLen-1, 0) + tlb_req_facCarry := Mux( + isMisalignReg, + Mux( + isFirstSplit, + false.B, + misalignCarry(misalignCarry.getWidth-1) + ), + facCarry(facCarry.getWidth-1) + ) //latch vaddr when(state === s_tlb_req && !isMisalignReg){ latchVaddr := vaddr(VAddrBits - 1, 0) @@ -446,6 +473,9 @@ class VSegmentUnit (implicit p: Parameters) extends VLSUModule io.dtlb.req.bits.no_translate := false.B io.dtlb.req.bits.debug.pc := instMicroOp.uop.pc io.dtlb.req.bits.debug.isFirstIssue := DontCare + io.dtlb.req.bits.facA := tlbReqVaddr(VAddrBits-1, sectorvpnOffLen) + io.dtlb.req.bits.facB := 0.U + io.dtlb.req.bits.facCarry := false.B io.dtlb.req_kill := false.B val canTriggerException = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger diff --git a/utility b/utility index 74e73cbd244..7b691c42694 160000 --- a/utility +++ b/utility @@ -1 +1 @@ -Subproject commit 74e73cbd2444fb3c7decaff4aa167e79dc7380a7 +Subproject commit 7b691c426943d56e4c98113fa4dc7c33b9f2124a