From 041342d37f656f3177007b2a6bae510cc529f59b Mon Sep 17 00:00:00 2001 From: Richard Yan Date: Tue, 28 Jan 2025 14:23:16 -0800 Subject: [PATCH] update loop tracking --- src/main/scala/gemmini/Controller.scala | 8 +++++++- src/main/scala/gemmini/LoopMatmul.scala | 8 ++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/main/scala/gemmini/Controller.scala b/src/main/scala/gemmini/Controller.scala index 1c44ebd9..2c6677d5 100644 --- a/src/main/scala/gemmini/Controller.scala +++ b/src/main/scala/gemmini/Controller.scala @@ -261,7 +261,7 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] new ComputeRs(mvin_rows_bits, mvin_cols_bits, local_addr_t), new ComputeRs(mvin_rows_bits, mvin_cols_bits, local_addr_t), has_training_convs, has_max_pool, has_first_layer_optimizations, has_dw_convs) } - val (loop_cmd, loop_matmul_unroller_busy) = withClock (gated_clock) { LoopMatmul(conv_cmd, reservation_station.io.matmul_ld_completed, reservation_station.io.matmul_st_completed, reservation_station.io.matmul_ex_completed, + val (loop_cmd, loop_matmul_unroller_busy, loop_completed) = withClock (gated_clock) { LoopMatmul(conv_cmd, reservation_station.io.matmul_ld_completed, reservation_station.io.matmul_st_completed, reservation_station.io.matmul_ex_completed, meshRows*tileRows, coreMaxAddrBits, reservation_station_entries, max_lds, max_exs, max_sts, sp_banks * sp_bank_entries, acc_banks * acc_bank_entries, inputType.getWidth, accType.getWidth, dma_maxbytes, new MvinRs2(mvin_rows_bits, mvin_cols_bits, local_addr_t), new PreloadRs(mvin_rows_bits, mvin_cols_bits, local_addr_t), new PreloadRs(mvout_rows_bits, mvout_cols_bits, local_addr_t), @@ -276,6 +276,12 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] reservation_station.io.alloc.valid := false.B reservation_station.io.alloc.bits := unrolled_cmd.bits + val completion_io = IO(new Bundle { + val completed = Output(loop_completed.cloneType) + }) + + completion_io.completed := loop_completed + /* //------------------------------------------------------------------------- // finish muxing control signals to rob (risc) or tiler (cisc) diff --git a/src/main/scala/gemmini/LoopMatmul.scala b/src/main/scala/gemmini/LoopMatmul.scala index 79ab9e8b..158ec6be 100644 --- a/src/main/scala/gemmini/LoopMatmul.scala +++ b/src/main/scala/gemmini/LoopMatmul.scala @@ -905,6 +905,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, reservation_station_size val st_completed = Input(UInt(log2Up(reservation_station_size+1).W)) val ex_completed = Input(UInt(log2Up(reservation_station_size+1).W)) val busy = Output(Bool()) + val completed = Output(Vec(2, Bool())) }) // Create states @@ -936,6 +937,8 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, reservation_station_size io.busy := cmd.valid || loop_configured + io.completed := 0.U.asTypeOf(io.completed.cloneType) + // Create ld arbiters val ldab_arb = Module(new WeightedArbiter(new RoCCCommand(), maxWeightA=255, staticWeightAEnabled=true)) // TODO magic numbers ldab_arb.io.inA <> ldA.io.cmd @@ -1282,6 +1285,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, reservation_station_size when (head_loop.running && head_loop.all_completed()) { head_loop.reset() + io.completed(head_loop_id) := true.B head_loop_id := ~head_loop_id } @@ -1302,7 +1306,7 @@ object LoopMatmul { max_addr: Int, max_acc_addr: Int, input_w: Int, acc_w: Int, dma_max_bytes: Int, mvin_rs2_t: MvinRs2, preload_rs1_t: PreloadRs, preload_rs2_t: PreloadRs, compute_rs1_t: ComputeRs, compute_rs2_t: ComputeRs, mvout_spad_rs1_t: MvoutSpadRs1, mvout_rs2_t: MvoutRs2) - (implicit p: Parameters): (DecoupledIO[GemminiCmd], Bool) = { + (implicit p: Parameters): (DecoupledIO[GemminiCmd], Bool, Vec[Bool]) = { val mod = Module(new LoopMatmul(block_size, coreMaxAddrBits, rob_size, max_lds, max_exs, max_sts, max_addr, max_acc_addr, input_w, acc_w, dma_max_bytes, mvin_rs2_t, preload_rs1_t, preload_rs2_t, compute_rs1_t, compute_rs2_t, mvout_spad_rs1_t, mvout_rs2_t)) @@ -1310,7 +1314,7 @@ object LoopMatmul { mod.io.ld_completed := ld_completed mod.io.st_completed := st_completed mod.io.ex_completed := ex_completed - (mod.io.out, mod.io.busy) + (mod.io.out, mod.io.busy, mod.io.completed) } def castDramOffset(dram_offset: UInt): UInt = {