Skip to content

Commit b537f52

Browse files
authored
Batch: rename BatchInterval to BatchStep, move to tail of StepInfo (#564)
Previous we append BatchInterval at the head of StepInfo, using it to update index of software buffer. This change rename BatchInterval to BatchStep, and append it to StepInfo in BatchCollector rather than BatchAssembler to simplify logic. Also, putting BatchStep at tail of StepInfo allow some operation after Batch parsing, which is needed by Incremental transmit.
1 parent 92e75e0 commit b537f52

File tree

2 files changed

+43
-36
lines changed

2 files changed

+43
-36
lines changed

src/main/scala/Batch.scala

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ case class BatchParam(config: GatewayConfig, bundles: Seq[DifftestBundle]) {
3939
val StepGroupSize = bundles.distinctBy(_.desiredCppName).length
4040
val StepDataByteLen = bundles.map(_.getByteAlignWidth).map { w => w / 8 }.sum
4141
val StepDataBitLen = StepDataByteLen * 8
42-
val StepInfoByteLen = StepGroupSize * (infoWidth / 8)
42+
val StepInfoByteLen = (StepGroupSize + 1) * (infoWidth / 8) // Include BatchStep to update buffer index
4343
val StepInfoBitLen = StepInfoByteLen * 8
4444

4545
// Width of statistic for data/info byte length
4646
val StatsDataWidth = log2Ceil(math.max(MaxDataByteLen, StepDataByteLen))
47-
val StatsInfoWidth = log2Ceil(math.max(MaxInfoSize, StepGroupSize))
47+
val StatsInfoWidth = log2Ceil(math.max(MaxInfoSize, StepGroupSize + 1))
4848

4949
// Truncate width when shifting to reduce useless gates
5050
val TruncDataBitLen = math.min(MaxDataBitLen, StepDataBitLen)
@@ -123,7 +123,7 @@ class BatchCollector(bundles: Seq[Valid[DifftestBundle]], param: BatchParam) ext
123123
val step_enable = IO(Output(Bool()))
124124

125125
val sorted =
126-
in.groupBy(_.bits.desiredCppName).values.toSeq.sortBy(gens => gens.length * gens.head.bits.getByteAlignWidth)
126+
in.groupBy(_.bits.desiredCppName).values.toSeq.sortBy(gen => gen.length * gen.head.bits.getByteAlignWidth).reverse
127127
// Stage 1: concat bundles with same desiredCppName
128128
val group_bitlen = sorted.map(_.head.bits.getByteAlignWidth)
129129
val group_length = sorted.map(_.length)
@@ -175,23 +175,33 @@ class BatchCollector(bundles: Seq[Valid[DifftestBundle]], param: BatchParam) ext
175175
val info_num = delay_group_status.last.info_size
176176
step_enable := info_num =/= 0.U
177177
step_status := delay_group_status
178+
// append BatchStep to last step_status
179+
step_status.last.info_size := delay_group_status.last.info_size + 1.U
180+
// Use BatchStep to update index of software buffer
181+
val BatchStep = Wire(new BatchInfo)
182+
BatchStep.id := Batch.getTemplate.length.U
183+
BatchStep.num := info_num // unused, only for debugging
184+
// Collect from tail, collect(i) include last 0~i
185+
val toCollect_data = delay_group_data.reverse
186+
val toCollect_info = delay_group_info.reverse
187+
val toCollect_vsize = delay_group_vsize.reverse
178188
val collect_data = Wire(MixedVec(Seq.tabulate(param.StepGroupSize) { idx =>
179-
UInt(delay_group_data.take(idx + 1).map(_.getWidth).sum.W)
189+
UInt(toCollect_data.take(idx + 1).map(_.getWidth).sum.W)
180190
}))
181191
val collect_info = Wire(MixedVec(Seq.tabulate(param.StepGroupSize) { idx =>
182-
UInt(((idx + 1) * param.infoWidth).W)
192+
UInt(((idx + 2) * param.infoWidth).W)
183193
}))
184-
// Collect from head, collect(i) include 0~i
185-
collect_data(0) := delay_group_data(0)
186-
collect_info(0) := delay_group_info(0)
194+
195+
collect_data(0) := toCollect_data(0)
196+
collect_info(0) := Mux(toCollect_vsize(0) =/= 0.U, Cat(BatchStep.asUInt, toCollect_info(0)), BatchStep.asUInt)
187197
(1 until param.StepGroupSize).foreach { idx =>
188-
val cat_map = Seq.tabulate(group_length(idx) + 1) { len =>
189-
(len.U, Cat(collect_data(idx - 1), delay_group_data(idx)(len * group_bitlen(idx) - 1, 0)))
198+
val cat_map = Seq.tabulate(group_length.reverse(idx) + 1) { len =>
199+
(len.U, Cat(collect_data(idx - 1), toCollect_data(idx)(len * group_bitlen.reverse(idx) - 1, 0)))
190200
}
191-
collect_data(idx) := LookupTree(delay_group_vsize(idx), cat_map)
201+
collect_data(idx) := LookupTree(toCollect_vsize(idx), cat_map)
192202
collect_info(idx) := Mux(
193-
delay_group_vsize(idx) =/= 0.U,
194-
Cat(collect_info(idx - 1), delay_group_info(idx)),
203+
toCollect_vsize(idx) =/= 0.U,
204+
Cat(collect_info(idx - 1), toCollect_info(idx)),
195205
collect_info(idx - 1),
196206
)
197207
}
@@ -227,8 +237,8 @@ class BatchAssembler(
227237
val delay_step_enable = RegNext(step_enable)
228238
val delay_step_trace_info = Option.when(config.hasReplay)(RegNext(step_trace_info.get))
229239
val data_bytes_avail = param.MaxDataByteLen.U -& state_status.data_bytes
230-
// Always leave space for BatchFinish and BatchInterval, use MaxInfoSize - 2
231-
val info_size_avail = (param.MaxInfoSize - 2).U -& state_status.info_size
240+
// Always leave space for BatchFinish, use MaxInfoSize - 1
241+
val info_size_avail = (param.MaxInfoSize - 1).U -& state_status.info_size
232242
val data_exceed = Wire(Bool())
233243
val info_exceed = Wire(Bool())
234244
val append_data = Wire(UInt(param.TruncDataBitLen.W))
@@ -239,10 +249,6 @@ class BatchAssembler(
239249
val next_state_info = Wire(UInt(param.MaxInfoBitLen.W))
240250
val next_state_stats = Wire(new BatchStats(param))
241251

242-
// Use BatchInterval to update index of software buffer
243-
val BatchInterval = Wire(new BatchInfo)
244-
BatchInterval.id := Batch.getTemplate.length.U
245-
BatchInterval.num := delay_step_status.last.info_size // unused, only for debugging
246252
val BatchFinish = Wire(new BatchInfo)
247253
BatchFinish.id := (Batch.getTemplate.length + 1).U
248254
BatchFinish.num := finish_step
@@ -274,13 +280,13 @@ class BatchAssembler(
274280
assert(remain_stats.data_bytes <= param.MaxDataByteLen.U)
275281
assert(remain_stats.info_size + 1.U <= param.MaxInfoSize.U)
276282

277-
val concat_data = (delay_step_data >> (remain_stats.data_bytes << 3).asUInt).asUInt
278-
val concat_info = (delay_step_info >> (remain_stats.info_size * param.infoWidth.U)).asUInt
279283
// Note we need only lowest bits to update state, truncate high bits to reduce gates
280-
val remain_data = (~(~0.U(param.TruncDataBitLen.W) <<
281-
(remain_stats.data_bytes << 3).asUInt)).asUInt & delay_step_data
282-
val remain_info = (~(~0.U(param.StepInfoBitLen.W) <<
283-
(remain_stats.info_size * param.infoWidth.U))).asUInt & delay_step_info
284+
val concat_data = (~(~0.U(param.TruncDataBitLen.W) <<
285+
(concat_stats.data_bytes << 3).asUInt)).asUInt & delay_step_data
286+
val concat_info = (~(~0.U(param.StepInfoBitLen.W) <<
287+
(concat_stats.info_size * param.infoWidth.U))).asUInt & delay_step_info
288+
val remain_data = (delay_step_data >> (concat_stats.data_bytes << 3).asUInt).asUInt
289+
val remain_info = (delay_step_info >> (concat_stats.info_size * param.infoWidth.U)).asUInt
284290

285291
// Delay step can be partly appended to output for making full use of transmission param
286292
// Avoid appending when step equals batchSize(delay_step_exceed), last appended data will overwrite first step data
@@ -290,20 +296,20 @@ class BatchAssembler(
290296
finish_step := state_step_cnt + Mux(append_whole, 1.U, 0.U)
291297

292298
append_data := Mux(has_append, concat_data(param.TruncDataBitLen - 1, 0), 0.U)
293-
val append_finish_map = Seq.tabulate(param.StepGroupSize) { g =>
299+
val append_finish_map = Seq.tabulate(param.StepGroupSize + 2) { g =>
294300
(g.U, (BatchFinish.asUInt << (g * param.infoWidth)).asUInt)
295301
}
296302
append_info := Mux(
297303
has_append,
298-
Cat(concat_info | LookupTree(concat_stats.info_size, append_finish_map), BatchInterval.asUInt),
304+
concat_info | LookupTree(concat_stats.info_size, append_finish_map),
299305
BatchFinish.asUInt,
300306
)
301307

302308
next_state_step_cnt := Mux(has_append && append_whole, 0.U, 1.U)
303309
next_state_data := Mux(has_append, remain_data, delay_step_data)
304-
next_state_info := Mux(has_append, remain_info, Cat(delay_step_info, BatchInterval.asUInt))
310+
next_state_info := Mux(has_append, remain_info, delay_step_info)
305311
next_state_stats.data_bytes := Mux(has_append, remain_stats.data_bytes, delay_step_status.last.data_bytes)
306-
next_state_stats.info_size := Mux(has_append, remain_stats.info_size, delay_step_status.last.info_size + 1.U)
312+
next_state_stats.info_size := Mux(has_append, remain_stats.info_size, delay_step_status.last.info_size)
307313
} else {
308314
data_exceed := delay_step_enable && delay_step_status.last.data_bytes > data_bytes_avail
309315
info_exceed := delay_step_enable && delay_step_status.last.info_size > info_size_avail
@@ -316,9 +322,9 @@ class BatchAssembler(
316322

317323
next_state_step_cnt := 1.U
318324
next_state_data := delay_step_data
319-
next_state_info := Cat(delay_step_info, BatchInterval.asUInt)
325+
next_state_info := delay_step_info
320326
next_state_stats.data_bytes := delay_step_status.last.data_bytes
321-
next_state_stats.info_size := delay_step_status.last.info_size + 1.U
327+
next_state_stats.info_size := delay_step_status.last.info_size
322328
}
323329

324330
// Stage 2:
@@ -352,6 +358,7 @@ class BatchAssembler(
352358
out.step := Mux(out.enable, finish_step, 0.U)
353359

354360
val state_update = delay_step_enable || state_flush || timeout
361+
355362
when(state_update) {
356363
when(delay_step_enable) {
357364
when(should_tick) {
@@ -365,9 +372,9 @@ class BatchAssembler(
365372
state_data := state_data |
366373
(delay_step_data(param.TruncDataBitLen - 1, 0) << (state_status.data_bytes << 3).asUInt).asUInt
367374
state_info := state_info |
368-
(Cat(delay_step_info, BatchInterval.asUInt) << (state_status.info_size * param.infoWidth.U)).asUInt
375+
(delay_step_info << (state_status.info_size * param.infoWidth.U)).asUInt
369376
state_status.data_bytes := state_status.data_bytes + delay_step_status.last.data_bytes
370-
state_status.info_size := state_status.info_size + delay_step_status.last.info_size + 1.U
377+
state_status.info_size := state_status.info_size + delay_step_status.last.info_size
371378
if (config.hasReplay) state_trace_size.get := state_trace_size.get + delay_step_trace_info.get.trace_size
372379
}
373380
}.otherwise { // state_flush without new-coming step

src/main/scala/DPIC.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ class DPICBatch(template: Seq[DifftestBundle], batchIO: BatchIO, config: Gateway
222222

223223
override def desiredName: String = "DifftestBatch"
224224
override def dpicFuncAssigns: Seq[String] = {
225-
val bundleEnum = template.map(_.desiredModuleName.replace("Difftest", "")) ++ Seq("BatchInterval", "BatchFinish")
225+
val bundleEnum = template.map(_.desiredModuleName.replace("Difftest", "")) ++ Seq("BatchStep", "BatchFinish")
226226
val bundleAssign = template.zipWithIndex.map { case (t, idx) =>
227227
val bundleName = bundleEnum(idx)
228228
val perfName = "perf_Batch_" + bundleName
@@ -265,7 +265,7 @@ class DPICBatch(template: Seq[DifftestBundle], batchIO: BatchIO, config: Gateway
265265
| ${bundleEnum.mkString(",\n ")}
266266
| };
267267
| extern void simv_nstep(uint8_t step);
268-
| static int dut_index = -1;
268+
| static int dut_index = 0;
269269
| $batchDecl
270270
| for (int i = 0; i < $infoLen; i++) {
271271
| uint8_t id = info[i].id;
@@ -277,7 +277,7 @@ class DPICBatch(template: Seq[DifftestBundle], batchIO: BatchIO, config: Gateway
277277
|#endif // CONFIG_DIFFTEST_INTERNAL_STEP
278278
| break;
279279
| }
280-
| else if (id == BatchInterval) {
280+
| else if (id == BatchStep) {
281281
| dut_index = (dut_index + 1) % CONFIG_DIFFTEST_BATCH_SIZE;
282282
|#ifdef CONFIG_DIFFTEST_QUERY
283283
| difftest_query_step();

0 commit comments

Comments
 (0)