Skip to content

Commit eb60a79

Browse files
authored
[AMDAIEAssignNpuDmaBdIds] Support both constant and semi-affine expressions for BD IDs (#946)
- Motivated by DMA chaining (#931), this update aims to encourage the use of different BD IDs wherever feasible. - When the DMA operation resides in the innermost loop of the control code, the BD ID will naturally form a semi-affine expression. - After loop unrolling, these BD IDs will simplify to constant. - The `value` operand for `BdIdOp` is changed from `UI32Attr` to `Index` to accommodate either a constant or expression. **_(question: Why use a separate `BdIdOp` instead of directly integrating as an operand into the DMA operation?)_** - Add an `incremental` mode for `ChannelBdIdGenerator` that prioritizes increasing BD IDs during assignment.
1 parent ab64bca commit eb60a79

File tree

12 files changed

+539
-165
lines changed

12 files changed

+539
-165
lines changed

compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ def AMDAIE_BdIdOp: AMDAIE_Op<"bd_id", [
336336

337337
let arguments = (
338338
ins Index:$tile,
339-
UI32Attr:$value
339+
Index:$value
340340
);
341341

342342
let assemblyFormat = [{ `(` $tile `,` $value `)` attr-dict }];

compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/test/roundtrip.mlir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
// CHECK-LABEL: func.func @bd_id
44
// CHECK: %[[C0:.*]] = arith.constant 0 : index
55
// CHECK: %[[TILE_0:.*]] = amdaie.tile(%[[C0]], %[[C0]])
6-
// CHECK: %[[BD_ID:.*]] = amdaie.bd_id(%[[TILE_0]], 0)
6+
// CHECK: %[[BD_ID:.*]] = amdaie.bd_id(%[[TILE_0]], %[[C0]])
77
func.func @bd_id() {
88
%c0 = arith.constant 0 : index
99
%tile = amdaie.tile(%c0, %c0)
10-
%bd_id = amdaie.bd_id(%tile, 0)
10+
%bd_id = amdaie.bd_id(%tile, %c0)
1111
return
1212
}
1313

@@ -295,7 +295,7 @@ func.func @npu_dma_cpy_nd(%arg0: !amdaie.logicalobjectfifo<memref<1x1x8x16xi32,
295295
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
296296
// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
297297
// CHECK-DAG: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]])
298-
// CHECK-DAG: %[[BD_ID_0_0:.+]] = amdaie.bd_id(%[[TILE_0_0]], 0)
298+
// CHECK-DAG: %[[BD_ID_0_0:.+]] = amdaie.bd_id(%[[TILE_0_0]], %[[C0]])
299299
// CHECK-DAG: %[[CONNECTION_0:.+]] = amdaie.connection
300300
// CHECK: %{{.*}} = amdaie.npu.dma_cpy_nd async_source
301301
// CHECK-SAME: %[[CONNECTION_0]]
@@ -308,7 +308,7 @@ func.func @npu_dma_cpy_nd_bd_id(%arg0: !amdaie.logicalobjectfifo<memref<1x1x8x16
308308
%c16 = arith.constant 16 : index
309309
%c128 = arith.constant 128 : index
310310
%tile = amdaie.tile(%c0, %c0)
311-
%bd_id = amdaie.bd_id(%tile, 0)
311+
%bd_id = amdaie.bd_id(%tile, %c0)
312312
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>)
313313
%1 = amdaie.npu.dma_cpy_nd async_source %0([%c0, %c0, %c0, %c0] [%c1, %c1, %c8, %c16] [%c128, %c128, %c16, %c1] bd_id = %bd_id, [%c0, %c0, %c0, %c0] [%c1, %c1, %c8, %c16] [%c128, %c16, %c16, %c1] bd_id = %bd_id)
314314
return
@@ -371,7 +371,7 @@ func.func @npu_dma_cpy_nd_target_source(%arg0: !amdaie.logicalobjectfifo<memref<
371371
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
372372
// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
373373
// CHECK-DAG: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]])
374-
// CHECK-DAG: %[[BD_ID_0_0:.+]] = amdaie.bd_id(%[[TILE_0_0]], 0)
374+
// CHECK-DAG: %[[BD_ID_0_0:.+]] = amdaie.bd_id(%[[TILE_0_0]], %[[C0]])
375375
// CHECK-DAG: %[[CONNECTION_0:.+]] = amdaie.connection
376376
// CHECK: %{{.*}} = amdaie.npu.dma_cpy_nd async_source %[[CONNECTION_0]]
377377
// CHECK-SAME: %[[ARG0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] [1, 1, %[[C8]], %[[C16]]] [%[[C128]], %[[C128]], %[[C16]], 1] bd_id = %[[BD_ID_0_0]]
@@ -383,7 +383,7 @@ func.func @npu_dma_cpy_nd_all_operands(%arg0: !amdaie.logicalobjectfifo<memref<1
383383
%c16 = arith.constant 16 : index
384384
%c128 = arith.constant 128 : index
385385
%tile = amdaie.tile(%c0, %c0)
386-
%bd_id = amdaie.bd_id(%tile, 0)
386+
%bd_id = amdaie.bd_id(%tile, %c0)
387387
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>)
388388
%1 = amdaie.npu.dma_cpy_nd async_source %0(%arg0[%c0, %c0, %c0, %c0] [1, 1, %c8, %c16] [%c128, %c128, %c16, 1] bd_id = %bd_id, %arg1[%c0, %c0, %c0, %c0] [1, 1, %c8, %c16] [%c128, %c16, %c16, 1] bd_id = %bd_id) : target_type = !amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>> source_type = !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>
389389
return
@@ -396,14 +396,14 @@ func.func @npu_dma_cpy_nd_all_operands(%arg0: !amdaie.logicalobjectfifo<memref<1
396396
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
397397
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
398398
// CHECK-DAG: %[[TILE_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]])
399-
// CHECK-DAG: %[[BD_ID:.+]] = amdaie.bd_id(%[[TILE_0_0]], 0)
399+
// CHECK-DAG: %[[BD_ID:.+]] = amdaie.bd_id(%[[TILE_0_0]], %[[C0]])
400400
// CHECK-DAG: %[[CHANNEL:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA, direction = S2MM)
401401
// CHECK-DAG: %[[CONNECTION_0:.+]] = amdaie.connection
402402
func.func @npu_half_dma_cpy_nd(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32>>, %arg1: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>>) {
403403
%c0 = arith.constant 0 : index
404404
%c1 = arith.constant 1 : index
405405
%tile_0_0 = amdaie.tile(%c0, %c0)
406-
%bd_id = amdaie.bd_id(%tile_0_0, 0)
406+
%bd_id = amdaie.bd_id(%tile_0_0, %c0)
407407
%channel = amdaie.channel(%tile_0_0, 0, port_type = DMA, direction = S2MM)
408408
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32>>, !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>>)
409409
// CHECK: amdaie.npu.half_dma_cpy_nd %[[CONNECTION_0]](%[[ARG0]] [] [] []) : !amdaie.logicalobjectfifo<memref<2048xi32>>

0 commit comments

Comments
 (0)