-
Couldn't load subscription status.
- Fork 42
Open
Description
To reproduce the error, you'll need this branch #1297 before it's merged.
Input IR:
func.func @truncf(%arg0: tensor<128xf32>) -> tensor<128xbf16> {
%0 = tensor.empty() : tensor<128xbf16>
%1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%arg0 : tensor<128xf32>) outs(%0 : tensor<128xbf16>) {
^bb0(%in: f32, %out: bf16):
%1 = arith.truncf %in : f32 to bf16
linalg.yield %1 : bf16
} -> tensor<128xbf16>
return %1 : tensor<128xbf16>
}
Command:
iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-targets --iree-amdaie-tile-pipeline=general-copy --iree-amdaie-target-device=npu1_4col --mlir-print-ir-after-all --mlir-disable-threading truncf.mlir &>truncf_afterall.mlir
The IR before failure
module {
func.func @truncf_dispatch_0_elementwise_128_f32xbf16() attributes {translation_info = #iree_codegen.translation_info<pipeline = Custom>} {
%c0 = arith.constant 0 : index
%alloc = memref.alloc() : memref<128xbf16, 2 : i32>
%alloc_0 = memref.alloc() : memref<128xf32, 2 : i32>
%alloc_1 = memref.alloc() : memref<128xbf16, 1 : i32>
%lof = amdaie.logicalobjectfifo.from_memref %alloc_1, {} : memref<128xbf16, 1 : i32> -> !amdaie.logicalobjectfifo<memref<128xbf16, 1 : i32>>
%alloc_2 = memref.alloc() : memref<128xf32, 1 : i32>
%lof_3 = amdaie.logicalobjectfifo.from_memref %alloc_2, {} : memref<128xf32, 1 : i32> -> !amdaie.logicalobjectfifo<memref<128xf32, 1 : i32>>
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : memref<128xf32>
%lof_4 = amdaie.logicalobjectfifo.from_memref %0, {} : memref<128xf32> -> !amdaie.logicalobjectfifo<memref<128xf32>>
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags(Indirect) : memref<128xbf16>
%lof_5 = amdaie.logicalobjectfifo.from_memref %1, {} : memref<128xbf16> -> !amdaie.logicalobjectfifo<memref<128xbf16>>
scf.forall (%arg0) in (1) {
%2 = amdaie.dma_cpy_nd(%lof_3[0] [128] [1], %lof_4[0] [128] [1]) : (!amdaie.logicalobjectfifo<memref<128xf32, 1 : i32>>, !amdaie.logicalobjectfifo<memref<128xf32>>)
%3 = amdaie.dma_cpy_nd(%lof[0] [128] [1], %lof_5[0] [128] [1]) : (!amdaie.logicalobjectfifo<memref<128xbf16, 1 : i32>>, !amdaie.logicalobjectfifo<memref<128xbf16>>)
%lof_6 = amdaie.logicalobjectfifo.from_memref %alloc, {} : memref<128xbf16, 2 : i32> -> !amdaie.logicalobjectfifo<memref<128xbf16, 2 : i32>>
%lof_7 = amdaie.logicalobjectfifo.from_memref %alloc_0, {} : memref<128xf32, 2 : i32> -> !amdaie.logicalobjectfifo<memref<128xf32, 2 : i32>>
scf.forall (%arg1) in (1) {
%5 = amdaie.dma_cpy_nd(%lof_7[0] [128] [1], %lof_3[0] [128] [1]) : (!amdaie.logicalobjectfifo<memref<128xf32, 2 : i32>>, !amdaie.logicalobjectfifo<memref<128xf32, 1 : i32>>)
%6 = amdaie.dma_cpy_nd(%lof_6[0] [128] [1], %lof[0] [128] [1]) : (!amdaie.logicalobjectfifo<memref<128xbf16, 2 : i32>>, !amdaie.logicalobjectfifo<memref<128xbf16, 1 : i32>>)
%7 = amdaie.dma_cpy_nd(%lof[0] [128] [1], %lof_6[0] [128] [1]) : (!amdaie.logicalobjectfifo<memref<128xbf16, 1 : i32>>, !amdaie.logicalobjectfifo<memref<128xbf16, 2 : i32>>)
%c2 = arith.constant 2 : index
%8 = arith.addi %arg1, %c2 : index
%tile_0_r = amdaie.tile(%c0, %8)
%9 = amdaie.core(%tile_0_r, in : [%5, %6], out : [%7]) {
linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%alloc_0 : memref<128xf32, 2 : i32>) outs(%alloc : memref<128xbf16, 2 : i32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[128], [128]]>} {
^bb0(%in: f32, %out: bf16):
%10 = arith.truncf %in : f32 to bf16
linalg.yield %10 : bf16
}
amdaie.end
}
} {mapping = [#gpu.thread<y>]}
%4 = amdaie.dma_cpy_nd(%lof_5[0] [128] [1], %lof[0] [128] [1]) : (!amdaie.logicalobjectfifo<memref<128xbf16>>, !amdaie.logicalobjectfifo<memref<128xbf16, 1 : i32>>)
} {mapping = [#gpu.block<y>]}
memref.dealloc %alloc_2 : memref<128xf32, 1 : i32>
memref.dealloc %alloc_1 : memref<128xbf16, 1 : i32>
memref.dealloc %alloc_0 : memref<128xf32, 2 : i32>
memref.dealloc %alloc : memref<128xbf16, 2 : i32>
return
}
}
Error:
unknown>:0: error: 'amdaie.logicalobjectfifo.from_memref' op No source or target tiles found
<unknown>:0: note: see current operation: %5 = "amdaie.logicalobjectfifo.from_memref"(%4) : (memref<128xbf16, 1 : i32>) -> !amdaie.logicalobjectfifo<memref<128xbf16, 1 : i32>>
truncf.mlir:3:8: error: 'builtin.module' op local tile assignment failed
Metadata
Metadata
Assignees
Labels
No labels