From 8911884f9de7483bbad5c4a53675178ff63f8bf6 Mon Sep 17 00:00:00 2001 From: Yu-Zhewen Date: Thu, 13 Feb 2025 22:26:10 +0000 Subject: [PATCH 1/2] init commit --- .../Transforms/AMDAIEAssignChannels.cpp | 149 +++++++++----- .../AMDAIEGenerateControlOverlay.cpp | 53 ++--- .../iree-amd-aie/Transforms/Passes.cpp | 4 +- .../Transforms/test/assign_channels.mlir | 37 ++++ .../test/generate_control_overlay.mlir | 21 +- .../aie_runtime/Utils/ChannelGenerator.h | 188 +++++++++++++----- .../Utils/test/ChannelGeneratorTest.cpp | 150 +++++--------- 7 files changed, 353 insertions(+), 249 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp index 6075286fe..0a86443a5 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp @@ -16,6 +16,62 @@ namespace mlir::iree_compiler::AMDAIE { namespace { +/// Initializes channel generators for tiles by detecting DMA channels +/// previously assigned by other passes (e.g., for control packets) and +/// registering them to prevent conflicts. +LogicalResult initializeChannelsGenerators( + AMDAIE::WorkgroupOp workgroupOp, const AMDAIEDeviceModel &deviceModel, + DenseMap &tileToGeneratorMap) { + // Get the number of producer and consumer channels for each tile. + workgroupOp.walk([&](AMDAIE::TileOp tileOp) { + uint32_t col = getConstantIndexOrAssert(tileOp.getCol()); + uint32_t row = getConstantIndexOrAssert(tileOp.getRow()); + AMDAIETileType tileType = deviceModel.getTileType(col, row); + uint8_t numDmaChannels = + deviceModel.getDmaProp(tileType, AMDAIEDmaProp::NumChannels); + tileToGeneratorMap[tileOp.getResult()] = + ChannelGenerator(numDmaChannels, numDmaChannels); + }); + + WalkResult res = workgroupOp.walk([&](AMDAIE::ConnectionOp connectionOp) { + ChannelAssignmentMode mode = + (connectionOp.getConnectionType() == AMDAIE::ConnectionType::Packet) + ? ChannelAssignmentMode::RoundRobinPacketFlow + : ChannelAssignmentMode::FirstAvailableCircuitFlow; + // Check source DMA channels previously assigned by other passes, + // and register them in `ChannelGenerator` using `assignProducerDMAChannel`. + for (Value source : connectionOp.getSourceChannels()) { + auto channelOp = dyn_cast(source.getDefiningOp()); + if (!channelOp) { + connectionOp.emitOpError() << "expected a `amdaie.channel` op source"; + return WalkResult::interrupt(); + } + if (channelOp.getPortType() == StrmSwPortType::DMA) { + Value tile = channelOp.getTileOp().getResult(); + tileToGeneratorMap[tile].assignProducerDMAChannel(channelOp.getValue(), + mode); + } + } + // Check target DMA channels previously assigned by other passes, + // and register them in `ChannelGenerator` using `assignConsumerDMAChannel`. + for (Value target : connectionOp.getTargetChannels()) { + auto channelOp = dyn_cast(target.getDefiningOp()); + if (!channelOp) { + connectionOp.emitOpError() << "expected a `amdaie.channel` op target"; + return WalkResult::interrupt(); + } + if (channelOp.getPortType() == StrmSwPortType::DMA) { + Value tile = channelOp.getTileOp().getResult(); + tileToGeneratorMap[tile].assignConsumerDMAChannel(channelOp.getValue(), + mode); + } + } + return WalkResult::advance(); + }); + if (res.wasInterrupted()) return failure(); + return success(); +} + /// Assign channels to `amdaie.connection` ops. LogicalResult assignChannels(AMDAIE::WorkgroupOp workgroupOp) { IRRewriter rewriter(workgroupOp->getContext()); @@ -27,19 +83,13 @@ LogicalResult assignChannels(AMDAIE::WorkgroupOp workgroupOp) { << "could not find an AMDAIEDevice attribute"; } AMDAIEDeviceModel deviceModel = AMDAIE::getDeviceModel(device.value()); - - // Get the number of producer and consumer channels for each tile. + // Initialize channel generators for tiles. DenseMap tileToGeneratorMap; - workgroupOp.walk([&](AMDAIE::TileOp tileOp) { - uint32_t col = getConstantIndexOrAssert(tileOp.getCol()); - uint32_t row = getConstantIndexOrAssert(tileOp.getRow()); - AMDAIETileType tileType = deviceModel.getTileType(col, row); - uint8_t numDmaChannels = - deviceModel.getDmaProp(tileType, AMDAIEDmaProp::NumChannels); - tileToGeneratorMap[tileOp.getResult()] = - ChannelGenerator(numDmaChannels, numDmaChannels); - }); - + if (failed(initializeChannelsGenerators(workgroupOp, deviceModel, + tileToGeneratorMap))) { + return failure(); + } + // Get all `amdaie.connection` ops. SmallVector connectionOps; workgroupOp->walk([&](AMDAIE::ConnectionOp connectionOp) { connectionOps.push_back(connectionOp); @@ -59,48 +109,49 @@ LogicalResult assignChannels(AMDAIE::WorkgroupOp workgroupOp) { return connectionOp.emitOpError() << "expected a `LogicalObjFifoOpInterface` target"; } - std::optional connectionType = - connectionOp.getConnectionType(); - bool isPacketFlow = connectionType && connectionType.value() == - AMDAIE::ConnectionType::Packet; - + ChannelAssignmentMode mode = + (connectionOp.getConnectionType() == AMDAIE::ConnectionType::Packet) + ? ChannelAssignmentMode::RoundRobinPacketFlow + : ChannelAssignmentMode::FirstAvailableCircuitFlow; rewriter.setInsertionPoint(connectionOp); - SmallVector sourceChannels; - for (Value tile : sourceLogicalObjFifo.getTiles()) { - assert(tileToGeneratorMap.contains(tile) && - "no channel generator found for tile"); - std::optional maybeChannel = - tileToGeneratorMap[tile].getProducerDMAChannel(); - if (!maybeChannel) { - return connectionOp.emitOpError() - << "no producer DMA channel available"; + SmallVector sourceChannels = connectionOp.getSourceChannels(); + // Assign source (producer) DMA channels if not already assigned. + if (sourceChannels.empty()) { + for (Value tile : sourceLogicalObjFifo.getTiles()) { + assert(tileToGeneratorMap.contains(tile) && + "no channel generator found for tile"); + std::optional maybeChannel = + tileToGeneratorMap[tile].getAndAssignProducerDMAChannel(mode); + if (!maybeChannel) { + return connectionOp.emitOpError() + << "no producer DMA channel available"; + } + auto channelOp = rewriter.create( + rewriter.getUnknownLoc(), tile, maybeChannel.value(), + StrmSwPortType::DMA, AMDAIE::DMAChannelDir::MM2S); + sourceChannels.push_back(channelOp.getResult()); } - // Only assign the channel if it is for circuit flow. - if (!isPacketFlow) - tileToGeneratorMap[tile].assignProducerDMAChannel(maybeChannel.value()); - auto channelOp = rewriter.create( - rewriter.getUnknownLoc(), tile, maybeChannel.value(), - StrmSwPortType::DMA, AMDAIE::DMAChannelDir::MM2S); - sourceChannels.push_back(channelOp.getResult()); } - SmallVector targetChannels; - for (Value tile : targetLogicalObjFifo.getTiles()) { - assert(tileToGeneratorMap.contains(tile) && - "no channel generator found for tile"); - std::optional maybeChannel = - tileToGeneratorMap[tile].getConsumerDMAChannel(); - if (!maybeChannel) { - return connectionOp.emitOpError() - << "no consumer DMA channel available"; + // Assign target (consumer) DMA channels if not already assigned. + SmallVector targetChannels = connectionOp.getTargetChannels(); + if (targetChannels.empty()) { + for (Value tile : targetLogicalObjFifo.getTiles()) { + assert(tileToGeneratorMap.contains(tile) && + "no channel generator found for tile"); + std::optional maybeChannel = + tileToGeneratorMap[tile].getAndAssignConsumerDMAChannel(mode); + if (!maybeChannel) { + return connectionOp.emitOpError() + << "no consumer DMA channel available"; + } + auto channelOp = rewriter.create( + rewriter.getUnknownLoc(), tile, maybeChannel.value(), + StrmSwPortType::DMA, AMDAIE::DMAChannelDir::S2MM); + targetChannels.push_back(channelOp.getResult()); } - // Only assign the channel if it is for circuit flow. - if (!isPacketFlow) - tileToGeneratorMap[tile].assignConsumerDMAChannel(maybeChannel.value()); - auto channelOp = rewriter.create( - rewriter.getUnknownLoc(), tile, maybeChannel.value(), - StrmSwPortType::DMA, AMDAIE::DMAChannelDir::S2MM); - targetChannels.push_back(channelOp.getResult()); } + // Replace the `amdaie.connection` op with newly assigned `sourceChannels` + // and `targetChannels`. rewriter.replaceOpWithNewOp( connectionOp, connectionOp.getTarget(), targetChannels, connectionOp.getSource(), sourceChannels, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEGenerateControlOverlay.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEGenerateControlOverlay.cpp index 909611e19..37cc1e4f3 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEGenerateControlOverlay.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEGenerateControlOverlay.cpp @@ -16,8 +16,9 @@ namespace mlir::iree_compiler::AMDAIE { namespace { -/// Initializes the channel generators for the shim tiles, excluding any -/// channels that are already in use by existing circuit-mode connections. +/// Initializes channel generators for shim tiles, ensuring that no shim DMA +/// MM2S channels have been assigned before. This guarantees priority for the +/// control overlay. LogicalResult initializeChannelsGenerators( AMDAIE::WorkgroupOp workgroupOp, const AMDAIEDeviceModel &deviceModel, const DenseSet &shimTileOps, @@ -29,40 +30,19 @@ LogicalResult initializeChannelsGenerators( shimTileToGeneratorMap[shimTileOp.getResult()] = ChannelGenerator(numShimDmaChannels, numShimDmaChannels); }); - // Exclude those channels that are already used by a circuit-mode connection. - workgroupOp->walk([&](AMDAIE::ConnectionOp connectionOp) { - std::optional connectionType = - connectionOp.getConnectionType(); - bool isPacketFlow = connectionType && connectionType.value() == - AMDAIE::ConnectionType::Packet; - if (isPacketFlow) return WalkResult::advance(); - SmallVector sourceChannels; - for (Value source : connectionOp.getSourceChannels()) { - if (auto channelOp = - dyn_cast(source.getDefiningOp())) { - sourceChannels.push_back(channelOp); - } - } - for (AMDAIE::ChannelOp channelOp : sourceChannels) { - AMDAIE::TileOp tileOp = channelOp.getTileOp(); - uint8_t channel = channelOp.getValue(); - StrmSwPortType portType = channelOp.getPortType(); - AMDAIE::DMAChannelDir direction = channelOp.getDirection(); - if (shimTileOps.contains(tileOp) && portType == StrmSwPortType::DMA) { - // Assign to exclude. - if (direction == AMDAIE::DMAChannelDir::MM2S) { - shimTileToGeneratorMap[tileOp.getResult()].assignProducerDMAChannel( - channel); - } else if (direction == AMDAIE::DMAChannelDir::S2MM) { - shimTileToGeneratorMap[tileOp.getResult()].assignConsumerDMAChannel( - channel); - } else { - assert(false && "unexpected DMA channel direction"); - } - } + // Ensure that shim DMA MM2S channels are not already assigned. + WalkResult res = workgroupOp->walk([&](AMDAIE::ChannelOp channelOp) { + if (shimTileOps.contains(channelOp.getTileOp()) && + channelOp.getPortType() == StrmSwPortType::DMA && + channelOp.getDirection() == AMDAIE::DMAChannelDir::MM2S) { + channelOp.emitOpError() + << "shim DMA MM2S channel must remain unassigned before " + "control overlay generation."; + return WalkResult::interrupt(); } return WalkResult::advance(); }); + if (res.wasInterrupted()) return failure(); return success(); } @@ -114,11 +94,12 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp, WalkResult res = workgroupOp->walk([&](AMDAIE::TileOp tileOp) { uint32_t col = getConstantIndexOrAssert(tileOp.getCol()); TileOp shimTileOp = columnToShimTile[col]; - // Get the available channel, but do not assign it. Allow it to be - // shared across multiple packet-mode connections as needed. + // Get the available DMA channel for the shim tile, and assign it for the + // packet flow. std::optional maybeChannel = shimTileToGeneratorMap[shimTileOp.getResult()] - .getProducerDMAChannel(); + .getAndAssignProducerDMAChannel( + ChannelAssignmentMode::RoundRobinPacketFlow); if (!maybeChannel) { shimTileOp.emitOpError() << "no producer DMA channel available"; return WalkResult::interrupt(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index 0fb33d7bd..4f8777a4d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -859,6 +859,8 @@ void addAMDAIEObjectFifoLoweringPasses( passManager.addPass(createCanonicalizerPass()); passManager.addPass(createAMDAIEDmaCSEPass()); + passManager.addPass(createAMDAIEGenerateControlOverlayPass()); + passManager.addPass(createAMDAIEAssignChannelsPass()); passManager.addPass(createCSEPass()); passManager.addPass(createCanonicalizerPass()); @@ -881,8 +883,6 @@ void addAMDAIEObjectFifoLoweringPasses( passManager.addPass(createAMDAIEObjFifoBufferizationPass()); passManager.addPass(createAMDAIETemporaryAllocBufferizationPass()); - passManager.addPass(createAMDAIEGenerateControlOverlayPass()); - passManager.addPass(createAMDAIEConnectionToFlowPass()); passManager.addPass(createAMDAIEAssignPacketIdsPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir index 095cc09f7..396f851fc 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir @@ -106,3 +106,40 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} return } } + +// ----- + +// For tile (0,0), its producer (MM2S) channel 0 is already assigned +// to a control packet flow. Therefore, channel 1 is used to connect to tile (0,1). +// CHECK-LABEL: @previously_assigned +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[C1:.+]] = arith.constant 1 : index +// CHECK: amdaie.workgroup +// CHECK: %[[tile_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: %[[tile_0_1:.+]] = amdaie.tile(%[[C0]], %[[C1]]) +// CHECK: %[[CHANNEL_0:.+]] = amdaie.channel(%[[tile_0_0]], 1, port_type = DMA, direction = MM2S) +// CHECK: %[[CHANNEL_1:.+]] = amdaie.channel(%[[tile_0_1]], 0, port_type = DMA, direction = S2MM) +// CHECK: amdaie.connection(%{{.+}} {%[[CHANNEL_1]]}, %{{.+}} {%[[CHANNEL_0]]}) +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { + func.func @previously_assigned(%arg0: memref<1x1x8x16xi32, 1>, %arg1: memref<8x16xi32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + amdaie.workgroup { + %tile_0_0 = amdaie.tile(%c0, %c0) + %tile_0_1 = amdaie.tile(%c0, %c1) + %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_1} : memref<1x1x8x16xi32, 1> -> !amdaie.logicalobjectfifo> + %1 = amdaie.logicalobjectfifo.from_memref %arg1, {%tile_0_0} : memref<8x16xi32> -> !amdaie.logicalobjectfifo> + %2 = amdaie.connection(%0, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + %channel = amdaie.channel(%tile_0_0, 0, port_type = DMA, direction = MM2S) + %channel_0 = amdaie.channel(%tile_0_0, 0, port_type = CTRL, direction = S2MM) + %3 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo> + %4 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo> + %5 = amdaie.connection(%4 {%channel_0}, %3 {%channel}) {connection_type = #amdaie} : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + amdaie.controlcode { + amdaie.end + } + } + return + } +} diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/generate_control_overlay.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/generate_control_overlay.mlir index c58e7cebe..17d207d68 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/generate_control_overlay.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/generate_control_overlay.mlir @@ -15,28 +15,23 @@ module { // ----- -// Shim tile (0, 0) has two producer (MM2S) channels, -// both of which are already utilized by existing circuit-mode connections. -// No producer DMA channel is available for route-shim-to-tile-ctrl. +/// No shim DMA channel can be assigned before control overlay generation. +/// This ensures that control packets have priority in resource allocation +/// and makes control packet routing static. #executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { - func.func @no_available_channel() { + func.func @priority_check(%arg0: memref<8x16xi32>, %arg1: memref<1x1x8x16xi32, 1>) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index amdaie.workgroup { - // expected-error @+1 {{no producer DMA channel available}} %tile_0_0 = amdaie.tile(%c0, %c0) %tile_0_1 = amdaie.tile(%c0, %c1) - %0 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.placeholder{%tile_0_1} : !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.placeholder{%tile_0_1} : !amdaie.logicalobjectfifo> + %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_0} : memref<8x16xi32> -> !amdaie.logicalobjectfifo> + %1 = amdaie.logicalobjectfifo.from_memref %arg1, {%tile_0_1} : memref<1x1x8x16xi32, 1> -> !amdaie.logicalobjectfifo> + // expected-error @+1 {{shim DMA MM2S channel must remain unassigned before control overlay generation}} %channel_0 = amdaie.channel(%tile_0_0, 0, port_type = DMA, direction = MM2S) %channel_1 = amdaie.channel(%tile_0_1, 0, port_type = DMA, direction = S2MM) - %connection_0 = amdaie.connection(%1 {%channel_1}, %0 {%channel_0}) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %channel_2 = amdaie.channel(%tile_0_0, 1, port_type = DMA, direction = MM2S) - %channel_3 = amdaie.channel(%tile_0_1, 1, port_type = DMA, direction = S2MM) - %connection_1 = amdaie.connection(%3 {%channel_3}, %2 {%channel_2}) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + %connection_0 = amdaie.connection(%0 {%channel_0}, %1 {%channel_1}) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) amdaie.controlcode { amdaie.end } diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h index d491d90d3..b71836672 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h @@ -7,8 +7,7 @@ #ifndef IREE_COMPILER_AMDAIE_UTILS_CHANNEL_GENERATOR_H_ #define IREE_COMPILER_AMDAIE_UTILS_CHANNEL_GENERATOR_H_ -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "mlir/Support/LogicalResult.h" @@ -16,7 +15,11 @@ using namespace llvm; namespace mlir::iree_compiler::AMDAIE { -enum class ChannelAssignmentMode { FirstAvailable, RoundRobin }; +enum class ChannelAssignmentMode { + FirstAvailableCircuitFlow, + FirstAvailablePacketFlow, + RoundRobinPacketFlow +}; /// Utility to generate valid channels. class ChannelGenerator { @@ -27,75 +30,160 @@ class ChannelGenerator { numConsumerChannels(numConsumerChannels) { assert(numProducerChannels > 0 && numConsumerChannels > 0 && "Invalid number of producer/consumer channels."); - // Initialize to the last channel for round-robin usage. - lastRetrievedProducerChannel = numProducerChannels - 1; - lastRetrievedConsumerChannel = numConsumerChannels - 1; + } + + /// Attempts to find the first available channel that is not present in any of + /// the given exclusion sets. + std::optional findFirstAvailableChannel( + uint8_t numChannels, + ArrayRef> excludeSets) { + for (uint8_t channel = 0; channel < numChannels; ++channel) { + if (llvm::none_of( + excludeSets, + [&](const llvm::SmallSetVector &excludeSet) { + return excludeSet.count(channel); + })) { + return channel; + } + } + return std::nullopt; } /// Retrieves the next producer channel using the specified strategy. - /// Defaults to round-robin for balanced load distribution, using - /// `lastRetrievedProducerChannel` to track the last channel accessed. - std::optional getProducerDMAChannel( - ChannelAssignmentMode mode = ChannelAssignmentMode::RoundRobin) { - for (uint8_t offset = 1; offset <= numProducerChannels; ++offset) { - uint8_t i; - if (mode == ChannelAssignmentMode::FirstAvailable) { - i = offset - 1; - } else if (mode == ChannelAssignmentMode::RoundRobin) { - i = (lastRetrievedProducerChannel + offset) % numProducerChannels; - } else { - assert(false && "Unsupported ChannelAssignmentMode"); + std::optional getAndAssignProducerDMAChannel( + ChannelAssignmentMode mode) { + std::optional channel; + switch (mode) { + // Select the first available channel for circuit flow. + // A channel is valid if it is not already assigned to any circuit or + // packet flow. + case ChannelAssignmentMode::FirstAvailableCircuitFlow: { + channel = findFirstAvailableChannel( + numProducerChannels, + {assignedCircuitProducerChannels, assignedPacketProducerChannels}); + break; } - if (!assignedProducerChannels.count(i)) { - lastRetrievedProducerChannel = i; - return i; + // Select the first available channel for packet flow. + // A channel is valid if it is not already assigned to a circuit flow. + case ChannelAssignmentMode::FirstAvailablePacketFlow: { + channel = findFirstAvailableChannel(numProducerChannels, + {assignedCircuitProducerChannels}); + break; } + // Select the channel for packet flow, using a round-robin strategy for + // load balancing: + // 1. Prefer an unused channel (not assigned to any circuit or packet + // flow). + // 2. If no such channel is available, reuse the least recently used + // packet flow channel from `assignedPacketProducerChannels.front()`. + case ChannelAssignmentMode::RoundRobinPacketFlow: { + channel = findFirstAvailableChannel( + numProducerChannels, + {assignedCircuitProducerChannels, assignedPacketProducerChannels}); + if (!channel && !assignedPacketProducerChannels.empty()) + channel = assignedPacketProducerChannels.front(); + break; + } + default: + assert(false && "Unsupported ChannelAssignmentMode"); } - return std::nullopt; + // Assign the channel if found. + if (channel.has_value()) assignProducerDMAChannel(channel.value(), mode); + return channel; } /// Retrieves the next consumer channel using the specified strategy. - /// Defaults to round-robin for balanced load distribution, using - /// `lastRetrievedConsumerChannel` to track the last channel accessed. - std::optional getConsumerDMAChannel( - ChannelAssignmentMode mode = ChannelAssignmentMode::RoundRobin) { - for (uint8_t offset = 1; offset <= numConsumerChannels; ++offset) { - uint8_t i; - if (mode == ChannelAssignmentMode::FirstAvailable) { - i = offset - 1; - } else if (mode == ChannelAssignmentMode::RoundRobin) { - i = (lastRetrievedConsumerChannel + offset) % numConsumerChannels; - } else { - assert(false && "Unsupported ChannelAssignmentMode"); + std::optional getAndAssignConsumerDMAChannel( + ChannelAssignmentMode mode) { + std::optional channel; + switch (mode) { + // Select the first available channel for circuit flow. + // A channel is valid if it is not already assigned to any circuit or + // packet flow. + case ChannelAssignmentMode::FirstAvailableCircuitFlow: { + channel = findFirstAvailableChannel( + numConsumerChannels, + {assignedCircuitConsumerChannels, assignedPacketConsumerChannels}); + break; + } + // Select the first available channel for packet flow. + // A channel is valid if it is not already assigned to a circuit flow. + case ChannelAssignmentMode::FirstAvailablePacketFlow: { + channel = findFirstAvailableChannel(numConsumerChannels, + {assignedCircuitConsumerChannels}); + break; } - if (!assignedConsumerChannels.count(i)) { - lastRetrievedConsumerChannel = i; - return i; + // Select the channel for packet flow, using a round-robin strategy for + // load balancing: + // 1. Prefer an unused channel (not assigned to any circuit or packet + // flow). + // 2. If no such channel is available, reuse the least recently used + // packet flow channel from `assignedPacketConsumerChannels.front()`. + case ChannelAssignmentMode::RoundRobinPacketFlow: { + channel = findFirstAvailableChannel( + numConsumerChannels, + {assignedCircuitConsumerChannels, assignedPacketConsumerChannels}); + if (!channel && !assignedPacketConsumerChannels.empty()) + channel = assignedPacketConsumerChannels.front(); + break; } + default: + assert(false && "Unsupported ChannelAssignmentMode"); } - return std::nullopt; + // Assign the channel if found. + if (channel.has_value()) assignConsumerDMAChannel(channel.value(), mode); + return channel; } - /// Assigns the provided producer channel, only used for circuit flow. - void assignProducerDMAChannel(uint8_t channel) { - assignedProducerChannels.insert(channel); + /// Assigns the provided producer channel. + void assignProducerDMAChannel(uint8_t channel, ChannelAssignmentMode mode) { + switch (mode) { + case ChannelAssignmentMode::FirstAvailableCircuitFlow: + assignedCircuitProducerChannels.insert(channel); + break; + case ChannelAssignmentMode::FirstAvailablePacketFlow: + assignedPacketProducerChannels.insert(channel); + break; + case ChannelAssignmentMode::RoundRobinPacketFlow: + // Remove and reinsert to update the least recently used channel + // (front). + assignedPacketProducerChannels.remove(channel); + assignedPacketProducerChannels.insert(channel); + break; + default: + assert(false && "Unsupported ChannelAssignmentMode"); + } } - /// Assigns the provided consumer channel, only used for circuit flow. - void assignConsumerDMAChannel(uint8_t channel) { - assignedConsumerChannels.insert(channel); + /// Assigns the provided consumer channel. + void assignConsumerDMAChannel(uint8_t channel, ChannelAssignmentMode mode) { + switch (mode) { + case ChannelAssignmentMode::FirstAvailableCircuitFlow: + assignedCircuitConsumerChannels.insert(channel); + break; + case ChannelAssignmentMode::FirstAvailablePacketFlow: + assignedPacketConsumerChannels.insert(channel); + break; + case ChannelAssignmentMode::RoundRobinPacketFlow: + // Remove and reinsert to update the least recently used channel + // (front). + assignedPacketConsumerChannels.remove(channel); + assignedPacketConsumerChannels.insert(channel); + break; + default: + assert(false && "Unsupported ChannelAssignmentMode"); + } } private: uint8_t numProducerChannels = 0; uint8_t numConsumerChannels = 0; // Tracks the channels that are used by circuit flows. - DenseSet assignedProducerChannels; - DenseSet assignedConsumerChannels; - // Tracks the last retrieved channel in `getProducerDMAChannel` and - // `getConsumerDMAChannel` for round-robin usage. - uint8_t lastRetrievedProducerChannel = 0; - uint8_t lastRetrievedConsumerChannel = 0; + llvm::SmallSetVector assignedCircuitProducerChannels; + llvm::SmallSetVector assignedCircuitConsumerChannels; + // Tracks the channels that are used by packet flows. + llvm::SmallSetVector assignedPacketProducerChannels; + llvm::SmallSetVector assignedPacketConsumerChannels; }; } // namespace mlir::iree_compiler::AMDAIE diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/test/ChannelGeneratorTest.cpp b/runtime/src/iree-amd-aie/aie_runtime/Utils/test/ChannelGeneratorTest.cpp index 6a811dd1b..52e9b493f 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/Utils/test/ChannelGeneratorTest.cpp +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/test/ChannelGeneratorTest.cpp @@ -13,116 +13,68 @@ namespace { using namespace mlir::iree_compiler::AMDAIE; -TEST(ChannelGeneratorTest, GetFirstAvailable) { +TEST(ChannelGeneratorTest, GetAssignFirstAvailableCircuitFlow) { ChannelGenerator generator(2, 2); - EXPECT_EQ( - generator.getProducerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 0); - EXPECT_EQ( - generator.getConsumerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 0); - EXPECT_EQ( - generator.getProducerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 0); - EXPECT_EQ( - generator.getConsumerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 0); - EXPECT_EQ( - generator.getProducerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 0); - EXPECT_EQ( - generator.getConsumerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 0); + // Keep incrementing the channel number until all channels are assigned. + ChannelAssignmentMode mode = ChannelAssignmentMode::FirstAvailableCircuitFlow; + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode), std::nullopt); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode), std::nullopt); } -TEST(ChannelGeneratorTest, GetRoundRobin) { +TEST(ChannelGeneratorTest, GetAssignFirstAvailablePacketFlow) { ChannelGenerator generator(2, 2); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 0); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 0); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 1); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 1); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 0); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 0); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 1); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 1); + // Use the same channel number, as it can be assigned to multiple packet + // flows. + ChannelAssignmentMode mode = ChannelAssignmentMode::FirstAvailablePacketFlow; + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 0); } -TEST(ChannelGeneratorTest, GetAssign) { +TEST(ChannelGeneratorTest, GetAssignRoundRobinPacketFlow) { ChannelGenerator generator(2, 2); - EXPECT_EQ(generator.getProducerDMAChannel().value(), 0); - generator.assignProducerDMAChannel(0); - EXPECT_EQ(generator.getConsumerDMAChannel().value(), 0); - generator.assignConsumerDMAChannel(0); - EXPECT_EQ(generator.getProducerDMAChannel().value(), 1); - generator.assignProducerDMAChannel(1); - EXPECT_EQ(generator.getConsumerDMAChannel().value(), 1); - generator.assignConsumerDMAChannel(1); - EXPECT_EQ(generator.getProducerDMAChannel(), std::nullopt); - EXPECT_EQ(generator.getConsumerDMAChannel(), std::nullopt); + // Round-robin between the availble two channels, for load balancing. + ChannelAssignmentMode mode = ChannelAssignmentMode::RoundRobinPacketFlow; + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 0); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 1); } TEST(ChannelGeneratorTest, Occupied) { ChannelGenerator generator(4, 4); - generator.assignProducerDMAChannel(0); - generator.assignConsumerDMAChannel(0); - generator.assignProducerDMAChannel(2); - generator.assignConsumerDMAChannel(2); - EXPECT_EQ( - generator.getProducerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 1); - EXPECT_EQ( - generator.getConsumerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 1); - EXPECT_EQ( - generator.getProducerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 1); - EXPECT_EQ( - generator.getConsumerDMAChannel(ChannelAssignmentMode::FirstAvailable) - .value(), - 1); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 3); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 3); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 1); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 1); - EXPECT_EQ(generator.getProducerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 3); - EXPECT_EQ(generator.getConsumerDMAChannel(ChannelAssignmentMode::RoundRobin) - .value(), - 3); + // Reserve channels 0 for circuit flow. + ChannelAssignmentMode mode = ChannelAssignmentMode::FirstAvailableCircuitFlow; + generator.assignProducerDMAChannel(0, mode); + generator.assignConsumerDMAChannel(0, mode); + // Reserve channels 1 for packet flow. + mode = ChannelAssignmentMode::FirstAvailablePacketFlow; + generator.assignProducerDMAChannel(1, mode); + generator.assignConsumerDMAChannel(1, mode); + // The next available channel for circuit flow is 2. + mode = ChannelAssignmentMode::FirstAvailableCircuitFlow; + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 2); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 2); + // Channel 0 and 2 are already assigned for circuit flow. Therefore, for + // packet flow, the next available channel is round-robin between 1 and 3. + mode = ChannelAssignmentMode::RoundRobinPacketFlow; + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 3); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 3); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 1); + EXPECT_EQ(generator.getAndAssignProducerDMAChannel(mode).value(), 3); + EXPECT_EQ(generator.getAndAssignConsumerDMAChannel(mode).value(), 3); } } // namespace From e11585cd5658d5da575ff2178480c4dfb0599f75 Mon Sep 17 00:00:00 2001 From: Yu-Zhewen Date: Fri, 14 Feb 2025 17:37:25 +0000 Subject: [PATCH 2/2] add a new test and create cpp for ChannelGenerator --- .../Transforms/test/assign_channels.mlir | 51 +++++- .../aie_runtime/Utils/CMakeLists.txt | 1 + .../aie_runtime/Utils/ChannelGenerator.cpp | 149 ++++++++++++++++++ .../aie_runtime/Utils/ChannelGenerator.h | 133 +--------------- 4 files changed, 202 insertions(+), 132 deletions(-) create mode 100644 runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.cpp diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir index 396f851fc..90edfc2c1 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir @@ -109,9 +109,52 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- -// For tile (0,0), its producer (MM2S) channel 0 is already assigned -// to a control packet flow. Therefore, channel 1 is used to connect to tile (0,1). -// CHECK-LABEL: @previously_assigned +// In the input IR: +// - Tile (0,0) has its DMA MM2S channel 0 already assigned to a circuit flow. +// - Tile (0,1) has its DMA S2MM channel 0 assigned to the same circuit flow. +// As a result, channel assignment starts from channel 1 for both tiles. +// CHECK-LABEL: @previously_assigned_circuit +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[C1:.+]] = arith.constant 1 : index +// CHECK: amdaie.workgroup +// CHECK: %[[tile_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: %[[tile_0_1:.+]] = amdaie.tile(%[[C0]], %[[C1]]) +// CHECK: %[[CHANNEL_0:.+]] = amdaie.channel(%[[tile_0_0]], 1, port_type = DMA, direction = MM2S) +// CHECK: %[[CHANNEL_1:.+]] = amdaie.channel(%[[tile_0_1]], 1, port_type = DMA, direction = S2MM) +// CHECK: amdaie.connection(%{{.+}} {%[[CHANNEL_1]]}, %{{.+}} {%[[CHANNEL_0]]}) +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { + func.func @previously_assigned_circuit(%arg0: memref<1x1x8x16xi32, 1>, %arg1: memref<8x16xi32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + amdaie.workgroup { + %tile_0_0 = amdaie.tile(%c0, %c0) + %tile_0_1 = amdaie.tile(%c0, %c1) + %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_1} : memref<1x1x8x16xi32, 1> -> !amdaie.logicalobjectfifo> + %1 = amdaie.logicalobjectfifo.from_memref %arg1, {%tile_0_0} : memref<8x16xi32> -> !amdaie.logicalobjectfifo> + %2 = amdaie.connection(%0, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + %channel = amdaie.channel(%tile_0_0, 0, port_type = DMA, direction = MM2S) + %channel_0 = amdaie.channel(%tile_0_1, 0, port_type = DMA, direction = S2MM) + %3 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo> + %4 = amdaie.logicalobjectfifo.placeholder{%tile_0_1} : !amdaie.logicalobjectfifo> + %5 = amdaie.connection(%4 {%channel_0}, %3 {%channel}) {connection_type = #amdaie} : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + amdaie.controlcode { + amdaie.end + } + } + return + } +} + +// ----- + +// In the input IR: +// - Tile (0,0) has its DMA MM2S channel 0 already assigned to a control packet flow. +// - Tile (0,1) has its CTRL S2MM channel 0 assigned to the same flow. +// Therefore, the next available channels are: +// - Tile (0,0): DMA MM2S channel 1 +// - Tile (0,1): DMA S2MM channel 0 +// CHECK-LABEL: @previously_assigned_packet // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[C1:.+]] = arith.constant 1 : index // CHECK: amdaie.workgroup @@ -122,7 +165,7 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // CHECK: amdaie.connection(%{{.+}} {%[[CHANNEL_1]]}, %{{.+}} {%[[CHANNEL_0]]}) #executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { - func.func @previously_assigned(%arg0: memref<1x1x8x16xi32, 1>, %arg1: memref<8x16xi32>) { + func.func @previously_assigned_packet(%arg0: memref<1x1x8x16xi32, 1>, %arg1: memref<8x16xi32>) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index amdaie.workgroup { diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt b/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt index 4a909e30a..608905456 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt @@ -17,6 +17,7 @@ iree_cc_library( "LockIdGenerator.h" SRCS "ChannelBdIdGenerator.cpp" + "ChannelGenerator.cpp" "LockIdGenerator.cpp" DEPS iree-amd-aie::aie_runtime::iree_aie_runtime_static diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.cpp b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.cpp new file mode 100644 index 000000000..4ce63844c --- /dev/null +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.cpp @@ -0,0 +1,149 @@ +// Copyright 2025 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h" + +namespace mlir::iree_compiler::AMDAIE { + +std::optional ChannelGenerator::findFirstAvailableChannel( + uint8_t numChannels, + ArrayRef> excludeSets) { + for (uint8_t channel = 0; channel < numChannels; ++channel) { + if (llvm::none_of(excludeSets, + [&](const llvm::SmallSetVector &excludeSet) { + return excludeSet.count(channel); + })) { + return channel; + } + } + return std::nullopt; +} + +std::optional ChannelGenerator::getAndAssignProducerDMAChannel( + ChannelAssignmentMode mode) { + std::optional channel; + switch (mode) { + case ChannelAssignmentMode::FirstAvailableCircuitFlow: { + // Select the first available channel for circuit flow. + // A channel is valid if it is not already assigned to any circuit or + // packet flow. + channel = findFirstAvailableChannel( + numProducerChannels, + {assignedCircuitProducerChannels, assignedPacketProducerChannels}); + break; + } + case ChannelAssignmentMode::FirstAvailablePacketFlow: { + // Select the first available channel for packet flow. + // A channel is valid if it is not already assigned to a circuit flow. + channel = findFirstAvailableChannel(numProducerChannels, + {assignedCircuitProducerChannels}); + break; + } + case ChannelAssignmentMode::RoundRobinPacketFlow: { + // Select the channel for packet flow, using a round-robin strategy for + // load balancing: + // 1. Prefer an unused channel (not assigned to any circuit or packet + // flow). + // 2. If no such channel is available, reuse the least recently used + // packet flow channel from `assignedPacketProducerChannels.front()`. + channel = findFirstAvailableChannel( + numProducerChannels, + {assignedCircuitProducerChannels, assignedPacketProducerChannels}); + if (!channel && !assignedPacketProducerChannels.empty()) + channel = assignedPacketProducerChannels.front(); + break; + } + default: + assert(false && "Unsupported ChannelAssignmentMode"); + } + // Assign the channel if found. + if (channel.has_value()) assignProducerDMAChannel(channel.value(), mode); + return channel; +} + +std::optional ChannelGenerator::getAndAssignConsumerDMAChannel( + ChannelAssignmentMode mode) { + std::optional channel; + switch (mode) { + case ChannelAssignmentMode::FirstAvailableCircuitFlow: { + // Select the first available channel for circuit flow. + // A channel is valid if it is not already assigned to any circuit or + // packet flow. + channel = findFirstAvailableChannel( + numConsumerChannels, + {assignedCircuitConsumerChannels, assignedPacketConsumerChannels}); + break; + } + case ChannelAssignmentMode::FirstAvailablePacketFlow: { + // Select the first available channel for packet flow. + // A channel is valid if it is not already assigned to a circuit flow. + channel = findFirstAvailableChannel(numConsumerChannels, + {assignedCircuitConsumerChannels}); + break; + } + case ChannelAssignmentMode::RoundRobinPacketFlow: { + // Select the channel for packet flow, using a round-robin strategy for + // load balancing: + // 1. Prefer an unused channel (not assigned to any circuit or packet + // flow). + // 2. If no such channel is available, reuse the least recently used + // packet flow channel from `assignedPacketConsumerChannels.front()`. + channel = findFirstAvailableChannel( + numConsumerChannels, + {assignedCircuitConsumerChannels, assignedPacketConsumerChannels}); + if (!channel && !assignedPacketConsumerChannels.empty()) + channel = assignedPacketConsumerChannels.front(); + break; + } + default: + assert(false && "Unsupported ChannelAssignmentMode"); + } + // Assign the channel if found. + if (channel.has_value()) assignConsumerDMAChannel(channel.value(), mode); + return channel; +} + +void ChannelGenerator::assignProducerDMAChannel(uint8_t channel, + ChannelAssignmentMode mode) { + switch (mode) { + case ChannelAssignmentMode::FirstAvailableCircuitFlow: + assignedCircuitProducerChannels.insert(channel); + break; + case ChannelAssignmentMode::FirstAvailablePacketFlow: + assignedPacketProducerChannels.insert(channel); + break; + case ChannelAssignmentMode::RoundRobinPacketFlow: + // Remove and reinsert to update the least recently used channel + // (front). + assignedPacketProducerChannels.remove(channel); + assignedPacketProducerChannels.insert(channel); + break; + default: + assert(false && "Unsupported ChannelAssignmentMode"); + } +} + +void ChannelGenerator::assignConsumerDMAChannel(uint8_t channel, + ChannelAssignmentMode mode) { + switch (mode) { + case ChannelAssignmentMode::FirstAvailableCircuitFlow: + assignedCircuitConsumerChannels.insert(channel); + break; + case ChannelAssignmentMode::FirstAvailablePacketFlow: + assignedPacketConsumerChannels.insert(channel); + break; + case ChannelAssignmentMode::RoundRobinPacketFlow: + // Remove and reinsert to update the least recently used channel + // (front). + assignedPacketConsumerChannels.remove(channel); + assignedPacketConsumerChannels.insert(channel); + break; + default: + assert(false && "Unsupported ChannelAssignmentMode"); + } +} + +} // namespace mlir::iree_compiler::AMDAIE diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h index b71836672..4981f7857 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h @@ -36,144 +36,21 @@ class ChannelGenerator { /// the given exclusion sets. std::optional findFirstAvailableChannel( uint8_t numChannels, - ArrayRef> excludeSets) { - for (uint8_t channel = 0; channel < numChannels; ++channel) { - if (llvm::none_of( - excludeSets, - [&](const llvm::SmallSetVector &excludeSet) { - return excludeSet.count(channel); - })) { - return channel; - } - } - return std::nullopt; - } + ArrayRef> excludeSets); /// Retrieves the next producer channel using the specified strategy. std::optional getAndAssignProducerDMAChannel( - ChannelAssignmentMode mode) { - std::optional channel; - switch (mode) { - // Select the first available channel for circuit flow. - // A channel is valid if it is not already assigned to any circuit or - // packet flow. - case ChannelAssignmentMode::FirstAvailableCircuitFlow: { - channel = findFirstAvailableChannel( - numProducerChannels, - {assignedCircuitProducerChannels, assignedPacketProducerChannels}); - break; - } - // Select the first available channel for packet flow. - // A channel is valid if it is not already assigned to a circuit flow. - case ChannelAssignmentMode::FirstAvailablePacketFlow: { - channel = findFirstAvailableChannel(numProducerChannels, - {assignedCircuitProducerChannels}); - break; - } - // Select the channel for packet flow, using a round-robin strategy for - // load balancing: - // 1. Prefer an unused channel (not assigned to any circuit or packet - // flow). - // 2. If no such channel is available, reuse the least recently used - // packet flow channel from `assignedPacketProducerChannels.front()`. - case ChannelAssignmentMode::RoundRobinPacketFlow: { - channel = findFirstAvailableChannel( - numProducerChannels, - {assignedCircuitProducerChannels, assignedPacketProducerChannels}); - if (!channel && !assignedPacketProducerChannels.empty()) - channel = assignedPacketProducerChannels.front(); - break; - } - default: - assert(false && "Unsupported ChannelAssignmentMode"); - } - // Assign the channel if found. - if (channel.has_value()) assignProducerDMAChannel(channel.value(), mode); - return channel; - } + ChannelAssignmentMode mode); /// Retrieves the next consumer channel using the specified strategy. std::optional getAndAssignConsumerDMAChannel( - ChannelAssignmentMode mode) { - std::optional channel; - switch (mode) { - // Select the first available channel for circuit flow. - // A channel is valid if it is not already assigned to any circuit or - // packet flow. - case ChannelAssignmentMode::FirstAvailableCircuitFlow: { - channel = findFirstAvailableChannel( - numConsumerChannels, - {assignedCircuitConsumerChannels, assignedPacketConsumerChannels}); - break; - } - // Select the first available channel for packet flow. - // A channel is valid if it is not already assigned to a circuit flow. - case ChannelAssignmentMode::FirstAvailablePacketFlow: { - channel = findFirstAvailableChannel(numConsumerChannels, - {assignedCircuitConsumerChannels}); - break; - } - // Select the channel for packet flow, using a round-robin strategy for - // load balancing: - // 1. Prefer an unused channel (not assigned to any circuit or packet - // flow). - // 2. If no such channel is available, reuse the least recently used - // packet flow channel from `assignedPacketConsumerChannels.front()`. - case ChannelAssignmentMode::RoundRobinPacketFlow: { - channel = findFirstAvailableChannel( - numConsumerChannels, - {assignedCircuitConsumerChannels, assignedPacketConsumerChannels}); - if (!channel && !assignedPacketConsumerChannels.empty()) - channel = assignedPacketConsumerChannels.front(); - break; - } - default: - assert(false && "Unsupported ChannelAssignmentMode"); - } - // Assign the channel if found. - if (channel.has_value()) assignConsumerDMAChannel(channel.value(), mode); - return channel; - } + ChannelAssignmentMode mode); /// Assigns the provided producer channel. - void assignProducerDMAChannel(uint8_t channel, ChannelAssignmentMode mode) { - switch (mode) { - case ChannelAssignmentMode::FirstAvailableCircuitFlow: - assignedCircuitProducerChannels.insert(channel); - break; - case ChannelAssignmentMode::FirstAvailablePacketFlow: - assignedPacketProducerChannels.insert(channel); - break; - case ChannelAssignmentMode::RoundRobinPacketFlow: - // Remove and reinsert to update the least recently used channel - // (front). - assignedPacketProducerChannels.remove(channel); - assignedPacketProducerChannels.insert(channel); - break; - default: - assert(false && "Unsupported ChannelAssignmentMode"); - } - } + void assignProducerDMAChannel(uint8_t channel, ChannelAssignmentMode mode); /// Assigns the provided consumer channel. - void assignConsumerDMAChannel(uint8_t channel, ChannelAssignmentMode mode) { - switch (mode) { - case ChannelAssignmentMode::FirstAvailableCircuitFlow: - assignedCircuitConsumerChannels.insert(channel); - break; - case ChannelAssignmentMode::FirstAvailablePacketFlow: - assignedPacketConsumerChannels.insert(channel); - break; - case ChannelAssignmentMode::RoundRobinPacketFlow: - // Remove and reinsert to update the least recently used channel - // (front). - assignedPacketConsumerChannels.remove(channel); - assignedPacketConsumerChannels.insert(channel); - break; - default: - assert(false && "Unsupported ChannelAssignmentMode"); - } - } + void assignConsumerDMAChannel(uint8_t channel, ChannelAssignmentMode mode); private: uint8_t numProducerChannels = 0;