diff --git a/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp b/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp index c624c297a..21ab74ce7 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AMDAIECreatePathFindFlows.cpp @@ -89,6 +89,25 @@ ConnectOp getOrCreateConnect(OpBuilder &builder, Operation *parentOp, srcChannel, destBundle, destChannel); } +PacketRulesOp getOrCreatePacketRules(OpBuilder &builder, SwitchboxOp &swboxOp, + StrmSwPortType bundle, int channel) { + Block &b = swboxOp.getConnections().front(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPoint(b.getTerminator()); + for (auto packetRules : swboxOp.getOps()) { + builder.setInsertionPointAfter(packetRules); + if (packetRules.getSourceBundle() == bundle && + packetRules.getSourceChannel() == channel) { + return packetRules; + } + } + auto packetRules = + builder.create(builder.getUnknownLoc(), bundle, channel); + PacketRulesOp::ensureTerminator(packetRules.getRules(), builder, + builder.getUnknownLoc()); + return packetRules; +} + struct ConvertFlowsToInterconnect : OpConversionPattern { using OpConversionPattern::OpConversionPattern; const std::map flowSolutions; @@ -276,8 +295,8 @@ LogicalResult runOnPacketFlow( } auto [masterSets, slaveAMSels] = maybeRoutingConfiguration.value(); - auto [slaveGroups, slaveMasks] = - emitSlaveGroupsAndMasksRoutingConfig(slavePorts, packetFlows); + auto [slaveGroups, slaveMasks] = emitSlaveGroupsAndMasksRoutingConfig( + slavePorts, packetFlows, deviceModel.getPacketIdMaskWidth()); // Realize the routes in MLIR for (auto &[tileLoc, tileOp] : tiles) { @@ -340,37 +359,37 @@ LogicalResult runOnPacketFlow( msOp->setAttr("keep_pkt_header", pktFlowAttrs); } - // Generate the packet rules + // Generate the packet rules. + uint32_t numPacketRuleSlots = + deviceModel.getNumPacketRuleSlots(tileLoc.col, tileLoc.row); DenseMap slaveRules; - for (std::vector group : slaveGroups) { - builder.setInsertionPoint(b.getTerminator()); - PhysPortAndID physPortAndId = group.front(); - PhysPort physPort = physPortAndId.physPort; + for (auto &[physPort, groups] : slaveGroups) { if (tileLoc != physPort.tileLoc) continue; Port slave = physPort.port; - int mask = slaveMasks[physPortAndId]; - int ID = physPortAndId.id & mask; + for (std::set &group : groups) { + PhysPortAndID physPortAndId(physPort, *group.begin()); + uint32_t mask = slaveMasks[physPortAndId]; + uint32_t maskedId = physPortAndId.id & mask; #ifndef NDEBUG - // Verify that we actually map all the ID's correctly. - for (PhysPortAndID _slave : group) assert((_slave.id & mask) == ID); + // Verify that we actually map all the ID's correctly. + for (uint32_t _pktId : group) assert((_pktId & mask) == maskedId); #endif - Value amsel = amselOps[slaveAMSels[physPortAndId]]; - PacketRulesOp packetrules; - if (slaveRules.count(slave) == 0) { - packetrules = builder.create( - builder.getUnknownLoc(), (slave.bundle), slave.channel); - PacketRulesOp::ensureTerminator(packetrules.getRules(), builder, - builder.getUnknownLoc()); - slaveRules[slave] = packetrules; - } else { - packetrules = slaveRules[slave]; + Value amsel = amselOps[slaveAMSels[physPortAndId]]; + PacketRulesOp packetrules = + getOrCreatePacketRules(builder, swbox, slave.bundle, slave.channel); + // Ensure the number of packet rules does not exceed the allowed slots. + if (groups.size() > numPacketRuleSlots) { + return packetrules.emitOpError() + << "Exceeded packet rule limit. Allowed: " + << numPacketRuleSlots << " Required: " << groups.size(); + } + Block &rules = packetrules.getRules().front(); + builder.setInsertionPoint(rules.getTerminator()); + builder.create(builder.getUnknownLoc(), mask, maskedId, + amsel); } - - Block &rules = packetrules.getRules().front(); - builder.setInsertionPoint(rules.getTerminator()); - builder.create(builder.getUnknownLoc(), mask, ID, amsel); } } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows0.mlir b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows0.mlir index a469dacfc..758b3f693 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows0.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows0.mlir @@ -12,8 +12,8 @@ module @test_create_packet_flows0 { // CHECK: %[[VAL_4:.*]] = aie.masterset(CORE : 0, %[[VAL_2:.*]]) // CHECK: %[[VAL_5:.*]] = aie.masterset(CORE : 1, %[[VAL_3:.*]]) // CHECK: aie.packet_rules(WEST : 0) { -// CHECK-DAG: aie.rule(31, 0, %[[VAL_2]]) -// CHECK-DAG: aie.rule(31, 1, %[[VAL_3]]) +// CHECK: aie.rule(31, 0, %[[VAL_2]]) +// CHECK: aie.rule(31, 1, %[[VAL_3]]) // CHECK: } // CHECK: } // CHECK: } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows2.mlir index dd493777d..915f69957 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows2.mlir @@ -8,8 +8,8 @@ // CHECK: %[[VAL_2:.*]] = aie.masterset(CORE : 0, %[[VAL_0]]) // CHECK: %[[VAL_3:.*]] = aie.masterset(CORE : 1, %[[VAL_0]], %[[VAL_1]]) // CHECK: aie.packet_rules(WEST : 0) { -// CHECK-DAG: aie.rule(31, 0, %[[VAL_0]]) -// CHECK-DAG: aie.rule(31, 1, %[[VAL_1]]) +// CHECK: aie.rule(31, 0, %[[VAL_0]]) +// CHECK: aie.rule(31, 1, %[[VAL_1]]) // CHECK: } // CHECK: } // CHECK: } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows4.mlir b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows4.mlir index 43d884188..51c6076f2 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows4.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows4.mlir @@ -10,11 +10,11 @@ module @test_create_packet_flows4 { // CHECK: %[[VAL_4:.*]] = aie.masterset(CORE : 0, %[[VAL_2]]) // CHECK: %[[VAL_5:.*]] = aie.masterset(CORE : 1, %[[VAL_3]]) // CHECK: aie.packet_rules(WEST : 0) { -// CHECK-DAG: aie.rule(31, 0, %[[VAL_2]]) -// CHECK-DAG: aie.rule(31, 1, %[[VAL_3]]) +// CHECK: aie.rule(31, 0, %[[VAL_2]]) +// CHECK: aie.rule(31, 1, %[[VAL_3]]) // CHECK: } // CHECK: aie.packet_rules(WEST : 1) { -// CHECK-DAG: aie.rule(31, 0, %[[VAL_3]]) +// CHECK: aie.rule(31, 0, %[[VAL_3]]) // CHECK: } // CHECK: } // CHECK: } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows5.mlir b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows5.mlir index 594c4bc72..f808bfb61 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows5.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/test_create_packet_flows5.mlir @@ -6,10 +6,10 @@ // CHECK: %[[VAL_2:.*]] = aie.amsel<0> (0) // CHECK: %[[VAL_3:.*]] = aie.masterset(CORE : 0, %[[VAL_2]]) // CHECK: aie.packet_rules(WEST : 0) { -// CHECK-DAG: aie.rule(30, 0, %[[VAL_2]]) +// CHECK: aie.rule(30, 0, %[[VAL_2]]) // CHECK: } // CHECK: aie.packet_rules(WEST : 1) { -// CHECK-DAG: aie.rule(31, 2, %[[VAL_2]]) +// CHECK: aie.rule(31, 2, %[[VAL_2]]) // CHECK: } // CHECK: } // CHECK: } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/test_pktflow_weight_pusher.mlir b/compiler/plugins/target/AMD-AIE/aie/test/test_pktflow_weight_pusher.mlir index 4974365c1..50452cc39 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/test_pktflow_weight_pusher.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/test_pktflow_weight_pusher.mlir @@ -85,8 +85,8 @@ // CHECK: %[[VAL_26:.*]] = aie.masterset(DMA : 1, %[[VAL_25]]) // CHECK: %[[VAL_27:.*]] = aie.masterset(SOUTH : 1, %[[VAL_24]]) // CHECK: aie.packet_rules(NORTH : 2) { -// CHECK: aie.rule(27, 8, %[[VAL_24]]) // CHECK: aie.rule(31, 13, %[[VAL_25]]) +// CHECK: aie.rule(27, 8, %[[VAL_24]]) // CHECK: } // CHECK: } // CHECK: %[[TILE_2_4:.*]] = aie.tile(2, 4) @@ -107,8 +107,8 @@ // CHECK: %[[VAL_34:.*]] = aie.masterset(DMA : 1, %[[VAL_33]]) // CHECK: %[[VAL_35:.*]] = aie.masterset(WEST : 1, %[[VAL_32]]) // CHECK: aie.packet_rules(NORTH : 1) { -// CHECK: aie.rule(28, 0, %[[VAL_32]]) // CHECK: aie.rule(31, 6, %[[VAL_33]]) +// CHECK: aie.rule(28, 0, %[[VAL_32]]) // CHECK: } // CHECK: } // CHECK: %[[TILE_4_4:.*]] = aie.tile(4, 4) @@ -134,8 +134,8 @@ // CHECK: %[[VAL_44:.*]] = aie.masterset(DMA : 1, %[[VAL_43]]) // CHECK: %[[VAL_45:.*]] = aie.masterset(SOUTH : 2, %[[VAL_42]]) // CHECK: aie.packet_rules(EAST : 1) { -// CHECK: aie.rule(26, 8, %[[VAL_42]]) // CHECK: aie.rule(31, 14, %[[VAL_43]]) +// CHECK: aie.rule(26, 8, %[[VAL_42]]) // CHECK: } // CHECK: } // CHECK: %[[TILE_2_5:.*]] = aie.tile(2, 5) @@ -155,9 +155,9 @@ // CHECK: %[[VAL_52:.*]] = aie.masterset(SOUTH : 1, %[[VAL_48]]) // CHECK: %[[VAL_53:.*]] = aie.masterset(WEST : 1, %[[VAL_49]]) // CHECK: aie.packet_rules(EAST : 2) { -// CHECK: aie.rule(24, 0, %[[VAL_48]]) // CHECK: aie.rule(31, 3, %[[VAL_49]]) // CHECK: aie.rule(31, 7, %[[VAL_50]]) +// CHECK: aie.rule(24, 0, %[[VAL_48]]) // CHECK: } // CHECK: } // CHECK: %[[TILE_4_5:.*]] = aie.tile(4, 5) @@ -171,12 +171,12 @@ // CHECK: %[[VAL_60:.*]] = aie.masterset(SOUTH : 3, %[[VAL_55]]) // CHECK: %[[VAL_61:.*]] = aie.masterset(WEST : 2, %[[VAL_54]]) // CHECK: aie.packet_rules(EAST : 2) { -// CHECK: aie.rule(24, 0, %[[VAL_54]]) // CHECK: aie.rule(31, 5, %[[VAL_55]]) +// CHECK: aie.rule(24, 0, %[[VAL_54]]) // CHECK: } // CHECK: aie.packet_rules(EAST : 3) { -// CHECK: aie.rule(28, 8, %[[VAL_56]]) // CHECK: aie.rule(31, 11, %[[VAL_57]]) +// CHECK: aie.rule(28, 8, %[[VAL_56]]) // CHECK: } // CHECK: } // CHECK: %[[TILE_5_5:.*]] = aie.tile(5, 5) @@ -191,8 +191,8 @@ // CHECK: aie.rule(24, 0, %[[VAL_62]]) // CHECK: } // CHECK: aie.packet_rules(EAST : 3) { -// CHECK: aie.rule(28, 8, %[[VAL_63]]) // CHECK: aie.rule(31, 15, %[[VAL_64]]) +// CHECK: aie.rule(28, 8, %[[VAL_63]]) // CHECK: } // CHECK: } // CHECK: %[[TILE_6_5:.*]] = aie.tile(6, 5) @@ -210,8 +210,10 @@ // CHECK: aie.rule(24, 0, %[[VAL_69]]) // CHECK: } // CHECK: aie.packet_rules(EAST : 0) { -// CHECK: aie.rule(24, 8, %[[VAL_70]]) -// CHECK: aie.rule(24, 8, %[[VAL_71]]) +// CHECK: aie.rule(31, 9, %3) +// CHECK: aie.rule(31, 10, %3) +// CHECK: aie.rule(27, 11, %3) +// CHECK: aie.rule(24, 8, %2) // CHECK: } // CHECK: } // CHECK: %[[TILE_7_5:.*]] = aie.tile(7, 5) diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.cc index 812b30e1e..ba4b3327a 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.cc +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.cc @@ -730,8 +730,105 @@ bool existsPathToDest(const SwitchSettings &settings, TileLoc currTile, return false; } +/// Generate the mask value for all the IDs in the group. +/// Iterate over all the ID values in a group. If the i-th bit (i <= +/// `numMaskBits`) of an ID value differs from the i-th bit of another ID value, +/// the bit position should be "don't care", and the mask value should be 0 at +/// that bit position. Otherwise, the mask value should be 1 at that bit +/// position. +/// +/// Example: +/// Consider a group of IDs: {0x1, 0x2, 0x3} and `numMaskBits`=5. Counting from +/// the LSB, +/// - 1st bit of 0x1 is 1, 1st bit of 0x2 is 0, and 1st bit of 0x3 is 1; +/// - 2nd bit of 0x1 is 0, 2nd bit of 0x2 is 1, and 2nd bit of 0x3 is 1; +/// - 3rd bit of 0x1 is 0, 3rd bit of 0x2 is 0, and 3rd bit of 0x3 is 0; +/// - 4th bit of 0x1 is 0, 4th bit of 0x2 is 0, and 4th bit of 0x3 is 0; +/// - 5th bit of 0x1 is 0, 5th bit of 0x2 is 0, and 5th bit of 0x3 is 0. +/// Therefore, the 1st and 2nd bits of the mask value should be "don't care" +/// (marked as 0), and the 3rd, 4th and 5th bits of the mask value should be 1, +/// resulting in a final mask value of 0b11100(0x1C). +void updateGroupMask(const PhysPort &slavePort, std::set &group, + std::map &slaveMasks, + uint32_t numMaskBits) { + if (group.empty()) return; + assert(numMaskBits <= 32 && "Invalid number of mask bits"); + // Initialize the mask value to all 1s. + uint32_t mask = (numMaskBits == 32) ? ~0u : ((uint32_t)1 << numMaskBits) - 1; + // Iterate through `group`, use XOR to find differing bits from `firstId`, and + // set them as 0 in `mask`. + uint32_t firstId = *group.begin(); + for (uint32_t id : group) mask = mask & ~(id ^ firstId); + // Update the final mask value for all the IDs in the group. + for (uint32_t id : group) slaveMasks[PhysPortAndID(slavePort, id)] = mask; +} + +/// Sort groups by their size in ascending order. A smaller group size can +/// represent a stricter `packet_rule`, which should be placed first to prevent +/// other broader (less strict) rules from matching unintended IDs. +/// +/// Example: +/// Consider two slave groups, A and B, that share the same `physPort` +/// (i.e., the same `tileLoc`, `bundle`, and `channel`). These groups +/// will later be merged into a single `packet_rules` operation, +/// where each group contributes a `packet_rule` entry. The order +/// of these entries is critical because the first matching `packet_rule` +/// takes precedence. +/// +/// - `Group A` contains IDs `{0x3, 0x4, 0x5}` with `mask = 0x18`, +/// and defines a `packet_rule`: `(ID & 0x18) == 0x00`. +/// - `Group B` contains ID `{0x2}` with `mask = 0x1F`, +/// and defines a `packet_rule`: `(ID & 0x1F) == 0x02`. +/// +/// In this case, `Group B`'s `packet_rule` must precede `Group A`'s +/// within the `packet_rules` operation. Otherwise, ID `0x02` +/// would incorrectly match `(ID & 0x18) == 0x00`, leading to incorrect +/// behavior. +void sortGroupsBySize(SmallVector> &groups) { + auto sortBySize = [](auto &lhs, auto &rhs) { + if (lhs.size() != rhs.size()) return lhs.size() < rhs.size(); + return lhs < rhs; + }; + std::sort(groups.begin(), groups.end(), sortBySize); +} + +/// Verifies the correctness of ID groupings before putting them into a single +/// `packet_rules` set. Each group contributes a `packet_rule` entry, and this +/// function checks if any ID in a later group incorrectly matches a preceding +/// group's masked ID. +/// +/// Example: +/// Consider three groups: A, B, and C. +/// - `Group A`: Contains IDs `{0x0}` with `mask = 0x1F`. +/// - Packet rule: `(ID & 0x1F) ?= (0x0 & 0x1F)`. +/// - `Group B`: Contains IDs `{0x6, 0x7}` with `mask = 0x18`. +/// - Packet rule: `(ID & 0x18) ?= (0x6 & 0x18)`. +/// - `Group C`: Contains IDs `{0x1, 0x2, 0x3, 0x4, 0x5}` with `mask = 0x18`. +/// - Packet rule: `(ID & 0x18) ?= (0x1 & 0x18)`. +/// +/// ID `0x1` belongs to `Group C`, however, due to the limitation of masking, it +/// matches both `Group B` and `C`'s rules. Since `Group B` precedes `Group C`, +/// and `packet_rule` entries are evaluated in order, the function returns +/// `false` to indicate an invalid grouping. +bool verifyGroupsByMask(PhysPort slavePort, + const SmallVector> &groups, + const std::map &slaveMasks) { + for (size_t i = 0; i < groups.size(); ++i) { + uint32_t iPktId = *groups[i].begin(); + uint32_t iMask = slaveMasks.at(PhysPortAndID(slavePort, iPktId)); + uint32_t iMaskedId = iPktId & iMask; + for (size_t j = i + 1; j < groups.size(); ++j) { + for (uint32_t jPktId : groups[j]) { + if ((jPktId & iMask) == iMaskedId) return false; + } + } + } + return true; +} + std::tuple emitSlaveGroupsAndMasksRoutingConfig( - ArrayRef slavePorts, const PacketFlowMapT &packetFlows) { + ArrayRef slavePorts, const PacketFlowMapT &packetFlows, + uint32_t numMaskBits) { // Convert packet flow map into a map from src 'port and id's to destination // ports, so that multiple flows with different packet IDs, but the same // ports, can be merged. @@ -743,22 +840,23 @@ std::tuple emitSlaveGroupsAndMasksRoutingConfig( }); physPortAndIDToPhysPort[src].insert(physPorts.begin(), physPorts.end()); } - // Compute mask values - // Merging as many stream flows as possible - // The flows must originate from the same source port and have different IDs - // Two flows can be merged if they share the same destinations + // `slaveGroups` maps a slave port to groups of packet IDs. The groups will be + // later used for generating `packet_rules`. SlaveGroupsT slaveGroups; + // `slaveMasks` maps a slave port and packet ID to a mask value, used for + // `packet_rule` entries. + SlaveMasksT slaveMasks; + // Start the grouping process by iterating over all `slavePorts`. Grouping + // as many as possible to reduce the number of `packet_rule` entries. SmallVector workList(slavePorts.begin(), slavePorts.end()); while (!workList.empty()) { PhysPortAndID slave1 = workList.pop_back_val(); - Port slavePort1 = slave1.physPort.port; - - bool foundgroup = false; - for (auto &group : slaveGroups) { - PhysPortAndID slave2 = group.front(); - if (Port slavePort2 = slave2.physPort.port; slavePort1 != slavePort2) - continue; - + // Try to find a matching group that can be merged with. + std::optional matchedGroupIdx; + SmallVector> &groups = slaveGroups[slave1.physPort]; + for (size_t i = 0; i < groups.size(); ++i) { + PhysPortAndID slave2(slave1.physPort, *groups[i].begin()); + // Can be merged if `slave1` and `slave2` share the same destinations. const llvm::SetVector &dests1 = physPortAndIDToPhysPort.at(slave1); const llvm::SetVector &dests2 = @@ -768,51 +866,32 @@ std::tuple emitSlaveGroupsAndMasksRoutingConfig( [&dests2](const PhysPort &dest1) { return dests2.count(dest1); })) { - group.push_back(slave1); - foundgroup = true; + // Found a matching group. + matchedGroupIdx = i; break; } } - - if (!foundgroup) { - slaveGroups.emplace_back(std::vector{slave1}); + // Attempt to merge, and verify that the merged group is still valid. + if (matchedGroupIdx.has_value()) { + // Make a copy of the groups in case the merge is invalid. + SmallVector> groupsCopy = groups; + std::set &group = groups[matchedGroupIdx.value()]; + // Merge `slave1.id` into the group. + group.insert(slave1.id); + updateGroupMask(slave1.physPort, group, slaveMasks, numMaskBits); + sortGroupsBySize(groups); + // If the merge is valid, simply continue the while loop on `workList`. + if (verifyGroupsByMask(slave1.physPort, groups, slaveMasks)) continue; + // Not a valid merge, so revert the changes on `groups` and `slaveMasks`. + slaveGroups[slave1.physPort] = groupsCopy; + updateGroupMask(slave1.physPort, group, slaveMasks, numMaskBits); } + // No mergeable group, create a new group instead. + std::set group = {static_cast(slave1.id)}; + groups.emplace_back(group); + updateGroupMask(slave1.physPort, group, slaveMasks, numMaskBits); + sortGroupsBySize(groups); } - - SlaveMasksT slaveMasks; - for (const auto &group : slaveGroups) { - // Iterate over all the ID values in a group - // If bit n-th (n <= 5) of an ID value differs from bit n-th of another ID - // value, the bit position should be "don't care", and we will set the - // mask bit of that position to 0 - int mask[5] = {-1, -1, -1, -1, -1}; - for (PhysPortAndID port : group) { - for (int i = 0; i < 5; i++) { - if (mask[i] == -1) { - mask[i] = port.id >> i & 0x1; - } else if (mask[i] != (port.id >> i & 0x1)) { - // found bit difference --> mark as "don't care" - mask[i] = 2; - } - } - } - - int maskValue = 0; - for (int i = 4; i >= 0; i--) { - if (mask[i] == 2) { - // don't care - mask[i] = 0; - } else { - mask[i] = 1; - } - maskValue = (maskValue << 1) + mask[i]; - } - for (PhysPortAndID port : group) slaveMasks[port] = maskValue; - } - - // sort for deterministic IR output - for (auto &item : slaveGroups) std::sort(item.begin(), item.end()); - std::sort(slaveGroups.begin(), slaveGroups.end()); return std::make_tuple(slaveGroups, slaveMasks); } diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.h index 78d355a94..19e3554d6 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.h +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_router.h @@ -135,8 +135,13 @@ struct PhysPortAndID { // A map from a switchbox output (physical) port to the number of that port. using MasterSetsT = std::map>>; -using SlaveGroupsT = std::vector>; -using SlaveMasksT = std::map; +/// Maps a slave port to groups of packet IDs. +/// Groups associated with the same slave port will be lowered together into a +/// `packet_rules` operation. +/// IDs within the same group will be converted into a single `packet_rule` +/// entry. +using SlaveGroupsT = std::map>>; +using SlaveMasksT = std::map; using SlaveAMSelsT = std::map>; using ConnectionAndFlowIDT = std::pair; using TileLocToConnectionFlowIDT = @@ -144,7 +149,8 @@ using TileLocToConnectionFlowIDT = using PacketFlowMapT = DenseMap>; std::tuple emitSlaveGroupsAndMasksRoutingConfig( - ArrayRef slavePorts, const PacketFlowMapT &packetFlows); + ArrayRef slavePorts, const PacketFlowMapT &packetFlows, + uint32_t numMaskBits); FailureOr> emitPacketRoutingConfiguration( const AMDAIEDeviceModel &deviceModel, const PacketFlowMapT &packetFlows); diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.cc index 6a191cfb6..f26ca0437 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.cc +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.cc @@ -612,6 +612,18 @@ uint8_t AMDAIEDeviceModel::getPacketIdMaxIdx() const { return deviceConfig.packetIdMaxIdx; } +uint8_t AMDAIEDeviceModel::getPacketIdMaskWidth() const { + return deviceConfig.packetIdMaskWidth; +} + +uint8_t AMDAIEDeviceModel::getNumPacketRuleSlots(uint8_t col, + uint8_t row) const { + AMDAIETileType tileType = getTileType(col, row); + const XAie_StrmMod *strmMod = + devInst.DevProp.DevMod[static_cast(tileType)].StrmSw; + return strmMod->NumSlaveSlots; +} + uint8_t AMDAIEDeviceModel::getStreamSwitchArbiterMax(uint8_t col, uint8_t row) const { assert(isCoreTile(col, row) || isMemTile(col, row) || isShimTile(col, row)); diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h index 72969f9b9..e5d137755 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_runtime.h @@ -271,6 +271,9 @@ struct AMDAIEDeviceModel { uint8_t minStrideBitWidth{32}; /// The max packet id. uint8_t packetIdMaxIdx{0}; + /// The bitwidth of the packet ID mask. This is currently buried in + /// aie-rt and not exposed for configuration. + uint8_t packetIdMaskWidth{5}; /// Currently, the max arbiter/msel is hidden inside aie-rt. uint8_t streamSwitchCoreArbiterMax{0}; uint8_t streamSwitchCoreMSelMax{0}; @@ -473,6 +476,10 @@ struct AMDAIEDeviceModel { uint32_t getOffsetFromAddress(uint32_t address) const; uint8_t getPacketIdMaxIdx() const; + /// Get the bitwidth of the packet id mask. + uint8_t getPacketIdMaskWidth() const; + /// Get the maximum number of packet rule slots available for each slave port. + uint8_t getNumPacketRuleSlots(uint8_t col, uint8_t row) const; uint8_t getStreamSwitchArbiterMax(uint8_t col, uint8_t row) const; uint8_t getStreamSwitchMSelMax(uint8_t col, uint8_t row) const;