Skip to content

Commit

Permalink
fix inaccuracies with NIU absorption rate modelling (unicast and mult…
Browse files Browse the repository at this point in the history
…icast)
  • Loading branch information
bgrady-tt committed Feb 7, 2025
1 parent f966894 commit caa87e5
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 22 deletions.
10 changes: 8 additions & 2 deletions tt_npe/cpp/include/device_data/wormhole_b0.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,20 @@ const TransferBandwidthTable TRANSFER_BW_TABLE = {
{2048, 30.0},
{8192, 30.0}};

const CoreTypeToInjectionRate CORE_TYPE_TO_INJ_RATE = {
const CoreTypeToInjectionRate CORE_TYPE_TO_INJECTION_RATE = {
{CoreType::DRAM, 23.2},
{CoreType::ETH, 23.2},
{CoreType::UNDEF, 28.1},
{CoreType::WORKER, 28.1},
};
const CoreTypeToInjectionRate CORE_TYPE_TO_ABSORPTION_RATE = {
{CoreType::DRAM, 24.0},
{CoreType::ETH, 24.0},
{CoreType::UNDEF, 28.1},
{CoreType::WORKER, 28.1},
};

const CoordToTypeMapping CORE_TO_TYPE_MAP = {
const CoordToCoreTypeMapping CORE_TO_TYPE_MAP = {
{{0,0},{CoreType::DRAM}},
{{0,1},{CoreType::ETH}},
{{0,2},{CoreType::ETH}},
Expand Down
49 changes: 29 additions & 20 deletions tt_npe/cpp/src/npeEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ void npeEngine::modelCongestion(

// assume all links have identical bandwidth
float LINK_BANDWIDTH = model.getLinkBandwidth({{0, 0}, nocLinkType::NOC0_EAST});
static auto worker_sink_absorption_rate =
model.getSinkAbsorptionRateByCoreType(CoreType::WORKER);

// Note: for now doing gradient descent to determine link bandwidth doesn't
// appear necessary. Base algorithm devolves to running just a single
Expand Down Expand Up @@ -110,7 +112,10 @@ void npeEngine::modelCongestion(
} else {
const auto &mcast_dst = std::get<MCastCoordPair>(lt.params.dst);
for (auto c : mcast_dst) {
niu_demand_grid(c.row, c.col, sink_niu_idx) += effective_demand;
// multicast only loads on WORKER NIUs; other NIUS ignore traffic
if (model.getCoreType(c) == CoreType::WORKER) {
niu_demand_grid(c.row, c.col, sink_niu_idx) += effective_demand;
}
}
}

Expand All @@ -123,54 +128,58 @@ void npeEngine::modelCongestion(
// find highest demand resource on each route to set bandwidth
for (auto ltid : live_transfer_ids) {
auto &lt = transfers[ltid];

// find max link demand on route
float max_link_demand_on_route = 0;
auto update_max_demand = [&max_link_demand_on_route](float demand) -> bool {
auto update_max_link_demand = [&max_link_demand_on_route](float demand) -> bool {
if (demand > max_link_demand_on_route) {
max_link_demand_on_route = demand;
return true;
} else {
return false;
}
};

// find max link demand on route
for (const auto &link : lt.route) {
auto [r, c] = link.coord;
float link_demand = link_demand_grid(r, c, size_t(link.type));
update_max_demand(link_demand);
update_max_link_demand(link_demand);
}
auto link_only_max_demand = max_link_demand_on_route;
auto min_link_bw_derate = LINK_BANDWIDTH / max_link_demand_on_route;

// find max demand at source and sink
// compute bottleneck (min derate factor) for source and sink NIUs
auto src_niu_idx = size_t(
lt.params.noc_type == nocType::NOC0 ? nocNIUType::NOC0_SRC : nocNIUType::NOC1_SRC);
auto src_demand = niu_demand_grid(lt.params.src.row, lt.params.src.col, src_niu_idx);
auto src_bw_demand = niu_demand_grid(lt.params.src.row, lt.params.src.col, src_niu_idx);
auto src_bw_derate = lt.params.injection_rate / src_bw_demand;

auto sink_niu_idx = size_t(
lt.params.noc_type == nocType::NOC0 ? nocNIUType::NOC0_SINK
: nocNIUType::NOC1_SINK);

float sink_demand = 0;
float sink_bw_derate = 1;
if (std::holds_alternative<Coord>(lt.params.dst)) {
const auto &dst = std::get<Coord>(lt.params.dst);
sink_demand = niu_demand_grid(dst.row, dst.col, sink_niu_idx);
auto sink_bw_demand = niu_demand_grid(dst.row, dst.col, sink_niu_idx);
sink_bw_derate = model.getSinkAbsorptionRate(dst) / sink_bw_demand;
} else {
// multicast transfer speed is set by the slowest sink NIU
const auto &mcast_dst = std::get<MCastCoordPair>(lt.params.dst);
for (auto c : mcast_dst) {
sink_demand =
std::max(sink_demand, niu_demand_grid(c.row, c.col, sink_niu_idx));
float sink_demand = 0;
for (const auto &loc : mcast_dst) {
if (model.getCoreType(loc) == CoreType::WORKER) {
sink_demand =
std::min(sink_demand, niu_demand_grid(loc.row, loc.col, sink_niu_idx));
}
}
sink_bw_derate = worker_sink_absorption_rate / sink_demand;
}

auto max_niu_demand = std::max(src_demand, sink_demand);
auto min_niu_bw_derate = std::min(src_bw_derate, sink_bw_derate);

if (max_link_demand_on_route > LINK_BANDWIDTH ||
max_niu_demand > lt.params.injection_rate) {
float link_bw_derate = LINK_BANDWIDTH / max_link_demand_on_route;
float niu_bw_derate = lt.params.injection_rate / max_niu_demand;
float bw_derate = std::min(link_bw_derate, niu_bw_derate);
if (min_link_bw_derate < 1.0 || min_niu_bw_derate < 1.0) {
float overall_bw_derate = std::min(min_link_bw_derate, min_niu_bw_derate);

lt.curr_bandwidth *= 1.0 - (grad_fac * (1.0f - bw_derate));
lt.curr_bandwidth *= 1.0 - (grad_fac * (1.0f - overall_bw_derate));
}
}
}
Expand Down

0 comments on commit caa87e5

Please sign in to comment.