Skip to content

Commit

Permalink
Compute DRAM Utilization in tt-npe from workload
Browse files Browse the repository at this point in the history
  • Loading branch information
bgrady-tt committed Jan 29, 2025
1 parent 9c79e46 commit d87625e
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 12 deletions.
2 changes: 2 additions & 0 deletions tt_npe/cpp/include/device_data/wormhole_b0.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ constexpr size_t NUM_COLS = 10;
constexpr size_t NUM_ROWS = 12;
constexpr float LINK_BANDWIDTH = 30;

constexpr float AGGREGATE_DRAM_BANDWIDTH = 384;

const TransferBandwidthTable TRANSFER_BW_TABLE = {
{ 0, 0},
{ 128, 5.5},
Expand Down
2 changes: 2 additions & 0 deletions tt_npe/cpp/include/npeDeviceModel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class npeDeviceModel {
return core_type_to_ir.at(core_type);
}
}
float getAggregateDRAMBandwidth() const;

private:
// build wormhole_b0 device
Expand All @@ -79,6 +80,7 @@ class npeDeviceModel {
CoordToTypeMapping core_to_type_mapping;
CoreTypeToInjectionRate core_type_to_ir;
TransferBandwidthTable transfer_bandwidth_table;
double aggregate_dram_bandwidth;
};

} // namespace tt_npe
2 changes: 2 additions & 0 deletions tt_npe/cpp/include/npeStats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct npeStats {
double overall_max_link_util = 0;
double overall_avg_niu_demand = 0;
double overall_max_niu_demand = 0;
double dram_bw_util = 0;
std::vector<TimestepStats> per_timestep_stats;

std::string to_string(bool verbose = false) const;
Expand All @@ -55,6 +56,7 @@ struct npeStats {
void emitSimStatsToFile(
const std::string &filepath,
const std::vector<PETransferState> &transfer_state,
const npeDeviceModel& model,
const npeConfig &cfg) const;
};

Expand Down
9 changes: 9 additions & 0 deletions tt_npe/cpp/include/npeWorkload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@

namespace tt_npe {

struct DRAMTrafficStats {
uint64_t read_bytes = 0;
uint64_t write_bytes = 0;
double dram_utilization_pct = 0.0;
uint64_t total_bytes() const { return read_bytes + write_bytes; }
};

using npeWorkloadPhaseID = int;
using npeWorkloadTransferID = int;

Expand Down Expand Up @@ -98,6 +105,8 @@ class npeWorkload {
CycleCount getGoldenResultCycles() const { return golden_cycle_count; }
void setGoldenResultCycles(CycleCount cycle_count) { golden_cycle_count = cycle_count; }

DRAMTrafficStats getDRAMTrafficStats(const npeDeviceModel &device_model) const;

private:
std::vector<npeWorkloadPhase> phases;
npeWorkloadTransferID gbl_transfer_id = 0;
Expand Down
6 changes: 6 additions & 0 deletions tt_npe/cpp/pybind/11/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ PYBIND11_MODULE(tt_npe_pybind, m) {
.def_readwrite("overall_max_niu_demand", &tt_npe::npeStats::overall_max_niu_demand)
.def_readwrite("overall_avg_link_util", &tt_npe::npeStats::overall_avg_link_util)
.def_readwrite("overall_max_link_util", &tt_npe::npeStats::overall_max_link_util)
.def_readwrite("dram_bw_util", &tt_npe::npeStats::dram_bw_util)
.def(
"__repr__",
[](const tt_npe::npeStats& stats) -> std::string { return stats.to_string(true); })
Expand Down Expand Up @@ -152,6 +153,11 @@ PYBIND11_MODULE(tt_npe_pybind, m) {
workload.def(py::init<>(), "Creates a new empty npe.Workload object.");
workload.def(
"addPhase", &tt_npe::npeWorkload::addPhase, "Adds an npe.Phase object into this workload.");
workload.def(
"getDRAMTrafficStats",
&tt_npe::npeWorkload::getDRAMTrafficStats,
"Returns a `npe.DRAMTrafficStats` object containing the total read and write bytes to/from "
"DRAM in this workload.");

//---- JSON workload ingestion bindings -----------------------------------
m.def(
Expand Down
4 changes: 4 additions & 0 deletions tt_npe/cpp/src/npeDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ float npeDeviceModel::getLinkBandwidth(const nocLinkID& link_id) const {
return wormhole_b0::LINK_BANDWIDTH;
}

float npeDeviceModel::getAggregateDRAMBandwidth() const {
return wormhole_b0::AGGREGATE_DRAM_BANDWIDTH;
}

nocRoute npeDeviceModel::unicastRoute(
nocType noc_type, const Coord& startpoint, const Coord& endpoint) const {
nocRoute route;
Expand Down
4 changes: 3 additions & 1 deletion tt_npe/cpp/src/npeEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ npeResult npeEngine::runPerfEstimation(const npeWorkload &wl, const npeConfig &c
}

stats.computeSummaryStats();
auto dram_traffic_stats = wl.getDRAMTrafficStats(model);
stats.dram_bw_util = dram_traffic_stats.dram_utilization_pct;

// visualize link congestion
if (cfg.enable_visualizations) {
Expand All @@ -491,7 +493,7 @@ npeResult npeEngine::runPerfEstimation(const npeWorkload &wl, const npeConfig &c
}

if (cfg.emit_stats_as_json) {
stats.emitSimStatsToFile(cfg.stats_json_filepath, transfer_state, cfg);
stats.emitSimStatsToFile(cfg.stats_json_filepath, transfer_state, model, cfg);
}

return stats;
Expand Down
20 changes: 11 additions & 9 deletions tt_npe/cpp/src/npeStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ std::string npeStats::to_string(bool verbose) const {

output.append(fmt::format(" estimated cycles: {:5d}\n", estimated_cycles));
output.append(fmt::format(" cycle pred error: {:5.1f}%\n", cycle_prediction_error));

output.append("\n");
output.append(fmt::format(" avg Link util: {:5.0f}%\n", overall_avg_link_util));
output.append(fmt::format(" max Link util: {:5.0f}%\n", overall_max_link_util));
output.append(fmt::format(" DRAM BW Util: {:5.1f}%\n", dram_bw_util));
output.append("\n");
output.append(fmt::format(" avg Link util: {:5.1f}%\n", overall_avg_link_util));
output.append(fmt::format(" max Link util: {:5.1f}%\n", overall_max_link_util));
output.append("\n");
output.append(fmt::format(" avg Link demand: {:5.0f}%\n", overall_avg_link_demand));
output.append(fmt::format(" max Link demand: {:5.0f}%\n", overall_max_link_demand));
output.append(fmt::format(" avg Link demand: {:5.1f}%\n", overall_avg_link_demand));
output.append(fmt::format(" max Link demand: {:5.1f}%\n", overall_max_link_demand));
output.append("\n");
output.append(fmt::format(" avg NIU demand: {:5.0f}%\n", overall_avg_niu_demand));
output.append(fmt::format(" max NIU demand: {:5.0f}%\n", overall_max_niu_demand));
output.append(fmt::format(" avg NIU demand: {:5.1f}%\n", overall_avg_niu_demand));
output.append(fmt::format(" max NIU demand: {:5.1f}%\n", overall_max_niu_demand));

if (verbose) {
output.append("\n");
Expand Down Expand Up @@ -54,6 +55,7 @@ void npeStats::computeSummaryStats() {
void npeStats::emitSimStatsToFile(
const std::string &filepath,
const std::vector<PETransferState> &transfer_state,
const npeDeviceModel& model,
const npeConfig &cfg) const {
std::ofstream os(filepath);
if (!os) {
Expand All @@ -69,8 +71,8 @@ void npeStats::emitSimStatsToFile(
fmt::println(os, R"( "device_name" : "{}",)", cfg.device_name);
fmt::println(os, R"( "cycles_per_timestep" : {},)", cfg.cycles_per_timestep);
fmt::println(os, R"( "congestion_model_name" : "{}",)", cfg.congestion_model_name);
// fmt::println(os, R"( "num_rows" : {},)", model.getRows());
// fmt::println(os, R"( "num_cols" : {})", model.getCols());
fmt::println(os, R"( "num_rows" : {},)", model.getRows());
fmt::println(os, R"( "num_cols" : {})", model.getCols());
fmt::println(os, R"(}},)");

//---- emit per transfer data ---------------------------------------------
Expand Down
22 changes: 22 additions & 0 deletions tt_npe/cpp/src/npeWorkload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,26 @@ void npeWorkload::inferInjectionRates(const npeDeviceModel &device_model) {
}
}

DRAMTrafficStats npeWorkload::getDRAMTrafficStats(const npeDeviceModel &device_model) const {
DRAMTrafficStats stats;
for (const auto &phase : phases) {
for (const auto &transfer : phase.transfers) {
if (device_model.getCoreType(transfer.src) == CoreType::DRAM) {
// read from DRAM
stats.read_bytes += transfer.total_bytes;
} else if (
// write to DRAM
std::holds_alternative<Coord>(transfer.dst) &&
device_model.getCoreType(std::get<Coord>(transfer.dst)) == CoreType::DRAM) {
stats.write_bytes += transfer.total_bytes;
}
}
}

double total_dram_bandwidth_over_golden_cycles = golden_cycle_count * device_model.getAggregateDRAMBandwidth();
stats.dram_utilization_pct = (stats.total_bytes() / total_dram_bandwidth_over_golden_cycles) * 100;

return stats;
}

} // namespace tt_npe
17 changes: 15 additions & 2 deletions tt_npe/scripts/analyze_noc_trace_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def getWeightedAvgLinkUtil(self):
/ total_cycles
)

def getWeightedAvgDramBWUtil(self):
total_cycles = self.getCycles()
return (
sum(
[
dp.result.dram_bw_util * dp.result.golden_cycles
for dp in self.datapoints
]
)
/ total_cycles
)

def getAvgError(self):
return sum([dp.result.cycle_prediction_error for dp in self.datapoints]) / len(
self.datapoints
Expand Down Expand Up @@ -151,7 +163,7 @@ def main():

# Print header
print(
f"{'opname':42} {'op_id':>5}, {'AVG LINK UTIL':>14}, {'MAX LINK UTIL':>14}, {'% Error':>14}, {'CYCLES':>14}"
f"{'opname':42} {'op_id':>5}, {'AVG LINK UTIL':>14}, {'DRAM_BW_UTIL':>14}, {'% Error':>14}, {'CYCLES':>14}"
)

noc_trace_files = glob.glob(os.path.join(args.noc_trace_dir, "*.json"))
Expand Down Expand Up @@ -180,13 +192,14 @@ def main():

for dp in stats.getSortedEvents():
print(
f"{dp.op_name:42}, {dp.op_id:>3}, {dp.result.overall_avg_link_util:>14.2f}, {dp.result.overall_max_link_util:>14.2f}, {dp.result.cycle_prediction_error:>14.2f}, {dp.result.golden_cycles:>14}"
f"{dp.op_name:42}, {dp.op_id:>3}, {dp.result.overall_avg_link_util:>14.1f}, {dp.result.dram_bw_util:14.1f}, {dp.result.cycle_prediction_error:>14.1f}, {dp.result.golden_cycles:>14}"
)

print("-------")
print(f"average cycle prediction error : {stats.getAvgError():.2f} ")
print(f"average link util : {stats.getAvgLinkUtil():.2f} ")
print(f"cycle-weighted overall link util : {stats.getWeightedAvgLinkUtil():.2f} ")
print(f"cycle-weighted dram bw util : {stats.getWeightedAvgDramBWUtil():.2f} ")

if __name__ == "__main__":
main()

0 comments on commit d87625e

Please sign in to comment.