Skip to content

Commit

Permalink
Enable IRAM on ERISC (#18066)
Browse files Browse the repository at this point in the history
### Ticket
#6740

### Problem description
IRAM should be used to accelerate program execution

### What's changed
- ERISC mac bug workaround
- kernel_init to point to IRAM address
~- TT_METAL_ENABLE_IRAM switch to enable/disable IRAM feature (only on
WH b0)~
- TT_METAL_ENABLE_ERISC_IRAM switch to enable/disable IRAM feature (only
on WH b0)

### Checklist
- [ ] [All post
commit](https://github.com/tenstorrent/tt-metal/actions/workflows/all-post-commit-workflows.yaml)
CI passes
- [ ] [Blackhole Post
commit](https://github.com/tenstorrent/tt-metal/actions/workflows/blackhole-post-commit.yaml)
CI passes (if applicable)
- [ ] [Model
regression](https://github.com/tenstorrent/tt-metal/actions/workflows/perf-models.yaml)
CI passes (if applicable)
- [ ] [Device performance
regression](https://github.com/tenstorrent/tt-metal/actions/workflows/perf-device-models.yaml)
CI passes (if applicable)
- [ ] **(For models and ops writers)** Full [new models
tests](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml)
CI passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
daminakaTT authored Mar 5, 2025
1 parent d9fec1f commit ce8bbba
Show file tree
Hide file tree
Showing 19 changed files with 161 additions and 7 deletions.
4 changes: 4 additions & 0 deletions tt_metal/api/tt-metalium/hal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ inline const HalJitBuildConfig &HalCoreInfoType::get_jit_build_config(uint32_t p
class Hal {
public:
using RelocateFunc = std::function<uint64_t(uint64_t, uint64_t)>;
using IramRelocateFunc = std::function<uint64_t(uint64_t)>;
using ValidRegAddrFunc = std::function<bool(uint32_t)>;
using NOCXYEncodingFunc = std::function<uint32_t(uint32_t, uint32_t)>;
using NOCMulticastEncodingFunc = std::function<uint32_t(uint32_t, uint32_t, uint32_t, uint32_t)>;
Expand Down Expand Up @@ -171,6 +172,7 @@ class Hal {

// Functions where implementation varies by architecture
RelocateFunc relocate_func_;
IramRelocateFunc erisc_iram_relocate_func_;
ValidRegAddrFunc valid_reg_addr_func_;
NOCXYEncodingFunc noc_xy_encoding_func_;
NOCMulticastEncodingFunc noc_multicast_encoding_func_;
Expand Down Expand Up @@ -261,6 +263,8 @@ class Hal {
return relocate_func_(addr, local_init_addr);
}

uint64_t erisc_iram_relocate_dev_addr(uint64_t addr) { return erisc_iram_relocate_func_(addr); }

uint32_t valid_reg_addr(uint32_t addr) { return valid_reg_addr_func_(addr); }

uint32_t get_stack_size(uint32_t type) { return stack_size_func_(type); }
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/api/tt-metalium/rtoptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ class RunTimeOptions {
bool simulator_enabled = false;
std::filesystem::path simulator_path = "";

bool erisc_iram_enabled = false;

RunTimeOptions();

public:
Expand Down Expand Up @@ -317,6 +319,8 @@ class RunTimeOptions {
inline bool get_simulator_enabled() { return simulator_enabled; }
inline const std::filesystem::path& get_simulator_path() { return simulator_path; }

inline bool get_erisc_iram_enabled() { return erisc_iram_enabled; }

private:
// Helper functions to parse feature-specific environment vaiables.
void ParseFeatureEnv(RunTimeDebugFeatures feature);
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/api/tt-metalium/tt_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class memory {
uint32_t get_text_size() const { return this->text_size_; }
uint32_t get_packed_size() const { return data_.size() * sizeof(word_t); }
uint32_t get_text_addr() const { return this->text_addr_; }
void set_text_addr(const uint32_t& addr) { this->text_addr_ = addr; }

size_t size() const { return data_.size(); }

Expand All @@ -66,6 +67,7 @@ class memory {
// Iterate over spans_ to act on data_ (eg., to device)
void process_spans(const std::function<void (std::vector<uint32_t>::const_iterator, uint64_t addr, uint32_t len)>& callback) const;
void process_spans(const std::function<void (std::vector<uint32_t>::iterator, uint64_t addr, uint32_t len)>& callback);
void update_spans(std::function<void(uint64_t& addr)>& callback);
};

} // namespace ll_api
42 changes: 42 additions & 0 deletions tt_metal/hw/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ set(TYPES
kernel
)

# for wormhole, we need to generate two different linker scripts
set(WH_LD_SCRIPTS
kernel # base name
app # base name
)
set(IRAM_OPTIONS
"" # No IRAM
ENABLE_IRAM # With IRAM
)

include(FetchContent)
set(SFPI_x86_64_Linux_RELEASE
"v6.2.0/sfpi-release.tgz"
Expand All @@ -45,6 +55,38 @@ foreach(ARCH IN LISTS ARCHS)
set(DEV_MEM_MAP "${PROJECT_SOURCE_DIR}/tt_metal/hw/inc/${ARCH}/dev_mem_map.h")
set(HW_INCLUDES "${PROJECT_SOURCE_DIR}/tt_metal/hw/inc/${ARCH}")
set(HW_OUTPUT_DIR "${PROJECT_SOURCE_DIR}/runtime/hw/toolchain/${ARCH}")

if("${ARCH}" STREQUAL "wormhole")
foreach(SCRIPT_TYPE IN LISTS WH_LD_SCRIPTS)
foreach(IRAM_OPT IN LISTS IRAM_OPTIONS)
if(IRAM_OPT)
set(OUTPUT_FILE "${HW_OUTPUT_DIR}/erisc-b0-${SCRIPT_TYPE}_iram.ld")
set(IRAM_FLAG "-D${IRAM_OPT}")
set(IRAM_COMMENT " with IRAM")
else()
set(OUTPUT_FILE "${HW_OUTPUT_DIR}/erisc-b0-${SCRIPT_TYPE}.ld")
set(IRAM_FLAG "")
set(IRAM_COMMENT "")
endif()

add_custom_command(
OUTPUT
${OUTPUT_FILE}
COMMAND
${CMAKE_CXX_COMPILER} ${IRAM_FLAG} -I${HW_INCLUDES} -E -P -x c -o ${OUTPUT_FILE}
${CMAKE_CURRENT_SOURCE_DIR}/toolchain/erisc-b0-${SCRIPT_TYPE}.ld
DEPENDS
${CMAKE_CURRENT_SOURCE_DIR}/toolchain/erisc-b0-${SCRIPT_TYPE}.ld
COMMENT "Preprocessing toolchain/erisc-b0-${SCRIPT_TYPE}.ld${IRAM_COMMENT}"
VERBATIM
)

# Add to preprocessed files list
list(APPEND PREPROCESSED_LD_FILES ${OUTPUT_FILE})
endforeach()
endforeach()
endif()

foreach(PROC IN LISTS PROCS)
foreach(TYPE IN LISTS TYPES)
set(HW_OUTPUT_FILE "${HW_OUTPUT_DIR}/${TYPE}_${PROC}.ld")
Expand Down
30 changes: 30 additions & 0 deletions tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,33 @@ uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

#if defined(ARCH_WORMHOLE) && defined(ENABLE_IRAM)
void l1_to_erisc_iram_copy(volatile uint32_t* iram_load_reg) {
// Trigger copy of code from L1 to IRAM.
*iram_load_reg = eth_l1_mem::address_map::KERNEL_BASE >> 4;
RISC_POST_STATUS(0x10000000);
}

void l1_to_erisc_iram_copy_wait(volatile uint32_t* iram_load_reg) {
// Wait for copy to complete.
while (*iram_load_reg & 0x1);
}

void iram_setup() {
// Copy code from L1 to IRAM.
volatile uint32_t* iram_load_reg = (volatile uint32_t*)(ETH_CTRL_REGS_START + ETH_CORE_IRAM_LOAD);

toggle_macpcs_ptr = (void (*)(uint32_t))RtosTable[1];
toggle_macpcs_ptr(0); // To disable MAC

l1_to_erisc_iram_copy(iram_load_reg);
l1_to_erisc_iram_copy_wait(iram_load_reg);

toggle_macpcs_ptr(1); // To re-enable MAC
}

#endif

void __attribute__((noinline)) Application(void) {
WAYPOINT("I");

Expand Down Expand Up @@ -86,6 +113,9 @@ void __attribute__((noinline)) Application(void) {
if (enables & DISPATCH_CLASS_MASK_ETH_DM0) {
WAYPOINT("R");
firmware_config_init(mailboxes, ProgrammableCoreType::ACTIVE_ETH, DISPATCH_CLASS_ETH_DM0);
#if defined(ARCH_WORMHOLE) && defined(ENABLE_IRAM)
iram_setup();
#endif
kernel_init(0);
WAYPOINT("D");
}
Expand Down
1 change: 1 addition & 0 deletions tt_metal/hw/inc/ethernet/erisc.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "noc_nonblocking_api.h"

inline void (*rtos_context_switch_ptr)();
inline void (*toggle_macpcs_ptr)(uint32_t);
volatile inline uint32_t* flag_disable = (uint32_t*)(eth_l1_mem::address_map::LAUNCH_ERISC_APP_FLAG);

namespace internal_ {
Expand Down
6 changes: 6 additions & 0 deletions tt_metal/hw/inc/ethernet/tt_eth_ss_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
#define NUM_ETH_QUEUES 2
#endif

#define ETH_CTRL_REGS_START 0xFFB94000
// Write to start ERISC IRAM load.
// Write value: word address for the start of binary in L1.
// Read value: bit 0 = status (1=ongoing, 0=complete), bits [17:1] = currend read address.
#define ETH_CORE_IRAM_LOAD ((0x30 + NUM_ECC_SOURCES * 4) + 0x1C)

//////////////////
// RISC debug regs
#define ETH_RISC_REGS_START 0xFFB10000
Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/inc/wormhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@

#include "noc/noc_parameters.h"

namespace eth_iram_mem {
struct address_map {
static constexpr std::int32_t ERISC_IRAM_BASE = 0xFFC00000;
static constexpr std::int32_t ERISC_IRAM_SIZE = 16 * 1024;
static constexpr std::int32_t ERISC_KERNEL_BASE = ERISC_IRAM_BASE;
};
}; // namespace eth_iram_mem

namespace eth_l1_mem {

struct address_map {
Expand Down Expand Up @@ -38,12 +46,12 @@ struct address_map {
// Base addresses
static constexpr std::int32_t FIRMWARE_BASE = 0x9040;
static constexpr std::int32_t L1_EPOCH_Q_BASE = 0x9000; // Epoch Q start in L1.
static constexpr std::int32_t KERNEL_BASE = 0xA840;
static constexpr std::int32_t COMMAND_Q_BASE = L1_EPOCH_Q_BASE + FIRMWARE_SIZE;
static constexpr std::int32_t DATA_BUFFER_BASE = COMMAND_Q_BASE + COMMAND_Q_SIZE;
static constexpr std::int32_t TILE_HEADER_BUFFER_BASE = DATA_BUFFER_BASE + DATA_BUFFER_SIZE;

// TT Metal Specific
static constexpr std::int32_t ERISC_FIRMWARE_SIZE = 2 * 1024;
// Total 160 * 1024 L1 starting from TILE_HEADER_BUFFER_BASE
// - 1 * 1024 misc args
// - 53 * 1024 eth app reserved buffer space
Expand Down
5 changes: 4 additions & 1 deletion tt_metal/hw/toolchain/erisc-b0-app.ld
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@

INCLUDE "erisc-b0-memory.ld"

REGION_ALIAS("REGION_APP_IRAM_CODE", ERISC_APP_IRAM_CODE)
REGION_ALIAS("REGION_APP_CODE", ERISC_APP_CODE)
#if defined(ENABLE_IRAM)
REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_IRAM_CODE)
#else
REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_KERNEL_CODE)
#endif
REGION_ALIAS("REGION_APP_DATA", ERISC_APP_DATA)
REGION_ALIAS("REGION_LDM", LOCAL_DATA_MEM)

Expand Down
5 changes: 4 additions & 1 deletion tt_metal/hw/toolchain/erisc-b0-kernel.ld
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@

INCLUDE "erisc-b0-memory.ld"

REGION_ALIAS("REGION_APP_IRAM_CODE", ERISC_APP_IRAM_CODE)
#if defined(ENABLE_IRAM)
REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_IRAM_CODE)
#else
REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_KERNEL_CODE)
#endif
REGION_ALIAS("REGION_APP_KERNEL_DATA", ERISC_APP_KERNEL_DATA)
REGION_ALIAS("REGION_LDM", LOCAL_DATA_MEM)

Expand Down
13 changes: 13 additions & 0 deletions tt_metal/impl/kernels/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <core_coord.hpp>
#include "tt_metal/jit_build/genfiles.hpp"
#include "tt_metal/jit_build/build_env_manager.hpp"
#include "hw/inc/wormhole/eth_l1_address_map.h"

namespace tt {

namespace tt_metal {
Expand Down Expand Up @@ -442,6 +444,17 @@ void EthernetKernel::read_binaries(IDevice* device) {
ll_api::memory const& binary_mem = llrt::get_risc_binary(
build_state.get_target_out_path(this->kernel_full_name_),
load_type);
if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled() && this->config_.eth_mode != Eth::IDLE) {
// text_addr and some of span's addr point to IRAM base address.
// However it need to be placed L1 kernel base address for FW to copy it to IRAM then kick off
// The kernel can run with IRAM base address once it started.
const_cast<ll_api::memory&>(binary_mem)
.set_text_addr(tt::tt_metal::hal.erisc_iram_relocate_dev_addr((uint64_t)binary_mem.get_text_addr()));
std::function<void(uint64_t& addr)> update_callback = [](uint64_t& addr) {
addr = tt::tt_metal::hal.erisc_iram_relocate_dev_addr(addr);
};
const_cast<ll_api::memory&>(binary_mem).update_spans(update_callback);
}
binaries.push_back(&binary_mem);
uint32_t binary_size = binary_mem.get_packed_size();
log_debug(LogLoader, "ERISC={}, name={}, size={} (bytes)", erisc_id, this->name(), binary_size);
Expand Down
6 changes: 4 additions & 2 deletions tt_metal/impl/program/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,7 @@ void detail::Program_::populate_dispatch_data(IDevice* device) {
}
const auto& binaries =
kernel->binaries(BuildEnvManager::get_instance().get_device_build_env(device->build_id()).build_key);
const auto core_type = kernel->get_kernel_programmable_core_type();
std::vector<uint32_t> dst_base_addrs;
std::vector<uint32_t> page_offsets;
std::vector<uint32_t> lengths;
Expand All @@ -1141,8 +1142,9 @@ void detail::Program_::populate_dispatch_data(IDevice* device) {
lengths.resize(lengths.size() + num_spans);
riscvs.resize(riscvs.size() + num_spans);

kernel_bin.process_spans([&](std::vector<uint32_t>::const_iterator mem_ptr, uint64_t dst, uint32_t len) {

kernel_bin.process_spans([&](std::vector<uint32_t>::const_iterator mem_ptr,
uint64_t dst,
uint32_t len) {
// Set dst for eth kernels until they move to ring buffer
dst_base_addrs[transfer_info_index] = dst;
page_offsets[transfer_info_index] =
Expand Down
15 changes: 13 additions & 2 deletions tt_metal/jit_build/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,9 @@ JitBuildActiveEthernet::JitBuildActiveEthernet(const JitBuildEnv& env, const Jit
this->target_name_ = "erisc";
this->cflags_ = env_.cflags_ + " -fno-delete-null-pointer-checks ";

if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled()) {
this->defines_ += "-DENABLE_IRAM ";
}
this->defines_ +=
"-DCOMPILE_FOR_ERISC "
"-DERISC "
Expand All @@ -501,9 +504,17 @@ JitBuildActiveEthernet::JitBuildActiveEthernet(const JitBuildEnv& env, const Jit

string linker_str;
if (this->is_fw_) {
linker_str = "tt_metal/hw/toolchain/erisc-b0-app.ld ";
if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled()) {
linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-app_iram.ld ";
} else {
linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-app.ld ";
}
} else {
linker_str = "tt_metal/hw/toolchain/erisc-b0-kernel.ld ";
if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled()) {
linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-kernel_iram.ld ";
} else {
linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-kernel.ld ";
}
}
this->lflags_ = env_.lflags_ + "-L" + env_.root_ +
"/tt_metal/hw/toolchain "
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/llrt/blackhole/bh_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ void Hal::initialize_bh() {
return addr;
};

this->erisc_iram_relocate_func_ = [](uint64_t addr) { return addr; };

this->valid_reg_addr_func_ = [](uint32_t addr) {
return (
((addr >= NOC_OVERLAY_START_ADDR) &&
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/llrt/grayskull/gs_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ void Hal::initialize_gs() {
return addr;
};

this->erisc_iram_relocate_func_ = [](uint64_t addr) { return addr; };

this->valid_reg_addr_func_ = [](uint32_t addr) {
return (
((addr >= NOC_OVERLAY_START_ADDR) &&
Expand Down
1 change: 1 addition & 0 deletions tt_metal/llrt/llrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ bool test_load_write_read_risc_binary(
assert(tt::Cluster::instance().is_worker_core(core, chip_id) or tt::Cluster::instance().is_ethernet_core(core, chip_id));

uint64_t local_init_addr = tt::tt_metal::hal.get_jit_build_config(core_type_idx, processor_class_idx, processor_type_idx).local_init_addr;
auto core_type = tt::tt_metal::hal.get_programmable_core_type(core_type_idx);

log_debug(tt::LogLLRuntime, "hex_vec size = {}, size_in_bytes = {}", mem.size(), mem.size()*sizeof(uint32_t));
mem.process_spans([&](std::vector<uint32_t>::const_iterator mem_ptr, uint64_t addr, uint32_t len_words) {
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/llrt/rtoptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ RunTimeOptions::RunTimeOptions() {
this->simulator_enabled = true;
this->simulator_path = std::getenv("TT_METAL_SIMULATOR");
}

if (getenv("TT_METAL_ENABLE_ERISC_IRAM")) {
this->erisc_iram_enabled = true;
}
}

const std::string& RunTimeOptions::get_root_dir() {
Expand Down
6 changes: 6 additions & 0 deletions tt_metal/llrt/tt_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,10 @@ void memory::process_spans(
}
}

void memory::update_spans(std::function<void(uint64_t& addr)>& callback) {
for (auto& span : link_spans_) {
callback(span.addr);
}
}

} // namespace ll_api
10 changes: 10 additions & 0 deletions tt_metal/llrt/wormhole/wh_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "hal.hpp"
#include "wormhole/wh_hal.hpp"
#include "hw/inc/wormhole/eth_l1_address_map.h"

// Reserved DRAM addresses
// Host writes (4B value) to and reads from DRAM_BARRIER_BASE across all channels to ensure previous writes have been
Expand Down Expand Up @@ -69,6 +70,15 @@ void Hal::initialize_wh() {
return addr;
};

this->erisc_iram_relocate_func_ = [](uint64_t addr) {
if (addr == static_cast<uint32_t>(eth_iram_mem::address_map::ERISC_IRAM_BASE)) {
// IRAM enabled program starts from ERISC_IRAM_BASE. This relocation is for where to put the program.
// At first the program is placed on ERISC_IRAM_BASE, then erisc.cc copies to local IRAM.
return (uint64_t)eth_l1_mem::address_map::KERNEL_BASE;
}
return addr;
};

this->valid_reg_addr_func_ = [](uint32_t addr) {
return (
((addr >= NOC_OVERLAY_START_ADDR) &&
Expand Down

0 comments on commit ce8bbba

Please sign in to comment.