From ce8bbbadd52d505cd420ed879d9599d8282210ee Mon Sep 17 00:00:00 2001 From: Daiki Aminaka Date: Tue, 4 Mar 2025 17:11:34 -0800 Subject: [PATCH] Enable IRAM on ERISC (#18066) ### Ticket https://github.com/tenstorrent/tt-metal/issues/6740 ### Problem description IRAM should be used to accelerate program execution ### What's changed - ERISC mac bug workaround - kernel_init to point to IRAM address ~- TT_METAL_ENABLE_IRAM switch to enable/disable IRAM feature (only on WH b0)~ - TT_METAL_ENABLE_ERISC_IRAM switch to enable/disable IRAM feature (only on WH b0) ### Checklist - [ ] [All post commit](https://github.com/tenstorrent/tt-metal/actions/workflows/all-post-commit-workflows.yaml) CI passes - [ ] [Blackhole Post commit](https://github.com/tenstorrent/tt-metal/actions/workflows/blackhole-post-commit.yaml) CI passes (if applicable) - [ ] [Model regression](https://github.com/tenstorrent/tt-metal/actions/workflows/perf-models.yaml) CI passes (if applicable) - [ ] [Device performance regression](https://github.com/tenstorrent/tt-metal/actions/workflows/perf-device-models.yaml) CI passes (if applicable) - [ ] **(For models and ops writers)** Full [new models tests](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml) CI passes (if applicable) - [ ] New/Existing tests provide coverage for changes --- tt_metal/api/tt-metalium/hal.hpp | 4 ++ tt_metal/api/tt-metalium/rtoptions.hpp | 4 ++ tt_metal/api/tt-metalium/tt_memory.h | 2 + tt_metal/hw/CMakeLists.txt | 42 +++++++++++++++++++ tt_metal/hw/firmware/src/erisc.cc | 30 +++++++++++++ tt_metal/hw/inc/ethernet/erisc.h | 1 + tt_metal/hw/inc/ethernet/tt_eth_ss_regs.h | 6 +++ tt_metal/hw/inc/wormhole/eth_l1_address_map.h | 10 ++++- tt_metal/hw/toolchain/erisc-b0-app.ld | 5 ++- tt_metal/hw/toolchain/erisc-b0-kernel.ld | 5 ++- tt_metal/impl/kernels/kernel.cpp | 13 ++++++ tt_metal/impl/program/program.cpp | 6 ++- tt_metal/jit_build/build.cpp | 15 ++++++- tt_metal/llrt/blackhole/bh_hal.cpp | 2 + tt_metal/llrt/grayskull/gs_hal.cpp | 2 + tt_metal/llrt/llrt.cpp | 1 + tt_metal/llrt/rtoptions.cpp | 4 ++ tt_metal/llrt/tt_memory.cpp | 6 +++ tt_metal/llrt/wormhole/wh_hal.cpp | 10 +++++ 19 files changed, 161 insertions(+), 7 deletions(-) diff --git a/tt_metal/api/tt-metalium/hal.hpp b/tt_metal/api/tt-metalium/hal.hpp index 82119ae57ff..42a33d44ab5 100644 --- a/tt_metal/api/tt-metalium/hal.hpp +++ b/tt_metal/api/tt-metalium/hal.hpp @@ -137,6 +137,7 @@ inline const HalJitBuildConfig &HalCoreInfoType::get_jit_build_config(uint32_t p class Hal { public: using RelocateFunc = std::function; + using IramRelocateFunc = std::function; using ValidRegAddrFunc = std::function; using NOCXYEncodingFunc = std::function; using NOCMulticastEncodingFunc = std::function; @@ -171,6 +172,7 @@ class Hal { // Functions where implementation varies by architecture RelocateFunc relocate_func_; + IramRelocateFunc erisc_iram_relocate_func_; ValidRegAddrFunc valid_reg_addr_func_; NOCXYEncodingFunc noc_xy_encoding_func_; NOCMulticastEncodingFunc noc_multicast_encoding_func_; @@ -261,6 +263,8 @@ class Hal { return relocate_func_(addr, local_init_addr); } + uint64_t erisc_iram_relocate_dev_addr(uint64_t addr) { return erisc_iram_relocate_func_(addr); } + uint32_t valid_reg_addr(uint32_t addr) { return valid_reg_addr_func_(addr); } uint32_t get_stack_size(uint32_t type) { return stack_size_func_(type); } diff --git a/tt_metal/api/tt-metalium/rtoptions.hpp b/tt_metal/api/tt-metalium/rtoptions.hpp index a53c1bf5aca..18cb16a8f81 100644 --- a/tt_metal/api/tt-metalium/rtoptions.hpp +++ b/tt_metal/api/tt-metalium/rtoptions.hpp @@ -138,6 +138,8 @@ class RunTimeOptions { bool simulator_enabled = false; std::filesystem::path simulator_path = ""; + bool erisc_iram_enabled = false; + RunTimeOptions(); public: @@ -317,6 +319,8 @@ class RunTimeOptions { inline bool get_simulator_enabled() { return simulator_enabled; } inline const std::filesystem::path& get_simulator_path() { return simulator_path; } + inline bool get_erisc_iram_enabled() { return erisc_iram_enabled; } + private: // Helper functions to parse feature-specific environment vaiables. void ParseFeatureEnv(RunTimeDebugFeatures feature); diff --git a/tt_metal/api/tt-metalium/tt_memory.h b/tt_metal/api/tt-metalium/tt_memory.h index fcbcd073515..45e88535d7d 100644 --- a/tt_metal/api/tt-metalium/tt_memory.h +++ b/tt_metal/api/tt-metalium/tt_memory.h @@ -54,6 +54,7 @@ class memory { uint32_t get_text_size() const { return this->text_size_; } uint32_t get_packed_size() const { return data_.size() * sizeof(word_t); } uint32_t get_text_addr() const { return this->text_addr_; } + void set_text_addr(const uint32_t& addr) { this->text_addr_ = addr; } size_t size() const { return data_.size(); } @@ -66,6 +67,7 @@ class memory { // Iterate over spans_ to act on data_ (eg., to device) void process_spans(const std::function::const_iterator, uint64_t addr, uint32_t len)>& callback) const; void process_spans(const std::function::iterator, uint64_t addr, uint32_t len)>& callback); + void update_spans(std::function& callback); }; } // namespace ll_api diff --git a/tt_metal/hw/CMakeLists.txt b/tt_metal/hw/CMakeLists.txt index c7ef3148c65..6b26e0403b1 100644 --- a/tt_metal/hw/CMakeLists.txt +++ b/tt_metal/hw/CMakeLists.txt @@ -19,6 +19,16 @@ set(TYPES kernel ) +# for wormhole, we need to generate two different linker scripts +set(WH_LD_SCRIPTS + kernel # base name + app # base name +) +set(IRAM_OPTIONS + "" # No IRAM + ENABLE_IRAM # With IRAM +) + include(FetchContent) set(SFPI_x86_64_Linux_RELEASE "v6.2.0/sfpi-release.tgz" @@ -45,6 +55,38 @@ foreach(ARCH IN LISTS ARCHS) set(DEV_MEM_MAP "${PROJECT_SOURCE_DIR}/tt_metal/hw/inc/${ARCH}/dev_mem_map.h") set(HW_INCLUDES "${PROJECT_SOURCE_DIR}/tt_metal/hw/inc/${ARCH}") set(HW_OUTPUT_DIR "${PROJECT_SOURCE_DIR}/runtime/hw/toolchain/${ARCH}") + + if("${ARCH}" STREQUAL "wormhole") + foreach(SCRIPT_TYPE IN LISTS WH_LD_SCRIPTS) + foreach(IRAM_OPT IN LISTS IRAM_OPTIONS) + if(IRAM_OPT) + set(OUTPUT_FILE "${HW_OUTPUT_DIR}/erisc-b0-${SCRIPT_TYPE}_iram.ld") + set(IRAM_FLAG "-D${IRAM_OPT}") + set(IRAM_COMMENT " with IRAM") + else() + set(OUTPUT_FILE "${HW_OUTPUT_DIR}/erisc-b0-${SCRIPT_TYPE}.ld") + set(IRAM_FLAG "") + set(IRAM_COMMENT "") + endif() + + add_custom_command( + OUTPUT + ${OUTPUT_FILE} + COMMAND + ${CMAKE_CXX_COMPILER} ${IRAM_FLAG} -I${HW_INCLUDES} -E -P -x c -o ${OUTPUT_FILE} + ${CMAKE_CURRENT_SOURCE_DIR}/toolchain/erisc-b0-${SCRIPT_TYPE}.ld + DEPENDS + ${CMAKE_CURRENT_SOURCE_DIR}/toolchain/erisc-b0-${SCRIPT_TYPE}.ld + COMMENT "Preprocessing toolchain/erisc-b0-${SCRIPT_TYPE}.ld${IRAM_COMMENT}" + VERBATIM + ) + + # Add to preprocessed files list + list(APPEND PREPROCESSED_LD_FILES ${OUTPUT_FILE}) + endforeach() + endforeach() + endif() + foreach(PROC IN LISTS PROCS) foreach(TYPE IN LISTS TYPES) set(HW_OUTPUT_FILE "${HW_OUTPUT_DIR}/${TYPE}_${PROC}.ld") diff --git a/tt_metal/hw/firmware/src/erisc.cc b/tt_metal/hw/firmware/src/erisc.cc index 7dc0390f329..bf7b8a62695 100644 --- a/tt_metal/hw/firmware/src/erisc.cc +++ b/tt_metal/hw/firmware/src/erisc.cc @@ -41,6 +41,33 @@ uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used)); int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used)); int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used)); +#if defined(ARCH_WORMHOLE) && defined(ENABLE_IRAM) +void l1_to_erisc_iram_copy(volatile uint32_t* iram_load_reg) { + // Trigger copy of code from L1 to IRAM. + *iram_load_reg = eth_l1_mem::address_map::KERNEL_BASE >> 4; + RISC_POST_STATUS(0x10000000); +} + +void l1_to_erisc_iram_copy_wait(volatile uint32_t* iram_load_reg) { + // Wait for copy to complete. + while (*iram_load_reg & 0x1); +} + +void iram_setup() { + // Copy code from L1 to IRAM. + volatile uint32_t* iram_load_reg = (volatile uint32_t*)(ETH_CTRL_REGS_START + ETH_CORE_IRAM_LOAD); + + toggle_macpcs_ptr = (void (*)(uint32_t))RtosTable[1]; + toggle_macpcs_ptr(0); // To disable MAC + + l1_to_erisc_iram_copy(iram_load_reg); + l1_to_erisc_iram_copy_wait(iram_load_reg); + + toggle_macpcs_ptr(1); // To re-enable MAC +} + +#endif + void __attribute__((noinline)) Application(void) { WAYPOINT("I"); @@ -86,6 +113,9 @@ void __attribute__((noinline)) Application(void) { if (enables & DISPATCH_CLASS_MASK_ETH_DM0) { WAYPOINT("R"); firmware_config_init(mailboxes, ProgrammableCoreType::ACTIVE_ETH, DISPATCH_CLASS_ETH_DM0); +#if defined(ARCH_WORMHOLE) && defined(ENABLE_IRAM) + iram_setup(); +#endif kernel_init(0); WAYPOINT("D"); } diff --git a/tt_metal/hw/inc/ethernet/erisc.h b/tt_metal/hw/inc/ethernet/erisc.h index 1bb4e1ed8b9..9aa71e4da3c 100644 --- a/tt_metal/hw/inc/ethernet/erisc.h +++ b/tt_metal/hw/inc/ethernet/erisc.h @@ -7,6 +7,7 @@ #include "noc_nonblocking_api.h" inline void (*rtos_context_switch_ptr)(); +inline void (*toggle_macpcs_ptr)(uint32_t); volatile inline uint32_t* flag_disable = (uint32_t*)(eth_l1_mem::address_map::LAUNCH_ERISC_APP_FLAG); namespace internal_ { diff --git a/tt_metal/hw/inc/ethernet/tt_eth_ss_regs.h b/tt_metal/hw/inc/ethernet/tt_eth_ss_regs.h index 82b4b5a913d..4c7233cbd3d 100644 --- a/tt_metal/hw/inc/ethernet/tt_eth_ss_regs.h +++ b/tt_metal/hw/inc/ethernet/tt_eth_ss_regs.h @@ -15,6 +15,12 @@ #define NUM_ETH_QUEUES 2 #endif +#define ETH_CTRL_REGS_START 0xFFB94000 +// Write to start ERISC IRAM load. +// Write value: word address for the start of binary in L1. +// Read value: bit 0 = status (1=ongoing, 0=complete), bits [17:1] = currend read address. +#define ETH_CORE_IRAM_LOAD ((0x30 + NUM_ECC_SOURCES * 4) + 0x1C) + ////////////////// // RISC debug regs #define ETH_RISC_REGS_START 0xFFB10000 diff --git a/tt_metal/hw/inc/wormhole/eth_l1_address_map.h b/tt_metal/hw/inc/wormhole/eth_l1_address_map.h index 2740b23e26e..173159d5ad6 100644 --- a/tt_metal/hw/inc/wormhole/eth_l1_address_map.h +++ b/tt_metal/hw/inc/wormhole/eth_l1_address_map.h @@ -8,6 +8,14 @@ #include "noc/noc_parameters.h" +namespace eth_iram_mem { +struct address_map { + static constexpr std::int32_t ERISC_IRAM_BASE = 0xFFC00000; + static constexpr std::int32_t ERISC_IRAM_SIZE = 16 * 1024; + static constexpr std::int32_t ERISC_KERNEL_BASE = ERISC_IRAM_BASE; +}; +}; // namespace eth_iram_mem + namespace eth_l1_mem { struct address_map { @@ -38,12 +46,12 @@ struct address_map { // Base addresses static constexpr std::int32_t FIRMWARE_BASE = 0x9040; static constexpr std::int32_t L1_EPOCH_Q_BASE = 0x9000; // Epoch Q start in L1. + static constexpr std::int32_t KERNEL_BASE = 0xA840; static constexpr std::int32_t COMMAND_Q_BASE = L1_EPOCH_Q_BASE + FIRMWARE_SIZE; static constexpr std::int32_t DATA_BUFFER_BASE = COMMAND_Q_BASE + COMMAND_Q_SIZE; static constexpr std::int32_t TILE_HEADER_BUFFER_BASE = DATA_BUFFER_BASE + DATA_BUFFER_SIZE; // TT Metal Specific - static constexpr std::int32_t ERISC_FIRMWARE_SIZE = 2 * 1024; // Total 160 * 1024 L1 starting from TILE_HEADER_BUFFER_BASE // - 1 * 1024 misc args // - 53 * 1024 eth app reserved buffer space diff --git a/tt_metal/hw/toolchain/erisc-b0-app.ld b/tt_metal/hw/toolchain/erisc-b0-app.ld index 4a82d3f2f17..d05a414d043 100644 --- a/tt_metal/hw/toolchain/erisc-b0-app.ld +++ b/tt_metal/hw/toolchain/erisc-b0-app.ld @@ -6,9 +6,12 @@ INCLUDE "erisc-b0-memory.ld" -REGION_ALIAS("REGION_APP_IRAM_CODE", ERISC_APP_IRAM_CODE) REGION_ALIAS("REGION_APP_CODE", ERISC_APP_CODE) +#if defined(ENABLE_IRAM) +REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_IRAM_CODE) +#else REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_KERNEL_CODE) +#endif REGION_ALIAS("REGION_APP_DATA", ERISC_APP_DATA) REGION_ALIAS("REGION_LDM", LOCAL_DATA_MEM) diff --git a/tt_metal/hw/toolchain/erisc-b0-kernel.ld b/tt_metal/hw/toolchain/erisc-b0-kernel.ld index 1d25722d293..0ec827a4f4d 100644 --- a/tt_metal/hw/toolchain/erisc-b0-kernel.ld +++ b/tt_metal/hw/toolchain/erisc-b0-kernel.ld @@ -6,8 +6,11 @@ INCLUDE "erisc-b0-memory.ld" -REGION_ALIAS("REGION_APP_IRAM_CODE", ERISC_APP_IRAM_CODE) +#if defined(ENABLE_IRAM) +REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_IRAM_CODE) +#else REGION_ALIAS("REGION_APP_KERNEL_CODE", ERISC_APP_KERNEL_CODE) +#endif REGION_ALIAS("REGION_APP_KERNEL_DATA", ERISC_APP_KERNEL_DATA) REGION_ALIAS("REGION_LDM", LOCAL_DATA_MEM) diff --git a/tt_metal/impl/kernels/kernel.cpp b/tt_metal/impl/kernels/kernel.cpp index 176d9e8741d..41be802cd6c 100644 --- a/tt_metal/impl/kernels/kernel.cpp +++ b/tt_metal/impl/kernels/kernel.cpp @@ -21,6 +21,8 @@ #include #include "tt_metal/jit_build/genfiles.hpp" #include "tt_metal/jit_build/build_env_manager.hpp" +#include "hw/inc/wormhole/eth_l1_address_map.h" + namespace tt { namespace tt_metal { @@ -442,6 +444,17 @@ void EthernetKernel::read_binaries(IDevice* device) { ll_api::memory const& binary_mem = llrt::get_risc_binary( build_state.get_target_out_path(this->kernel_full_name_), load_type); + if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled() && this->config_.eth_mode != Eth::IDLE) { + // text_addr and some of span's addr point to IRAM base address. + // However it need to be placed L1 kernel base address for FW to copy it to IRAM then kick off + // The kernel can run with IRAM base address once it started. + const_cast(binary_mem) + .set_text_addr(tt::tt_metal::hal.erisc_iram_relocate_dev_addr((uint64_t)binary_mem.get_text_addr())); + std::function update_callback = [](uint64_t& addr) { + addr = tt::tt_metal::hal.erisc_iram_relocate_dev_addr(addr); + }; + const_cast(binary_mem).update_spans(update_callback); + } binaries.push_back(&binary_mem); uint32_t binary_size = binary_mem.get_packed_size(); log_debug(LogLoader, "ERISC={}, name={}, size={} (bytes)", erisc_id, this->name(), binary_size); diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp index 554e36c4bd3..c0eef44c972 100644 --- a/tt_metal/impl/program/program.cpp +++ b/tt_metal/impl/program/program.cpp @@ -1124,6 +1124,7 @@ void detail::Program_::populate_dispatch_data(IDevice* device) { } const auto& binaries = kernel->binaries(BuildEnvManager::get_instance().get_device_build_env(device->build_id()).build_key); + const auto core_type = kernel->get_kernel_programmable_core_type(); std::vector dst_base_addrs; std::vector page_offsets; std::vector lengths; @@ -1141,8 +1142,9 @@ void detail::Program_::populate_dispatch_data(IDevice* device) { lengths.resize(lengths.size() + num_spans); riscvs.resize(riscvs.size() + num_spans); - kernel_bin.process_spans([&](std::vector::const_iterator mem_ptr, uint64_t dst, uint32_t len) { - + kernel_bin.process_spans([&](std::vector::const_iterator mem_ptr, + uint64_t dst, + uint32_t len) { // Set dst for eth kernels until they move to ring buffer dst_base_addrs[transfer_info_index] = dst; page_offsets[transfer_info_index] = diff --git a/tt_metal/jit_build/build.cpp b/tt_metal/jit_build/build.cpp index 13e66ed3e02..875196c6fe0 100644 --- a/tt_metal/jit_build/build.cpp +++ b/tt_metal/jit_build/build.cpp @@ -484,6 +484,9 @@ JitBuildActiveEthernet::JitBuildActiveEthernet(const JitBuildEnv& env, const Jit this->target_name_ = "erisc"; this->cflags_ = env_.cflags_ + " -fno-delete-null-pointer-checks "; + if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled()) { + this->defines_ += "-DENABLE_IRAM "; + } this->defines_ += "-DCOMPILE_FOR_ERISC " "-DERISC " @@ -501,9 +504,17 @@ JitBuildActiveEthernet::JitBuildActiveEthernet(const JitBuildEnv& env, const Jit string linker_str; if (this->is_fw_) { - linker_str = "tt_metal/hw/toolchain/erisc-b0-app.ld "; + if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled()) { + linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-app_iram.ld "; + } else { + linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-app.ld "; + } } else { - linker_str = "tt_metal/hw/toolchain/erisc-b0-kernel.ld "; + if (tt::llrt::RunTimeOptions::get_instance().get_erisc_iram_enabled()) { + linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-kernel_iram.ld "; + } else { + linker_str = "runtime/hw/toolchain/" + get_alias(env_.arch_) + "/erisc-b0-kernel.ld "; + } } this->lflags_ = env_.lflags_ + "-L" + env_.root_ + "/tt_metal/hw/toolchain " diff --git a/tt_metal/llrt/blackhole/bh_hal.cpp b/tt_metal/llrt/blackhole/bh_hal.cpp index 9eccb172bc3..b19d3683e07 100644 --- a/tt_metal/llrt/blackhole/bh_hal.cpp +++ b/tt_metal/llrt/blackhole/bh_hal.cpp @@ -68,6 +68,8 @@ void Hal::initialize_bh() { return addr; }; + this->erisc_iram_relocate_func_ = [](uint64_t addr) { return addr; }; + this->valid_reg_addr_func_ = [](uint32_t addr) { return ( ((addr >= NOC_OVERLAY_START_ADDR) && diff --git a/tt_metal/llrt/grayskull/gs_hal.cpp b/tt_metal/llrt/grayskull/gs_hal.cpp index 168b6c2b3bd..fb5ccde3a41 100644 --- a/tt_metal/llrt/grayskull/gs_hal.cpp +++ b/tt_metal/llrt/grayskull/gs_hal.cpp @@ -171,6 +171,8 @@ void Hal::initialize_gs() { return addr; }; + this->erisc_iram_relocate_func_ = [](uint64_t addr) { return addr; }; + this->valid_reg_addr_func_ = [](uint32_t addr) { return ( ((addr >= NOC_OVERLAY_START_ADDR) && diff --git a/tt_metal/llrt/llrt.cpp b/tt_metal/llrt/llrt.cpp index 5632110e6d4..6a0ad3e6fad 100644 --- a/tt_metal/llrt/llrt.cpp +++ b/tt_metal/llrt/llrt.cpp @@ -140,6 +140,7 @@ bool test_load_write_read_risc_binary( assert(tt::Cluster::instance().is_worker_core(core, chip_id) or tt::Cluster::instance().is_ethernet_core(core, chip_id)); uint64_t local_init_addr = tt::tt_metal::hal.get_jit_build_config(core_type_idx, processor_class_idx, processor_type_idx).local_init_addr; + auto core_type = tt::tt_metal::hal.get_programmable_core_type(core_type_idx); log_debug(tt::LogLLRuntime, "hex_vec size = {}, size_in_bytes = {}", mem.size(), mem.size()*sizeof(uint32_t)); mem.process_spans([&](std::vector::const_iterator mem_ptr, uint64_t addr, uint32_t len_words) { diff --git a/tt_metal/llrt/rtoptions.cpp b/tt_metal/llrt/rtoptions.cpp index 6a0246c44d0..762a7ee3e88 100644 --- a/tt_metal/llrt/rtoptions.cpp +++ b/tt_metal/llrt/rtoptions.cpp @@ -132,6 +132,10 @@ RunTimeOptions::RunTimeOptions() { this->simulator_enabled = true; this->simulator_path = std::getenv("TT_METAL_SIMULATOR"); } + + if (getenv("TT_METAL_ENABLE_ERISC_IRAM")) { + this->erisc_iram_enabled = true; + } } const std::string& RunTimeOptions::get_root_dir() { diff --git a/tt_metal/llrt/tt_memory.cpp b/tt_metal/llrt/tt_memory.cpp index 92fe0405a02..f2126489248 100644 --- a/tt_metal/llrt/tt_memory.cpp +++ b/tt_metal/llrt/tt_memory.cpp @@ -100,4 +100,10 @@ void memory::process_spans( } } +void memory::update_spans(std::function& callback) { + for (auto& span : link_spans_) { + callback(span.addr); + } +} + } // namespace ll_api diff --git a/tt_metal/llrt/wormhole/wh_hal.cpp b/tt_metal/llrt/wormhole/wh_hal.cpp index 338d2b15cd6..af3de9d9e34 100644 --- a/tt_metal/llrt/wormhole/wh_hal.cpp +++ b/tt_metal/llrt/wormhole/wh_hal.cpp @@ -14,6 +14,7 @@ #include "hal.hpp" #include "wormhole/wh_hal.hpp" +#include "hw/inc/wormhole/eth_l1_address_map.h" // Reserved DRAM addresses // Host writes (4B value) to and reads from DRAM_BARRIER_BASE across all channels to ensure previous writes have been @@ -69,6 +70,15 @@ void Hal::initialize_wh() { return addr; }; + this->erisc_iram_relocate_func_ = [](uint64_t addr) { + if (addr == static_cast(eth_iram_mem::address_map::ERISC_IRAM_BASE)) { + // IRAM enabled program starts from ERISC_IRAM_BASE. This relocation is for where to put the program. + // At first the program is placed on ERISC_IRAM_BASE, then erisc.cc copies to local IRAM. + return (uint64_t)eth_l1_mem::address_map::KERNEL_BASE; + } + return addr; + }; + this->valid_reg_addr_func_ = [](uint32_t addr) { return ( ((addr >= NOC_OVERLAY_START_ADDR) &&