Skip to content

Commit e02ca41

Browse files
authored
Arm backend: Re-enable Dedicated_Sram for the Ethos-U85 (#11459)
We define a weak symbol for the fast scratch array. In case we run for Dedicated_Sram memory mode via run.sh, we overwrite the fast scratch array.
1 parent ef1d2ff commit e02ca41

File tree

6 files changed

+16
-22
lines changed

6 files changed

+16
-22
lines changed

backends/arm/runtime/EthosUBackend.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ typedef struct {
8484
extern "C" {
8585
void __attribute__((weak)) EthosUBackend_execute_begin() {}
8686
void __attribute__((weak)) EthosUBackend_execute_end() {}
87+
__attribute__((weak)) unsigned char* ethosu_fast_scratch = nullptr;
88+
__attribute__((weak)) size_t ethosu_fast_scratch_size = 0;
8789
}
8890

8991
class EthosUBackendExecuteCallbacks {
@@ -198,8 +200,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
198200
handles.weight_data_size,
199201
ethosu_scratch,
200202
handles.scratch_data_size,
201-
nullptr,
202-
0);
203+
ethosu_fast_scratch,
204+
ethosu_fast_scratch_size);
203205

204206
// Write argument values (from EValue tensor) into Ethos-U scratch
205207
// TODO(MLETORCH-123): Optimise into direct write from Vela into the SRAM
@@ -309,9 +311,12 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
309311
static_cast<uint64_t>(
310312
reinterpret_cast<uintptr_t>((handles.weight_data))),
311313
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ethosu_scratch)),
312-
0};
314+
static_cast<uint64_t>(
315+
reinterpret_cast<uintptr_t>(ethosu_fast_scratch))};
313316
size_t bases_size[ETHOSU_NUM_BASE_ADDRS] = {
314-
handles.weight_data_size, handles.scratch_data_size, 0};
317+
handles.weight_data_size,
318+
handles.scratch_data_size,
319+
ethosu_fast_scratch_size};
315320
int result = 0;
316321
EXECUTORCH_PROF_START(
317322
event_tracer, event_tracer_local_scope, "+EthosUBackend::execute()NPU");
@@ -321,7 +326,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
321326
handles.cmd_data_size,
322327
bases,
323328
bases_size,
324-
3, /* fixed array of pointers to binary interface*/
329+
ETHOSU_NUM_BASE_ADDRS, /* fixed array of pointers to binary interface*/
325330
nullptr);
326331
EXECUTORCH_PROF_END(event_tracer, event_tracer_local_scope);
327332

backends/arm/scripts/build_executor_runner.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ then
103103
memory_mode="Shared_Sram"
104104
if [[ ${target} =~ "ethos-u85" ]]
105105
then
106-
memory_mode="Sram_Only"
106+
memory_mode="Dedicated_Sram_384KB"
107107
fi
108108
fi
109109

backends/arm/test/test_arm_baremetal.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,10 +211,7 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
211211
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
212212
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
213213
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
214-
# Temporarily not test inception_v4 on Ethos-U85. To support inception_v4 properly on Ethos-U85, we need to run the model in Dedicated_Sram memory mode with
215-
# 384KB(or another amount lower than 2MB) of SRAM passed as fast scratch area. The PR adding support for Dedicated_Sram(https://github.com/pytorch/executorch/pull/10714)
216-
# was reverted due to a change required in an internal variant of the examples/arm/executor_runner/arm_executor_runner.cpp
217-
# python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
214+
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
218215

219216
echo "${TEST_SUITE_NAME}: PASS"
220217
}

backends/arm/test/test_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_args():
8181
if "u55" in args.target:
8282
args.memory_mode = "Shared_Sram"
8383
elif "u85" in args.target:
84-
args.memory_mode = "Sram_Only"
84+
args.memory_mode = "Dedicated_Sram_384KB"
8585
else:
8686
raise RuntimeError(f"Invalid target name {args.target}")
8787

examples/arm/executor_runner/arm_executor_runner.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,23 +145,15 @@ const size_t temp_allocation_pool_size =
145145
unsigned char __attribute__((
146146
section(".bss.tensor_arena"),
147147
aligned(16))) temp_allocation_pool[temp_allocation_pool_size];
148-
149-
namespace executorch {
150-
namespace backends {
151-
namespace arm {
152148
#if defined(ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
149+
extern "C" {
153150
size_t ethosu_fast_scratch_size =
154151
ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE;
155152
unsigned char __attribute__((section(".bss.ethosu_scratch"), aligned(16)))
156153
dedicated_sram[ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE];
157154
unsigned char* ethosu_fast_scratch = dedicated_sram;
158-
#else
159-
size_t ethosu_fast_scratch_size = 0;
160-
unsigned char* ethosu_fast_scratch = nullptr;
155+
}
161156
#endif
162-
} // namespace arm
163-
} // namespace backends
164-
} // namespace executorch
165157

166158
void et_pal_init(void) {
167159
// Enable ARM PMU Clock

examples/arm/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ then
110110
memory_mode="Shared_Sram"
111111
if [[ ${target} =~ "ethos-u85" ]]
112112
then
113-
memory_mode="Sram_Only"
113+
memory_mode="Dedicated_Sram_384KB"
114114
fi
115115
fi
116116

0 commit comments

Comments
 (0)