Skip to content

Commit 0ebaca6

Browse files
committed
[L0] Only Override max allocation limits given env
- Change the defaults from always allowing > 4GB allocations to making the user have to request > 4GB allocation support given the max allocation allowed on that system is less than 4GB. - This ensures performance is maintained on systems that dont handle > 4GB allocations natively and avoids breaking Ahead of Time (AOT) binaries that were built without > 4GB resource support. - By setting UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 , the L0 Adapter will force the modules to be built with stateless or > 4GB support and will allow for the allocations to exceed the max single allocation size limit for that device. Signed-off-by: Spruit, Neil R <[email protected]>
1 parent d06ba9d commit 0ebaca6

File tree

4 files changed

+16
-20
lines changed

4 files changed

+16
-20
lines changed

source/adapters/level_zero/device.cpp

+11-15
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "device.hpp"
1212
#include "ur_level_zero.hpp"
13+
#include "ur_util.hpp"
1314
#include <algorithm>
1415
#include <climits>
1516
#include <optional>
@@ -268,9 +269,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
268269
return ReturnValue(uint32_t{64});
269270
}
270271
case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
271-
// if not optimized for 32-bit access, return total memory size.
272-
// otherwise, return only maximum allocatable size.
273-
if (Device->useOptimized32bitAccess() == 0) {
272+
// if the user wishes to allocate large allocations on a system that usually
273+
// does not allow that allocation size, then we return the max global mem
274+
// size as the limit.
275+
if (Device->useRelaxedAllocationLimits()) {
274276
return ReturnValue(uint64_t{calculateGlobalMemSize(Device)});
275277
} else {
276278
return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize});
@@ -923,20 +925,14 @@ ur_device_handle_t_::useImmediateCommandLists() {
923925
}
924926
}
925927

926-
int32_t ur_device_handle_t_::useOptimized32bitAccess() {
927-
static const int32_t Optimize32bitAccessMode = [this] {
928-
// If device is Intel(R) Data Center GPU Max,
929-
// use default provided by L0 driver.
930-
// TODO: Use IP versioning to select based on range of devices
931-
if (this->isPVC())
932-
return -1;
933-
const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS");
934-
if (!UrRet)
935-
return 0;
936-
return std::atoi(UrRet);
928+
bool ur_device_handle_t_::useRelaxedAllocationLimits() {
929+
static const bool EnableRelaxedAllocationLimits = [] {
930+
auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS");
931+
const bool RetVal = UrRet ? std::stoi(*UrRet) : 0;
932+
return RetVal;
937933
}();
938934

939-
return Optimize32bitAccessMode;
935+
return EnableRelaxedAllocationLimits;
940936
}
941937

942938
ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,

source/adapters/level_zero/device.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ struct ur_device_handle_t_ : _ur_object {
160160
// provide support for only one, like for Intel(R)
161161
// Data Center GPU Max, for which L0 driver only
162162
// supports stateless.
163-
int32_t useOptimized32bitAccess();
163+
bool useRelaxedAllocationLimits();
164164

165165
bool isSubDevice() { return RootDevice != nullptr; }
166166

source/adapters/level_zero/program.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
161161
ZeBuildOptions += pOptions;
162162
}
163163

164-
if (phDevices[0]->useOptimized32bitAccess() == 0) {
164+
if (phDevices[0]->useRelaxedAllocationLimits()) {
165165
ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required";
166166
}
167167

@@ -256,7 +256,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
256256
// ze-opt-greater-than-4GB-buffer-required to disable
257257
// stateful optimizations and be able to use larger than
258258
// 4GB allocations on these kernels.
259-
if (Context->Devices[0]->useOptimized32bitAccess() == 0) {
259+
if (Context->Devices[0]->useRelaxedAllocationLimits()) {
260260
Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required";
261261
}
262262
}

source/adapters/level_zero/usm.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,11 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr,
178178
ZeDesc.flags = 0;
179179
ZeDesc.ordinal = 0;
180180

181-
if (Device->useOptimized32bitAccess() == 0 &&
181+
ZeStruct<ze_relaxed_allocation_limits_exp_desc_t> RelaxedDesc;
182+
if (Device->useRelaxedAllocationLimits() &&
182183
(Size > Device->ZeDeviceProperties->maxMemAllocSize)) {
183184
// Tell Level-Zero to accept Size > maxMemAllocSize if
184185
// large allocations are used.
185-
ZeStruct<ze_relaxed_allocation_limits_exp_desc_t> RelaxedDesc;
186186
RelaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE;
187187
ZeDesc.pNext = &RelaxedDesc;
188188
}

0 commit comments

Comments
 (0)