Skip to content

Commit 8f7792b

Browse files
authored
[DeviceMSAN] Use device usm to allocate LaunchInfo (#17948)
Fix cts segment fault on accessing shared usm
1 parent 6d64961 commit 8f7792b

File tree

4 files changed

+129
-77
lines changed

4 files changed

+129
-77
lines changed

libdevice/sanitizer/msan_rtl.cpp

+4-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
DeviceGlobal<void *> __MsanLaunchInfo;
1616
#define GetMsanLaunchInfo \
17-
((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
17+
((__SYCL_GLOBAL__ MsanRuntimeData *)__MsanLaunchInfo.get())
1818

1919
namespace {
2020

@@ -160,14 +160,11 @@ inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) {
160160
}
161161

162162
if (as != ADDRESS_SPACE_GLOBAL || !(addr & DG2_DEVICE_USM_MASK))
163-
return (uptr)((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
164-
->CleanShadow;
163+
return (uptr)GetMsanLaunchInfo->CleanShadow;
165164

166165
// Device USM only
167-
auto shadow_begin = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
168-
->GlobalShadowOffset;
169-
auto shadow_end = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
170-
->GlobalShadowOffsetEnd;
166+
auto shadow_begin = GetMsanLaunchInfo->GlobalShadowOffset;
167+
auto shadow_end = GetMsanLaunchInfo->GlobalShadowOffsetEnd;
171168
if (addr < shadow_begin) {
172169
return addr + (shadow_begin - DG2_DEVICE_USM_BEGIN);
173170
} else {

unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.cpp

+56-62
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context,
6868

6969
*ResultPtr = Allocated;
7070

71-
ContextInfo->MaxAllocatedSize = std::max(ContextInfo->MaxAllocatedSize, Size);
71+
if (Type != AllocType::DEVICE_USM) {
72+
ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, Size);
73+
}
7274

7375
// For host/shared usm, we only record the alloc size.
7476
if (Type != AllocType::DEVICE_USM) {
@@ -138,15 +140,16 @@ ur_result_t MsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
138140
// FIXME: We must use block operation here, until we support
139141
// urEventSetCallback
140142
auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue);
143+
UR_CALL(LaunchInfo.Data.syncFromDevice(Queue));
141144

142145
if (Result == UR_RESULT_SUCCESS) {
143-
const auto &Report = LaunchInfo.Data->Report;
146+
const auto &Report = LaunchInfo.Data.Host.Report;
144147

145148
if (!Report.Flag) {
146149
return Result;
147150
}
148151

149-
ReportUsesUninitializedValue(LaunchInfo.Data->Report, Kernel);
152+
ReportUsesUninitializedValue(LaunchInfo.Data.Host.Report, Kernel);
150153

151154
exitWithErrors();
152155
}
@@ -286,8 +289,8 @@ MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
286289
MsanShadowMemoryDG2::IsDeviceUSM(GVInfo.Addr))) {
287290
UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr,
288291
GVInfo.Size, 0));
289-
ContextInfo->MaxAllocatedSize =
290-
std::max(ContextInfo->MaxAllocatedSize, GVInfo.Size);
292+
ContextInfo->CleanShadowSize =
293+
std::max(ContextInfo->CleanShadowSize, GVInfo.Size);
291294
}
292295
}
293296
}
@@ -471,16 +474,20 @@ ur_result_t MsanInterceptor::prepareLaunch(
471474

472475
// Set LaunchInfo
473476
auto ContextInfo = getContextInfo(LaunchInfo.Context);
474-
LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
475-
LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
477+
LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
478+
LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
479+
480+
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
481+
LaunchInfo.Data.Host.Debug = getContext()->Options.Debug ? 1 : 0;
476482

477-
LaunchInfo.Data->DeviceTy = DeviceInfo->Type;
478-
LaunchInfo.Data->Debug = getContext()->Options.Debug ? 1 : 0;
483+
// Clean shadow
484+
// Its content is always zero, and is used for unsupport memory types
479485
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
480486
ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr,
481-
ContextInfo->MaxAllocatedSize, (void **)&LaunchInfo.Data->CleanShadow));
482-
UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data->CleanShadow, 0,
483-
ContextInfo->MaxAllocatedSize, 0, nullptr,
487+
ContextInfo->CleanShadowSize,
488+
(void **)&LaunchInfo.Data.Host.CleanShadow));
489+
UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow,
490+
0, ContextInfo->CleanShadowSize, 0, nullptr,
484491
nullptr));
485492

486493
if (LaunchInfo.LocalWorkSize.empty()) {
@@ -510,8 +517,8 @@ ur_result_t MsanInterceptor::prepareLaunch(
510517
// Write shadow memory offset for local memory
511518
if (KernelInfo.IsCheckLocals) {
512519
if (DeviceInfo->Shadow->AllocLocalShadow(
513-
Queue, NumWG, LaunchInfo.Data->LocalShadowOffset,
514-
LaunchInfo.Data->LocalShadowOffsetEnd) != UR_RESULT_SUCCESS) {
520+
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
521+
LaunchInfo.Data.Host.LocalShadowOffsetEnd) != UR_RESULT_SUCCESS) {
515522
getContext()->logger.warning(
516523
"Failed to allocate shadow memory for local "
517524
"memory, maybe the number of workgroup ({}) is too "
@@ -520,18 +527,18 @@ ur_result_t MsanInterceptor::prepareLaunch(
520527
getContext()->logger.warning("Skip checking local memory of kernel <{}> ",
521528
GetKernelName(Kernel));
522529
} else {
523-
getContext()->logger.debug("ShadowMemory(Local, WorkGroup={}, {} - {})",
524-
NumWG,
525-
(void *)LaunchInfo.Data->LocalShadowOffset,
526-
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
530+
getContext()->logger.debug(
531+
"ShadowMemory(Local, WorkGroup={}, {} - {})", NumWG,
532+
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
533+
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
527534
}
528535
}
529536

530537
// Write shadow memory offset for private memory
531538
if (KernelInfo.IsCheckPrivates) {
532539
if (DeviceInfo->Shadow->AllocPrivateShadow(
533-
Queue, NumWG, LaunchInfo.Data->PrivateShadowOffset,
534-
LaunchInfo.Data->PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) {
540+
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
541+
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) {
535542
getContext()->logger.warning(
536543
"Failed to allocate shadow memory for private "
537544
"memory, maybe the number of workgroup ({}) is too "
@@ -542,8 +549,8 @@ ur_result_t MsanInterceptor::prepareLaunch(
542549
} else {
543550
getContext()->logger.debug(
544551
"ShadowMemory(Private, WorkGroup={}, {} - {})", NumWG,
545-
(void *)LaunchInfo.Data->PrivateShadowOffset,
546-
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
552+
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
553+
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
547554
}
548555
// Write local arguments info
549556
if (!KernelInfo.LocalArgs.empty()) {
@@ -553,22 +560,26 @@ ur_result_t MsanInterceptor::prepareLaunch(
553560
getContext()->logger.debug("LocalArgs (argIndex={}, size={})", ArgIndex,
554561
ArgInfo.Size);
555562
}
556-
UR_CALL(LaunchInfo.importLocalArgsInfo(Queue, LocalArgsInfo));
563+
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
557564
}
558565
}
559566

567+
// sync msan runtime data to device side
568+
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));
569+
560570
getContext()->logger.info(
561571
"LaunchInfo {} (GlobalShadow={}, LocalShadow={}, PrivateShadow={}, "
562572
"CleanShadow={}, LocalArgs={}, NumLocalArgs={}, Device={}, Debug={})",
563-
(void *)LaunchInfo.Data, (void *)LaunchInfo.Data->GlobalShadowOffset,
564-
(void *)LaunchInfo.Data->LocalShadowOffset,
565-
(void *)LaunchInfo.Data->PrivateShadowOffset,
566-
(void *)LaunchInfo.Data->CleanShadow, (void *)LaunchInfo.Data->LocalArgs,
567-
LaunchInfo.Data->NumLocalArgs, ToString(LaunchInfo.Data->DeviceTy),
568-
LaunchInfo.Data->Debug);
569-
570-
ur_result_t URes =
571-
EnqueueWriteGlobal("__MsanLaunchInfo", &LaunchInfo.Data, sizeof(uptr));
573+
(void *)LaunchInfo.Data.getDevicePtr(),
574+
(void *)LaunchInfo.Data.Host.GlobalShadowOffset,
575+
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
576+
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
577+
(void *)LaunchInfo.Data.Host.CleanShadow,
578+
(void *)LaunchInfo.Data.Host.LocalArgs, LaunchInfo.Data.Host.NumLocalArgs,
579+
ToString(LaunchInfo.Data.Host.DeviceTy), LaunchInfo.Data.Host.Debug);
580+
581+
ur_result_t URes = EnqueueWriteGlobal(
582+
"__MsanLaunchInfo", &LaunchInfo.Data.DevicePtr, sizeof(uptr));
572583
if (URes != UR_RESULT_SUCCESS) {
573584
getContext()->logger.info("EnqueueWriteGlobal(__MsanLaunchInfo) "
574585
"failed, maybe empty kernel: {}",
@@ -641,47 +652,30 @@ ContextInfo::~ContextInfo() {
641652
ur_result_t USMLaunchInfo::initialize() {
642653
UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context));
643654
UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device));
644-
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
645-
Context, Device, nullptr, nullptr, sizeof(MsanLaunchInfo),
646-
(void **)&Data));
647-
*Data = MsanLaunchInfo{};
648655
return UR_RESULT_SUCCESS;
649656
}
650657

651-
USMLaunchInfo::~USMLaunchInfo() {
652-
[[maybe_unused]] ur_result_t Result;
653-
if (Data) {
654-
if (Data->CleanShadow) {
655-
Result = getContext()->urDdiTable.USM.pfnFree(Context,
656-
(void *)Data->CleanShadow);
657-
assert(Result == UR_RESULT_SUCCESS);
658-
}
659-
Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data);
658+
MsanRuntimeDataWrapper::~MsanRuntimeDataWrapper() {
659+
if (Host.CleanShadow) {
660+
[[maybe_unused]] auto Result =
661+
getContext()->urDdiTable.USM.pfnFree(Context, (void *)Host.CleanShadow);
662+
assert(Result == UR_RESULT_SUCCESS);
663+
}
664+
if (DevicePtr) {
665+
[[maybe_unused]] auto Result =
666+
getContext()->urDdiTable.USM.pfnFree(Context, (void *)DevicePtr);
660667
assert(Result == UR_RESULT_SUCCESS);
661668
}
669+
}
670+
671+
USMLaunchInfo::~USMLaunchInfo() {
672+
[[maybe_unused]] ur_result_t Result;
662673
Result = getContext()->urDdiTable.Context.pfnRelease(Context);
663674
assert(Result == UR_RESULT_SUCCESS);
664675
Result = getContext()->urDdiTable.Device.pfnRelease(Device);
665676
assert(Result == UR_RESULT_SUCCESS);
666677
}
667678

668-
ur_result_t USMLaunchInfo::importLocalArgsInfo(
669-
ur_queue_handle_t Queue, const std::vector<MsanLocalArgsInfo> &LocalArgs) {
670-
assert(!LocalArgs.empty());
671-
672-
Data->NumLocalArgs = LocalArgs.size();
673-
const size_t LocalArgsInfoSize = sizeof(MsanLocalArgsInfo) * LocalArgs.size();
674-
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
675-
Context, Device, nullptr, nullptr, LocalArgsInfoSize,
676-
ur_cast<void **>(&Data->LocalArgs)));
677-
678-
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
679-
Queue, true, Data->LocalArgs, LocalArgs.data(), LocalArgsInfoSize, 0,
680-
nullptr, nullptr));
681-
682-
return UR_RESULT_SUCCESS;
683-
}
684-
685679
} // namespace msan
686680

687681
using namespace msan;

unified-runtime/source/loader/layers/sanitizer/msan/msan_interceptor.hpp

+68-7
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ struct ProgramInfo {
134134

135135
struct ContextInfo {
136136
ur_context_handle_t Handle;
137-
size_t MaxAllocatedSize = 1024;
137+
size_t CleanShadowSize = 1024;
138138
std::atomic<int32_t> RefCount = 1;
139139

140140
std::vector<ur_device_handle_t> DeviceList;
@@ -148,8 +148,71 @@ struct ContextInfo {
148148
~ContextInfo();
149149
};
150150

151+
struct MsanRuntimeDataWrapper {
152+
MsanRuntimeData Host{};
153+
154+
MsanRuntimeData *DevicePtr = nullptr;
155+
156+
ur_context_handle_t Context{};
157+
158+
ur_device_handle_t Device{};
159+
160+
MsanRuntimeDataWrapper(ur_context_handle_t Context, ur_device_handle_t Device)
161+
: Context(Context), Device(Device) {}
162+
163+
~MsanRuntimeDataWrapper();
164+
165+
MsanRuntimeData *getDevicePtr() {
166+
if (DevicePtr == nullptr) {
167+
ur_result_t Result = getContext()->urDdiTable.USM.pfnDeviceAlloc(
168+
Context, Device, nullptr, nullptr, sizeof(MsanRuntimeData),
169+
(void **)&DevicePtr);
170+
if (Result != UR_RESULT_SUCCESS) {
171+
getContext()->logger.error(
172+
"Failed to alloc device usm for msan runtime data: {}", Result);
173+
}
174+
}
175+
return DevicePtr;
176+
}
177+
178+
ur_result_t syncFromDevice(ur_queue_handle_t Queue) {
179+
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
180+
Queue, true, ur_cast<void *>(&Host), getDevicePtr(),
181+
sizeof(MsanRuntimeData), 0, nullptr, nullptr));
182+
183+
return UR_RESULT_SUCCESS;
184+
}
185+
186+
ur_result_t syncToDevice(ur_queue_handle_t Queue) {
187+
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
188+
Queue, true, getDevicePtr(), ur_cast<void *>(&Host),
189+
sizeof(MsanRuntimeData), 0, nullptr, nullptr));
190+
191+
return UR_RESULT_SUCCESS;
192+
}
193+
194+
ur_result_t
195+
importLocalArgsInfo(ur_queue_handle_t Queue,
196+
const std::vector<MsanLocalArgsInfo> &LocalArgs) {
197+
assert(!LocalArgs.empty());
198+
199+
Host.NumLocalArgs = LocalArgs.size();
200+
const size_t LocalArgsInfoSize =
201+
sizeof(MsanLocalArgsInfo) * Host.NumLocalArgs;
202+
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
203+
Context, Device, nullptr, nullptr, LocalArgsInfoSize,
204+
ur_cast<void **>(&Host.LocalArgs)));
205+
206+
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
207+
Queue, true, Host.LocalArgs, &LocalArgs[0], LocalArgsInfoSize, 0,
208+
nullptr, nullptr));
209+
210+
return UR_RESULT_SUCCESS;
211+
}
212+
};
213+
151214
struct USMLaunchInfo {
152-
MsanLaunchInfo *Data = nullptr;
215+
MsanRuntimeDataWrapper Data;
153216

154217
ur_context_handle_t Context = nullptr;
155218
ur_device_handle_t Device = nullptr;
@@ -161,8 +224,9 @@ struct USMLaunchInfo {
161224
USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
162225
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
163226
const size_t *GlobalWorkOffset, uint32_t WorkDim)
164-
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
165-
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) {
227+
: Data(Context, Device), Context(Context), Device(Device),
228+
GlobalWorkSize(GlobalWorkSize), GlobalWorkOffset(GlobalWorkOffset),
229+
WorkDim(WorkDim) {
166230
if (LocalWorkSize) {
167231
this->LocalWorkSize =
168232
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
@@ -171,9 +235,6 @@ struct USMLaunchInfo {
171235
~USMLaunchInfo();
172236

173237
ur_result_t initialize();
174-
ur_result_t
175-
importLocalArgsInfo(ur_queue_handle_t Queue,
176-
const std::vector<MsanLocalArgsInfo> &LocalArgs);
177238
};
178239

179240
struct DeviceGlobalInfo {

unified-runtime/source/loader/layers/sanitizer/msan/msan_libdevice.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct MsanLocalArgsInfo {
4444
uint64_t Size = 0;
4545
};
4646

47-
struct MsanLaunchInfo {
47+
struct MsanRuntimeData {
4848
uintptr_t GlobalShadowOffset = 0;
4949
uintptr_t GlobalShadowOffsetEnd = 0;
5050

0 commit comments

Comments
 (0)