Skip to content

Commit 3d9a3c2

Browse files
[L0] Add initial USM alloc enqueue API
Co-authored-by: Michał Staniewski <[email protected]>
1 parent bbf3d89 commit 3d9a3c2

File tree

5 files changed

+336
-97
lines changed

5 files changed

+336
-97
lines changed

source/adapters/level_zero/context.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ struct ur_context_handle_t_ : _ur_object {
5757
const ur_device_handle_t *Devs, bool OwnZeContext)
5858
: ZeContext{ZeContext}, Devices{Devs, Devs + NumDevices},
5959
NumDevices{NumDevices}, DefaultPool{this, nullptr},
60-
ProxyPool{this, nullptr} {
60+
ProxyPool{this, nullptr}, AsyncPool{this, nullptr} {
6161
OwnNativeHandle = OwnZeContext;
6262
}
6363

6464
ur_context_handle_t_(ze_context_handle_t ZeContext)
6565
: ZeContext{ZeContext}, DefaultPool{this, nullptr},
66-
ProxyPool{this, nullptr} {}
66+
ProxyPool{this, nullptr}, AsyncPool{this, nullptr} {}
6767

6868
// A L0 context handle is primarily used during creation and management of
6969
// resources that may be used by multiple devices.
@@ -125,6 +125,9 @@ struct ur_context_handle_t_ : _ur_object {
125125
// Allocation-tracking proxy pools for direct allocations. No pooling used.
126126
ur_usm_pool_handle_t_ ProxyPool;
127127

128+
// USM pools for async allocations.
129+
ur_usm_pool_handle_t_ AsyncPool;
130+
128131
// Map associating pools created with urUsmPoolCreate and internal pools
129132
std::list<ur_usm_pool_handle_t> UsmPoolHandles{};
130133

source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1334,7 +1334,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList,
13341334
Device->Platform->ContextsMutex, std::defer_lock);
13351335

13361336
if (IndirectAccessTrackingEnabled) {
1337-
// We are going to submit kernels for execution. If indirect access flag is
1337+
// We are going to submit kernels for execution. If indirect access flag is
13381338
// set for a kernel then we need to make a snapshot of existing memory
13391339
// allocations in all contexts in the platform. We need to lock the mutex
13401340
// guarding the list of contexts in the platform to prevent creation of new

source/adapters/level_zero/usm.cpp

Lines changed: 182 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -600,124 +600,212 @@ ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) {
600600
return UR_RESULT_SUCCESS;
601601
}
602602

603+
static ur_result_t enqueueUSMAllocHelper(
604+
ur_queue_handle_t Queue, ur_usm_pool_handle_t Pool, const size_t Size,
605+
const ur_exp_enqueue_usm_alloc_properties_t *Properties,
606+
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
607+
void **RetMem, ur_event_handle_t *OutEvent, ur_usm_type_t Type) {
608+
std::ignore = Pool;
609+
std::ignore = Properties;
610+
611+
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
612+
613+
bool UseCopyEngine = false;
614+
_ur_ze_event_list_t TmpWaitList;
615+
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
616+
NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine));
617+
618+
bool OkToBatch = true;
619+
// Get a new command list to be used on this call
620+
ur_command_list_ptr_t CommandList{};
621+
UR_CALL(Queue->Context->getAvailableCommandList(
622+
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList,
623+
OkToBatch, nullptr /*ForcedCmdQueue*/));
624+
625+
ze_event_handle_t ZeEvent = nullptr;
626+
ur_event_handle_t InternalEvent{};
627+
bool IsInternal = OutEvent == nullptr;
628+
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
629+
630+
ur_command_t CommandType = UR_COMMAND_FORCE_UINT32;
631+
switch (Type) {
632+
case UR_USM_TYPE_HOST:
633+
CommandType = UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP;
634+
break;
635+
case UR_USM_TYPE_DEVICE:
636+
CommandType = UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP;
637+
break;
638+
case UR_USM_TYPE_SHARED:
639+
CommandType = UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP;
640+
break;
641+
default:
642+
logger::error("enqueueUSMAllocHelper: unsupported USM type");
643+
throw UR_RESULT_ERROR_UNKNOWN;
644+
}
645+
UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList,
646+
IsInternal, false));
647+
ZeEvent = (*Event)->ZeEvent;
648+
(*Event)->WaitList = TmpWaitList;
649+
650+
// Allocate USM memory
651+
ur_usm_pool_handle_t USMPool = nullptr;
652+
if (Pool) {
653+
USMPool = Pool;
654+
} else {
655+
USMPool = &Queue->Context->AsyncPool;
656+
}
657+
658+
auto Device = (Type == UR_USM_TYPE_HOST) ? nullptr : Queue->Device;
659+
auto Ret =
660+
USMPool->allocate(Queue->Context, Device, nullptr, Type, Size, RetMem);
661+
if (Ret) {
662+
return Ret;
663+
}
664+
665+
// Signal that USM allocation event was finished
666+
ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, ZeEvent));
667+
668+
UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch));
669+
670+
return UR_RESULT_SUCCESS;
671+
}
672+
603673
ur_result_t urEnqueueUSMDeviceAllocExp(
604-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
605-
ur_usm_pool_handle_t
606-
pPool, ///< [in][optional] handle of the USM memory pool
607-
const size_t size, ///< [in] minimum size in bytes of the USM memory object
674+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
675+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
676+
const size_t Size, ///< [in] minimum size in bytes of the USM memory object
608677
///< to be allocated
609678
const ur_exp_enqueue_usm_alloc_properties_t
610-
*pProperties, ///< [in][optional] pointer to the enqueue asynchronous
611-
///< USM allocation properties
612-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
679+
*Properties, ///< [in][optional] pointer to the enqueue async alloc
680+
///< properties
681+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
613682
const ur_event_handle_t
614-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
615-
///< pointer to a list of events that must be complete
616-
///< before the kernel execution. If nullptr, the
617-
///< numEventsInWaitList must be 0, indicating no wait
618-
///< events.
619-
void **ppMem, ///< [out] pointer to USM memory object
620-
ur_event_handle_t
621-
*phEvent ///< [out][optional] return an event object that identifies the
622-
///< asynchronous USM device allocation
683+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
684+
///< pointer to a list of events that must be complete
685+
///< before the kernel execution. If nullptr, the
686+
///< numEventsInWaitList must be 0, indicating no wait
687+
///< events.
688+
void **Mem, ///< [out] pointer to USM memory object
689+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
690+
///< identifies the async alloc
623691
) {
624-
std::ignore = hQueue;
625-
std::ignore = pPool;
626-
std::ignore = size;
627-
std::ignore = pProperties;
628-
std::ignore = numEventsInWaitList;
629-
std::ignore = phEventWaitList;
630-
std::ignore = ppMem;
631-
std::ignore = phEvent;
632-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
692+
return enqueueUSMAllocHelper(Queue, Pool, Size, Properties,
693+
NumEventsInWaitList, EventWaitList, Mem,
694+
OutEvent, UR_USM_TYPE_DEVICE);
633695
}
634696

635697
ur_result_t urEnqueueUSMSharedAllocExp(
636-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
637-
ur_usm_pool_handle_t
638-
pPool, ///< [in][optional] handle of the USM memory pool
639-
const size_t size, ///< [in] minimum size in bytes of the USM memory object
698+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
699+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
700+
const size_t Size, ///< [in] minimum size in bytes of the USM memory object
640701
///< to be allocated
641702
const ur_exp_enqueue_usm_alloc_properties_t
642-
*pProperties, ///< [in][optional] pointer to the enqueue asynchronous
643-
///< USM allocation properties
644-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
703+
*Properties, ///< [in][optional] pointer to the enqueue async alloc
704+
///< properties
705+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
645706
const ur_event_handle_t
646-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
647-
///< pointer to a list of events that must be complete
648-
///< before the kernel execution. If nullptr, the
649-
///< numEventsInWaitList must be 0, indicating no wait
650-
///< events.
651-
void **ppMem, ///< [out] pointer to USM memory object
652-
ur_event_handle_t
653-
*phEvent ///< [out][optional] return an event object that identifies the
654-
///< asynchronous USM shared allocation
707+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
708+
///< pointer to a list of events that must be complete
709+
///< before the kernel execution. If nullptr, the
710+
///< numEventsInWaitList must be 0, indicating no wait
711+
///< events.
712+
void **Mem, ///< [out] pointer to USM memory object
713+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
714+
///< identifies the async alloc
655715
) {
656-
std::ignore = hQueue;
657-
std::ignore = pPool;
658-
std::ignore = size;
659-
std::ignore = pProperties;
660-
std::ignore = numEventsInWaitList;
661-
std::ignore = phEventWaitList;
662-
std::ignore = ppMem;
663-
std::ignore = phEvent;
664-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
716+
return enqueueUSMAllocHelper(Queue, Pool, Size, Properties,
717+
NumEventsInWaitList, EventWaitList, Mem,
718+
OutEvent, UR_USM_TYPE_SHARED);
665719
}
666720

667721
ur_result_t urEnqueueUSMHostAllocExp(
668-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
669-
ur_usm_pool_handle_t
670-
pPool, ///< [in][optional] handle of the USM memory pool
671-
const size_t size, ///< [in] minimum size in bytes of the USM memory object
722+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
723+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
724+
const size_t Size, ///< [in] minimum size in bytes of the USM memory object
672725
///< to be allocated
673726
const ur_exp_enqueue_usm_alloc_properties_t
674-
*pProperties, ///< [in][optional] pointer to the enqueue asynchronous
675-
///< USM allocation properties
676-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
727+
*Properties, ///< [in][optional] pointer to the enqueue async alloc
728+
///< properties
729+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
677730
const ur_event_handle_t
678-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
679-
///< pointer to a list of events that must be complete
680-
///< before the kernel execution. If nullptr, the
681-
///< numEventsInWaitList must be 0, indicating no wait
682-
///< events.
683-
void **ppMem, ///< [out] pointer to USM memory object
684-
ur_event_handle_t
685-
*phEvent ///< [out][optional] return an event object that identifies the
686-
///< asynchronous USM host allocation
731+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
732+
///< pointer to a list of events that must be complete
733+
///< before the kernel execution. If nullptr, the
734+
///< numEventsInWaitList must be 0, indicating no wait
735+
///< events.
736+
void **Mem, ///< [out] pointer to USM memory object
737+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
738+
///< identifies the async alloc
687739
) {
688-
std::ignore = hQueue;
689-
std::ignore = pPool;
690-
std::ignore = size;
691-
std::ignore = pProperties;
692-
std::ignore = numEventsInWaitList;
693-
std::ignore = phEventWaitList;
694-
std::ignore = ppMem;
695-
std::ignore = phEvent;
696-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
740+
return enqueueUSMAllocHelper(Queue, Pool, Size, Properties,
741+
NumEventsInWaitList, EventWaitList, Mem,
742+
OutEvent, UR_USM_TYPE_HOST);
697743
}
698744

699745
ur_result_t urEnqueueUSMFreeExp(
700-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
701-
ur_usm_pool_handle_t
702-
pPool, ///< [in][optional] handle of the USM memory pooliptor
703-
void *pMem, ///< [in] pointer to USM memory object
704-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
746+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
747+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
748+
void *Mem, ///< [in] pointer to USM memory object
749+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
705750
const ur_event_handle_t
706-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
707-
///< pointer to a list of events that must be complete
708-
///< before the kernel execution. If nullptr, the
709-
///< numEventsInWaitList must be 0, indicating no wait
710-
///< events.
711-
ur_event_handle_t *phEvent ///< [out][optional] return an event object that
712-
///< identifies the asynchronous USM deallocation
751+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
752+
///< pointer to a list of events that must be complete
753+
///< before the kernel execution. If nullptr, the
754+
///< numEventsInWaitList must be 0, indicating no wait
755+
///< events.
756+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
757+
///< identifies the async alloc
713758
) {
714-
std::ignore = hQueue;
715-
std::ignore = pPool;
716-
std::ignore = pMem;
717-
std::ignore = numEventsInWaitList;
718-
std::ignore = phEventWaitList;
719-
std::ignore = phEvent;
720-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
759+
std::ignore = Pool;
760+
761+
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
762+
763+
bool UseCopyEngine = false;
764+
_ur_ze_event_list_t TmpWaitList;
765+
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
766+
NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine));
767+
768+
bool OkToBatch = false;
769+
// Get a new command list to be used on this call
770+
ur_command_list_ptr_t CommandList{};
771+
UR_CALL(Queue->Context->getAvailableCommandList(
772+
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList,
773+
OkToBatch, nullptr /*ForcedCmdQueue*/));
774+
775+
ze_event_handle_t ZeEvent = nullptr;
776+
ur_event_handle_t InternalEvent{};
777+
bool IsInternal = OutEvent == nullptr;
778+
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
779+
780+
UR_CALL(createEventAndAssociateQueue(Queue, Event,
781+
UR_COMMAND_ENQUEUE_USM_FREE_EXP,
782+
CommandList, IsInternal, false));
783+
ZeEvent = (*Event)->ZeEvent;
784+
(*Event)->WaitList = TmpWaitList;
785+
786+
const auto &ZeCommandList = CommandList->first;
787+
const auto &WaitList = (*Event)->WaitList;
788+
if (WaitList.Length) {
789+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
790+
(ZeCommandList, WaitList.Length, WaitList.ZeEventList));
791+
792+
// Wait for commands execution until USM can be freed
793+
UR_CALL(
794+
Queue->executeCommandList(CommandList, true, OkToBatch)); // Blocking
795+
}
796+
797+
// Free USM memory
798+
auto Ret = USMFreeHelper(Queue->Context, Mem);
799+
if (Ret) {
800+
return Ret;
801+
}
802+
803+
// Signal that USM free event was finished
804+
ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));
805+
806+
UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch));
807+
808+
return UR_RESULT_SUCCESS;
721809
}
722810
} // namespace ur::level_zero
723811

test/adapters/level_zero/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ function(add_adapter_tests adapter)
4040

4141
add_dependencies(test-adapter-${adapter}
4242
generate_device_binaries kernel_names_header)
43+
44+
45+
if("${adapter}" STREQUAL "level_zero")
46+
add_adapter_test(level_zero_enqueue_alloc
47+
FIXTURE KERNELS
48+
SOURCES
49+
enqueue_alloc.cpp
50+
ENVIRONMENT
51+
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
52+
)
53+
endif()
4354
endif()
4455

4556
if(NOT WIN32 AND NOT UR_STATIC_ADAPTER_L0)

0 commit comments

Comments
 (0)