Skip to content

Commit 86a5f11

Browse files
[L0] Add initial USM alloc enqueue API
Co-authored-by: Michał Staniewski <[email protected]>
1 parent 7467ee9 commit 86a5f11

File tree

5 files changed

+340
-99
lines changed

5 files changed

+340
-99
lines changed

source/adapters/level_zero/context.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,14 @@ struct ur_context_handle_t_ : _ur_object {
5656
ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices,
5757
const ur_device_handle_t *Devs, bool OwnZeContext)
5858
: ZeContext{ZeContext}, Devices{Devs, Devs + NumDevices},
59-
NumDevices{NumDevices}, DefaultPool{this, nullptr}, ProxyPool{this,
60-
nullptr} {
59+
NumDevices{NumDevices}, DefaultPool{this, nullptr},
60+
ProxyPool{this, nullptr}, AsyncPool{this, nullptr} {
6161
OwnNativeHandle = OwnZeContext;
6262
}
6363

6464
ur_context_handle_t_(ze_context_handle_t ZeContext)
65-
: ZeContext{ZeContext}, DefaultPool{this, nullptr}, ProxyPool{this,
66-
nullptr} {}
65+
: ZeContext{ZeContext}, DefaultPool{this, nullptr},
66+
ProxyPool{this, nullptr}, AsyncPool{this, nullptr} {}
6767

6868
// A L0 context handle is primarily used during creation and management of
6969
// resources that may be used by multiple devices.
@@ -125,6 +125,9 @@ struct ur_context_handle_t_ : _ur_object {
125125
// Allocation-tracking proxy pools for direct allocations. No pooling used.
126126
ur_usm_pool_handle_t_ ProxyPool;
127127

128+
// USM pools for async allocations.
129+
ur_usm_pool_handle_t_ AsyncPool;
130+
128131
// Map associating pools created with urUsmPoolCreate and internal pools
129132
std::list<ur_usm_pool_handle_t> UsmPoolHandles{};
130133

source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1329,7 +1329,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList,
13291329
Device->Platform->ContextsMutex, std::defer_lock);
13301330

13311331
if (IndirectAccessTrackingEnabled) {
1332-
// We are going to submit kernels for execution. If indirect access flag is
1332+
// We are going to submit kernels for execution. If indirect access flag is
13331333
// set for a kernel then we need to make a snapshot of existing memory
13341334
// allocations in all contexts in the platform. We need to lock the mutex
13351335
// guarding the list of contexts in the platform to prevent creation of new

source/adapters/level_zero/usm.cpp

Lines changed: 181 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -588,124 +588,211 @@ ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) {
588588
return UR_RESULT_SUCCESS;
589589
}
590590

591+
static ur_result_t enqueueUSMAllocHelper(
592+
ur_queue_handle_t Queue, ur_usm_pool_handle_t Pool, const size_t Size,
593+
const ur_exp_enqueue_usm_alloc_properties_t *Properties,
594+
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
595+
void **RetMem, ur_event_handle_t *OutEvent, ur_usm_type_t Type) {
596+
std::ignore = Pool;
597+
std::ignore = Properties;
598+
599+
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
600+
601+
bool UseCopyEngine = false;
602+
_ur_ze_event_list_t TmpWaitList;
603+
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
604+
NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine));
605+
606+
bool OkToBatch = true;
607+
// Get a new command list to be used on this call
608+
ur_command_list_ptr_t CommandList{};
609+
UR_CALL(Queue->Context->getAvailableCommandList(
610+
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList,
611+
OkToBatch, nullptr /*ForcedCmdQueue*/));
612+
613+
ze_event_handle_t ZeEvent = nullptr;
614+
ur_event_handle_t InternalEvent{};
615+
bool IsInternal = OutEvent == nullptr;
616+
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
617+
618+
ur_command_t CommandType = UR_COMMAND_FORCE_UINT32;
619+
switch (Type) {
620+
case UR_USM_TYPE_HOST:
621+
CommandType = UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP;
622+
break;
623+
case UR_USM_TYPE_DEVICE:
624+
CommandType = UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP;
625+
break;
626+
case UR_USM_TYPE_SHARED:
627+
CommandType = UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP;
628+
break;
629+
default:
630+
logger::error("enqueueUSMAllocHelper: unsupported USM type");
631+
throw UR_RESULT_ERROR_UNKNOWN;
632+
}
633+
UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList,
634+
IsInternal, false));
635+
ZeEvent = (*Event)->ZeEvent;
636+
(*Event)->WaitList = TmpWaitList;
637+
638+
// Allocate USM memory
639+
ur_usm_pool_handle_t USMPool = nullptr;
640+
if (Pool) {
641+
USMPool = Pool;
642+
} else {
643+
USMPool = &Queue->Context->AsyncPool;
644+
}
645+
646+
auto Device = (Type == UR_USM_TYPE_HOST) ? nullptr : Queue->Device;
647+
auto Ret =
648+
USMPool->allocate(Queue->Context, Device, nullptr, Type, Size, RetMem);
649+
if (Ret) {
650+
return Ret;
651+
}
652+
653+
// Signal that USM allocation event was finished
654+
ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, ZeEvent));
655+
656+
UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch));
657+
658+
return UR_RESULT_SUCCESS;
659+
}
660+
591661
ur_result_t urEnqueueUSMDeviceAllocExp(
592-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
593-
ur_usm_pool_handle_t
594-
pPool, ///< [in][optional] handle of the USM memory pool
595-
const size_t size, ///< [in] minimum size in bytes of the USM memory object
662+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
663+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
664+
const size_t Size, ///< [in] minimum size in bytes of the USM memory object
596665
///< to be allocated
597666
const ur_exp_enqueue_usm_alloc_properties_t
598-
*pProperties, ///< [in][optional] pointer to the enqueue asynchronous
599-
///< USM allocation properties
600-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
667+
*Properties, ///< [in][optional] pointer to the enqueue async alloc
668+
///< properties
669+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
601670
const ur_event_handle_t
602-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
603-
///< pointer to a list of events that must be complete
604-
///< before the kernel execution. If nullptr, the
605-
///< numEventsInWaitList must be 0, indicating no wait
606-
///< events.
607-
void **ppMem, ///< [out] pointer to USM memory object
608-
ur_event_handle_t
609-
*phEvent ///< [out][optional] return an event object that identifies the
610-
///< asynchronous USM device allocation
671+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
672+
///< pointer to a list of events that must be complete
673+
///< before the kernel execution. If nullptr, the
674+
///< numEventsInWaitList must be 0, indicating no wait
675+
///< events.
676+
void **Mem, ///< [out] pointer to USM memory object
677+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
678+
///< identifies the async alloc
611679
) {
612-
std::ignore = hQueue;
613-
std::ignore = pPool;
614-
std::ignore = size;
615-
std::ignore = pProperties;
616-
std::ignore = numEventsInWaitList;
617-
std::ignore = phEventWaitList;
618-
std::ignore = ppMem;
619-
std::ignore = phEvent;
620-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
680+
return enqueueUSMAllocHelper(Queue, Pool, Size, Properties,
681+
NumEventsInWaitList, EventWaitList, Mem,
682+
OutEvent, UR_USM_TYPE_DEVICE);
621683
}
622684

623685
ur_result_t urEnqueueUSMSharedAllocExp(
624-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
625-
ur_usm_pool_handle_t
626-
pPool, ///< [in][optional] handle of the USM memory pool
627-
const size_t size, ///< [in] minimum size in bytes of the USM memory object
686+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
687+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
688+
const size_t Size, ///< [in] minimum size in bytes of the USM memory object
628689
///< to be allocated
629690
const ur_exp_enqueue_usm_alloc_properties_t
630-
*pProperties, ///< [in][optional] pointer to the enqueue asynchronous
631-
///< USM allocation properties
632-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
691+
*Properties, ///< [in][optional] pointer to the enqueue async alloc
692+
///< properties
693+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
633694
const ur_event_handle_t
634-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
635-
///< pointer to a list of events that must be complete
636-
///< before the kernel execution. If nullptr, the
637-
///< numEventsInWaitList must be 0, indicating no wait
638-
///< events.
639-
void **ppMem, ///< [out] pointer to USM memory object
640-
ur_event_handle_t
641-
*phEvent ///< [out][optional] return an event object that identifies the
642-
///< asynchronous USM shared allocation
695+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
696+
///< pointer to a list of events that must be complete
697+
///< before the kernel execution. If nullptr, the
698+
///< numEventsInWaitList must be 0, indicating no wait
699+
///< events.
700+
void **Mem, ///< [out] pointer to USM memory object
701+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
702+
///< identifies the async alloc
643703
) {
644-
std::ignore = hQueue;
645-
std::ignore = pPool;
646-
std::ignore = size;
647-
std::ignore = pProperties;
648-
std::ignore = numEventsInWaitList;
649-
std::ignore = phEventWaitList;
650-
std::ignore = ppMem;
651-
std::ignore = phEvent;
652-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
704+
return enqueueUSMAllocHelper(Queue, Pool, Size, Properties,
705+
NumEventsInWaitList, EventWaitList, Mem,
706+
OutEvent, UR_USM_TYPE_SHARED);
653707
}
654708

655709
ur_result_t urEnqueueUSMHostAllocExp(
656-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
657-
ur_usm_pool_handle_t
658-
pPool, ///< [in][optional] handle of the USM memory pool
659-
const size_t size, ///< [in] minimum size in bytes of the USM memory object
710+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
711+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
712+
const size_t Size, ///< [in] minimum size in bytes of the USM memory object
660713
///< to be allocated
661714
const ur_exp_enqueue_usm_alloc_properties_t
662-
*pProperties, ///< [in][optional] pointer to the enqueue asynchronous
663-
///< USM allocation properties
664-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
715+
*Properties, ///< [in][optional] pointer to the enqueue async alloc
716+
///< properties
717+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
665718
const ur_event_handle_t
666-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
667-
///< pointer to a list of events that must be complete
668-
///< before the kernel execution. If nullptr, the
669-
///< numEventsInWaitList must be 0, indicating no wait
670-
///< events.
671-
void **ppMem, ///< [out] pointer to USM memory object
672-
ur_event_handle_t
673-
*phEvent ///< [out][optional] return an event object that identifies the
674-
///< asynchronous USM host allocation
719+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
720+
///< pointer to a list of events that must be complete
721+
///< before the kernel execution. If nullptr, the
722+
///< numEventsInWaitList must be 0, indicating no wait
723+
///< events.
724+
void **Mem, ///< [out] pointer to USM memory object
725+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
726+
///< identifies the async alloc
675727
) {
676-
std::ignore = hQueue;
677-
std::ignore = pPool;
678-
std::ignore = size;
679-
std::ignore = pProperties;
680-
std::ignore = numEventsInWaitList;
681-
std::ignore = phEventWaitList;
682-
std::ignore = ppMem;
683-
std::ignore = phEvent;
684-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
728+
return enqueueUSMAllocHelper(Queue, Pool, Size, Properties,
729+
NumEventsInWaitList, EventWaitList, Mem,
730+
OutEvent, UR_USM_TYPE_HOST);
685731
}
686732

687733
ur_result_t urEnqueueUSMFreeExp(
688-
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
689-
ur_usm_pool_handle_t
690-
pPool, ///< [in][optional] handle of the USM memory pooliptor
691-
void *pMem, ///< [in] pointer to USM memory object
692-
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
734+
ur_queue_handle_t Queue, ///< [in] handle of the queue object
735+
ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor
736+
void *Mem, ///< [in] pointer to USM memory object
737+
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
693738
const ur_event_handle_t
694-
*phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
695-
///< pointer to a list of events that must be complete
696-
///< before the kernel execution. If nullptr, the
697-
///< numEventsInWaitList must be 0, indicating no wait
698-
///< events.
699-
ur_event_handle_t *phEvent ///< [out][optional] return an event object that
700-
///< identifies the asynchronous USM deallocation
739+
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
740+
///< pointer to a list of events that must be complete
741+
///< before the kernel execution. If nullptr, the
742+
///< numEventsInWaitList must be 0, indicating no wait
743+
///< events.
744+
ur_event_handle_t *OutEvent ///< [out][optional] return an event object that
745+
///< identifies the async alloc
701746
) {
702-
std::ignore = hQueue;
703-
std::ignore = pPool;
704-
std::ignore = pMem;
705-
std::ignore = numEventsInWaitList;
706-
std::ignore = phEventWaitList;
707-
std::ignore = phEvent;
708-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
747+
std::ignore = Pool;
748+
749+
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
750+
751+
bool UseCopyEngine = false;
752+
_ur_ze_event_list_t TmpWaitList;
753+
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
754+
NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine));
755+
756+
bool OkToBatch = false;
757+
// Get a new command list to be used on this call
758+
ur_command_list_ptr_t CommandList{};
759+
UR_CALL(Queue->Context->getAvailableCommandList(
760+
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList,
761+
OkToBatch, nullptr /*ForcedCmdQueue*/));
762+
763+
ze_event_handle_t ZeEvent = nullptr;
764+
ur_event_handle_t InternalEvent{};
765+
bool IsInternal = OutEvent == nullptr;
766+
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
767+
768+
UR_CALL(createEventAndAssociateQueue(Queue, Event,
769+
UR_COMMAND_ENQUEUE_USM_FREE_EXP,
770+
CommandList, IsInternal, false));
771+
ZeEvent = (*Event)->ZeEvent;
772+
(*Event)->WaitList = TmpWaitList;
773+
774+
const auto &ZeCommandList = CommandList->first;
775+
const auto &WaitList = (*Event)->WaitList;
776+
if (WaitList.Length) {
777+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
778+
(ZeCommandList, WaitList.Length, WaitList.ZeEventList));
779+
}
780+
781+
// Wait for commands execution until USM can be freed
782+
UR_CALL(Queue->executeCommandList(CommandList, true, OkToBatch)); // Blocking
783+
784+
// Free USM memory
785+
auto Ret = USMFreeHelper(Queue->Context, Mem);
786+
if (Ret) {
787+
return Ret;
788+
}
789+
790+
// Signal that USM free event was finished
791+
ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));
792+
793+
UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch));
794+
795+
return UR_RESULT_SUCCESS;
709796
}
710797
} // namespace ur::level_zero
711798

test/adapters/level_zero/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ function(add_adapter_tests adapter)
4040

4141
add_dependencies(test-adapter-${adapter}
4242
generate_device_binaries kernel_names_header)
43+
44+
45+
if("${adapter}" STREQUAL "level_zero")
46+
add_adapter_test(level_zero_enqueue_alloc
47+
FIXTURE KERNELS
48+
SOURCES
49+
enqueue_alloc.cpp
50+
ENVIRONMENT
51+
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
52+
)
53+
endif()
4354
endif()
4455

4556
if(NOT WIN32 AND NOT UR_STATIC_ADAPTER_L0)

0 commit comments

Comments
 (0)