Skip to content

Commit 87eaebb

Browse files
committed
Add remaining calls that are shared between queue and command buffer
1 parent 3ce6fcc commit 87eaebb

File tree

6 files changed

+279
-156
lines changed

6 files changed

+279
-156
lines changed

source/adapters/level_zero/v2/api.cpp

-47
Original file line numberDiff line numberDiff line change
@@ -239,53 +239,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
239239
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
240240
}
241241

242-
ur_result_t urCommandBufferAppendUSMFillExp(
243-
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
244-
const void *pPattern, size_t patternSize, size_t size,
245-
uint32_t numSyncPointsInWaitList,
246-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
247-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
248-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
249-
ur_exp_command_buffer_command_handle_t *phCommand) {
250-
logger::error("{} function not implemented!", __FUNCTION__);
251-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
252-
}
253-
254-
ur_result_t urCommandBufferAppendMemBufferFillExp(
255-
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
256-
const void *pPattern, size_t patternSize, size_t offset, size_t size,
257-
uint32_t numSyncPointsInWaitList,
258-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
259-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
260-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
261-
ur_exp_command_buffer_command_handle_t *phCommand) {
262-
logger::error("{} function not implemented!", __FUNCTION__);
263-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
264-
}
265-
266-
ur_result_t urCommandBufferAppendUSMPrefetchExp(
267-
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
268-
size_t size, ur_usm_migration_flags_t flags,
269-
uint32_t numSyncPointsInWaitList,
270-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
271-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
272-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
273-
ur_exp_command_buffer_command_handle_t *phCommand) {
274-
logger::error("{} function not implemented!", __FUNCTION__);
275-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
276-
}
277-
278-
ur_result_t urCommandBufferAppendUSMAdviseExp(
279-
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
280-
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
281-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
282-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
283-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
284-
ur_exp_command_buffer_command_handle_t *phCommand) {
285-
logger::error("{} function not implemented!", __FUNCTION__);
286-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
287-
}
288-
289242
ur_result_t urCommandBufferUpdateKernelLaunchExp(
290243
ur_exp_command_buffer_command_handle_t hCommand,
291244
const ur_exp_command_buffer_update_kernel_launch_desc_t

source/adapters/level_zero/v2/command_buffer.cpp

+108
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,114 @@ ur_result_t urCommandBufferAppendMemBufferReadRectExp(
346346
return exceptionToResult(std::current_exception());
347347
}
348348

349+
ur_result_t urCommandBufferAppendUSMFillExp(
350+
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
351+
const void *pPattern, size_t patternSize, size_t size,
352+
uint32_t numSyncPointsInWaitList,
353+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
354+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
355+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
356+
ur_exp_command_buffer_command_handle_t *phCommand) try {
357+
358+
// the same issue as in urCommandBufferAppendKernelLaunchExp
359+
std::ignore = numEventsInWaitList;
360+
std::ignore = phEventWaitList;
361+
std::ignore = phEvent;
362+
// sync mechanic can be ignored, because all lists are in-order
363+
std::ignore = numSyncPointsInWaitList;
364+
std::ignore = pSyncPointWaitList;
365+
std::ignore = pSyncPoint;
366+
367+
std::ignore = phCommand;
368+
369+
UR_CALL(hCommandBuffer->commandListManager.appendUSMFill(
370+
pMemory, patternSize, pPattern, size, 0, nullptr, nullptr));
371+
return UR_RESULT_SUCCESS;
372+
} catch (...) {
373+
return exceptionToResult(std::current_exception());
374+
}
375+
376+
ur_result_t urCommandBufferAppendMemBufferFillExp(
377+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
378+
const void *pPattern, size_t patternSize, size_t offset, size_t size,
379+
uint32_t numSyncPointsInWaitList,
380+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
381+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
382+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
383+
ur_exp_command_buffer_command_handle_t *phCommand) try {
384+
385+
// the same issue as in urCommandBufferAppendKernelLaunchExp
386+
std::ignore = numEventsInWaitList;
387+
std::ignore = phEventWaitList;
388+
std::ignore = phEvent;
389+
// sync mechanic can be ignored, because all lists are in-order
390+
std::ignore = numSyncPointsInWaitList;
391+
std::ignore = pSyncPointWaitList;
392+
std::ignore = pSyncPoint;
393+
394+
std::ignore = phCommand;
395+
396+
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferFill(
397+
hBuffer, pPattern, patternSize, offset, size, 0, nullptr, nullptr));
398+
return UR_RESULT_SUCCESS;
399+
} catch (...) {
400+
return exceptionToResult(std::current_exception());
401+
}
402+
403+
ur_result_t urCommandBufferAppendUSMPrefetchExp(
404+
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
405+
size_t size, ur_usm_migration_flags_t flags,
406+
uint32_t numSyncPointsInWaitList,
407+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
408+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
409+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
410+
ur_exp_command_buffer_command_handle_t *phCommand) try {
411+
412+
// the same issue as in urCommandBufferAppendKernelLaunchExp
413+
std::ignore = numEventsInWaitList;
414+
std::ignore = phEventWaitList;
415+
std::ignore = phEvent;
416+
// sync mechanic can be ignored, because all lists are in-order
417+
std::ignore = numSyncPointsInWaitList;
418+
std::ignore = pSyncPointWaitList;
419+
std::ignore = pSyncPoint;
420+
421+
std::ignore = phCommand;
422+
423+
UR_CALL(hCommandBuffer->commandListManager.appendUSMPrefetch(
424+
pMemory, size, flags, 0, nullptr, nullptr));
425+
426+
return UR_RESULT_SUCCESS;
427+
} catch (...) {
428+
return exceptionToResult(std::current_exception());
429+
}
430+
431+
ur_result_t urCommandBufferAppendUSMAdviseExp(
432+
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
433+
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
434+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
435+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
436+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
437+
ur_exp_command_buffer_command_handle_t *phCommand) try {
438+
439+
// the same issue as in urCommandBufferAppendKernelLaunchExp
440+
std::ignore = numEventsInWaitList;
441+
std::ignore = phEventWaitList;
442+
std::ignore = phEvent;
443+
// sync mechanic can be ignored, because all lists are in-order
444+
std::ignore = numSyncPointsInWaitList;
445+
std::ignore = pSyncPointWaitList;
446+
std::ignore = pSyncPoint;
447+
448+
std::ignore = phCommand;
449+
450+
UR_CALL(hCommandBuffer->commandListManager.appendUSMAdvise(pMemory, size,
451+
advice, nullptr));
452+
453+
return UR_RESULT_SUCCESS;
454+
} catch (...) {
455+
return exceptionToResult(std::current_exception());
456+
}
349457
ur_result_t
350458
urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
351459
ur_exp_command_buffer_info_t propName,

source/adapters/level_zero/v2/command_list_manager.cpp

+133
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,50 @@ ur_command_list_manager::~ur_command_list_manager() {
3131
ur::level_zero::urDeviceRelease(device);
3232
}
3333

34+
ur_result_t ur_command_list_manager::appendGenericFillUnlocked(
35+
ur_mem_handle_t dst, size_t offset, size_t patternSize,
36+
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
37+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
38+
ur_command_t commandType) {
39+
40+
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
41+
42+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
43+
44+
auto pDst = ur_cast<char *>(dst->getDevicePtr(
45+
device, ur_mem_handle_t_::device_access_mode_t::read_only, offset, size,
46+
[&](void *src, void *dst, size_t size) {
47+
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
48+
(zeCommandList.get(), dst, src, size, nullptr,
49+
waitListView.num, waitListView.handles));
50+
waitListView.clear();
51+
}));
52+
53+
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
54+
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
55+
if (isPowerOf2(patternSize)) {
56+
ZE2UR_CALL(zeCommandListAppendMemoryFill,
57+
(zeCommandList.get(), pDst, pPattern, patternSize, size,
58+
zeSignalEvent, waitListView.num, waitListView.handles));
59+
} else {
60+
// Copy pattern into every entry in memory array pointed by Ptr.
61+
uint32_t numOfCopySteps = size / patternSize;
62+
const void *src = pPattern;
63+
64+
for (uint32_t step = 0; step < numOfCopySteps; ++step) {
65+
void *dst = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(pDst) +
66+
step * patternSize);
67+
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
68+
(zeCommandList.get(), dst, src, patternSize,
69+
step == numOfCopySteps - 1 ? zeSignalEvent : nullptr,
70+
waitListView.num, waitListView.handles));
71+
waitListView.clear();
72+
}
73+
}
74+
75+
return UR_RESULT_SUCCESS;
76+
}
77+
3478
ur_result_t ur_command_list_manager::appendGenericCopyUnlocked(
3579
ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset,
3680
size_t dstOffset, size_t size, uint32_t numEventsInWaitList,
@@ -209,6 +253,95 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy(
209253
return UR_RESULT_SUCCESS;
210254
}
211255

256+
ur_result_t ur_command_list_manager::appendMemBufferFill(
257+
ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize,
258+
size_t offset, size_t size, uint32_t numEventsInWaitList,
259+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
260+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferFill");
261+
262+
UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE);
263+
264+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
265+
hBuffer->getMutex());
266+
267+
return appendGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size,
268+
numEventsInWaitList, phEventWaitList,
269+
phEvent, UR_COMMAND_MEM_BUFFER_FILL);
270+
}
271+
272+
ur_result_t ur_command_list_manager::appendUSMFill(
273+
void *pMem, size_t patternSize, const void *pPattern, size_t size,
274+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
275+
ur_event_handle_t *phEvent) {
276+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMFill");
277+
278+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
279+
280+
ur_usm_handle_t_ dstHandle(context, size, pMem);
281+
return appendGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size,
282+
numEventsInWaitList, phEventWaitList,
283+
phEvent, UR_COMMAND_USM_FILL);
284+
}
285+
286+
ur_result_t ur_command_list_manager::appendUSMPrefetch(
287+
const void *pMem, size_t size, ur_usm_migration_flags_t flags,
288+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
289+
ur_event_handle_t *phEvent) {
290+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMPrefetch");
291+
292+
std::ignore = flags;
293+
294+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
295+
296+
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH);
297+
298+
auto [pWaitEvents, numWaitEvents] =
299+
getWaitListView(phEventWaitList, numEventsInWaitList);
300+
301+
if (pWaitEvents) {
302+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
303+
(zeCommandList.get(), numWaitEvents, pWaitEvents));
304+
}
305+
// TODO: figure out how to translate "flags"
306+
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
307+
(zeCommandList.get(), pMem, size));
308+
if (zeSignalEvent) {
309+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
310+
(zeCommandList.get(), zeSignalEvent));
311+
}
312+
313+
return UR_RESULT_SUCCESS;
314+
}
315+
316+
ur_result_t
317+
ur_command_list_manager::appendUSMAdvise(const void *pMem, size_t size,
318+
ur_usm_advice_flags_t advice,
319+
ur_event_handle_t *phEvent) {
320+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMAdvise");
321+
322+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
323+
324+
auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);
325+
326+
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);
327+
328+
auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0);
329+
330+
if (pWaitEvents) {
331+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
332+
(zeCommandList.get(), numWaitEvents, pWaitEvents));
333+
}
334+
335+
ZE2UR_CALL(zeCommandListAppendMemAdvise,
336+
(zeCommandList.get(), device->ZeDevice, pMem, size, zeAdvice));
337+
338+
if (zeSignalEvent) {
339+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
340+
(zeCommandList.get(), zeSignalEvent));
341+
}
342+
return UR_RESULT_SUCCESS;
343+
}
344+
212345
ur_result_t ur_command_list_manager::appendMemBufferRead(
213346
ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size,
214347
void *pDst, uint32_t numEventsInWaitList,

source/adapters/level_zero/v2/command_list_manager.hpp

+27
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,27 @@ struct ur_command_list_manager : public _ur_object {
9797
size_t height, uint32_t numEventsInWaitList,
9898
const ur_event_handle_t *phEventWaitList,
9999
ur_event_handle_t *phEvent);
100+
ur_result_t appendMemBufferFill(ur_mem_handle_t hBuffer, const void *pPattern,
101+
size_t patternSize, size_t offset,
102+
size_t size, uint32_t numEventsInWaitList,
103+
const ur_event_handle_t *phEventWaitList,
104+
ur_event_handle_t *phEvent);
105+
106+
ur_result_t appendUSMFill(void *pMem, size_t patternSize,
107+
const void *pPattern, size_t size,
108+
uint32_t numEventsInWaitList,
109+
const ur_event_handle_t *phEventWaitList,
110+
ur_event_handle_t *phEvent);
111+
112+
ur_result_t appendUSMPrefetch(const void *pMem, size_t size,
113+
ur_usm_migration_flags_t flags,
114+
uint32_t numEventsInWaitList,
115+
const ur_event_handle_t *phEventWaitList,
116+
ur_event_handle_t *phEvent);
117+
118+
ur_result_t appendUSMAdvise(const void *pMem, size_t size,
119+
ur_usm_advice_flags_t advice,
120+
ur_event_handle_t *phEvent);
100121

101122
ze_command_list_handle_t getZeCommandList();
102123

@@ -106,6 +127,12 @@ struct ur_command_list_manager : public _ur_object {
106127
ur_command_t commandType);
107128

108129
private:
130+
ur_result_t appendGenericFillUnlocked(
131+
ur_mem_handle_t hBuffer, size_t offset, size_t patternSize,
132+
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
133+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
134+
ur_command_t commandType);
135+
109136
ur_result_t appendGenericCopyUnlocked(
110137
ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset,
111138
size_t dstOffset, size_t size, uint32_t numEventsInWaitList,

0 commit comments

Comments
 (0)