Skip to content

Commit 89622c8

Browse files
committed
Add remaining calls that are shared between queue and command buffer
1 parent 6c07eda commit 89622c8

File tree

5 files changed

+278
-99
lines changed

5 files changed

+278
-99
lines changed

source/adapters/level_zero/v2/api.cpp

-47
Original file line numberDiff line numberDiff line change
@@ -170,53 +170,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
170170
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
171171
}
172172

173-
ur_result_t urCommandBufferAppendUSMFillExp(
174-
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
175-
const void *pPattern, size_t patternSize, size_t size,
176-
uint32_t numSyncPointsInWaitList,
177-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
178-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
179-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
180-
ur_exp_command_buffer_command_handle_t *phCommand) {
181-
logger::error("{} function not implemented!", __FUNCTION__);
182-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
183-
}
184-
185-
ur_result_t urCommandBufferAppendMemBufferFillExp(
186-
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
187-
const void *pPattern, size_t patternSize, size_t offset, size_t size,
188-
uint32_t numSyncPointsInWaitList,
189-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
190-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
191-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
192-
ur_exp_command_buffer_command_handle_t *phCommand) {
193-
logger::error("{} function not implemented!", __FUNCTION__);
194-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
195-
}
196-
197-
ur_result_t urCommandBufferAppendUSMPrefetchExp(
198-
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
199-
size_t size, ur_usm_migration_flags_t flags,
200-
uint32_t numSyncPointsInWaitList,
201-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
202-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
203-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
204-
ur_exp_command_buffer_command_handle_t *phCommand) {
205-
logger::error("{} function not implemented!", __FUNCTION__);
206-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
207-
}
208-
209-
ur_result_t urCommandBufferAppendUSMAdviseExp(
210-
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
211-
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
212-
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
213-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
214-
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
215-
ur_exp_command_buffer_command_handle_t *phCommand) {
216-
logger::error("{} function not implemented!", __FUNCTION__);
217-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
218-
}
219-
220173
ur_result_t urCommandBufferUpdateKernelLaunchExp(
221174
ur_exp_command_buffer_command_handle_t hCommand,
222175
const ur_exp_command_buffer_update_kernel_launch_desc_t

source/adapters/level_zero/v2/command_buffer.cpp

+108
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,114 @@ ur_result_t urCommandBufferAppendMemBufferReadRectExp(
346346
return exceptionToResult(std::current_exception());
347347
}
348348

349+
ur_result_t urCommandBufferAppendUSMFillExp(
350+
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
351+
const void *pPattern, size_t patternSize, size_t size,
352+
uint32_t numSyncPointsInWaitList,
353+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
354+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
355+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
356+
ur_exp_command_buffer_command_handle_t *phCommand) try {
357+
358+
// the same issue as in urCommandBufferAppendKernelLaunchExp
359+
std::ignore = numEventsInWaitList;
360+
std::ignore = phEventWaitList;
361+
std::ignore = phEvent;
362+
// sync mechanic can be ignored, because all lists are in-order
363+
std::ignore = numSyncPointsInWaitList;
364+
std::ignore = pSyncPointWaitList;
365+
std::ignore = pSyncPoint;
366+
367+
std::ignore = phCommand;
368+
369+
UR_CALL(hCommandBuffer->commandListManager.appendUSMFill(
370+
pMemory, patternSize, pPattern, size, 0, nullptr, nullptr));
371+
return UR_RESULT_SUCCESS;
372+
} catch (...) {
373+
return exceptionToResult(std::current_exception());
374+
}
375+
376+
ur_result_t urCommandBufferAppendMemBufferFillExp(
377+
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
378+
const void *pPattern, size_t patternSize, size_t offset, size_t size,
379+
uint32_t numSyncPointsInWaitList,
380+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
381+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
382+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
383+
ur_exp_command_buffer_command_handle_t *phCommand) try {
384+
385+
// the same issue as in urCommandBufferAppendKernelLaunchExp
386+
std::ignore = numEventsInWaitList;
387+
std::ignore = phEventWaitList;
388+
std::ignore = phEvent;
389+
// sync mechanic can be ignored, because all lists are in-order
390+
std::ignore = numSyncPointsInWaitList;
391+
std::ignore = pSyncPointWaitList;
392+
std::ignore = pSyncPoint;
393+
394+
std::ignore = phCommand;
395+
396+
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferFill(
397+
hBuffer, pPattern, patternSize, offset, size, 0, nullptr, nullptr));
398+
return UR_RESULT_SUCCESS;
399+
} catch (...) {
400+
return exceptionToResult(std::current_exception());
401+
}
402+
403+
ur_result_t urCommandBufferAppendUSMPrefetchExp(
404+
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
405+
size_t size, ur_usm_migration_flags_t flags,
406+
uint32_t numSyncPointsInWaitList,
407+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
408+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
409+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
410+
ur_exp_command_buffer_command_handle_t *phCommand) try {
411+
412+
// the same issue as in urCommandBufferAppendKernelLaunchExp
413+
std::ignore = numEventsInWaitList;
414+
std::ignore = phEventWaitList;
415+
std::ignore = phEvent;
416+
// sync mechanic can be ignored, because all lists are in-order
417+
std::ignore = numSyncPointsInWaitList;
418+
std::ignore = pSyncPointWaitList;
419+
std::ignore = pSyncPoint;
420+
421+
std::ignore = phCommand;
422+
423+
UR_CALL(hCommandBuffer->commandListManager.appendUSMPrefetch(
424+
pMemory, size, flags, 0, nullptr, nullptr));
425+
426+
return UR_RESULT_SUCCESS;
427+
} catch (...) {
428+
return exceptionToResult(std::current_exception());
429+
}
430+
431+
ur_result_t urCommandBufferAppendUSMAdviseExp(
432+
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
433+
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
434+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
435+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
436+
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
437+
ur_exp_command_buffer_command_handle_t *phCommand) try {
438+
439+
// the same issue as in urCommandBufferAppendKernelLaunchExp
440+
std::ignore = numEventsInWaitList;
441+
std::ignore = phEventWaitList;
442+
std::ignore = phEvent;
443+
// sync mechanic can be ignored, because all lists are in-order
444+
std::ignore = numSyncPointsInWaitList;
445+
std::ignore = pSyncPointWaitList;
446+
std::ignore = pSyncPoint;
447+
448+
std::ignore = phCommand;
449+
450+
UR_CALL(hCommandBuffer->commandListManager.appendUSMAdvise(pMemory, size,
451+
advice, nullptr));
452+
453+
return UR_RESULT_SUCCESS;
454+
} catch (...) {
455+
return exceptionToResult(std::current_exception());
456+
}
349457
ur_result_t
350458
urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
351459
ur_exp_command_buffer_info_t propName,

source/adapters/level_zero/v2/command_list_manager.cpp

+133
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,50 @@ ur_command_list_manager::~ur_command_list_manager() {
3131
ur::level_zero::urDeviceRelease(device);
3232
}
3333

34+
ur_result_t ur_command_list_manager::appendGenericFillUnlocked(
35+
ur_mem_handle_t dst, size_t offset, size_t patternSize,
36+
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
37+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
38+
ur_command_t commandType) {
39+
40+
auto zeSignalEvent = getSignalEvent(phEvent, commandType);
41+
42+
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
43+
44+
auto pDst = ur_cast<char *>(dst->getDevicePtr(
45+
device, ur_mem_handle_t_::device_access_mode_t::read_only, offset, size,
46+
[&](void *src, void *dst, size_t size) {
47+
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
48+
(zeCommandList.get(), dst, src, size, nullptr,
49+
waitListView.num, waitListView.handles));
50+
waitListView.clear();
51+
}));
52+
53+
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
54+
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
55+
if (isPowerOf2(patternSize)) {
56+
ZE2UR_CALL(zeCommandListAppendMemoryFill,
57+
(zeCommandList.get(), pDst, pPattern, patternSize, size,
58+
zeSignalEvent, waitListView.num, waitListView.handles));
59+
} else {
60+
// Copy pattern into every entry in memory array pointed by Ptr.
61+
uint32_t numOfCopySteps = size / patternSize;
62+
const void *src = pPattern;
63+
64+
for (uint32_t step = 0; step < numOfCopySteps; ++step) {
65+
void *dst = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(pDst) +
66+
step * patternSize);
67+
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
68+
(zeCommandList.get(), dst, src, patternSize,
69+
step == numOfCopySteps - 1 ? zeSignalEvent : nullptr,
70+
waitListView.num, waitListView.handles));
71+
waitListView.clear();
72+
}
73+
}
74+
75+
return UR_RESULT_SUCCESS;
76+
}
77+
3478
ur_result_t ur_command_list_manager::appendGenericCopyUnlocked(
3579
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking, size_t srcOffset,
3680
size_t dstOffset, size_t size, uint32_t numEventsInWaitList,
@@ -209,6 +253,95 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy(
209253
return UR_RESULT_SUCCESS;
210254
}
211255

256+
ur_result_t ur_command_list_manager::appendMemBufferFill(
257+
ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize,
258+
size_t offset, size_t size, uint32_t numEventsInWaitList,
259+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
260+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferFill");
261+
262+
UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE);
263+
264+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
265+
hBuffer->getMutex());
266+
267+
return appendGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size,
268+
numEventsInWaitList, phEventWaitList,
269+
phEvent, UR_COMMAND_MEM_BUFFER_FILL);
270+
}
271+
272+
ur_result_t ur_command_list_manager::appendUSMFill(
273+
void *pMem, size_t patternSize, const void *pPattern, size_t size,
274+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
275+
ur_event_handle_t *phEvent) {
276+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMFill");
277+
278+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
279+
280+
ur_usm_handle_t_ dstHandle(context, size, pMem);
281+
return appendGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size,
282+
numEventsInWaitList, phEventWaitList,
283+
phEvent, UR_COMMAND_USM_FILL);
284+
}
285+
286+
ur_result_t ur_command_list_manager::appendUSMPrefetch(
287+
const void *pMem, size_t size, ur_usm_migration_flags_t flags,
288+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
289+
ur_event_handle_t *phEvent) {
290+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMPrefetch");
291+
292+
std::ignore = flags;
293+
294+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
295+
296+
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH);
297+
298+
auto [pWaitEvents, numWaitEvents] =
299+
getWaitListView(phEventWaitList, numEventsInWaitList);
300+
301+
if (pWaitEvents) {
302+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
303+
(zeCommandList.get(), numWaitEvents, pWaitEvents));
304+
}
305+
// TODO: figure out how to translate "flags"
306+
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
307+
(zeCommandList.get(), pMem, size));
308+
if (zeSignalEvent) {
309+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
310+
(zeCommandList.get(), zeSignalEvent));
311+
}
312+
313+
return UR_RESULT_SUCCESS;
314+
}
315+
316+
ur_result_t
317+
ur_command_list_manager::appendUSMAdvise(const void *pMem, size_t size,
318+
ur_usm_advice_flags_t advice,
319+
ur_event_handle_t *phEvent) {
320+
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMAdvise");
321+
322+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
323+
324+
auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);
325+
326+
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);
327+
328+
auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0);
329+
330+
if (pWaitEvents) {
331+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
332+
(zeCommandList.get(), numWaitEvents, pWaitEvents));
333+
}
334+
335+
ZE2UR_CALL(zeCommandListAppendMemAdvise,
336+
(zeCommandList.get(), device->ZeDevice, pMem, size, zeAdvice));
337+
338+
if (zeSignalEvent) {
339+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
340+
(zeCommandList.get(), zeSignalEvent));
341+
}
342+
return UR_RESULT_SUCCESS;
343+
}
344+
212345
ur_result_t ur_command_list_manager::appendMemBufferRead(
213346
ur_mem_handle_t hMem, bool blockingRead, size_t offset, size_t size,
214347
void *pDst, uint32_t numEventsInWaitList,

source/adapters/level_zero/v2/command_list_manager.hpp

+27
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,27 @@ struct ur_command_list_manager : public _ur_object {
9898
size_t height, uint32_t numEventsInWaitList,
9999
const ur_event_handle_t *phEventWaitList,
100100
ur_event_handle_t *phEvent);
101+
ur_result_t appendMemBufferFill(ur_mem_handle_t hBuffer, const void *pPattern,
102+
size_t patternSize, size_t offset,
103+
size_t size, uint32_t numEventsInWaitList,
104+
const ur_event_handle_t *phEventWaitList,
105+
ur_event_handle_t *phEvent);
106+
107+
ur_result_t appendUSMFill(void *pMem, size_t patternSize,
108+
const void *pPattern, size_t size,
109+
uint32_t numEventsInWaitList,
110+
const ur_event_handle_t *phEventWaitList,
111+
ur_event_handle_t *phEvent);
112+
113+
ur_result_t appendUSMPrefetch(const void *pMem, size_t size,
114+
ur_usm_migration_flags_t flags,
115+
uint32_t numEventsInWaitList,
116+
const ur_event_handle_t *phEventWaitList,
117+
ur_event_handle_t *phEvent);
118+
119+
ur_result_t appendUSMAdvise(const void *pMem, size_t size,
120+
ur_usm_advice_flags_t advice,
121+
ur_event_handle_t *phEvent);
101122

102123
ze_command_list_handle_t getZeCommandList();
103124

@@ -107,6 +128,12 @@ struct ur_command_list_manager : public _ur_object {
107128
ur_command_t commandType);
108129

109130
private:
131+
ur_result_t appendGenericFillUnlocked(
132+
ur_mem_handle_t hBuffer, size_t offset, size_t patternSize,
133+
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
134+
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
135+
ur_command_t commandType);
136+
110137
ur_result_t appendGenericCopyUnlocked(
111138
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking,
112139
size_t srcOffset, size_t dstOffset, size_t size,

0 commit comments

Comments
 (0)