Skip to content

Commit cb5651f

Browse files
hdelankswiecicki
authored andcommitted
WIP
1 parent 11c5856 commit cb5651f

File tree

12 files changed

+346
-7
lines changed

12 files changed

+346
-7
lines changed

source/adapters/cuda/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_ur_adapter(${TARGET_NAME}
1010
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
1111
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
1212
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
13+
${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp
1314
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp
1415
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp
1516
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp

source/adapters/cuda/async_alloc.cpp

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===//
2+
//
3+
// Copyright (C) 2024 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include <ur_api.h>
12+
13+
#include "context.hpp"
14+
#include "enqueue.hpp"
15+
#include "event.hpp"
16+
#include "queue.hpp"
17+
#include "usm.hpp"
18+
19+
UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp(
20+
ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size,
21+
const ur_exp_async_usm_alloc_properties_t *, uint32_t numEventsInWaitList,
22+
const ur_event_handle_t *phEventWaitList, void **ppMem,
23+
ur_event_handle_t *phEvent) {
24+
try {
25+
std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
26+
27+
ScopedContext Active(hQueue->getDevice());
28+
uint32_t StreamToken;
29+
ur_stream_guard_ Guard;
30+
CUstream CuStream = hQueue->getNextComputeStream(
31+
numEventsInWaitList, phEventWaitList, Guard, &StreamToken);
32+
33+
UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
34+
phEventWaitList));
35+
36+
if (phEvent) {
37+
RetImplEvent =
38+
std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
39+
UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken));
40+
UR_CHECK_ERROR(RetImplEvent->start());
41+
}
42+
43+
if (pPool) {
44+
assert(pPool->usesCudaPool());
45+
46+
} else {
47+
UR_CHECK_ERROR(cuMemAllocAsync(reinterpret_cast<CUdeviceptr *>(ppMem),
48+
size, CuStream));
49+
}
50+
51+
if (phEvent) {
52+
UR_CHECK_ERROR(RetImplEvent->record());
53+
*phEvent = RetImplEvent.release();
54+
}
55+
56+
} catch (ur_result_t Err) {
57+
return Err;
58+
}
59+
return UR_RESULT_SUCCESS;
60+
}
61+
62+
UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp(
63+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
64+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
65+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
66+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
67+
}
68+
69+
UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp(
70+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
71+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
72+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
73+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
74+
}
75+
76+
UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t,
77+
ur_usm_pool_handle_t, void *,
78+
uint32_t,
79+
const ur_event_handle_t *,
80+
ur_event_handle_t *) {
81+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
82+
}
83+
/*
84+
85+
UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp(
86+
ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size,
87+
const ur_exp_async_usm_alloc_properties_t *pProperties,
88+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
89+
void **ppMem, ur_event_handle_t *phEvent) {
90+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
91+
}
92+
93+
UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp(
94+
ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size,
95+
const ur_exp_async_usm_alloc_properties_t *pProperties,
96+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
97+
void **ppMem, ur_event_handle_t *phEvent) {
98+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
99+
}
100+
101+
UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(
102+
ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, void *pMem,
103+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
104+
ur_event_handle_t *phEvent) {
105+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
106+
}
107+
*/

source/adapters/cuda/usm.cpp

+66-7
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,67 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
379379
ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,
380380
ur_usm_pool_desc_t *PoolDesc)
381381
: Context{Context} {
382+
if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) {
383+
// TODO: this should only use the host
384+
}
385+
const void *pNext = PoolDesc->pNext;
386+
while (pNext != nullptr) {
387+
const ur_base_desc_t *BaseDesc = static_cast<const ur_base_desc_t *>(pNext);
388+
switch (BaseDesc->stype) {
389+
case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: {
390+
const ur_usm_pool_limits_desc_t *Limits =
391+
reinterpret_cast<const ur_usm_pool_limits_desc_t *>(BaseDesc);
392+
for (auto &config : DisjointPoolConfigs.Configs) {
393+
config.MaxPoolableSize = Limits->maxPoolableSize;
394+
config.SlabMinSize = Limits->minDriverAllocSize;
395+
}
396+
break;
397+
}
398+
default: {
399+
throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT);
400+
}
401+
}
402+
pNext = BaseDesc->pNext;
403+
}
404+
405+
auto MemProvider =
406+
umf::memoryProviderMakeUnique<USMHostMemoryProvider>(Context, nullptr)
407+
.second;
408+
409+
HostMemPool =
410+
umf::poolMakeUniqueFromOps(
411+
umfDisjointPoolOps(), std::move(MemProvider),
412+
&this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host])
413+
.second;
414+
415+
for (const auto &Device : Context->getDevices()) {
416+
MemProvider =
417+
umf::memoryProviderMakeUnique<USMDeviceMemoryProvider>(Context, Device)
418+
.second;
419+
DeviceMemPool = umf::poolMakeUniqueFromOps(
420+
umfDisjointPoolOps(), std::move(MemProvider),
421+
&this->DisjointPoolConfigs
422+
.Configs[usm::DisjointPoolMemType::Device])
423+
.second;
424+
MemProvider =
425+
umf::memoryProviderMakeUnique<USMSharedMemoryProvider>(Context, Device)
426+
.second;
427+
SharedMemPool = umf::poolMakeUniqueFromOps(
428+
umfDisjointPoolOps(), std::move(MemProvider),
429+
&this->DisjointPoolConfigs
430+
.Configs[usm::DisjointPoolMemType::Shared])
431+
.second;
432+
Context->addPool(this);
433+
}
434+
}
435+
436+
ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,
437+
ur_device_handle_t Device,
438+
ur_usm_pool_desc_t *PoolDesc)
439+
: Context{Context} {
440+
if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) {
441+
// TODO: this should only use the host
442+
}
382443
const void *pNext = PoolDesc->pNext;
383444
while (pNext != nullptr) {
384445
const ur_base_desc_t *BaseDesc = static_cast<const ur_base_desc_t *>(pNext);
@@ -443,7 +504,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate(
443504
ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool
444505
) {
445506
// Without pool tracking we can't free pool allocations.
446-
#ifdef UMF_ENABLE_POOL_TRACKING
507+
#ifndef UMF_ENABLE_POOL_TRACKING
508+
// We don't need UMF to use native mem pools
509+
if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP))
510+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
511+
#endif
447512
if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) {
448513
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
449514
}
@@ -454,12 +519,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate(
454519
return Ex.getError();
455520
}
456521
return UR_RESULT_SUCCESS;
457-
#else
458-
std::ignore = Context;
459-
std::ignore = PoolDesc;
460-
std::ignore = Pool;
461-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
462-
#endif
463522
}
464523

465524
UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain(

source/adapters/cuda/usm.hpp

+8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig();
1717

18+
// A ur_usm_pool_handle_t can represent different types of memory pools. It may
19+
// sit on top of a UMF pool or a CUmemoryPool, but not both.
1820
struct ur_usm_pool_handle_t_ {
1921
std::atomic_uint32_t RefCount = 1;
2022

@@ -27,6 +29,8 @@ struct ur_usm_pool_handle_t_ {
2729
umf::pool_unique_handle_t SharedMemPool;
2830
umf::pool_unique_handle_t HostMemPool;
2931

32+
CUmemoryPool CUmemPool{0};
33+
3034
ur_usm_pool_handle_t_(ur_context_handle_t Context,
3135
ur_usm_pool_desc_t *PoolDesc);
3236

@@ -37,6 +41,10 @@ struct ur_usm_pool_handle_t_ {
3741
uint32_t getReferenceCount() const noexcept { return RefCount; }
3842

3943
bool hasUMFPool(umf_memory_pool_t *umf_pool);
44+
45+
// To be used if ur_usm_pool_handle_t represents a CUmemoryPool
46+
bool usesCudaPool() const { return CUmemPool != CUmemoryPool{0}; };
47+
CUmemoryPool getCudaPool() { return CUmemPool; };
4048
};
4149

4250
// Exception type to pass allocation errors

source/adapters/hip/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ add_ur_adapter(${TARGET_NAME}
5959
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
6060
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
6161
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
62+
${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp
6263
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp
6364
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp
6465
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp

source/adapters/hip/async_alloc.cpp

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===//
2+
//
3+
// Copyright (C) 2024 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include <ur_api.h>
12+
13+
UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp(
14+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
15+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
16+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
17+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
18+
}
19+
20+
UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp(
21+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
22+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
23+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
24+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
25+
}
26+
27+
UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp(
28+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
29+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
30+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
31+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
32+
}
33+
34+
UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t,
35+
ur_usm_pool_handle_t, void *,
36+
uint32_t,
37+
const ur_event_handle_t *,
38+
ur_event_handle_t *) {
39+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
40+
}

source/adapters/level_zero/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ if(UR_BUILD_ADAPTER_L0)
1313
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
1414
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
1515
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
16+
${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp
1617
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp
1718
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp
1819
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===//
2+
//
3+
// Copyright (C) 2024 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include <ur_api.h>
12+
13+
UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp(
14+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
15+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
16+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
17+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
18+
}
19+
20+
UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp(
21+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
22+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
23+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
24+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
25+
}
26+
27+
UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp(
28+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
29+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
30+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
31+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
32+
}
33+
34+
UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t,
35+
ur_usm_pool_handle_t, void *,
36+
uint32_t,
37+
const ur_event_handle_t *,
38+
ur_event_handle_t *) {
39+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
40+
}

source/adapters/native_cpu/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(TARGET_NAME ur_adapter_native_cpu)
1010
add_ur_adapter(${TARGET_NAME}
1111
SHARED
1212
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
13+
${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp
1314
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp
1415
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
1516
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===//
2+
//
3+
// Copyright (C) 2024 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include <ur_api.h>
12+
13+
UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp(
14+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
15+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
16+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
17+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
18+
}
19+
20+
UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp(
21+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
22+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
23+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
24+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
25+
}
26+
27+
UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp(
28+
ur_queue_handle_t, ur_usm_pool_handle_t, const size_t,
29+
const ur_exp_async_usm_alloc_properties_t *, uint32_t,
30+
const ur_event_handle_t *, void **, ur_event_handle_t *) {
31+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
32+
}
33+
34+
UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t,
35+
ur_usm_pool_handle_t, void *,
36+
uint32_t,
37+
const ur_event_handle_t *,
38+
ur_event_handle_t *) {
39+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
40+
}

source/adapters/opencl/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ add_ur_adapter(${TARGET_NAME} SHARED
1616
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
1717
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
1818
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
19+
${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp
1920
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp
2021
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp
2122
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp

0 commit comments

Comments
 (0)