Skip to content

Commit dd7d5c6

Browse files
ldoraukbenzie
authored andcommitted
Use UMF Proxy pool manager with UMF CUDA memory provider in UR (#17015)
Use UMF Proxy pool manager with UMF CUDA memory provider in UR. UMF Proxy pool manager is just a wrapper for the UMF memory provider (CUDA memory provider in this case) plus it adds also tracking of memory allocations. Moved from: #2659 Signed-off-by: Lukasz Dorau <[email protected]>
1 parent 99fa00b commit dd7d5c6

File tree

9 files changed

+157
-272
lines changed

9 files changed

+157
-272
lines changed

source/adapters/cuda/common.hpp

+38
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@
1212
#include <cuda.h>
1313
#include <ur/ur.hpp>
1414

15+
#include <umf/base.h>
16+
#include <umf/providers/provider_cuda.h>
17+
18+
#define UMF_RETURN_UMF_ERROR(UmfResult) \
19+
do { \
20+
umf_result_t UmfResult_ = (UmfResult); \
21+
if (UmfResult_ != UMF_RESULT_SUCCESS) { \
22+
return UmfResult_; \
23+
} \
24+
} while (0)
25+
1526
ur_result_t mapErrorUR(CUresult Result);
1627

1728
/// Converts CUDA error into UR error codes, and outputs error information
@@ -59,3 +70,30 @@ void assertion(bool Condition, const char *Message = nullptr);
5970

6071
} // namespace ur
6172
} // namespace detail
73+
74+
namespace umf {
75+
76+
using cuda_params_unique_handle_t = std::unique_ptr<
77+
umf_cuda_memory_provider_params_t,
78+
std::function<umf_result_t(umf_cuda_memory_provider_params_handle_t)>>;
79+
80+
inline umf_result_t setCUMemoryProviderParams(
81+
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams,
82+
int cuDevice, void *cuContext, umf_usm_memory_type_t memType) {
83+
84+
umf_result_t UmfResult =
85+
umfCUDAMemoryProviderParamsSetContext(CUMemoryProviderParams, cuContext);
86+
UMF_RETURN_UMF_ERROR(UmfResult);
87+
88+
UmfResult =
89+
umfCUDAMemoryProviderParamsSetDevice(CUMemoryProviderParams, cuDevice);
90+
UMF_RETURN_UMF_ERROR(UmfResult);
91+
92+
UmfResult =
93+
umfCUDAMemoryProviderParamsSetMemoryType(CUMemoryProviderParams, memType);
94+
UMF_RETURN_UMF_ERROR(UmfResult);
95+
96+
return UMF_RESULT_SUCCESS;
97+
}
98+
99+
} // namespace umf

source/adapters/cuda/context.hpp

+25-8
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ typedef void (*ur_context_extended_deleter_t)(void *user_data);
7777
///
7878

7979
static ur_result_t
80-
CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
81-
umf_memory_provider_handle_t *MemoryProviderHost) {
80+
CreateHostMemoryProviderPool(ur_device_handle_t_ *DeviceHandle,
81+
umf_memory_provider_handle_t *MemoryProviderHost,
82+
umf_memory_pool_handle_t *MemoryPoolHost) {
8283
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;
8384

8485
*MemoryProviderHost = nullptr;
@@ -91,10 +92,20 @@ CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
9192
umf::cuda_params_unique_handle_t CUMemoryProviderParamsUnique(
9293
CUMemoryProviderParams, umfCUDAMemoryProviderParamsDestroy);
9394

94-
// create UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
95-
UmfResult = umf::createMemoryProvider(
96-
CUMemoryProviderParamsUnique.get(), 0 /* cuDevice */, context,
97-
UMF_MEMORY_TYPE_HOST, MemoryProviderHost);
95+
UmfResult = umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
96+
0 /* cuDevice */, context,
97+
UMF_MEMORY_TYPE_HOST);
98+
UMF_RETURN_UR_ERROR(UmfResult);
99+
100+
// create UMF CUDA memory provider and pool for the host memory
101+
// (UMF_MEMORY_TYPE_HOST)
102+
UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
103+
CUMemoryProviderParamsUnique.get(),
104+
MemoryProviderHost);
105+
UMF_RETURN_UR_ERROR(UmfResult);
106+
107+
UmfResult = umfPoolCreate(umfProxyPoolOps(), *MemoryProviderHost, nullptr, 0,
108+
MemoryPoolHost);
98109
UMF_RETURN_UR_ERROR(UmfResult);
99110

100111
return UR_RESULT_SUCCESS;
@@ -112,8 +123,10 @@ struct ur_context_handle_t_ {
112123
std::vector<ur_device_handle_t> Devices;
113124
std::atomic_uint32_t RefCount;
114125

115-
// UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
126+
// UMF CUDA memory provider and pool for the host memory
127+
// (UMF_MEMORY_TYPE_HOST)
116128
umf_memory_provider_handle_t MemoryProviderHost = nullptr;
129+
umf_memory_pool_handle_t MemoryPoolHost = nullptr;
117130

118131
ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
119132
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
@@ -124,10 +137,14 @@ struct ur_context_handle_t_ {
124137
// Create UMF CUDA memory provider for the host memory
125138
// (UMF_MEMORY_TYPE_HOST) from any device (Devices[0] is used here, because
126139
// it is guaranteed to exist).
127-
UR_CHECK_ERROR(CreateHostMemoryProvider(Devices[0], &MemoryProviderHost));
140+
UR_CHECK_ERROR(CreateHostMemoryProviderPool(Devices[0], &MemoryProviderHost,
141+
&MemoryPoolHost));
128142
};
129143

130144
~ur_context_handle_t_() {
145+
if (MemoryPoolHost) {
146+
umfPoolDestroy(MemoryPoolHost);
147+
}
131148
if (MemoryProviderHost) {
132149
umfMemoryProviderDestroy(MemoryProviderHost);
133150
}

source/adapters/cuda/device.hpp

+15-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <ur/ur.hpp>
1313

14+
#include <umf/memory_pool.h>
1415
#include <umf/memory_provider.h>
1516

1617
#include "common.hpp"
@@ -84,9 +85,17 @@ struct ur_device_handle_t_ {
8485

8586
MemoryProviderDevice = nullptr;
8687
MemoryProviderShared = nullptr;
88+
MemoryPoolDevice = nullptr;
89+
MemoryPoolShared = nullptr;
8790
}
8891

8992
~ur_device_handle_t_() {
93+
if (MemoryPoolDevice) {
94+
umfPoolDestroy(MemoryPoolDevice);
95+
}
96+
if (MemoryPoolShared) {
97+
umfPoolDestroy(MemoryPoolShared);
98+
}
9099
if (MemoryProviderDevice) {
91100
umfMemoryProviderDestroy(MemoryProviderDevice);
92101
}
@@ -131,11 +140,15 @@ struct ur_device_handle_t_ {
131140
// bookkeeping for mipmappedArray leaks in Mapping external Memory
132141
std::map<CUarray, CUmipmappedArray> ChildCuarrayFromMipmapMap;
133142

134-
// UMF CUDA memory provider for the device memory (UMF_MEMORY_TYPE_DEVICE)
143+
// UMF CUDA memory provider and pool for the device memory
144+
// (UMF_MEMORY_TYPE_DEVICE)
135145
umf_memory_provider_handle_t MemoryProviderDevice;
146+
umf_memory_pool_handle_t MemoryPoolDevice;
136147

137-
// UMF CUDA memory provider for the shared memory (UMF_MEMORY_TYPE_SHARED)
148+
// UMF CUDA memory provider and pool for the shared memory
149+
// (UMF_MEMORY_TYPE_SHARED)
138150
umf_memory_provider_handle_t MemoryProviderShared;
151+
umf_memory_pool_handle_t MemoryPoolShared;
139152
};
140153

141154
int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);

source/adapters/cuda/memory.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
5050
cuMemHostRegister(HostPtr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
5151
AllocMode = BufferMem::AllocMode::UseHostPtr;
5252
} else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
53-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hContext->MemoryProviderHost, size,
54-
0, &HostPtr));
53+
HostPtr = umfPoolMalloc(hContext->MemoryPoolHost, size);
54+
UMF_CHECK_PTR(HostPtr);
5555
AllocMode = BufferMem::AllocMode::AllocHostPtr;
5656
} else if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
5757
AllocMode = BufferMem::AllocMode::CopyIn;
@@ -442,8 +442,8 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
442442
CU_MEMHOSTALLOC_DEVICEMAP));
443443
UR_CHECK_ERROR(cuMemHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0));
444444
} else {
445-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hDevice->MemoryProviderDevice,
446-
Buffer.Size, 0, (void **)&DevPtr));
445+
*(void **)&DevPtr = umfPoolMalloc(hDevice->MemoryPoolDevice, Buffer.Size);
446+
UMF_CHECK_PTR(*(void **)&DevPtr);
447447
}
448448
} else {
449449
CUarray ImageArray{};

source/adapters/cuda/memory.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -158,15 +158,15 @@ struct BufferMem {
158158
case AllocMode::Classic:
159159
for (auto &DevPtr : Ptrs) {
160160
if (DevPtr != native_type{0}) {
161-
UR_CHECK_ERROR(cuMemFree(DevPtr));
161+
UMF_CHECK_ERROR(umfFree((void *)DevPtr));
162162
}
163163
}
164164
break;
165165
case AllocMode::UseHostPtr:
166166
UR_CHECK_ERROR(cuMemHostUnregister(HostPtr));
167167
break;
168168
case AllocMode::AllocHostPtr:
169-
UR_CHECK_ERROR(cuMemFreeHost(HostPtr));
169+
UMF_CHECK_ERROR(umfFree((void *)HostPtr));
170170
}
171171
return UR_RESULT_SUCCESS;
172172
}

source/adapters/cuda/platform.cpp

+32-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include <sstream>
2121

2222
static ur_result_t
23-
CreateDeviceMemoryProviders(ur_platform_handle_t_ *Platform) {
23+
CreateDeviceMemoryProvidersPools(ur_platform_handle_t_ *Platform) {
2424
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;
2525

2626
umf_result_t UmfResult =
@@ -37,16 +37,40 @@ CreateDeviceMemoryProviders(ur_platform_handle_t_ *Platform) {
3737

3838
// create UMF CUDA memory provider for the device memory
3939
// (UMF_MEMORY_TYPE_DEVICE)
40-
UmfResult = umf::createMemoryProvider(
41-
CUMemoryProviderParamsUnique.get(), device, context,
42-
UMF_MEMORY_TYPE_DEVICE, &device_handle->MemoryProviderDevice);
40+
UmfResult =
41+
umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
42+
device, context, UMF_MEMORY_TYPE_DEVICE);
43+
UMF_RETURN_UR_ERROR(UmfResult);
44+
45+
UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
46+
CUMemoryProviderParamsUnique.get(),
47+
&device_handle->MemoryProviderDevice);
4348
UMF_RETURN_UR_ERROR(UmfResult);
4449

4550
// create UMF CUDA memory provider for the shared memory
4651
// (UMF_MEMORY_TYPE_SHARED)
47-
UmfResult = umf::createMemoryProvider(
48-
CUMemoryProviderParamsUnique.get(), device, context,
49-
UMF_MEMORY_TYPE_SHARED, &device_handle->MemoryProviderShared);
52+
UmfResult =
53+
umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
54+
device, context, UMF_MEMORY_TYPE_SHARED);
55+
UMF_RETURN_UR_ERROR(UmfResult);
56+
57+
UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
58+
CUMemoryProviderParamsUnique.get(),
59+
&device_handle->MemoryProviderShared);
60+
UMF_RETURN_UR_ERROR(UmfResult);
61+
62+
// create UMF CUDA memory pool for the device memory
63+
// (UMF_MEMORY_TYPE_DEVICE)
64+
UmfResult =
65+
umfPoolCreate(umfProxyPoolOps(), device_handle->MemoryProviderDevice,
66+
nullptr, 0, &device_handle->MemoryPoolDevice);
67+
UMF_RETURN_UR_ERROR(UmfResult);
68+
69+
// create UMF CUDA memory pool for the shared memory
70+
// (UMF_MEMORY_TYPE_SHARED)
71+
UmfResult =
72+
umfPoolCreate(umfProxyPoolOps(), device_handle->MemoryProviderShared,
73+
nullptr, 0, &device_handle->MemoryPoolShared);
5074
UMF_RETURN_UR_ERROR(UmfResult);
5175
}
5276

@@ -134,7 +158,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
134158
static_cast<uint32_t>(i)});
135159
}
136160

137-
UR_CHECK_ERROR(CreateDeviceMemoryProviders(&Platform));
161+
UR_CHECK_ERROR(CreateDeviceMemoryProvidersPools(&Platform));
138162
} catch (const std::bad_alloc &) {
139163
// Signal out-of-memory situation
140164
for (int i = 0; i < NumDevices; ++i) {

0 commit comments

Comments
 (0)