forked from oneapi-src/unified-runtime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommand_buffer.hpp
229 lines (215 loc) · 9.84 KB
/
command_buffer.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
//===--------- command_buffer.hpp - CUDA Adapter --------------------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <ur_api.h>
#include "context.hpp"
#include <cuda.h>
#include <memory>
static inline const char *getUrResultString(ur_result_t Result) {
switch (Result) {
case UR_RESULT_SUCCESS:
return "UR_RESULT_SUCCESS";
case UR_RESULT_ERROR_INVALID_OPERATION:
return "UR_RESULT_ERROR_INVALID_OPERATION";
case UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES:
return "UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES";
case UR_RESULT_ERROR_INVALID_QUEUE:
return "UR_RESULT_ERROR_INVALID_QUEUE";
case UR_RESULT_ERROR_INVALID_VALUE:
return "UR_RESULT_ERROR_INVALID_VALUE";
case UR_RESULT_ERROR_INVALID_CONTEXT:
return "UR_RESULT_ERROR_INVALID_CONTEXT";
case UR_RESULT_ERROR_INVALID_PLATFORM:
return "UR_RESULT_ERROR_INVALID_PLATFORM";
case UR_RESULT_ERROR_INVALID_BINARY:
return "UR_RESULT_ERROR_INVALID_BINARY";
case UR_RESULT_ERROR_INVALID_PROGRAM:
return "UR_RESULT_ERROR_INVALID_PROGRAM";
case UR_RESULT_ERROR_INVALID_SAMPLER:
return "UR_RESULT_ERROR_INVALID_SAMPLER";
case UR_RESULT_ERROR_INVALID_BUFFER_SIZE:
return "UR_RESULT_ERROR_INVALID_BUFFER_SIZE";
case UR_RESULT_ERROR_INVALID_MEM_OBJECT:
return "UR_RESULT_ERROR_INVALID_MEM_OBJECT";
case UR_RESULT_ERROR_INVALID_EVENT:
return "UR_RESULT_ERROR_INVALID_EVENT";
case UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST:
return "UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST";
case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET:
return "UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET";
case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE:
return "UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE";
case UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE:
return "UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE";
case UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE:
return "UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE";
case UR_RESULT_ERROR_DEVICE_NOT_FOUND:
return "UR_RESULT_ERROR_DEVICE_NOT_FOUND";
case UR_RESULT_ERROR_INVALID_DEVICE:
return "UR_RESULT_ERROR_INVALID_DEVICE";
case UR_RESULT_ERROR_DEVICE_LOST:
return "UR_RESULT_ERROR_DEVICE_LOST";
case UR_RESULT_ERROR_DEVICE_REQUIRES_RESET:
return "UR_RESULT_ERROR_DEVICE_REQUIRES_RESET";
case UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE:
return "UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE";
case UR_RESULT_ERROR_DEVICE_PARTITION_FAILED:
return "UR_RESULT_ERROR_DEVICE_PARTITION_FAILED";
case UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT:
return "UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT";
case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE:
return "UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE";
case UR_RESULT_ERROR_INVALID_WORK_DIMENSION:
return "UR_RESULT_ERROR_INVALID_WORK_DIMENSION";
case UR_RESULT_ERROR_INVALID_KERNEL_ARGS:
return "UR_RESULT_ERROR_INVALID_KERNEL_ARGS";
case UR_RESULT_ERROR_INVALID_KERNEL:
return "UR_RESULT_ERROR_INVALID_KERNEL";
case UR_RESULT_ERROR_INVALID_KERNEL_NAME:
return "UR_RESULT_ERROR_INVALID_KERNEL_NAME";
case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX:
return "UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX";
case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE:
return "UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE";
case UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE:
return "UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE";
case UR_RESULT_ERROR_INVALID_IMAGE_SIZE:
return "UR_RESULT_ERROR_INVALID_IMAGE_SIZE";
case UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED:
return "UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED";
case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE:
return "UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE";
case UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE:
return "UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE";
case UR_RESULT_ERROR_UNINITIALIZED:
return "UR_RESULT_ERROR_UNINITIALIZED";
case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY:
return "UR_RESULT_ERROR_OUT_OF_HOST_MEMORY";
case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY:
return "UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY";
case UR_RESULT_ERROR_OUT_OF_RESOURCES:
return "UR_RESULT_ERROR_OUT_OF_RESOURCES";
case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE:
return "UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE";
case UR_RESULT_ERROR_PROGRAM_LINK_FAILURE:
return "UR_RESULT_ERROR_PROGRAM_LINK_FAILURE";
case UR_RESULT_ERROR_UNSUPPORTED_VERSION:
return "UR_RESULT_ERROR_UNSUPPORTED_VERSION";
case UR_RESULT_ERROR_UNSUPPORTED_FEATURE:
return "UR_RESULT_ERROR_UNSUPPORTED_FEATURE";
case UR_RESULT_ERROR_INVALID_ARGUMENT:
return "UR_RESULT_ERROR_INVALID_ARGUMENT";
case UR_RESULT_ERROR_INVALID_NULL_HANDLE:
return "UR_RESULT_ERROR_INVALID_NULL_HANDLE";
case UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE:
return "UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE";
case UR_RESULT_ERROR_INVALID_NULL_POINTER:
return "UR_RESULT_ERROR_INVALID_NULL_POINTER";
case UR_RESULT_ERROR_INVALID_SIZE:
return "UR_RESULT_ERROR_INVALID_SIZE";
case UR_RESULT_ERROR_UNSUPPORTED_SIZE:
return "UR_RESULT_ERROR_UNSUPPORTED_SIZE";
case UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT:
return "UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT";
case UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT:
return "UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT";
case UR_RESULT_ERROR_INVALID_ENUMERATION:
return "UR_RESULT_ERROR_INVALID_ENUMERATION";
case UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION:
return "UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION";
case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT:
return "UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT";
case UR_RESULT_ERROR_INVALID_NATIVE_BINARY:
return "UR_RESULT_ERROR_INVALID_NATIVE_BINARY";
case UR_RESULT_ERROR_INVALID_GLOBAL_NAME:
return "UR_RESULT_ERROR_INVALID_GLOBAL_NAME";
case UR_RESULT_ERROR_INVALID_FUNCTION_NAME:
return "UR_RESULT_ERROR_INVALID_FUNCTION_NAME";
case UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION:
return "UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION";
case UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION:
return "UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION";
case UR_RESULT_ERROR_PROGRAM_UNLINKED:
return "UR_RESULT_ERROR_PROGRAM_UNLINKED";
case UR_RESULT_ERROR_OVERLAPPING_REGIONS:
return "UR_RESULT_ERROR_OVERLAPPING_REGIONS";
case UR_RESULT_ERROR_INVALID_HOST_PTR:
return "UR_RESULT_ERROR_INVALID_HOST_PTR";
case UR_RESULT_ERROR_INVALID_USM_SIZE:
return "UR_RESULT_ERROR_INVALID_USM_SIZE";
case UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE:
return "UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE";
case UR_RESULT_ERROR_ADAPTER_SPECIFIC:
return "UR_RESULT_ERROR_ADAPTER_SPECIFIC";
default:
return "UR_RESULT_ERROR_UNKNOWN";
}
}
// Trace an internal UR call
#define UR_TRACE(Call) \
{ \
ur_result_t Result; \
UR_CALL(Call, Result); \
}
// Trace an internal UR call and return the result to the user.
#define UR_CALL(Call, Result) \
{ \
if (PrintTrace) \
fprintf(stderr, "UR ---> %s\n", #Call); \
Result = (Call); \
if (PrintTrace) \
fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \
}
struct ur_exp_command_buffer_handle_t_ {
ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext,
ur_device_handle_t hDevice);
~ur_exp_command_buffer_handle_t_();
void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
std::shared_ptr<CUgraphNode> CuNode) {
SyncPoints[SyncPoint] = std::move(CuNode);
NextSyncPoint++;
}
ur_exp_command_buffer_sync_point_t GetNextSyncPoint() const {
return NextSyncPoint;
}
// Helper to register next sync point
// @param CuNode Node to register as next sync point
// @return Pointer to the sync that registers the Node
ur_exp_command_buffer_sync_point_t
AddSyncPoint(std::shared_ptr<CUgraphNode> CuNode) {
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
RegisterSyncPoint(SyncPoint, std::move(CuNode));
return SyncPoint;
}
// UR context associated with this command-buffer
ur_context_handle_t Context;
// Device associated with this command buffer
ur_device_handle_t Device;
// Cuda Graph handle
CUgraph CudaGraph;
// Cuda Graph Exec handle
CUgraphExec CudaGraphExec;
// Atomic variable counting the number of reference to this command_buffer
// using std::atomic prevents data race when incrementing/decrementing.
std::atomic_uint32_t RefCount;
// Map of sync_points to ur_events
std::unordered_map<ur_exp_command_buffer_sync_point_t,
std::shared_ptr<CUgraphNode>>
SyncPoints;
// Next sync_point value (may need to consider ways to reuse values if 32-bits
// is not enough)
ur_exp_command_buffer_sync_point_t NextSyncPoint;
// Used when retaining an object.
uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
// Used when releasing an object.
uint32_t decrementReferenceCount() noexcept { return --RefCount; }
uint32_t getReferenceCount() const noexcept { return RefCount; }
};