Skip to content

Commit ac29419

Browse files
committed
Tidy and move common adapter code to ur_common
1 parent cf26de2 commit ac29419

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+660
-603
lines changed

source/adapters/cuda/CMakeLists.txt

-6
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ add_ur_adapter(${TARGET_NAME}
4040
${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp
4141
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
4242
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
43-
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
44-
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
4543
)
4644

4745
set_target_properties(${TARGET_NAME} PROPERTIES
@@ -82,7 +80,3 @@ target_link_libraries(${TARGET_NAME} PRIVATE
8280
Threads::Threads
8381
cudadrv
8482
)
85-
86-
target_include_directories(${TARGET_NAME} PRIVATE
87-
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
88-
)

source/adapters/cuda/adapter.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
7676
size_t propSize,
7777
void *pPropValue,
7878
size_t *pPropSizeRet) {
79-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
79+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
80+
pPropSizeRet);
8081

8182
switch (propName) {
8283
case UR_ADAPTER_INFO_BACKEND:

source/adapters/cuda/command_buffer.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
#include <ur/ur.hpp>
11+
#include <ur_api.h>
1212

1313
/// Stub implementation of command-buffers for CUDA
1414

source/adapters/cuda/common.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#pragma once
1111

1212
#include <cuda.h>
13-
#include <ur/ur.hpp>
13+
#include <ur_adapter_util.hpp>
1414

1515
ur_result_t mapErrorUR(CUresult Result);
1616

source/adapters/cuda/context.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ urContextCreate(uint32_t DeviceCount, const ur_device_handle_t *phDevices,
6868
UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(
6969
ur_context_handle_t hContext, ur_context_info_t ContextInfoType,
7070
size_t propSize, void *pContextInfo, size_t *pPropSizeRet) {
71-
UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet);
71+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pContextInfo,
72+
pPropSizeRet);
7273

7374
switch (static_cast<uint32_t>(ContextInfoType)) {
7475
case UR_CONTEXT_INFO_NUM_DEVICES:

source/adapters/cuda/device.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
4141
size_t propSize,
4242
void *pPropValue,
4343
size_t *pPropSizeRet) {
44-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
44+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
45+
pPropSizeRet);
4546

4647
static constexpr uint32_t MaxWorkItemDimensions = 3u;
4748

source/adapters/cuda/device.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12-
#include <ur/ur.hpp>
12+
#include <ur_api.h>
1313

1414
struct ur_device_handle_t_ {
1515
private:

source/adapters/cuda/event.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
169169
size_t propValueSize,
170170
void *pPropValue,
171171
size_t *pPropValueSizeRet) {
172-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
172+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
173+
pPropValueSizeRet);
173174

174175
switch (propName) {
175176
case UR_EVENT_INFO_COMMAND_QUEUE:
@@ -194,7 +195,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
194195
UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
195196
ur_event_handle_t hEvent, ur_profiling_info_t propName,
196197
size_t propValueSize, void *pPropValue, size_t *pPropValueSizeRet) {
197-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
198+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
199+
pPropValueSizeRet);
198200

199201
ur_queue_handle_t Queue = hEvent->getQueue();
200202
if (Queue == nullptr || !(Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE)) {

source/adapters/cuda/event.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#pragma once
1111

1212
#include <cuda.h>
13-
#include <ur/ur.hpp>
13+
#include <ur_api.h>
1414

1515
#include "queue.hpp"
1616

source/adapters/cuda/image.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "memory.hpp"
2121
#include "queue.hpp"
2222
#include "sampler.hpp"
23-
#include "ur/ur.hpp"
2423
#include "ur_api.h"
2524

2625
ur_result_t urCalculateNumChannels(ur_image_channel_order_t order,

source/adapters/cuda/kernel.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ UR_APIEXPORT ur_result_t UR_APICALL
6262
urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
6363
ur_kernel_group_info_t propName, size_t propSize,
6464
void *pPropValue, size_t *pPropSizeRet) {
65-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
65+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
66+
pPropSizeRet);
6667

6768
switch (propName) {
6869
case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
@@ -205,7 +206,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
205206
size_t propSize,
206207
void *pKernelInfo,
207208
size_t *pPropSizeRet) {
208-
UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet);
209+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pKernelInfo,
210+
pPropSizeRet);
209211

210212
switch (propName) {
211213
case UR_KERNEL_INFO_FUNCTION_NAME:
@@ -237,7 +239,8 @@ UR_APIEXPORT ur_result_t UR_APICALL
237239
urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
238240
ur_kernel_sub_group_info_t propName, size_t propSize,
239241
void *pPropValue, size_t *pPropSizeRet) {
240-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
242+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
243+
pPropSizeRet);
241244
switch (propName) {
242245
case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: {
243246
// Sub-group size is equivalent to warp size

source/adapters/cuda/memory.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
175175
size_t *pPropSizeRet) {
176176
UR_ASSERT(hMemory->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
177177

178-
UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet);
178+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet);
179179

180180
ScopedContext Active(hMemory->getContext());
181181

source/adapters/cuda/platform.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo(
2222
size_t Size, void *pPlatformInfo, size_t *pSizeRet) {
2323

2424
UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
25-
UrReturnHelper ReturnValue(Size, pPlatformInfo, pSizeRet);
25+
ur::adapter_util::ReturnHelper ReturnValue(Size, pPlatformInfo, pSizeRet);
2626

2727
switch (PlatformInfoType) {
2828
case UR_PLATFORM_INFO_NAME:

source/adapters/cuda/platform.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12-
#include <ur/ur.hpp>
12+
#include <memory>
13+
#include <ur_api.h>
1314
#include <vector>
1415

1516
struct ur_platform_handle_t_ {

source/adapters/cuda/program.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice,
277277
void *pPropValue, size_t *pPropSizeRet) {
278278
std::ignore = hDevice;
279279

280-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
280+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
281+
pPropSizeRet);
281282

282283
switch (propName) {
283284
case UR_PROGRAM_BUILD_INFO_STATUS: {
@@ -296,7 +297,8 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice,
296297
UR_APIEXPORT ur_result_t UR_APICALL
297298
urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName,
298299
size_t propSize, void *pProgramInfo, size_t *pPropSizeRet) {
299-
UrReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet);
300+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pProgramInfo,
301+
pPropSizeRet);
300302

301303
switch (propName) {
302304
case UR_PROGRAM_INFO_REFERENCE_COUNT:

source/adapters/cuda/queue.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue,
291291
size_t propValueSize,
292292
void *pPropValue,
293293
size_t *pPropSizeRet) {
294-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);
294+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
295+
pPropSizeRet);
295296

296297
switch (propName) {
297298
case UR_QUEUE_INFO_CONTEXT:

source/adapters/cuda/queue.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12-
#include <ur/ur.hpp>
12+
#include <ur_api.h>
1313

1414
#include <algorithm>
15+
#include <atomic>
1516
#include <cuda.h>
17+
#include <mutex>
1618
#include <vector>
1719

1820
using ur_stream_guard_ = std::unique_lock<std::mutex>;

source/adapters/cuda/sampler.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
4949
UR_APIEXPORT ur_result_t UR_APICALL
5050
urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName,
5151
size_t propValueSize, void *pPropValue, size_t *pPropSizeRet) {
52-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);
52+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
53+
pPropSizeRet);
5354

5455
switch (propName) {
5556
case UR_SAMPLER_INFO_REFERENCE_COUNT:

source/adapters/cuda/sampler.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
#include <ur/ur.hpp>
11+
#include <ur_api.h>
12+
13+
#include <atomic>
1214

1315
/// Implementation of samplers for CUDA
1416
///

source/adapters/cuda/usm.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
195195
void *pPropValue, size_t *pPropValueSizeRet) {
196196
ur_result_t Result = UR_RESULT_SUCCESS;
197197

198-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
198+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
199+
pPropValueSizeRet);
199200

200201
try {
201202
ScopedContext Active(hContext);
@@ -489,7 +490,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo(
489490
size_t *pPropSizeRet ///< [out][optional] size in bytes returned in pool
490491
///< property value
491492
) {
492-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
493+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
494+
pPropSizeRet);
493495

494496
switch (propName) {
495497
case UR_USM_POOL_INFO_REFERENCE_COUNT: {

source/adapters/cuda/usm_p2p.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
4242
ur_exp_peer_info_t propName, size_t propSize, void *pPropValue,
4343
size_t *pPropSizeRet) {
4444

45-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
45+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
46+
pPropSizeRet);
4647

4748
int value;
4849
CUdevice_P2PAttribute cu_attr;

source/adapters/hip/CMakeLists.txt

-6
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ add_ur_adapter(${TARGET_NAME}
7373
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
7474
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
7575
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
76-
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
77-
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
7876
)
7977

8078
if(NOT MSVC)
@@ -159,7 +157,3 @@ elseif("${UR_HIP_PLATFORM}" STREQUAL "NVIDIA")
159157
else()
160158
message(FATAL_ERROR "Unspecified UR HIP platform please set UR_HIP_PLATFORM to 'AMD' or 'NVIDIA'")
161159
endif()
162-
163-
target_include_directories(${TARGET_NAME} PRIVATE
164-
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
165-
)

source/adapters/hip/adapter.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
6565
size_t propSize,
6666
void *pPropValue,
6767
size_t *pPropSizeRet) {
68-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
68+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
69+
pPropSizeRet);
6970

7071
switch (propName) {
7172
case UR_ADAPTER_INFO_BACKEND:

source/adapters/hip/command_buffer.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
#include <ur/ur.hpp>
11+
#include <ur_api.h>
1212

1313
/// Stub implementation of command-buffers for HIP
1414

source/adapters/hip/common.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
#include <amd_comgr/amd_comgr.h>
1414
#endif
1515
#include <hip/hip_runtime.h>
16-
#include <ur/ur.hpp>
16+
#include <ur_adapter_util.hpp>
17+
#include <ur_api.h>
1718

1819
// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be
1920
// indexed, but on NVidia it is an opaque type and needs to go through

source/adapters/hip/context.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ UR_APIEXPORT ur_result_t UR_APICALL
7474
urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,
7575
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
7676

77-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
77+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
78+
pPropSizeRet);
7879

7980
switch (uint32_t{propName}) {
8081
case UR_CONTEXT_INFO_NUM_DEVICES:

source/adapters/hip/device.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
2525
size_t propSize,
2626
void *pPropValue,
2727
size_t *pPropSizeRet) {
28-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
28+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
29+
pPropSizeRet);
2930

3031
static constexpr uint32_t MaxWorkItemDimensions = 3u;
3132

source/adapters/hip/device.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
#include "common.hpp"
1313

14-
#include <ur/ur.hpp>
14+
#include <ur_api.h>
1515

1616
/// UR device mapping to a hipDevice_t.
1717
/// Includes an observer pointer to the platform,

source/adapters/hip/event.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
217217
size_t *pPropValueSizeRet) {
218218
UR_ASSERT(!(pPropValue && propValueSize == 0), UR_RESULT_ERROR_INVALID_SIZE);
219219

220-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
220+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
221+
pPropValueSizeRet);
221222
switch (propName) {
222223
case UR_EVENT_INFO_COMMAND_QUEUE:
223224
return ReturnValue(hEvent->getQueue());
@@ -249,7 +250,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
249250
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
250251
}
251252

252-
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
253+
ur::adapter_util::ReturnHelper ReturnValue(propValueSize, pPropValue,
254+
pPropValueSizeRet);
253255
switch (propName) {
254256
case UR_PROFILING_INFO_COMMAND_QUEUED:
255257
case UR_PROFILING_INFO_COMMAND_SUBMIT:

source/adapters/hip/image.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
#include "ur/ur.hpp"
11+
#include "ur_api.h"
1212

1313
UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
1414
[[maybe_unused]] ur_context_handle_t hContext,

source/adapters/hip/kernel.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ UR_APIEXPORT ur_result_t UR_APICALL
5353
urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
5454
ur_kernel_group_info_t propName, size_t propSize,
5555
void *pPropValue, size_t *pPropSizeRet) {
56-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
56+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
57+
pPropSizeRet);
5758

5859
switch (propName) {
5960
case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
@@ -186,7 +187,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
186187
size_t propSize,
187188
void *pKernelInfo,
188189
size_t *pPropSizeRet) {
189-
UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet);
190+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pKernelInfo,
191+
pPropSizeRet);
190192

191193
switch (propName) {
192194
case UR_KERNEL_INFO_FUNCTION_NAME:
@@ -212,7 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL
212214
urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
213215
ur_kernel_sub_group_info_t propName, size_t propSize,
214216
void *pPropValue, size_t *pPropSizeRet) {
215-
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
217+
ur::adapter_util::ReturnHelper ReturnValue(propSize, pPropValue,
218+
pPropSizeRet);
216219
switch (propName) {
217220
case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: {
218221
// Sub-group size is equivalent to warp size

0 commit comments

Comments
 (0)