Skip to content

Commit 3e1f163

Browse files
authored
Merge branch 'adapters' into adapter-batch1
2 parents 18d333f + 5e914c5 commit 3e1f163

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+682
-190
lines changed

.github/workflows/cmake.yml

+5-4
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,10 @@ jobs:
163163
strategy:
164164
matrix:
165165
adapter: [
166-
{name: CUDA, triplet: nvptx64-nvidia-cuda},
167-
{name: HIP, triplet: amdgcn-amd-amdhsa},
168-
{name: L0, triplet: spir64}
166+
{name: CUDA, triplet: nvptx64-nvidia-cuda, platform: ""},
167+
{name: HIP, triplet: amdgcn-amd-amdhsa, platform: ""},
168+
{name: L0, triplet: spir64, platform: ""},
169+
{name: OPENCL, triplet: spir64, platform: "Intel(R) OpenCL"}
169170
]
170171
build_type: [Debug, Release]
171172
compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}]
@@ -219,7 +220,7 @@ jobs:
219220
- name: Test adapters
220221
if: matrix.adapter.name != 'L0'
221222
working-directory: ${{github.workspace}}/build
222-
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180
223+
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180
223224

224225
examples-build-hw:
225226
name: Build - examples on HW

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

66
cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
7-
project(unified-runtime VERSION 0.7.0)
7+
project(unified-runtime VERSION 0.9.0)
88

99
include(GNUInstallDirs)
1010
include(CheckCXXSourceCompiles)

include/ur.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
77
88
@file ur.py
9-
@version v0.7-r0
9+
@version v0.9-r0
1010
1111
"""
1212
import platform
@@ -573,7 +573,9 @@ def __str__(self):
573573
class ur_api_version_v(IntEnum):
574574
_0_6 = UR_MAKE_VERSION( 0, 6 ) ## version 0.6
575575
_0_7 = UR_MAKE_VERSION( 0, 7 ) ## version 0.7
576-
CURRENT = UR_MAKE_VERSION( 0, 7 ) ## latest known version
576+
_0_8 = UR_MAKE_VERSION( 0, 8 ) ## version 0.8
577+
_0_9 = UR_MAKE_VERSION( 0, 9 ) ## version 0.9
578+
CURRENT = UR_MAKE_VERSION( 0, 9 ) ## latest known version
577579

578580
class ur_api_version_t(c_int):
579581
def __str__(self):

include/ur_api.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
88
*
99
* @file ur_api.h
10-
* @version v0.7-r0
10+
* @version v0.9-r0
1111
*
1212
*/
1313
#ifndef UR_API_H_INCLUDED
@@ -1025,7 +1025,9 @@ urPlatformGetInfo(
10251025
typedef enum ur_api_version_t {
10261026
UR_API_VERSION_0_6 = UR_MAKE_VERSION(0, 6), ///< version 0.6
10271027
UR_API_VERSION_0_7 = UR_MAKE_VERSION(0, 7), ///< version 0.7
1028-
UR_API_VERSION_CURRENT = UR_MAKE_VERSION(0, 7), ///< latest known version
1028+
UR_API_VERSION_0_8 = UR_MAKE_VERSION(0, 8), ///< version 0.8
1029+
UR_API_VERSION_0_9 = UR_MAKE_VERSION(0, 9), ///< version 0.9
1030+
UR_API_VERSION_CURRENT = UR_MAKE_VERSION(0, 9), ///< latest known version
10291031
/// @cond
10301032
UR_API_VERSION_FORCE_UINT32 = 0x7fffffff
10311033
/// @endcond

include/ur_ddi.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
88
*
99
* @file ur_ddi.h
10-
* @version v0.7-r0
10+
* @version v0.9-r0
1111
*
1212
*/
1313
#ifndef UR_DDI_H_INCLUDED

scripts/Doxyfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ PROJECT_NAME = "Intel One API Unified Runtime API"
3838
# could be handy for archiving the generated documentation or if some version
3939
# control system is used.
4040

41-
PROJECT_NUMBER = v0.7
41+
PROJECT_NUMBER = v0.9
4242

4343
# Using the PROJECT_BRIEF tag one can provide an optional one line description
4444
# for a project that appears at the top of each page and should give viewer a

scripts/core/platform.yml

+6
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ etors:
133133
- name: "0_7"
134134
value: "$X_MAKE_VERSION( 0, 7 )"
135135
desc: "version 0.7"
136+
- name: "0_8"
137+
value: "$X_MAKE_VERSION( 0, 8 )"
138+
desc: "version 0.8"
139+
- name: "0_9"
140+
value: "$X_MAKE_VERSION( 0, 9 )"
141+
desc: "version 0.9"
136142
--- #--------------------------------------------------------------------------
137143
type: function
138144
desc: "Returns the API version supported by the specified platform"

scripts/parse_specs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
import ctypes
1919
import itertools
2020

21-
default_version = "0.7"
22-
all_versions = ["0.6", "0.7"]
21+
default_version = "0.9"
22+
all_versions = ["0.6", "0.7", "0.8", "0.9"]
2323

2424
"""
2525
preprocess object

source/adapters/cuda/command_buffer.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,19 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
140140
UR_APIEXPORT ur_result_t UR_APICALL
141141
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
142142
try {
143+
const unsigned long long flags = 0;
144+
#if CUDA_VERSION >= 12000
143145
UR_CHECK_ERROR(cuGraphInstantiate(&hCommandBuffer->CudaGraphExec,
144-
hCommandBuffer->CudaGraph, 0));
146+
hCommandBuffer->CudaGraph, flags));
147+
#elif CUDA_VERSION >= 11040
148+
UR_CHECK_ERROR(cuGraphInstantiateWithFlags(
149+
&hCommandBuffer->CudaGraphExec, hCommandBuffer->CudaGraph, flags));
150+
#else
151+
// Cannot use flags
152+
UR_CHECK_ERROR(cuGraphInstantiate(&hCommandBuffer->CudaGraphExec,
153+
hCommandBuffer->CudaGraph, nullptr,
154+
nullptr, 0));
155+
#endif
145156
} catch (...) {
146157
return UR_RESULT_ERROR_UNKNOWN;
147158
}

source/adapters/cuda/program.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext,
263263
}
264264

265265
UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
266-
ur_context_handle_t, uint32_t, const ur_program_handle_t *, uint32_t,
267-
ur_device_handle_t *, const char *, ur_program_handle_t *) {
266+
ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t,
267+
const ur_program_handle_t *, const char *, ur_program_handle_t *) {
268268
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
269269
}
270270

source/adapters/cuda/ur_interface_loader.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -399,9 +399,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
399399
if (UR_RESULT_SUCCESS != retVal) {
400400
return retVal;
401401
}
402-
pDdiTable->pfnBuildExp = nullptr;
403-
pDdiTable->pfnCompileExp = nullptr;
404-
pDdiTable->pfnLinkExp = nullptr;
402+
pDdiTable->pfnBuildExp = urProgramBuildExp;
403+
pDdiTable->pfnCompileExp = urProgramCompileExp;
404+
pDdiTable->pfnLinkExp = urProgramLinkExp;
405405
return retVal;
406406
}
407407

source/adapters/hip/memory.cpp

+11-3
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
273273
UR_CHECK_ERROR(hipMemGetAddressRange(&BasePtr, &AllocSize, Mem.Ptr));
274274
return AllocSize;
275275
} else if constexpr (std::is_same_v<T, SurfaceMem>) {
276+
#if HIP_VERSION < 50600000
277+
throw UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
278+
#else
276279
HIP_ARRAY3D_DESCRIPTOR ArrayDescriptor;
277-
UR_CHECK_ERROR(hipArray3DGetDescriptor(&ArrayDescriptor, Mem.Array));
280+
UR_CHECK_ERROR(
281+
hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray()));
278282
const auto PixelSizeBytes =
279283
GetHipFormatPixelSize(ArrayDescriptor.Format) *
280284
ArrayDescriptor.NumChannels;
@@ -284,6 +288,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
284288
(ArrayDescriptor.Height ? ArrayDescriptor.Height : 1) *
285289
(ArrayDescriptor.Depth ? ArrayDescriptor.Depth : 1);
286290
return ImageSizeBytes;
291+
#endif
287292
} else {
288293
static_assert(ur_always_false_t<T>, "Not exhaustive visitor!");
289294
}
@@ -535,10 +540,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory,
535540
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
536541

537542
try {
538-
539543
HIP_ARRAY3D_DESCRIPTOR ArrayInfo;
544+
#if HIP_VERSION >= 50600000
540545
UR_CHECK_ERROR(hipArray3DGetDescriptor(
541-
&ArrayInfo, std::get<SurfaceMem>(hMemory->Mem).Array));
546+
&ArrayInfo, std::get<SurfaceMem>(hMemory->Mem).getArray()));
547+
#else
548+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
549+
#endif
542550

543551
const auto hip2urFormat =
544552
[](hipArray_Format HipFormat) -> ur_image_channel_type_t {

source/adapters/hip/program.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t,
279279
}
280280

281281
UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
282-
ur_context_handle_t, uint32_t, const ur_program_handle_t *, uint32_t,
283-
ur_device_handle_t *, const char *, ur_program_handle_t *) {
282+
ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t,
283+
const ur_program_handle_t *, const char *, ur_program_handle_t *) {
284284
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
285285
}
286286

source/adapters/hip/ur_interface_loader.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
370370
if (UR_RESULT_SUCCESS != retVal) {
371371
return retVal;
372372
}
373-
pDdiTable->pfnBuildExp = nullptr;
374-
pDdiTable->pfnCompileExp = nullptr;
375-
pDdiTable->pfnLinkExp = nullptr;
373+
pDdiTable->pfnBuildExp = urProgramBuildExp;
374+
pDdiTable->pfnCompileExp = urProgramCompileExp;
375+
pDdiTable->pfnLinkExp = urProgramLinkExp;
376376
return retVal;
377377
}
378378

source/adapters/level_zero/adapter.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ ur_result_t adapterStateTeardown() {
3838
// Print the balance of various create/destroy native calls.
3939
// The idea is to verify if the number of create(+) and destroy(-) calls are
4040
// matched.
41-
if (ZeCallCount && (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) != 0) {
41+
if (ZeCallCount && (UrL0LeaksDebug) != 0) {
4242
// clang-format off
4343
//
4444
// The format of this table is such that each row accounts for a
@@ -79,8 +79,7 @@ ur_result_t adapterStateTeardown() {
7979
//
8080
// clang-format on
8181

82-
fprintf(stderr, "ZE_DEBUG=%d: check balance of create/destroy calls\n",
83-
UR_L0_DEBUG_CALL_COUNT);
82+
fprintf(stderr, "Check balance of create/destroy calls\n");
8483
fprintf(stderr,
8584
"----------------------------------------------------------\n");
8685
for (const auto &Row : CreateDestroySet) {

source/adapters/level_zero/common.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName,
149149
const char *ZeArgs, bool TraceError) {
150150
urPrint("ZE ---> %s%s\n", ZeName, ZeArgs);
151151

152-
if (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) {
152+
if (UrL0LeaksDebug) {
153153
++(*ZeCallCount)[ZeName];
154154
}
155155

source/adapters/level_zero/common.hpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ enum UrDebugLevel {
187187
UR_L0_DEBUG_NONE = 0x0,
188188
UR_L0_DEBUG_BASIC = 0x1,
189189
UR_L0_DEBUG_VALIDATION = 0x2,
190-
UR_L0_DEBUG_CALL_COUNT = 0x4,
191190
UR_L0_DEBUG_ALL = -1
192191
};
193192

@@ -203,6 +202,13 @@ const int UrL0Debug = [] {
203202
return DebugMode;
204203
}();
205204

205+
const int UrL0LeaksDebug = [] {
206+
const char *UrRet = std::getenv("UR_L0_LEAKS_DEBUG");
207+
if (!UrRet)
208+
return 0;
209+
return std::atoi(UrRet);
210+
}();
211+
206212
// Controls Level Zero calls serialization to w/a Level Zero driver being not MT
207213
// ready. Recognized values (can be used as a bit mask):
208214
enum {

source/adapters/level_zero/platform.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
3030
static std::once_flag ZeCallCountInitialized;
3131
try {
3232
std::call_once(ZeCallCountInitialized, []() {
33-
if (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) {
33+
if (UrL0LeaksDebug) {
3434
ZeCallCount = new std::map<std::string, int>;
3535
}
3636
});

source/adapters/native_cpu/common.hpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,16 @@ struct _ur_object {
6161
ur_shared_mutex Mutex;
6262
};
6363

64+
// Todo: replace this with a common helper once it is available
6465
struct RefCounted {
6566
std::atomic_uint32_t _refCount;
66-
void incrementReferenceCount() { _refCount++; }
67-
void decrementReferenceCount() { _refCount--; }
67+
uint32_t incrementReferenceCount() { return ++_refCount; }
68+
uint32_t decrementReferenceCount() { return --_refCount; }
6869
RefCounted() : _refCount{1} {}
6970
uint32_t getReferenceCount() const { return _refCount; }
7071
};
72+
73+
template <typename T> inline void decrementOrDelete(T *refC) {
74+
if (refC->decrementReferenceCount() == 0)
75+
delete refC;
76+
}

source/adapters/native_cpu/context.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ urContextCreate(uint32_t DeviceCount, const ur_device_handle_t *phDevices,
3232

3333
UR_APIEXPORT ur_result_t UR_APICALL
3434
urContextRetain(ur_context_handle_t hContext) {
35-
std::ignore = hContext;
36-
DIE_NO_IMPLEMENTATION
35+
hContext->incrementReferenceCount();
36+
return UR_RESULT_SUCCESS;
3737
}
3838

3939
UR_APIEXPORT ur_result_t UR_APICALL
4040
urContextRelease(ur_context_handle_t hContext) {
41-
delete hContext;
41+
decrementOrDelete(hContext);
4242
return UR_RESULT_SUCCESS;
4343
}
4444

source/adapters/native_cpu/context.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212

1313
#include <ur_api.h>
1414

15+
#include "common.hpp"
1516
#include "device.hpp"
1617

17-
struct ur_context_handle_t_ {
18+
struct ur_context_handle_t_ : RefCounted {
1819
ur_context_handle_t_(ur_device_handle_t_ *phDevices) : _device{phDevices} {}
1920

2021
ur_device_handle_t _device;

source/adapters/native_cpu/enqueue.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,11 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl(
144144
size_t host_origin = (d + HostOffset.z) * HostSlicePitch +
145145
(h + HostOffset.y) * HostRowPitch + w +
146146
HostOffset.x;
147-
int8_t &host_mem = ur_cast<int8_t *>(DstMem)[host_origin];
148147
int8_t &buff_mem = ur_cast<int8_t *>(Buff->_mem)[buff_orign];
149-
if (IsRead)
150-
host_mem = buff_mem;
148+
if constexpr (IsRead)
149+
ur_cast<int8_t *>(DstMem)[host_origin] = buff_mem;
151150
else
152-
buff_mem = host_mem;
151+
buff_mem = ur_cast<const int8_t *>(DstMem)[host_origin];
153152
}
154153
return UR_RESULT_SUCCESS;
155154
}
@@ -160,6 +159,8 @@ static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr,
160159
const ur_event_handle_t *EventWaitList,
161160
ur_event_handle_t *Event) {
162161
// todo: non-blocking, events, UR integration
162+
std::ignore = EventWaitList;
163+
std::ignore = Event;
163164
std::ignore = hQueue;
164165
std::ignore = numEventsInWaitList;
165166
if (SrcPtr != DstPtr && Size)

source/adapters/native_cpu/kernel.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
2525
if (kernelEntry == hProgram->_kernels.end())
2626
return UR_RESULT_ERROR_INVALID_KERNEL;
2727

28-
auto f = reinterpret_cast<nativecpu_ptr_t>(kernelEntry->second);
28+
auto f = reinterpret_cast<nativecpu_ptr_t>(
29+
const_cast<unsigned char *>(kernelEntry->second));
2930
auto kernel = new ur_kernel_handle_t_(pKernelName, *f);
3031

3132
*phKernel = kernel;
@@ -171,6 +172,13 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
171172
// todo: set proper values
172173
return ReturnValue(0);
173174
}
175+
case UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32: {
176+
#ifdef _MSC_VER
177+
__assume(0);
178+
#else
179+
__builtin_unreachable();
180+
#endif
181+
}
174182
}
175183
DIE_NO_IMPLEMENTATION;
176184
}
@@ -182,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) {
182190

183191
UR_APIEXPORT ur_result_t UR_APICALL
184192
urKernelRelease(ur_kernel_handle_t hKernel) {
185-
delete hKernel;
193+
decrementOrDelete(hKernel);
186194

187195
return UR_RESULT_SUCCESS;
188196
}

source/adapters/native_cpu/memory.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ struct _ur_buffer final : ur_mem_handle_t_ {
6161
: ur_mem_handle_t_(Size, false) {}
6262
_ur_buffer(_ur_buffer *b, size_t Offset, size_t Size)
6363
: ur_mem_handle_t_(b->_mem + Offset, false), SubBuffer(b) {
64+
std::ignore = Size;
6465
SubBuffer.Origin = Offset;
6566
}
6667

0 commit comments

Comments
 (0)