Skip to content

Pull in intel/llvm changes to main - Fri 21st Feb #2719

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,6 @@ function(add_ur_target_link_options name)
if (UR_DEVELOPER_MODE)
target_link_options(${name} PRIVATE -Werror -Wextra)
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Release")
target_link_options(${name} PRIVATE
$<$<CXX_COMPILER_ID:GNU>:-pie>
)
endif()
endif()
elseif(MSVC)
target_link_options(${name} PRIVATE
Expand All @@ -176,7 +171,15 @@ function(add_ur_target_link_options name)
endfunction()

function(add_ur_target_exec_options name)
if(MSVC)
if(NOT MSVC)
if(NOT APPLE)
if(CMAKE_BUILD_TYPE STREQUAL "Release")
target_link_options(${name} PRIVATE
$<$<CXX_COMPILER_ID:GNU>:-pie>
)
endif()
endif()
elseif(MSVC)
target_link_options(${name} PRIVATE
LINKER:/ALLOWISOLATION
)
Expand Down
27 changes: 15 additions & 12 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -7105,21 +7105,21 @@ typedef enum ur_event_info_t {
///////////////////////////////////////////////////////////////////////////////
/// @brief Profiling query information type
typedef enum ur_profiling_info_t {
/// [uint64_t] A 64-bit value of current device counter in nanoseconds
/// when the event is enqueued
/// [uint64_t][optional-query] A 64-bit value of current device counter in
/// nanoseconds when the event is enqueued
UR_PROFILING_INFO_COMMAND_QUEUED = 0,
/// [uint64_t] A 64-bit value of current device counter in nanoseconds
/// when the event is submitted
/// [uint64_t][optional-query] A 64-bit value of current device counter in
/// nanoseconds when the event is submitted
UR_PROFILING_INFO_COMMAND_SUBMIT = 1,
/// [uint64_t] A 64-bit value of current device counter in nanoseconds
/// when the event starts execution
/// [uint64_t][optional-query] A 64-bit value of current device counter in
/// nanoseconds when the event starts execution
UR_PROFILING_INFO_COMMAND_START = 2,
/// [uint64_t] A 64-bit value of current device counter in nanoseconds
/// when the event has finished execution
/// [uint64_t][optional-query] A 64-bit value of current device counter in
/// nanoseconds when the event has finished execution
UR_PROFILING_INFO_COMMAND_END = 3,
/// [uint64_t] A 64-bit value of current device counter in nanoseconds
/// when the event and any child events enqueued by this event on the
/// device have finished execution
/// [uint64_t][optional-query] A 64-bit value of current device counter in
/// nanoseconds when the event and any child events enqueued by this event
/// on the device have finished execution
UR_PROFILING_INFO_COMMAND_COMPLETE = 4,
/// @cond
UR_PROFILING_INFO_FORCE_UINT32 = 0x7fffffff
Expand Down Expand Up @@ -7193,6 +7193,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(
/// - ::UR_RESULT_ERROR_INVALID_EVENT
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
/// [in] handle of the event object
ur_event_handle_t hEvent,
Expand Down Expand Up @@ -10111,6 +10113,7 @@ typedef struct ur_exp_command_buffer_command_handle_t_
/// + `NULL == hContext`
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == pCommandBufferDesc`
/// + `NULL == phCommandBuffer`
/// - ::UR_RESULT_ERROR_INVALID_CONTEXT
/// - ::UR_RESULT_ERROR_INVALID_DEVICE
Expand All @@ -10125,7 +10128,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_context_handle_t hContext,
/// [in] Handle of the device object.
ur_device_handle_t hDevice,
/// [in][optional] command-buffer descriptor.
/// [in] Command-buffer descriptor.
const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
/// [out][alloc] Pointer to command-Buffer handle.
ur_exp_command_buffer_handle_t *phCommandBuffer);
Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmarks/benches/oneapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class OneAPI:
# random unique number for benchmark oneAPI installation
ONEAPI_BENCHMARK_INSTANCE_ID = 98765
ONEAPI_BENCHMARK_INSTANCE_ID = 987654

def __init__(self):
self.oneapi_dir = os.path.join(options.workdir, "oneapi")
Expand Down
4 changes: 1 addition & 3 deletions scripts/benchmarks/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
compare_names.append(saved_name)

if options.output_html:
html_content = generate_html(
history.runs, "oneapi-src/unified-runtime", compare_names
)
html_content = generate_html(history.runs, "intel/llvm", compare_names)

with open("benchmark_results.html", "w") as file:
file.write(html_content)
Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmarks/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class Options:
build_compute_runtime: bool = False
extra_ld_libraries: list[str] = field(default_factory=list)
extra_env_vars: dict = field(default_factory=dict)
compute_runtime_tag: str = "24.52.32224.10"
compute_runtime_tag: str = "25.05.32567.12"
build_igc: bool = False
current_run_name: str = "This PR"

Expand Down
4 changes: 4 additions & 0 deletions scripts/benchmarks/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
matplotlib==3.9.2
mpld3==0.5.10
dataclasses-json==0.6.7
PyYAML==6.0.1
12 changes: 7 additions & 5 deletions scripts/core/event.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,15 @@ name: $x_profiling_info_t
typed_etors: True
etors:
- name: COMMAND_QUEUED
desc: "[uint64_t] A 64-bit value of current device counter in nanoseconds when the event is enqueued"
desc: "[uint64_t][optional-query] A 64-bit value of current device counter in nanoseconds when the event is enqueued"
- name: COMMAND_SUBMIT
desc: "[uint64_t] A 64-bit value of current device counter in nanoseconds when the event is submitted"
desc: "[uint64_t][optional-query] A 64-bit value of current device counter in nanoseconds when the event is submitted"
- name: COMMAND_START
desc: "[uint64_t] A 64-bit value of current device counter in nanoseconds when the event starts execution"
desc: "[uint64_t][optional-query] A 64-bit value of current device counter in nanoseconds when the event starts execution"
- name: COMMAND_END
desc: "[uint64_t] A 64-bit value of current device counter in nanoseconds when the event has finished execution"
desc: "[uint64_t][optional-query] A 64-bit value of current device counter in nanoseconds when the event has finished execution"
- name: COMMAND_COMPLETE
desc: "[uint64_t] A 64-bit value of current device counter in nanoseconds when the event and any child events enqueued by this event on the device have finished execution"
desc: "[uint64_t][optional-query] A 64-bit value of current device counter in nanoseconds when the event and any child events enqueued by this event on the device have finished execution"
--- #--------------------------------------------------------------------------
type: function
desc: "Get event object information"
Expand Down Expand Up @@ -198,6 +198,8 @@ returns:
- $X_RESULT_ERROR_INVALID_EVENT
- $X_RESULT_ERROR_OUT_OF_RESOURCES
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
- $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION:
- "If `propName` is not supported by the adapter."
--- #--------------------------------------------------------------------------
type: function
desc: "Wait for a list of events to finish."
Expand Down
2 changes: 1 addition & 1 deletion scripts/core/exp-command-buffer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ params:
desc: "[in] Handle of the device object."
- type: "const $x_exp_command_buffer_desc_t*"
name: pCommandBufferDesc
desc: "[in][optional] command-buffer descriptor."
desc: "[in] Command-buffer descriptor."
- type: "$x_exp_command_buffer_handle_t*"
name: phCommandBuffer
desc: "[out][alloc] Pointer to command-Buffer handle."
Expand Down
5 changes: 1 addition & 4 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
ur_exp_command_buffer_handle_t *phCommandBuffer) {

const bool IsUpdatable =
pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false;

const bool IsUpdatable = pCommandBufferDesc->isUpdatable;
try {
*phCommandBuffer =
new ur_exp_command_buffer_handle_t_(hContext, hDevice, IsUpdatable);
Expand Down
38 changes: 38 additions & 0 deletions source/adapters/cuda/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@
#include <cuda.h>
#include <ur/ur.hpp>

#include <umf/base.h>
#include <umf/providers/provider_cuda.h>

#define UMF_RETURN_UMF_ERROR(UmfResult) \
do { \
umf_result_t UmfResult_ = (UmfResult); \
if (UmfResult_ != UMF_RESULT_SUCCESS) { \
return UmfResult_; \
} \
} while (0)

ur_result_t mapErrorUR(CUresult Result);

/// Converts CUDA error into UR error codes, and outputs error information
Expand Down Expand Up @@ -59,3 +70,30 @@ void assertion(bool Condition, const char *Message = nullptr);

} // namespace ur
} // namespace detail

namespace umf {

using cuda_params_unique_handle_t = std::unique_ptr<
umf_cuda_memory_provider_params_t,
std::function<umf_result_t(umf_cuda_memory_provider_params_handle_t)>>;

inline umf_result_t setCUMemoryProviderParams(
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams,
int cuDevice, void *cuContext, umf_usm_memory_type_t memType) {

umf_result_t UmfResult =
umfCUDAMemoryProviderParamsSetContext(CUMemoryProviderParams, cuContext);
UMF_RETURN_UMF_ERROR(UmfResult);

UmfResult =
umfCUDAMemoryProviderParamsSetDevice(CUMemoryProviderParams, cuDevice);
UMF_RETURN_UMF_ERROR(UmfResult);

UmfResult =
umfCUDAMemoryProviderParamsSetMemoryType(CUMemoryProviderParams, memType);
UMF_RETURN_UMF_ERROR(UmfResult);

return UMF_RESULT_SUCCESS;
}

} // namespace umf
33 changes: 25 additions & 8 deletions source/adapters/cuda/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ typedef void (*ur_context_extended_deleter_t)(void *user_data);
///

static ur_result_t
CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
umf_memory_provider_handle_t *MemoryProviderHost) {
CreateHostMemoryProviderPool(ur_device_handle_t_ *DeviceHandle,
umf_memory_provider_handle_t *MemoryProviderHost,
umf_memory_pool_handle_t *MemoryPoolHost) {
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;

*MemoryProviderHost = nullptr;
Expand All @@ -91,10 +92,20 @@ CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
umf::cuda_params_unique_handle_t CUMemoryProviderParamsUnique(
CUMemoryProviderParams, umfCUDAMemoryProviderParamsDestroy);

// create UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
UmfResult = umf::createMemoryProvider(
CUMemoryProviderParamsUnique.get(), 0 /* cuDevice */, context,
UMF_MEMORY_TYPE_HOST, MemoryProviderHost);
UmfResult = umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
0 /* cuDevice */, context,
UMF_MEMORY_TYPE_HOST);
UMF_RETURN_UR_ERROR(UmfResult);

// create UMF CUDA memory provider and pool for the host memory
// (UMF_MEMORY_TYPE_HOST)
UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
CUMemoryProviderParamsUnique.get(),
MemoryProviderHost);
UMF_RETURN_UR_ERROR(UmfResult);

UmfResult = umfPoolCreate(umfProxyPoolOps(), *MemoryProviderHost, nullptr, 0,
MemoryPoolHost);
UMF_RETURN_UR_ERROR(UmfResult);

return UR_RESULT_SUCCESS;
Expand All @@ -112,8 +123,10 @@ struct ur_context_handle_t_ {
std::vector<ur_device_handle_t> Devices;
std::atomic_uint32_t RefCount;

// UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
// UMF CUDA memory provider and pool for the host memory
// (UMF_MEMORY_TYPE_HOST)
umf_memory_provider_handle_t MemoryProviderHost = nullptr;
umf_memory_pool_handle_t MemoryPoolHost = nullptr;

ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
Expand All @@ -124,10 +137,14 @@ struct ur_context_handle_t_ {
// Create UMF CUDA memory provider for the host memory
// (UMF_MEMORY_TYPE_HOST) from any device (Devices[0] is used here, because
// it is guaranteed to exist).
UR_CHECK_ERROR(CreateHostMemoryProvider(Devices[0], &MemoryProviderHost));
UR_CHECK_ERROR(CreateHostMemoryProviderPool(Devices[0], &MemoryProviderHost,
&MemoryPoolHost));
};

~ur_context_handle_t_() {
if (MemoryPoolHost) {
umfPoolDestroy(MemoryPoolHost);
}
if (MemoryProviderHost) {
umfMemoryProviderDestroy(MemoryProviderHost);
}
Expand Down
17 changes: 15 additions & 2 deletions source/adapters/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <ur/ur.hpp>

#include <umf/memory_pool.h>
#include <umf/memory_provider.h>

#include "common.hpp"
Expand Down Expand Up @@ -84,9 +85,17 @@ struct ur_device_handle_t_ {

MemoryProviderDevice = nullptr;
MemoryProviderShared = nullptr;
MemoryPoolDevice = nullptr;
MemoryPoolShared = nullptr;
}

~ur_device_handle_t_() {
if (MemoryPoolDevice) {
umfPoolDestroy(MemoryPoolDevice);
}
if (MemoryPoolShared) {
umfPoolDestroy(MemoryPoolShared);
}
if (MemoryProviderDevice) {
umfMemoryProviderDestroy(MemoryProviderDevice);
}
Expand Down Expand Up @@ -131,11 +140,15 @@ struct ur_device_handle_t_ {
// bookkeeping for mipmappedArray leaks in Mapping external Memory
std::map<CUarray, CUmipmappedArray> ChildCuarrayFromMipmapMap;

// UMF CUDA memory provider for the device memory (UMF_MEMORY_TYPE_DEVICE)
// UMF CUDA memory provider and pool for the device memory
// (UMF_MEMORY_TYPE_DEVICE)
umf_memory_provider_handle_t MemoryProviderDevice;
umf_memory_pool_handle_t MemoryPoolDevice;

// UMF CUDA memory provider for the shared memory (UMF_MEMORY_TYPE_SHARED)
// UMF CUDA memory provider and pool for the shared memory
// (UMF_MEMORY_TYPE_SHARED)
umf_memory_provider_handle_t MemoryProviderShared;
umf_memory_pool_handle_t MemoryPoolShared;
};

int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);
2 changes: 2 additions & 0 deletions source/adapters/cuda/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
return ReturnValue(static_cast<uint64_t>(hEvent->getStartTime()));
case UR_PROFILING_INFO_COMMAND_END:
return ReturnValue(static_cast<uint64_t>(hEvent->getEndTime()));
case UR_PROFILING_INFO_COMMAND_COMPLETE:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
default:
break;
}
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/cuda/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
cuMemHostRegister(HostPtr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
AllocMode = BufferMem::AllocMode::UseHostPtr;
} else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hContext->MemoryProviderHost, size,
0, &HostPtr));
HostPtr = umfPoolMalloc(hContext->MemoryPoolHost, size);
UMF_CHECK_PTR(HostPtr);
AllocMode = BufferMem::AllocMode::AllocHostPtr;
} else if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
AllocMode = BufferMem::AllocMode::CopyIn;
Expand Down Expand Up @@ -442,8 +442,8 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
CU_MEMHOSTALLOC_DEVICEMAP));
UR_CHECK_ERROR(cuMemHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0));
} else {
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hDevice->MemoryProviderDevice,
Buffer.Size, 0, (void **)&DevPtr));
*(void **)&DevPtr = umfPoolMalloc(hDevice->MemoryPoolDevice, Buffer.Size);
UMF_CHECK_PTR(*(void **)&DevPtr);
}
} else {
CUarray ImageArray{};
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,15 @@ struct BufferMem {
case AllocMode::Classic:
for (auto &DevPtr : Ptrs) {
if (DevPtr != native_type{0}) {
UR_CHECK_ERROR(cuMemFree(DevPtr));
UMF_CHECK_ERROR(umfFree((void *)DevPtr));
}
}
break;
case AllocMode::UseHostPtr:
UR_CHECK_ERROR(cuMemHostUnregister(HostPtr));
break;
case AllocMode::AllocHostPtr:
UR_CHECK_ERROR(cuMemFreeHost(HostPtr));
UMF_CHECK_ERROR(umfFree((void *)HostPtr));
}
return UR_RESULT_SUCCESS;
}
Expand Down
Loading
Loading