Skip to content

[UR] Consolidate kernel launch entry points in UR. #18385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/include/sycl/info/device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_3d, id<3>,
UR_DEVICE_INFO_MAX_WORK_GROUPS_3D)
__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_global_work_groups, size_t, __SYCL_TRAIT_HANDLED_IN_RT)
__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_cuda_cluster_group, bool,
UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP)
UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT)

#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
return false;

return get_info_impl_nocheck<ur_bool_t,
UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP>()
UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT>()
.value_or(0);
}

Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/kernel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ kernel_impl::queryMaxNumWorkGroups(queue Queue,

uint32_t GroupCount{0};
if (auto Result = Adapter->call_nocheck<
UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
UrApiKind::urKernelSuggestMaxCooperativeGroupCount>(
Handle, DeviceHandleRef, Dimensions, WG, DynamicLocalMemorySize,
&GroupCount);
Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE &&
Expand Down
55 changes: 17 additions & 38 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2441,58 +2441,37 @@ static ur_result_t SetKernelParamsAndLaunch(
if (EnforcedLocalSize)
LocalSize = RequiredWGSize;
}
std::vector<ur_exp_launch_property_t> property_list;
std::vector<ur_kernel_launch_property_t> property_list;
if (KernelUsesClusterLaunch) {
ur_exp_launch_property_value_t launch_property_value_cluster_range;
ur_kernel_launch_property_value_t launch_property_value_cluster_range;
launch_property_value_cluster_range.clusterDim[0] =
NDRDesc.ClusterDimensions[0];
launch_property_value_cluster_range.clusterDim[1] =
NDRDesc.ClusterDimensions[1];
launch_property_value_cluster_range.clusterDim[2] =
NDRDesc.ClusterDimensions[2];

property_list.push_back({UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION,
property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION,
launch_property_value_cluster_range});

if (IsCooperative) {
ur_exp_launch_property_value_t launch_property_value_cooperative;
launch_property_value_cooperative.cooperative = 1;
property_list.push_back({UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE,
launch_property_value_cooperative});
}
}
if (IsCooperative) {
ur_kernel_launch_property_value_t launch_property_value_cooperative;
launch_property_value_cooperative.cooperative = 1;
property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE,
launch_property_value_cooperative});
}
// If there is no implicit arg, let the driver handle it via a property
if (WorkGroupMemorySize && !ImplicitLocalArg.has_value()) {
property_list.push_back(
{UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY, {{WorkGroupMemorySize}}});
}
if (!property_list.empty()) {
ur_event_handle_t UREvent = nullptr;
ur_result_t Error =
Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunchCustomExp>(
Queue->getHandleRef(), Kernel, NDRDesc.Dims,
&NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize,
property_list.size(), property_list.data(), RawEvents.size(),
RawEvents.empty() ? nullptr : &RawEvents[0],
OutEventImpl ? &UREvent : nullptr);
if ((Error == UR_RESULT_SUCCESS) && OutEventImpl) {
OutEventImpl->setHandle(UREvent);
}
return Error;
property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY,
{{WorkGroupMemorySize}}});
}
ur_event_handle_t UREvent = nullptr;
ur_result_t Error =
[&](auto... Args) {
if (IsCooperative) {
return Adapter
->call_nocheck<UrApiKind::urEnqueueCooperativeKernelLaunchExp>(
Args...);
}
return Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunch>(Args...);
}(Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
&NDRDesc.GlobalSize[0], LocalSize, RawEvents.size(),
RawEvents.empty() ? nullptr : &RawEvents[0],
OutEventImpl ? &UREvent : nullptr);
ur_result_t Error = Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunch>(
Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
&NDRDesc.GlobalSize[0], LocalSize, property_list.size(),
property_list.empty() ? nullptr : property_list.data(), RawEvents.size(),
RawEvents.empty() ? nullptr : &RawEvents[0],
OutEventImpl ? &UREvent : nullptr);
if (Error == UR_RESULT_SUCCESS && OutEventImpl) {
OutEventImpl->setHandle(UREvent);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Checks whether or not event Dependencies are honored by
// urEnqueueKernelLaunchCustomExp
// urEnqueueKernelLaunch with cluster dimensions
// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group
// RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out
// RUN: %{run} %t.out
Expand Down
9 changes: 4 additions & 5 deletions sycl/unittests/helpers/UrMock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,9 @@ inline ur_result_t mock_urEventGetInfo(void *pParams) {
}
}

inline ur_result_t
mock_urKernelSuggestMaxCooperativeGroupCountExp(void *pParams) {
inline ur_result_t mock_urKernelSuggestMaxCooperativeGroupCount(void *pParams) {
auto params = reinterpret_cast<
ur_kernel_suggest_max_cooperative_group_count_exp_params_t *>(pParams);
ur_kernel_suggest_max_cooperative_group_count_params_t *>(pParams);
**params->ppGroupCountRet = 1;
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -573,8 +572,8 @@ template <sycl::backend Backend = backend::opencl> class UrMock {
ADD_DEFAULT_OVERRIDE(urProgramGetInfo, mock_urProgramGetInfo)
ADD_DEFAULT_OVERRIDE(urKernelGetGroupInfo, mock_urKernelGetGroupInfo)
ADD_DEFAULT_OVERRIDE(urEventGetInfo, mock_urEventGetInfo)
ADD_DEFAULT_OVERRIDE(urKernelSuggestMaxCooperativeGroupCountExp,
mock_urKernelSuggestMaxCooperativeGroupCountExp)
ADD_DEFAULT_OVERRIDE(urKernelSuggestMaxCooperativeGroupCount,
mock_urKernelSuggestMaxCooperativeGroupCount)
ADD_DEFAULT_OVERRIDE(urDeviceSelectBinary, mock_urDeviceSelectBinary)
ADD_DEFAULT_OVERRIDE(urPlatformGetBackendOption,
mock_urPlatformGetBackendOption)
Expand Down
Loading
Loading