diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def
index 44c4192a338f3..7ca82b196bb82 100644
--- a/sycl/include/sycl/info/device_traits.def
+++ b/sycl/include/sycl/info/device_traits.def
@@ -235,8 +235,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_2d, id<2>, __SYCL_TR
 __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_3d, id<3>,
                          UR_DEVICE_INFO_MAX_WORK_GROUPS_3D)
 __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_global_work_groups, size_t, __SYCL_TRAIT_HANDLED_IN_RT)
-__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_cuda_cluster_group, bool,
-                         UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP)
+__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_cuda_cluster_group, bool, __SYCL_TRAIT_HANDLED_IN_RT)
 
 #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
 #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index b3922c0f5c839..cfa2320d7987a 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -886,11 +886,10 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
     }
 
     CASE(info::device::ext_oneapi_cuda_cluster_group) {
-      if (getBackend() != backend::ext_oneapi_cuda)
-        return false;
-
-      return get_info_impl_nocheck<UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP>()
-                 .value_or(0) != 0;
+      auto SupportFlags =
+          get_info_impl<UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES>();
+      return static_cast<bool>(
+          SupportFlags & UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION);
     }
 
     // ext_codeplay_device_traits.def
diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp
index 6e5b933fa48d7..58c8c38564d91 100644
--- a/sycl/source/detail/kernel_impl.hpp
+++ b/sycl/source/detail/kernel_impl.hpp
@@ -371,7 +371,7 @@ kernel_impl::queryMaxNumWorkGroups(queue Queue,
 
   uint32_t GroupCount{0};
   if (auto Result = Adapter->call_nocheck<
-                    UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
+                    UrApiKind::urKernelSuggestMaxCooperativeGroupCount>(
           Handle, DeviceHandleRef, Dimensions, WG, DynamicLocalMemorySize,
           &GroupCount);
       Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE &&
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 724dcc2956734..11d560f23df4c 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -2474,14 +2474,14 @@ static ur_result_t SetKernelParamsAndLaunch(
     if (EnforcedLocalSize)
       LocalSize = RequiredWGSize;
   }
-
   const bool HasOffset = NDRDesc.GlobalOffset[0] != 0 ||
                          NDRDesc.GlobalOffset[1] != 0 ||
                          NDRDesc.GlobalOffset[2] != 0;
 
-  std::vector<ur_exp_launch_property_t> property_list;
+  std::vector<ur_kernel_launch_property_t> property_list;
+
   if (KernelUsesClusterLaunch) {
-    ur_exp_launch_property_value_t launch_property_value_cluster_range;
+    ur_kernel_launch_property_value_t launch_property_value_cluster_range;
     launch_property_value_cluster_range.clusterDim[0] =
         NDRDesc.ClusterDimensions[0];
     launch_property_value_cluster_range.clusterDim[1] =
@@ -2489,50 +2489,28 @@ static ur_result_t SetKernelParamsAndLaunch(
     launch_property_value_cluster_range.clusterDim[2] =
         NDRDesc.ClusterDimensions[2];
 
-    property_list.push_back({UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION,
+    property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION,
                              launch_property_value_cluster_range});
-
-    if (IsCooperative) {
-      ur_exp_launch_property_value_t launch_property_value_cooperative;
-      launch_property_value_cooperative.cooperative = 1;
-      property_list.push_back({UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE,
-                               launch_property_value_cooperative});
-    }
+  }
+  if (IsCooperative) {
+    ur_kernel_launch_property_value_t launch_property_value_cooperative;
+    launch_property_value_cooperative.cooperative = 1;
+    property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE,
+                             launch_property_value_cooperative});
   }
   // If there is no implicit arg, let the driver handle it via a property
   if (WorkGroupMemorySize && !ImplicitLocalArg.has_value()) {
-    property_list.push_back(
-        {UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY, {{WorkGroupMemorySize}}});
-  }
-  if (!property_list.empty()) {
-    ur_event_handle_t UREvent = nullptr;
-    ur_result_t Error =
-        Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunchCustomExp>(
-            Queue.getHandleRef(), Kernel, NDRDesc.Dims,
-            HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr,
-            &NDRDesc.GlobalSize[0], LocalSize, property_list.size(),
-            property_list.data(), RawEvents.size(),
-            RawEvents.empty() ? nullptr : &RawEvents[0],
-            OutEventImpl ? &UREvent : nullptr);
-    if ((Error == UR_RESULT_SUCCESS) && OutEventImpl) {
-      OutEventImpl->setHandle(UREvent);
-    }
-    return Error;
+    property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY,
+                             {{WorkGroupMemorySize}}});
   }
   ur_event_handle_t UREvent = nullptr;
-  ur_result_t Error =
-      [&](auto... Args) {
-        if (IsCooperative) {
-          return Adapter
-              ->call_nocheck<UrApiKind::urEnqueueCooperativeKernelLaunchExp>(
-                  Args...);
-        }
-        return Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunch>(Args...);
-      }(Queue.getHandleRef(), Kernel, NDRDesc.Dims,
-        HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr, &NDRDesc.GlobalSize[0],
-        LocalSize, RawEvents.size(),
-        RawEvents.empty() ? nullptr : &RawEvents[0],
-        OutEventImpl ? &UREvent : nullptr);
+  ur_result_t Error = Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunch>(
+      Queue.getHandleRef(), Kernel, NDRDesc.Dims,
+      HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr, &NDRDesc.GlobalSize[0],
+      LocalSize, property_list.size(),
+      property_list.empty() ? nullptr : property_list.data(), RawEvents.size(),
+      RawEvents.empty() ? nullptr : &RawEvents[0],
+      OutEventImpl ? &UREvent : nullptr);
   if (Error == UR_RESULT_SUCCESS && OutEventImpl) {
     OutEventImpl->setHandle(UREvent);
   }
diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc
index 69a68389a732c..aa1159f72c772 100644
--- a/sycl/source/detail/ur_device_info_ret_types.inc
+++ b/sycl/source/detail/ur_device_info_ret_types.inc
@@ -172,7 +172,6 @@ MAP(UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM_SUPPORT_EXP, ur_bool_t)
-MAP(UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP, ur_bool_t)
@@ -188,4 +187,5 @@ MAP(UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP, uint32_t)
 MAP(UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP, ur_bool_t)
 MAP(UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP, ur_bool_t)
+MAP(UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES, ur_kernel_launch_properties_flags_t)
 // clang-format on
diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp
index e15d361a5ca3e..7928e5da66bac 100644
--- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp
+++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp
@@ -1,5 +1,5 @@
 // Checks whether or not event Dependencies are honored by
-// urEnqueueKernelLaunchCustomExp
+// urEnqueueKernelLaunch with cluster dimensions
 // REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group
 // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out
 // RUN: %{run} %t.out
diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp
index 583fb0514cb97..934dd9f5f7a84 100644
--- a/sycl/unittests/helpers/UrMock.hpp
+++ b/sycl/unittests/helpers/UrMock.hpp
@@ -393,10 +393,9 @@ inline ur_result_t mock_urEventGetInfo(void *pParams) {
   }
 }
 
-inline ur_result_t
-mock_urKernelSuggestMaxCooperativeGroupCountExp(void *pParams) {
+inline ur_result_t mock_urKernelSuggestMaxCooperativeGroupCount(void *pParams) {
   auto params = reinterpret_cast<
-      ur_kernel_suggest_max_cooperative_group_count_exp_params_t *>(pParams);
+      ur_kernel_suggest_max_cooperative_group_count_params_t *>(pParams);
   **params->ppGroupCountRet = 1;
   return UR_RESULT_SUCCESS;
 }
@@ -571,8 +570,8 @@ template <sycl::backend Backend = backend::opencl> class UrMock {
     ADD_DEFAULT_OVERRIDE(urProgramGetInfo, mock_urProgramGetInfo)
     ADD_DEFAULT_OVERRIDE(urKernelGetGroupInfo, mock_urKernelGetGroupInfo)
     ADD_DEFAULT_OVERRIDE(urEventGetInfo, mock_urEventGetInfo)
-    ADD_DEFAULT_OVERRIDE(urKernelSuggestMaxCooperativeGroupCountExp,
-                         mock_urKernelSuggestMaxCooperativeGroupCountExp)
+    ADD_DEFAULT_OVERRIDE(urKernelSuggestMaxCooperativeGroupCount,
+                         mock_urKernelSuggestMaxCooperativeGroupCount)
     ADD_DEFAULT_OVERRIDE(urDeviceSelectBinary, mock_urDeviceSelectBinary)
     ADD_DEFAULT_OVERRIDE(urPlatformGetBackendOption,
                          mock_urPlatformGetBackendOption)
diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h
index 8a1c8619d2977..0b6ce6ed4280f 100644
--- a/unified-runtime/include/ur_api.h
+++ b/unified-runtime/include/ur_api.h
@@ -367,10 +367,6 @@ typedef enum ur_function_t {
   UR_FUNCTION_LOADER_INIT = 201,
   /// Enumerator for ::urLoaderTearDown
   UR_FUNCTION_LOADER_TEAR_DOWN = 202,
-  /// Enumerator for ::urEnqueueCooperativeKernelLaunchExp
-  UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214,
-  /// Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp
-  UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215,
   /// Enumerator for ::urProgramGetGlobalVariablePointer
   UR_FUNCTION_PROGRAM_GET_GLOBAL_VARIABLE_POINTER = 216,
   /// Enumerator for ::urDeviceGetSelected
@@ -381,8 +377,6 @@ typedef enum ur_function_t {
   UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221,
   /// Enumerator for ::urEnqueueTimestampRecordingExp
   UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223,
-  /// Enumerator for ::urEnqueueKernelLaunchCustomExp
-  UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP = 224,
   /// Enumerator for ::urKernelGetSuggestedLocalWorkSize
   UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE = 225,
   /// Enumerator for ::urBindlessImagesImportExternalMemoryExp
@@ -469,6 +463,8 @@ typedef enum ur_function_t {
   UR_FUNCTION_BINDLESS_IMAGES_GET_IMAGE_MEMORY_HANDLE_TYPE_SUPPORT_EXP = 270,
   /// Enumerator for ::urBindlessImagesFreeMappedLinearMemoryExp
   UR_FUNCTION_BINDLESS_IMAGES_FREE_MAPPED_LINEAR_MEMORY_EXP = 271,
+  /// Enumerator for ::urKernelSuggestMaxCooperativeGroupCount
+  UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT = 272,
   /// @cond
   UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
   /// @endcond
@@ -2311,6 +2307,9 @@ typedef enum ur_device_info_t {
   UR_DEVICE_INFO_MAX_POWER_LIMIT = 126,
   /// [::ur_bool_t] support for native bfloat16 conversions
   UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE = 127,
+  /// [::ur_kernel_launch_properties_flags_t] Bitfield of supported kernel
+  /// launch properties.
+  UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES = 128,
   /// [::ur_bool_t] Returns true if the device supports the use of
   /// command-buffers.
   UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000,
@@ -2323,8 +2322,6 @@ typedef enum ur_device_info_t {
   /// [::ur_bool_t] Returns true if the device supports appending a
   /// command-buffer as a command inside another command-buffer.
   UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP = 0x1003,
-  /// [::ur_bool_t] return true if enqueue Cluster Launch is supported
-  UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP = 0x1111,
   /// [::ur_bool_t] returns true if the device supports the creation of
   /// bindless images
   UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000,
@@ -2416,14 +2413,9 @@ typedef enum ur_device_info_t {
   /// [::ur_bool_t] returns true if the device supports enqueueing of
   /// allocations and frees.
   UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP = 0x2050,
-  /// [::ur_bool_t] Returns true if the device supports the use of kernel
-  /// launch properties.
-  UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP = 0x3000,
   /// [::ur_bool_t] Returns true if the device supports the USM P2P
   /// experimental feature.
   UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP = 0x4000,
-  /// [::ur_bool_t] Returns true if the device supports cooperative kernels.
-  UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP = 0x5000,
   /// [::ur_bool_t] Returns true if the device supports the multi device
   /// compile experimental feature.
   UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP = 0x6000,
@@ -2995,6 +2987,27 @@ typedef enum ur_device_throttle_reasons_flag_t {
 /// @brief Bit Mask for validating ur_device_throttle_reasons_flags_t
 #define UR_DEVICE_THROTTLE_REASONS_FLAGS_MASK 0xffffff80
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Kernel launch properties support
+typedef uint32_t ur_kernel_launch_properties_flags_t;
+typedef enum ur_kernel_launch_properties_flag_t {
+  /// Supports ::UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE and
+  /// ::urKernelSuggestMaxCooperativeGroupCount
+  UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE = UR_BIT(0),
+  /// Supports ::UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION
+  UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION = UR_BIT(1),
+  /// Supports ::UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY
+  UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY = UR_BIT(2),
+  /// Supports ::UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE
+  UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE = UR_BIT(3),
+  /// @cond
+  UR_KERNEL_LAUNCH_PROPERTIES_FLAG_FORCE_UINT32 = 0x7fffffff
+  /// @endcond
+
+} ur_kernel_launch_properties_flag_t;
+/// @brief Bit Mask for validating ur_kernel_launch_properties_flags_t
+#define UR_KERNEL_LAUNCH_PROPERTIES_FLAGS_MASK 0xfffffff0
+
 #if !defined(__GNUC__)
 #pragma endregion
 #endif
@@ -6763,6 +6776,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
     /// suggested local work size that will contain the result of the query
     size_t *pSuggestedLocalWorkSize);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Query the maximum number of work groups for a cooperative kernel
+///
+/// @returns
+///     - ::UR_RESULT_SUCCESS
+///     - ::UR_RESULT_ERROR_UNINITIALIZED
+///     - ::UR_RESULT_ERROR_DEVICE_LOST
+///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
+///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `NULL == hKernel`
+///         + `NULL == hDevice`
+///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `NULL == pLocalWorkSize`
+///         + `NULL == pGroupCountRet`
+///     - ::UR_RESULT_ERROR_INVALID_KERNEL
+///     - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+///         + If ::UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES returns a value
+///         without the ::UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE bit set.
+///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
+///         + `workDim < 1 || workDim > 3`
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet);
+
 #if !defined(__GNUC__)
 #pragma endregion
 #endif
@@ -7619,6 +7670,66 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(
 #if !defined(__GNUC__)
 #pragma region enqueue
 #endif
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Specifies a launch property id
+///
+/// @remarks
+///   _Analogues_
+///     - **CUlaunchAttributeID**
+typedef enum ur_kernel_launch_property_id_t {
+  /// The property has no effect.
+  UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE = 0,
+  /// Whether to launch a cooperative kernel.
+  UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE = 1,
+  /// work-group cluster dimensions.
+  UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION = 2,
+  /// Implicit work group memory allocation.
+  UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY = 3,
+  /// Whether to opportunistically execute kernel launches serially on a
+  /// native queue
+  UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE = 4,
+  /// @cond
+  UR_KERNEL_LAUNCH_PROPERTY_ID_FORCE_UINT32 = 0x7fffffff
+  /// @endcond
+
+} ur_kernel_launch_property_id_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Specifies a launch property value
+///
+/// @remarks
+///   _Analogues_
+///     - **CUlaunchAttributeValue**
+typedef union ur_kernel_launch_property_value_t {
+  /// [in] dimensions of the cluster (units of work-group) (x, y, z). Each
+  /// value must be a divisor of the corresponding global work-size
+  /// dimension (in units of work-group).
+  uint32_t clusterDim[3];
+  /// [in] non-zero value indicates a cooperative kernel
+  int cooperative;
+  /// [in] non-zero value indicates the amount of work group memory to
+  /// allocate in bytes
+  size_t workgroup_mem_size;
+  /// [in] non-zero value indicates an opportunistic native queue serialized
+  /// kernel
+  int opportunistic_queue_serialize;
+
+} ur_kernel_launch_property_value_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Kernel launch property
+///
+/// @remarks
+///   _Analogues_
+///     - **cuLaunchAttribute**
+typedef struct ur_kernel_launch_property_t {
+  /// [in] launch property id
+  ur_kernel_launch_property_id_t id;
+  /// [in][tagged_by(id)] launch property value
+  ur_kernel_launch_property_value_t value;
+
+} ur_kernel_launch_property_t;
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Enqueue a command to execute a kernel
 ///
@@ -7639,6 +7750,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(
 ///         + `NULL == hKernel`
 ///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
 ///         + `NULL == pGlobalWorkSize`
+///         + `launchPropList == NULL && numPropsInLaunchPropList > 0`
 ///     - ::UR_RESULT_ERROR_INVALID_QUEUE
 ///     - ::UR_RESULT_ERROR_INVALID_KERNEL
 ///     - ::UR_RESULT_ERROR_INVALID_EVENT
@@ -7653,6 +7765,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(
 ///     - ::UR_RESULT_ERROR_INVALID_VALUE
 ///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
 ///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
+///     - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+///         + If any property in `launchPropList` isn't supported by the device.
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// [in] handle of the queue object
     ur_queue_handle_t hQueue,
@@ -7673,6 +7787,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -12113,105 +12232,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
     /// [out] A pointer to the native handle of the command-buffer.
     ur_native_handle_t *phNativeCommandBuffer);
 
-#if !defined(__GNUC__)
-#pragma endregion
-#endif
-// Intel 'oneAPI' Unified Runtime Experimental APIs for Cooperative Kernels
-#if !defined(__GNUC__)
-#pragma region cooperative_kernels_(experimental)
-#endif
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Enqueue a command to execute a cooperative kernel
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hQueue`
-///         + `NULL == hKernel`
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pGlobalWorkOffset`
-///         + `NULL == pGlobalWorkSize`
-///     - ::UR_RESULT_ERROR_INVALID_QUEUE
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-///     - ::UR_RESULT_ERROR_INVALID_EVENT
-///     - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
-///         + `phEventWaitList == NULL && numEventsInWaitList > 0`
-///         + `phEventWaitList != NULL && numEventsInWaitList == 0`
-///         + If event objects in phEventWaitList are not valid events.
-///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
-///     - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-///     - ::UR_RESULT_ERROR_INVALID_VALUE
-///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
-///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Query the maximum number of work groups for a cooperative kernel
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hKernel`
-///         + `NULL == hDevice`
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pLocalWorkSize`
-///         + `NULL == pGroupCountRet`
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet);
-
 #if !defined(__GNUC__)
 #pragma endregion
 #endif
@@ -12261,159 +12281,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
     /// array.
     ur_event_handle_t *phEvent);
 
-#if !defined(__GNUC__)
-#pragma endregion
-#endif
-// Intel 'oneAPI' Unified Runtime Experimental APIs for (kernel) Launch
-// Properties
-#if !defined(__GNUC__)
-#pragma region launch_properties_(experimental)
-#endif
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Specifies a launch property id
-///
-/// @remarks
-///   _Analogues_
-///     - **CUlaunchAttributeID**
-typedef enum ur_exp_launch_property_id_t {
-  /// The property has no effect
-  UR_EXP_LAUNCH_PROPERTY_ID_IGNORE = 0,
-  /// Whether to launch a cooperative kernel
-  UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE = 1,
-  /// work-group cluster dimensions
-  UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION = 2,
-  /// Implicit work group memory allocation
-  UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY = 3,
-  /// Whether to opportunistically execute kernel launches serially on a
-  /// native queue
-  UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE = 4,
-  /// @cond
-  UR_EXP_LAUNCH_PROPERTY_ID_FORCE_UINT32 = 0x7fffffff
-  /// @endcond
-
-} ur_exp_launch_property_id_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Specifies a launch property value
-///
-/// @remarks
-///   _Analogues_
-///     - **CUlaunchAttributeValue**
-typedef union ur_exp_launch_property_value_t {
-  /// [in] dimensions of the cluster (units of work-group) (x, y, z). Each
-  /// value must be a divisor of the corresponding global work-size
-  /// dimension (in units of work-group).
-  uint32_t clusterDim[3];
-  /// [in] non-zero value indicates a cooperative kernel
-  int cooperative;
-  /// [in] non-zero value indicates the amount of work group memory to
-  /// allocate in bytes
-  size_t workgroup_mem_size;
-  /// [in] non-zero value indicates an opportunistic native queue serialized
-  /// kernel
-  int opportunistic_queue_serialize;
-
-} ur_exp_launch_property_value_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Kernel launch property
-///
-/// @remarks
-///   _Analogues_
-///     - **cuLaunchAttribute**
-typedef struct ur_exp_launch_property_t {
-  /// [in] launch property id
-  ur_exp_launch_property_id_t id;
-  /// [in][tagged_by(id)] launch property value
-  ur_exp_launch_property_value_t value;
-
-} ur_exp_launch_property_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Launch kernel with custom launch properties
-///
-/// @details
-///     - Launches the kernel using the specified launch properties
-///     - If numPropsInLaunchPropList == 0 then a regular kernel launch is used:
-///       `urEnqueueKernelLaunch`
-///     - Consult the appropriate adapter driver documentation for details of
-///       adapter specific behavior and native error codes that may be returned.
-///
-/// @remarks
-///   _Analogues_
-///     - **cuLaunchKernelEx**
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hQueue`
-///         + `NULL == hKernel`
-///         + NULL == hQueue
-///         + NULL == hKernel
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pGlobalWorkOffset`
-///         + `NULL == pGlobalWorkSize`
-///         + `NULL == launchPropList`
-///         + NULL == pGlobalWorkSize
-///         + numPropsInLaunchpropList != 0 && launchPropList == NULL
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_QUEUE
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-///     - ::UR_RESULT_ERROR_INVALID_EVENT
-///     - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
-///         + phEventWaitList == NULL && numEventsInWaitList > 0
-///         + phEventWaitList != NULL && numEventsInWaitList == 0
-///         + If event objects in phEventWaitList are not valid events.
-///     - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS
-///         + An event in phEventWaitList has ::UR_EVENT_STATUS_ERROR
-///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
-///     - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-///     - ::UR_RESULT_ERROR_INVALID_VALUE
-///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
-///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent);
-
 #if !defined(__GNUC__)
 #pragma endregion
 #endif
@@ -13657,17 +13524,17 @@ typedef struct ur_kernel_set_specialization_constants_params_t {
 } ur_kernel_set_specialization_constants_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for urKernelSuggestMaxCooperativeGroupCountExp
+/// @brief Function parameters for urKernelSuggestMaxCooperativeGroupCount
 /// @details Each entry is a pointer to the parameter passed to the function;
 ///     allowing the callback the ability to modify the parameter's value
-typedef struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t {
+typedef struct ur_kernel_suggest_max_cooperative_group_count_params_t {
   ur_kernel_handle_t *phKernel;
   ur_device_handle_t *phDevice;
   uint32_t *pworkDim;
   const size_t **ppLocalWorkSize;
   size_t *pdynamicSharedMemorySize;
   uint32_t **ppGroupCountRet;
-} ur_kernel_suggest_max_cooperative_group_count_exp_params_t;
+} ur_kernel_suggest_max_cooperative_group_count_params_t;
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for urQueueGetInfo
@@ -13966,6 +13833,8 @@ typedef struct ur_enqueue_kernel_launch_params_t {
   const size_t **ppGlobalWorkOffset;
   const size_t **ppGlobalWorkSize;
   const size_t **ppLocalWorkSize;
+  uint32_t *pnumPropsInLaunchPropList;
+  const ur_kernel_launch_property_t **plaunchPropList;
   uint32_t *pnumEventsInWaitList;
   const ur_event_handle_t **pphEventWaitList;
   ur_event_handle_t **pphEvent;
@@ -14358,24 +14227,6 @@ typedef struct ur_enqueue_write_host_pipe_params_t {
   ur_event_handle_t **pphEvent;
 } ur_enqueue_write_host_pipe_params_t;
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for urEnqueueKernelLaunchCustomExp
-/// @details Each entry is a pointer to the parameter passed to the function;
-///     allowing the callback the ability to modify the parameter's value
-typedef struct ur_enqueue_kernel_launch_custom_exp_params_t {
-  ur_queue_handle_t *phQueue;
-  ur_kernel_handle_t *phKernel;
-  uint32_t *pworkDim;
-  const size_t **ppGlobalWorkOffset;
-  const size_t **ppGlobalWorkSize;
-  const size_t **ppLocalWorkSize;
-  uint32_t *pnumPropsInLaunchPropList;
-  const ur_exp_launch_property_t **plaunchPropList;
-  uint32_t *pnumEventsInWaitList;
-  const ur_event_handle_t **pphEventWaitList;
-  ur_event_handle_t **pphEvent;
-} ur_enqueue_kernel_launch_custom_exp_params_t;
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for urEnqueueEventsWaitWithBarrierExt
 /// @details Each entry is a pointer to the parameter passed to the function;
@@ -14458,22 +14309,6 @@ typedef struct ur_enqueue_command_buffer_exp_params_t {
   ur_event_handle_t **pphEvent;
 } ur_enqueue_command_buffer_exp_params_t;
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp
-/// @details Each entry is a pointer to the parameter passed to the function;
-///     allowing the callback the ability to modify the parameter's value
-typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
-  ur_queue_handle_t *phQueue;
-  ur_kernel_handle_t *phKernel;
-  uint32_t *pworkDim;
-  const size_t **ppGlobalWorkOffset;
-  const size_t **ppGlobalWorkSize;
-  const size_t **ppLocalWorkSize;
-  uint32_t *pnumEventsInWaitList;
-  const ur_event_handle_t **pphEventWaitList;
-  ur_event_handle_t **pphEvent;
-} ur_enqueue_cooperative_kernel_launch_exp_params_t;
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function parameters for urEnqueueTimestampRecordingExp
 /// @details Each entry is a pointer to the parameter passed to the function;
diff --git a/unified-runtime/include/ur_api_funcs.def b/unified-runtime/include/ur_api_funcs.def
index c91297ccfd7dc..7b8e0b49f2e5c 100644
--- a/unified-runtime/include/ur_api_funcs.def
+++ b/unified-runtime/include/ur_api_funcs.def
@@ -78,7 +78,7 @@ _UR_API(urKernelSetExecInfo)
 _UR_API(urKernelSetArgSampler)
 _UR_API(urKernelSetArgMemObj)
 _UR_API(urKernelSetSpecializationConstants)
-_UR_API(urKernelSuggestMaxCooperativeGroupCountExp)
+_UR_API(urKernelSuggestMaxCooperativeGroupCount)
 _UR_API(urQueueGetInfo)
 _UR_API(urQueueCreate)
 _UR_API(urQueueRetain)
@@ -133,13 +133,11 @@ _UR_API(urEnqueueDeviceGlobalVariableRead)
 _UR_API(urEnqueueReadHostPipe)
 _UR_API(urEnqueueWriteHostPipe)
 _UR_API(urEnqueueEventsWaitWithBarrierExt)
-_UR_API(urEnqueueKernelLaunchCustomExp)
 _UR_API(urEnqueueUSMDeviceAllocExp)
 _UR_API(urEnqueueUSMSharedAllocExp)
 _UR_API(urEnqueueUSMHostAllocExp)
 _UR_API(urEnqueueUSMFreeExp)
 _UR_API(urEnqueueCommandBufferExp)
-_UR_API(urEnqueueCooperativeKernelLaunchExp)
 _UR_API(urEnqueueTimestampRecordingExp)
 _UR_API(urEnqueueNativeCommandExp)
 _UR_API(urUSMHostAlloc)
diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h
index 07e37c79cae14..f0c6d14e56a6f 100644
--- a/unified-runtime/include/ur_ddi.h
+++ b/unified-runtime/include/ur_ddi.h
@@ -556,6 +556,12 @@ typedef ur_result_t(UR_APICALL *ur_pfnKernelSetArgMemObj_t)(
 typedef ur_result_t(UR_APICALL *ur_pfnKernelSetSpecializationConstants_t)(
     ur_kernel_handle_t, uint32_t, const ur_specialization_constant_info_t *);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCount
+typedef ur_result_t(UR_APICALL *ur_pfnKernelSuggestMaxCooperativeGroupCount_t)(
+    ur_kernel_handle_t, ur_device_handle_t, uint32_t, const size_t *, size_t,
+    uint32_t *);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Table of Kernel functions pointers
 typedef struct ur_kernel_dditable_t {
@@ -575,6 +581,8 @@ typedef struct ur_kernel_dditable_t {
   ur_pfnKernelSetArgSampler_t pfnSetArgSampler;
   ur_pfnKernelSetArgMemObj_t pfnSetArgMemObj;
   ur_pfnKernelSetSpecializationConstants_t pfnSetSpecializationConstants;
+  ur_pfnKernelSuggestMaxCooperativeGroupCount_t
+      pfnSuggestMaxCooperativeGroupCount;
 } ur_kernel_dditable_t;
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -597,40 +605,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
 typedef ur_result_t(UR_APICALL *ur_pfnGetKernelProcAddrTable_t)(
     ur_api_version_t, ur_kernel_dditable_t *);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp
-typedef ur_result_t(
-    UR_APICALL *ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t)(
-    ur_kernel_handle_t, ur_device_handle_t, uint32_t, const size_t *, size_t,
-    uint32_t *);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Table of KernelExp functions pointers
-typedef struct ur_kernel_exp_dditable_t {
-  ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t
-      pfnSuggestMaxCooperativeGroupCountExp;
-} ur_kernel_exp_dditable_t;
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's KernelExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///     - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    /// [in] API version requested
-    ur_api_version_t version,
-    /// [in,out] pointer to table of DDI function pointers
-    ur_kernel_exp_dditable_t *pDdiTable);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function-pointer for urGetKernelExpProcAddrTable
-typedef ur_result_t(UR_APICALL *ur_pfnGetKernelExpProcAddrTable_t)(
-    ur_api_version_t, ur_kernel_exp_dditable_t *);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function-pointer for urQueueGetInfo
 typedef ur_result_t(UR_APICALL *ur_pfnQueueGetInfo_t)(ur_queue_handle_t,
@@ -911,7 +885,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetPhysicalMemProcAddrTable_t)(
 /// @brief Function-pointer for urEnqueueKernelLaunch
 typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunch_t)(
     ur_queue_handle_t, ur_kernel_handle_t, uint32_t, const size_t *,
-    const size_t *, const size_t *, uint32_t, const ur_event_handle_t *,
+    const size_t *, const size_t *, uint32_t,
+    const ur_kernel_launch_property_t *, uint32_t, const ur_event_handle_t *,
     ur_event_handle_t *);
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1122,13 +1097,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
 typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueProcAddrTable_t)(
     ur_api_version_t, ur_enqueue_dditable_t *);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function-pointer for urEnqueueKernelLaunchCustomExp
-typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunchCustomExp_t)(
-    ur_queue_handle_t, ur_kernel_handle_t, uint32_t, const size_t *,
-    const size_t *, const size_t *, uint32_t, const ur_exp_launch_property_t *,
-    uint32_t, const ur_event_handle_t *, ur_event_handle_t *);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function-pointer for urEnqueueUSMDeviceAllocExp
 typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMDeviceAllocExp_t)(
@@ -1162,13 +1130,6 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCommandBufferExp_t)(
     ur_queue_handle_t, ur_exp_command_buffer_handle_t, uint32_t,
     const ur_event_handle_t *, ur_event_handle_t *);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp
-typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
-    ur_queue_handle_t, ur_kernel_handle_t, uint32_t, const size_t *,
-    const size_t *, const size_t *, uint32_t, const ur_event_handle_t *,
-    ur_event_handle_t *);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Function-pointer for urEnqueueTimestampRecordingExp
 typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
@@ -1186,13 +1147,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueNativeCommandExp_t)(
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Table of EnqueueExp functions pointers
 typedef struct ur_enqueue_exp_dditable_t {
-  ur_pfnEnqueueKernelLaunchCustomExp_t pfnKernelLaunchCustomExp;
   ur_pfnEnqueueUSMDeviceAllocExp_t pfnUSMDeviceAllocExp;
   ur_pfnEnqueueUSMSharedAllocExp_t pfnUSMSharedAllocExp;
   ur_pfnEnqueueUSMHostAllocExp_t pfnUSMHostAllocExp;
   ur_pfnEnqueueUSMFreeExp_t pfnUSMFreeExp;
   ur_pfnEnqueueCommandBufferExp_t pfnCommandBufferExp;
-  ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
   ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
   ur_pfnEnqueueNativeCommandExp_t pfnNativeCommandExp;
 } ur_enqueue_exp_dditable_t;
@@ -2021,7 +1980,6 @@ typedef struct ur_dditable_t {
   ur_program_dditable_t Program;
   ur_program_exp_dditable_t ProgramExp;
   ur_kernel_dditable_t Kernel;
-  ur_kernel_exp_dditable_t KernelExp;
   ur_queue_dditable_t Queue;
   ur_sampler_dditable_t Sampler;
   ur_mem_dditable_t Mem;
diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h
index 2a27a436d6312..f281f18aff0a5 100644
--- a/unified-runtime/include/ur_print.h
+++ b/unified-runtime/include/ur_print.h
@@ -343,6 +343,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceThrottleReasonsFlags(
     enum ur_device_throttle_reasons_flag_t value, char *buffer,
     const size_t buff_size, size_t *out_size);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print ur_kernel_launch_properties_flag_t enum
+/// @returns
+///     - ::UR_RESULT_SUCCESS
+///     - ::UR_RESULT_ERROR_INVALID_SIZE
+///         - `buff_size < out_size`
+UR_APIEXPORT ur_result_t UR_APICALL urPrintKernelLaunchPropertiesFlags(
+    enum ur_kernel_launch_properties_flag_t value, char *buffer,
+    const size_t buff_size, size_t *out_size);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print ur_context_flag_t enum
 /// @returns
@@ -1101,6 +1111,26 @@ UR_APIEXPORT ur_result_t UR_APICALL
 urPrintExecutionInfo(enum ur_execution_info_t value, char *buffer,
                      const size_t buff_size, size_t *out_size);
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print ur_kernel_launch_property_id_t enum
+/// @returns
+///     - ::UR_RESULT_SUCCESS
+///     - ::UR_RESULT_ERROR_INVALID_SIZE
+///         - `buff_size < out_size`
+UR_APIEXPORT ur_result_t UR_APICALL urPrintKernelLaunchPropertyId(
+    enum ur_kernel_launch_property_id_t value, char *buffer,
+    const size_t buff_size, size_t *out_size);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print ur_kernel_launch_property_t struct
+/// @returns
+///     - ::UR_RESULT_SUCCESS
+///     - ::UR_RESULT_ERROR_INVALID_SIZE
+///         - `buff_size < out_size`
+UR_APIEXPORT ur_result_t UR_APICALL urPrintKernelLaunchProperty(
+    const struct ur_kernel_launch_property_t params, char *buffer,
+    const size_t buff_size, size_t *out_size);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print ur_map_flag_t enum
 /// @returns
@@ -1375,26 +1405,6 @@ urPrintExpCommandBufferUpdateKernelLaunchDesc(
     const struct ur_exp_command_buffer_update_kernel_launch_desc_t params,
     char *buffer, const size_t buff_size, size_t *out_size);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print ur_exp_launch_property_id_t enum
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_SIZE
-///         - `buff_size < out_size`
-UR_APIEXPORT ur_result_t UR_APICALL
-urPrintExpLaunchPropertyId(enum ur_exp_launch_property_id_t value, char *buffer,
-                           const size_t buff_size, size_t *out_size);
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print ur_exp_launch_property_t struct
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_SIZE
-///         - `buff_size < out_size`
-UR_APIEXPORT ur_result_t UR_APICALL urPrintExpLaunchProperty(
-    const struct ur_exp_launch_property_t params, char *buffer,
-    const size_t buff_size, size_t *out_size);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print ur_exp_peer_info_t enum
 /// @returns
@@ -2131,16 +2141,14 @@ urPrintKernelSetSpecializationConstantsParams(
     char *buffer, const size_t buff_size, size_t *out_size);
 
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Print ur_kernel_suggest_max_cooperative_group_count_exp_params_t
-/// struct
+/// @brief Print ur_kernel_suggest_max_cooperative_group_count_params_t struct
 /// @returns
 ///     - ::UR_RESULT_SUCCESS
 ///     - ::UR_RESULT_ERROR_INVALID_SIZE
 ///         - `buff_size < out_size`
 UR_APIEXPORT ur_result_t UR_APICALL
-urPrintKernelSuggestMaxCooperativeGroupCountExpParams(
-    const struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t
-        *params,
+urPrintKernelSuggestMaxCooperativeGroupCountParams(
+    const struct ur_kernel_suggest_max_cooperative_group_count_params_t *params,
     char *buffer, const size_t buff_size, size_t *out_size);
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -2676,16 +2684,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(
     const struct ur_enqueue_write_host_pipe_params_t *params, char *buffer,
     const size_t buff_size, size_t *out_size);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print ur_enqueue_kernel_launch_custom_exp_params_t struct
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_SIZE
-///         - `buff_size < out_size`
-UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchCustomExpParams(
-    const struct ur_enqueue_kernel_launch_custom_exp_params_t *params,
-    char *buffer, const size_t buff_size, size_t *out_size);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print ur_enqueue_events_wait_with_barrier_ext_params_t struct
 /// @returns
@@ -2747,17 +2745,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCommandBufferExpParams(
     const struct ur_enqueue_command_buffer_exp_params_t *params, char *buffer,
     const size_t buff_size, size_t *out_size);
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_SIZE
-///         - `buff_size < out_size`
-UR_APIEXPORT ur_result_t UR_APICALL
-urPrintEnqueueCooperativeKernelLaunchExpParams(
-    const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params,
-    char *buffer, const size_t buff_size, size_t *out_size);
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
 /// @returns
diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp
index 180732a4f1909..e039cca3cbf85 100644
--- a/unified-runtime/include/ur_print.hpp
+++ b/unified-runtime/include/ur_print.hpp
@@ -106,6 +106,10 @@ template <>
 inline ur_result_t
 printFlag<ur_device_throttle_reasons_flag_t>(std::ostream &os, uint32_t flag);
 
+template <>
+inline ur_result_t
+printFlag<ur_kernel_launch_properties_flag_t>(std::ostream &os, uint32_t flag);
+
 template <>
 inline ur_result_t printFlag<ur_context_flag_t>(std::ostream &os,
                                                 uint32_t flag);
@@ -217,6 +221,11 @@ template <>
 inline ur_result_t printTagged(std::ostream &os, const void *ptr,
                                ur_profiling_info_t value, size_t size);
 
+inline ur_result_t
+printUnion(std::ostream &os,
+           const union ur_kernel_launch_property_value_t params,
+           const enum ur_kernel_launch_property_id_t tag);
+
 template <>
 inline ur_result_t printFlag<ur_map_flag_t>(std::ostream &os, uint32_t flag);
 
@@ -250,10 +259,6 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,
                                ur_exp_command_buffer_command_info_t value,
                                size_t size);
 
-inline ur_result_t printUnion(std::ostream &os,
-                              const union ur_exp_launch_property_value_t params,
-                              const enum ur_exp_launch_property_id_t tag);
-
 template <>
 inline ur_result_t printTagged(std::ostream &os, const void *ptr,
                                ur_exp_peer_info_t value, size_t size);
@@ -335,6 +340,8 @@ inline std::ostream &
 operator<<(std::ostream &os, enum ur_device_usm_access_capability_flag_t value);
 inline std::ostream &operator<<(std::ostream &os,
                                 enum ur_device_throttle_reasons_flag_t value);
+inline std::ostream &operator<<(std::ostream &os,
+                                enum ur_kernel_launch_properties_flag_t value);
 inline std::ostream &operator<<(std::ostream &os, enum ur_context_flag_t value);
 inline std::ostream &
 operator<<(std::ostream &os,
@@ -504,6 +511,11 @@ operator<<(std::ostream &os,
            [[maybe_unused]] const struct ur_event_native_properties_t params);
 inline std::ostream &operator<<(std::ostream &os,
                                 enum ur_execution_info_t value);
+inline std::ostream &operator<<(std::ostream &os,
+                                enum ur_kernel_launch_property_id_t value);
+inline std::ostream &
+operator<<(std::ostream &os,
+           [[maybe_unused]] const struct ur_kernel_launch_property_t params);
 inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value);
 inline std::ostream &operator<<(std::ostream &os,
                                 enum ur_usm_migration_flag_t value);
@@ -576,11 +588,6 @@ inline std::ostream &operator<<(
 inline std::ostream &
 operator<<(std::ostream &os, [[maybe_unused]] const struct
            ur_exp_command_buffer_update_kernel_launch_desc_t params);
-inline std::ostream &operator<<(std::ostream &os,
-                                enum ur_exp_launch_property_id_t value);
-inline std::ostream &
-operator<<(std::ostream &os,
-           [[maybe_unused]] const struct ur_exp_launch_property_t params);
 inline std::ostream &operator<<(std::ostream &os,
                                 enum ur_exp_peer_info_t value);
 inline std::ostream &operator<<(std::ostream &os,
@@ -1102,12 +1109,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
   case UR_FUNCTION_LOADER_TEAR_DOWN:
     os << "UR_FUNCTION_LOADER_TEAR_DOWN";
     break;
-  case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP:
-    os << "UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP";
-    break;
-  case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP:
-    os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP";
-    break;
   case UR_FUNCTION_PROGRAM_GET_GLOBAL_VARIABLE_POINTER:
     os << "UR_FUNCTION_PROGRAM_GET_GLOBAL_VARIABLE_POINTER";
     break;
@@ -1123,9 +1124,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
   case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
     os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
     break;
-  case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP:
-    os << "UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP";
-    break;
   case UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE:
     os << "UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE";
     break;
@@ -1256,6 +1254,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
   case UR_FUNCTION_BINDLESS_IMAGES_FREE_MAPPED_LINEAR_MEMORY_EXP:
     os << "UR_FUNCTION_BINDLESS_IMAGES_FREE_MAPPED_LINEAR_MEMORY_EXP";
     break;
+  case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT:
+    os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT";
+    break;
   default:
     os << "unknown enumerator";
     break;
@@ -2984,6 +2985,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
   case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE:
     os << "UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE";
     break;
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES:
+    os << "UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES";
+    break;
   case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
     os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
     break;
@@ -2996,9 +3000,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
   case UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP:
     os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP";
     break;
-  case UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP:
-    os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP";
-    break;
   case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP:
     os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP";
     break;
@@ -3092,15 +3093,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
   case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP:
     os << "UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP";
     break;
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP:
-    os << "UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP";
-    break;
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
     os << "UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP";
     break;
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP:
-    os << "UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP";
-    break;
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP:
     os << "UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP";
     break;
@@ -4729,6 +4724,21 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,
 
     os << ")";
   } break;
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES: {
+    const ur_kernel_launch_properties_flags_t *tptr =
+        (const ur_kernel_launch_properties_flags_t *)ptr;
+    if (sizeof(ur_kernel_launch_properties_flags_t) > size) {
+      os << "invalid size (is: " << size
+         << ", expected: >=" << sizeof(ur_kernel_launch_properties_flags_t)
+         << ")";
+      return UR_RESULT_ERROR_INVALID_SIZE;
+    }
+    os << (const void *)(tptr) << " (";
+
+    ur::details::printFlag<ur_kernel_launch_properties_flag_t>(os, *tptr);
+
+    os << ")";
+  } break;
   case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
     const ur_bool_t *tptr = (const ur_bool_t *)ptr;
     if (sizeof(ur_bool_t) > size) {
@@ -4783,19 +4793,6 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,
 
     os << ")";
   } break;
-  case UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP: {
-    const ur_bool_t *tptr = (const ur_bool_t *)ptr;
-    if (sizeof(ur_bool_t) > size) {
-      os << "invalid size (is: " << size
-         << ", expected: >=" << sizeof(ur_bool_t) << ")";
-      return UR_RESULT_ERROR_INVALID_SIZE;
-    }
-    os << (const void *)(tptr) << " (";
-
-    os << *tptr;
-
-    os << ")";
-  } break;
   case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: {
     const ur_bool_t *tptr = (const ur_bool_t *)ptr;
     if (sizeof(ur_bool_t) > size) {
@@ -5201,19 +5198,6 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,
 
     os << ")";
   } break;
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP: {
-    const ur_bool_t *tptr = (const ur_bool_t *)ptr;
-    if (sizeof(ur_bool_t) > size) {
-      os << "invalid size (is: " << size
-         << ", expected: >=" << sizeof(ur_bool_t) << ")";
-      return UR_RESULT_ERROR_INVALID_SIZE;
-    }
-    os << (const void *)(tptr) << " (";
-
-    os << *tptr;
-
-    os << ")";
-  } break;
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP: {
     const ur_bool_t *tptr = (const ur_bool_t *)ptr;
     if (sizeof(ur_bool_t) > size) {
@@ -5227,19 +5211,6 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,
 
     os << ")";
   } break;
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP: {
-    const ur_bool_t *tptr = (const ur_bool_t *)ptr;
-    if (sizeof(ur_bool_t) > size) {
-      os << "invalid size (is: " << size
-         << ", expected: >=" << sizeof(ur_bool_t) << ")";
-      return UR_RESULT_ERROR_INVALID_SIZE;
-    }
-    os << (const void *)(tptr) << " (";
-
-    os << *tptr;
-
-    os << ")";
-  } break;
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP: {
     const ur_bool_t *tptr = (const ur_bool_t *)ptr;
     if (sizeof(ur_bool_t) > size) {
@@ -6207,6 +6178,98 @@ printFlag<ur_device_throttle_reasons_flag_t>(std::ostream &os, uint32_t flag) {
 }
 } // namespace ur::details
 ///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ur_kernel_launch_properties_flag_t type
+/// @returns
+///     std::ostream &
+inline std::ostream &operator<<(std::ostream &os,
+                                enum ur_kernel_launch_properties_flag_t value) {
+  switch (value) {
+  case UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE:
+    os << "UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION:
+    os << "UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY:
+    os << "UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE:
+    os << "UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE";
+    break;
+  default:
+    os << "unknown enumerator";
+    break;
+  }
+  return os;
+}
+
+namespace ur::details {
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print ur_kernel_launch_properties_flag_t flag
+template <>
+inline ur_result_t
+printFlag<ur_kernel_launch_properties_flag_t>(std::ostream &os, uint32_t flag) {
+  uint32_t val = flag;
+  bool first = true;
+
+  if ((val & UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE) ==
+      (uint32_t)UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE) {
+    val ^= (uint32_t)UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE;
+    if (!first) {
+      os << " | ";
+    } else {
+      first = false;
+    }
+    os << UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE;
+  }
+
+  if ((val & UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION) ==
+      (uint32_t)UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION) {
+    val ^= (uint32_t)UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION;
+    if (!first) {
+      os << " | ";
+    } else {
+      first = false;
+    }
+    os << UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION;
+  }
+
+  if ((val & UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY) ==
+      (uint32_t)UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY) {
+    val ^= (uint32_t)UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY;
+    if (!first) {
+      os << " | ";
+    } else {
+      first = false;
+    }
+    os << UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY;
+  }
+
+  if ((val & UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE) ==
+      (uint32_t)
+          UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE) {
+    val ^= (uint32_t)
+        UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE;
+    if (!first) {
+      os << " | ";
+    } else {
+      first = false;
+    }
+    os << UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE;
+  }
+  if (val != 0) {
+    std::bitset<32> bits(val);
+    if (!first) {
+      os << " | ";
+    }
+    os << "unknown bit flags " << bits;
+  } else if (first) {
+    os << "0";
+  }
+  return UR_RESULT_SUCCESS;
+}
+} // namespace ur::details
+///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ur_context_flag_t type
 /// @returns
 ///     std::ostream &
@@ -10708,6 +10771,106 @@ inline std::ostream &operator<<(std::ostream &os,
   return os;
 }
 ///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ur_kernel_launch_property_id_t type
+/// @returns
+///     std::ostream &
+inline std::ostream &operator<<(std::ostream &os,
+                                enum ur_kernel_launch_property_id_t value) {
+  switch (value) {
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE:
+    os << "UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE:
+    os << "UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION:
+    os << "UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY:
+    os << "UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY";
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE:
+    os << "UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE";
+    break;
+  default:
+    os << "unknown enumerator";
+    break;
+  }
+  return os;
+}
+namespace ur::details {
+
+///////////////////////////////////////////////////////////////////////////////
+// @brief Print ur_kernel_launch_property_value_t union
+inline ur_result_t
+printUnion(std::ostream &os,
+           const union ur_kernel_launch_property_value_t params,
+           const enum ur_kernel_launch_property_id_t tag) {
+  os << "(union ur_kernel_launch_property_value_t){";
+
+  switch (tag) {
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION:
+
+    os << ".clusterDim = {";
+    for (auto i = 0; i < 3; i++) {
+      if (i != 0) {
+        os << ", ";
+      }
+
+      os << (params.clusterDim[i]);
+    }
+    os << "}";
+
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE:
+
+    os << ".cooperative = ";
+
+    os << (params.cooperative);
+
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY:
+
+    os << ".workgroup_mem_size = ";
+
+    os << (params.workgroup_mem_size);
+
+    break;
+  case UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE:
+
+    os << ".opportunistic_queue_serialize = ";
+
+    os << (params.opportunistic_queue_serialize);
+
+    break;
+  default:
+    os << "<unknown>";
+    return UR_RESULT_ERROR_INVALID_ENUMERATION;
+  }
+  os << "}";
+  return UR_RESULT_SUCCESS;
+}
+} // namespace ur::details
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Print operator for the ur_kernel_launch_property_t type
+/// @returns
+///     std::ostream &
+inline std::ostream &
+operator<<(std::ostream &os, const struct ur_kernel_launch_property_t params) {
+  os << "(struct ur_kernel_launch_property_t){";
+
+  os << ".id = ";
+
+  os << (params.id);
+
+  os << ", ";
+  os << ".value = ";
+  ur::details::printUnion(os, (params.value), params.id);
+
+  os << "}";
+  return os;
+}
+///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ur_map_flag_t type
 /// @returns
 ///     std::ostream &
@@ -11956,105 +12119,6 @@ inline std::ostream &operator<<(
   return os;
 }
 ///////////////////////////////////////////////////////////////////////////////
-/// @brief Print operator for the ur_exp_launch_property_id_t type
-/// @returns
-///     std::ostream &
-inline std::ostream &operator<<(std::ostream &os,
-                                enum ur_exp_launch_property_id_t value) {
-  switch (value) {
-  case UR_EXP_LAUNCH_PROPERTY_ID_IGNORE:
-    os << "UR_EXP_LAUNCH_PROPERTY_ID_IGNORE";
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE:
-    os << "UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE";
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION:
-    os << "UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION";
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY:
-    os << "UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY";
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE:
-    os << "UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE";
-    break;
-  default:
-    os << "unknown enumerator";
-    break;
-  }
-  return os;
-}
-namespace ur::details {
-
-///////////////////////////////////////////////////////////////////////////////
-// @brief Print ur_exp_launch_property_value_t union
-inline ur_result_t printUnion(std::ostream &os,
-                              const union ur_exp_launch_property_value_t params,
-                              const enum ur_exp_launch_property_id_t tag) {
-  os << "(union ur_exp_launch_property_value_t){";
-
-  switch (tag) {
-  case UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION:
-
-    os << ".clusterDim = {";
-    for (auto i = 0; i < 3; i++) {
-      if (i != 0) {
-        os << ", ";
-      }
-
-      os << (params.clusterDim[i]);
-    }
-    os << "}";
-
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE:
-
-    os << ".cooperative = ";
-
-    os << (params.cooperative);
-
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY:
-
-    os << ".workgroup_mem_size = ";
-
-    os << (params.workgroup_mem_size);
-
-    break;
-  case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE:
-
-    os << ".opportunistic_queue_serialize = ";
-
-    os << (params.opportunistic_queue_serialize);
-
-    break;
-  default:
-    os << "<unknown>";
-    return UR_RESULT_ERROR_INVALID_ENUMERATION;
-  }
-  os << "}";
-  return UR_RESULT_SUCCESS;
-}
-} // namespace ur::details
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print operator for the ur_exp_launch_property_t type
-/// @returns
-///     std::ostream &
-inline std::ostream &operator<<(std::ostream &os,
-                                const struct ur_exp_launch_property_t params) {
-  os << "(struct ur_exp_launch_property_t){";
-
-  os << ".id = ";
-
-  os << (params.id);
-
-  os << ", ";
-  os << ".value = ";
-  ur::details::printUnion(os, (params.value), params.id);
-
-  os << "}";
-  return os;
-}
-///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ur_exp_peer_info_t type
 /// @returns
 ///     std::ostream &
@@ -14307,12 +14371,12 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the
-/// ur_kernel_suggest_max_cooperative_group_count_exp_params_t type
+/// ur_kernel_suggest_max_cooperative_group_count_params_t type
 /// @returns
 ///     std::ostream &
 inline std::ostream &
 operator<<(std::ostream &os, [[maybe_unused]] const struct
-           ur_kernel_suggest_max_cooperative_group_count_exp_params_t *params) {
+           ur_kernel_suggest_max_cooperative_group_count_params_t *params) {
 
   os << ".hKernel = ";
 
@@ -15121,6 +15185,27 @@ inline std::ostream &operator<<(
 
   ur::details::printPtr(os, *(params->ppLocalWorkSize));
 
+  os << ", ";
+  os << ".numPropsInLaunchPropList = ";
+
+  os << *(params->pnumPropsInLaunchPropList);
+
+  os << ", ";
+  os << ".launchPropList = ";
+  ur::details::printPtr(
+      os, reinterpret_cast<const void *>(*(params->plaunchPropList)));
+  if (*(params->plaunchPropList) != NULL) {
+    os << " {";
+    for (size_t i = 0; i < *params->pnumPropsInLaunchPropList; ++i) {
+      if (i != 0) {
+        os << ", ";
+      }
+
+      os << (*(params->plaunchPropList))[i];
+    }
+    os << "}";
+  }
+
   os << ", ";
   os << ".numEventsInWaitList = ";
 
@@ -16746,95 +16831,6 @@ inline std::ostream &operator<<(
   return os;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print operator for the ur_enqueue_kernel_launch_custom_exp_params_t
-/// type
-/// @returns
-///     std::ostream &
-inline std::ostream &operator<<(
-    std::ostream &os,
-    [[maybe_unused]] const struct ur_enqueue_kernel_launch_custom_exp_params_t
-        *params) {
-
-  os << ".hQueue = ";
-
-  ur::details::printPtr(os, *(params->phQueue));
-
-  os << ", ";
-  os << ".hKernel = ";
-
-  ur::details::printPtr(os, *(params->phKernel));
-
-  os << ", ";
-  os << ".workDim = ";
-
-  os << *(params->pworkDim);
-
-  os << ", ";
-  os << ".pGlobalWorkOffset = ";
-
-  ur::details::printPtr(os, *(params->ppGlobalWorkOffset));
-
-  os << ", ";
-  os << ".pGlobalWorkSize = ";
-
-  ur::details::printPtr(os, *(params->ppGlobalWorkSize));
-
-  os << ", ";
-  os << ".pLocalWorkSize = ";
-
-  ur::details::printPtr(os, *(params->ppLocalWorkSize));
-
-  os << ", ";
-  os << ".numPropsInLaunchPropList = ";
-
-  os << *(params->pnumPropsInLaunchPropList);
-
-  os << ", ";
-  os << ".launchPropList = ";
-  ur::details::printPtr(
-      os, reinterpret_cast<const void *>(*(params->plaunchPropList)));
-  if (*(params->plaunchPropList) != NULL) {
-    os << " {";
-    for (size_t i = 0; i < *params->pnumPropsInLaunchPropList; ++i) {
-      if (i != 0) {
-        os << ", ";
-      }
-
-      os << (*(params->plaunchPropList))[i];
-    }
-    os << "}";
-  }
-
-  os << ", ";
-  os << ".numEventsInWaitList = ";
-
-  os << *(params->pnumEventsInWaitList);
-
-  os << ", ";
-  os << ".phEventWaitList = ";
-  ur::details::printPtr(
-      os, reinterpret_cast<const void *>(*(params->pphEventWaitList)));
-  if (*(params->pphEventWaitList) != NULL) {
-    os << " {";
-    for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) {
-      if (i != 0) {
-        os << ", ";
-      }
-
-      ur::details::printPtr(os, (*(params->pphEventWaitList))[i]);
-    }
-    os << "}";
-  }
-
-  os << ", ";
-  os << ".phEvent = ";
-
-  ur::details::printPtr(os, *(params->pphEvent));
-
-  return os;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the
 /// ur_enqueue_events_wait_with_barrier_ext_params_t type
@@ -17166,73 +17162,6 @@ operator<<(std::ostream &os,
   return os;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Print operator for the
-/// ur_enqueue_cooperative_kernel_launch_exp_params_t type
-/// @returns
-///     std::ostream &
-inline std::ostream &
-operator<<(std::ostream &os, [[maybe_unused]] const struct
-           ur_enqueue_cooperative_kernel_launch_exp_params_t *params) {
-
-  os << ".hQueue = ";
-
-  ur::details::printPtr(os, *(params->phQueue));
-
-  os << ", ";
-  os << ".hKernel = ";
-
-  ur::details::printPtr(os, *(params->phKernel));
-
-  os << ", ";
-  os << ".workDim = ";
-
-  os << *(params->pworkDim);
-
-  os << ", ";
-  os << ".pGlobalWorkOffset = ";
-
-  ur::details::printPtr(os, *(params->ppGlobalWorkOffset));
-
-  os << ", ";
-  os << ".pGlobalWorkSize = ";
-
-  ur::details::printPtr(os, *(params->ppGlobalWorkSize));
-
-  os << ", ";
-  os << ".pLocalWorkSize = ";
-
-  ur::details::printPtr(os, *(params->ppLocalWorkSize));
-
-  os << ", ";
-  os << ".numEventsInWaitList = ";
-
-  os << *(params->pnumEventsInWaitList);
-
-  os << ", ";
-  os << ".phEventWaitList = ";
-  ur::details::printPtr(
-      os, reinterpret_cast<const void *>(*(params->pphEventWaitList)));
-  if (*(params->pphEventWaitList) != NULL) {
-    os << " {";
-    for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) {
-      if (i != 0) {
-        os << ", ";
-      }
-
-      ur::details::printPtr(os, (*(params->pphEventWaitList))[i]);
-    }
-    os << "}";
-  }
-
-  os << ", ";
-  os << ".phEvent = ";
-
-  ur::details::printPtr(os, *(params->pphEvent));
-
-  return os;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t
 /// type
@@ -21093,9 +21022,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os,
     os << (const struct ur_kernel_set_specialization_constants_params_t *)
             params;
   } break;
-  case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: {
-    os << (const struct
-           ur_kernel_suggest_max_cooperative_group_count_exp_params_t *)params;
+  case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT: {
+    os << (const struct ur_kernel_suggest_max_cooperative_group_count_params_t
+               *)params;
   } break;
   case UR_FUNCTION_QUEUE_GET_INFO: {
     os << (const struct ur_queue_get_info_params_t *)params;
@@ -21260,9 +21189,6 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os,
   case UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE: {
     os << (const struct ur_enqueue_write_host_pipe_params_t *)params;
   } break;
-  case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP: {
-    os << (const struct ur_enqueue_kernel_launch_custom_exp_params_t *)params;
-  } break;
   case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: {
     os << (const struct ur_enqueue_events_wait_with_barrier_ext_params_t *)
             params;
@@ -21282,10 +21208,6 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os,
   case UR_FUNCTION_ENQUEUE_COMMAND_BUFFER_EXP: {
     os << (const struct ur_enqueue_command_buffer_exp_params_t *)params;
   } break;
-  case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
-    os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)
-            params;
-  } break;
   case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
     os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
   } break;
diff --git a/unified-runtime/scripts/core/EXP-COOPERATIVE-KERNELS.rst b/unified-runtime/scripts/core/EXP-COOPERATIVE-KERNELS.rst
deleted file mode 100644
index ba055c48ff7a9..0000000000000
--- a/unified-runtime/scripts/core/EXP-COOPERATIVE-KERNELS.rst
+++ /dev/null
@@ -1,72 +0,0 @@
-<%
-    OneApi=tags['$OneApi']
-    x=tags['$x']
-    X=x.upper()
-%>
-
-.. _experimental-cooperative-kernels:
-
-================================================================================
-Cooperative Kernels
-================================================================================
-
-.. warning::
-
-    Experimental features:
-
-    *   May be replaced, updated, or removed at any time.
-    *   Do not require maintaining API/ABI stability of their own additions over
-        time.
-    *   Do not require conformance testing of their own additions.
-
-
-Motivation
---------------------------------------------------------------------------------
-Cooperative kernels are kernels that use cross-workgroup synchronization
-features. All enqueued workgroups must run concurrently for cooperative kernels
-to execute without hanging. This experimental feature provides an API for
-querying the maximum number of workgroups and launching cooperative kernels.
-
-Any device can support cooperative kernels by restricting the maximum number of
-workgroups to 1. Devices that support cross-workgroup synchronization can
-specify a larger maximum for a given cooperative kernel.
-
-The functions defined here align with those specified in Level Zero.
-
-API
---------------------------------------------------------------------------------
-
-Enums
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-* ${x}_device_info_t
-    * ${X}_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP
-
-Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-* ${x}EnqueueCooperativeKernelLaunchExp
-* ${x}KernelSuggestMaxCooperativeGroupCountExp
-
-Changelog
---------------------------------------------------------------------------------
-+-----------+---------------------------------------------+
-| Revision  | Changes                                     |
-+===========+=============================================+
-| 1.0       | Initial Draft                               |
-+-----------+---------------------------------------------+
-| 1.1       | Switch from extension string macro to       |
-|           | device info enum for reporting support.     |
-+-----------+---------------------------------------------+
-
-Support
---------------------------------------------------------------------------------
-
-Adapters which support this experimental feature *must* return ``true`` when
-queried for ${X}_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP via
-${x}DeviceGetInfo. Conversely, before using any of the functionality defined in
-this experimental feature the user *must* use the device query to determine if
-the adapter supports this feature.
-
-Contributors
---------------------------------------------------------------------------------
-* Michael Aziz `michael.aziz@intel.com <michael.aziz@intel.com>`_
-* Aaron Greig `aaron.greig@codeplay.com <aaron.greig@codeplay.com>`_
diff --git a/unified-runtime/scripts/core/EXP-LAUNCH-PROPERTIES.rst b/unified-runtime/scripts/core/EXP-LAUNCH-PROPERTIES.rst
deleted file mode 100644
index 81703aaca503b..0000000000000
--- a/unified-runtime/scripts/core/EXP-LAUNCH-PROPERTIES.rst
+++ /dev/null
@@ -1,107 +0,0 @@
-<%
-    OneApi=tags['$OneApi']
-    x=tags['$x']
-    X=x.upper()
-%>
-
-.. _experimental-launch-properties:
-
-================================================================================
-LAUNCH Properties
-================================================================================
-
-.. warning::
-
-    Experimental features:
-
-    *   May be replaced, updated, or removed at any time.
-    *   Do not require maintaining API/ABI stability of their own additions over
-        time.
-    *   Do not require conformance testing of their own additions.
-
-
-Terminology
---------------------------------------------------------------------------------
-"Launch Properties" is used to indicate optional kernel launch properties that
-can be specified at the time of a kernel launch. Such properties can be used to
-enable hardware specific kernel launch features.
-
-Motivation
---------------------------------------------------------------------------------
-Advances in hardware sometimes require new kernel properties. One example is
-distributed shared memory as used by Nvidia Hopper GPUs. Launching a kernel
-that supports distributed shared memory requires specifying a set of "cluster"
-dimensions, in units of work-groups, over which the shared memory is
-"distributed". Additionally some applications require specification of kernel
-properties at launch-time.
-
-This extension is a future-proof and portable solution that supports these two
-requirements. Instead of using a fixed set of kernel enqueue arguments, the
-approach is to introduce the ${x}_exp_launch_property_t type that enables a
-more extendable API.
-
-Each ${x}_exp_launch_property_t instance corresponds to a specific kernel
-launch property.
-Only one new function is introduced: ${x}EnqueueKernelLaunchCustomExp.
-${x}EnqueueKernelLaunchCustomExp takes an array of ${x}_exp_launch_property_t
-as an argument, and launches a kernel using these properties.
-${x}EnqueueKernelLaunchCustomExp corresponds closely to the CUDA Driver API
-``cuLaunchKernelEx``.
-
-Many kernel lauch properties can be supported, such as cooperative kernel
-launches. As such, eventually this extension should be able to replace the
-cooperative kernels Unified-Runtime extension.
-
-API
---------------------------------------------------------------------------------
-
-Enums
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-* ${x}_device_info_t
-    * ${X}_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP
-
-* ${x}_exp_launch_property_id_t
-
-Unions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-* ${x}_exp_launch_property_value_t
-
-Structs
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-* ${x}_exp_launch_property_t
-
-Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-* ${x}EnqueueKernelLaunchCustomExp
-
-Support
---------------------------------------------------------------------------------
-
-Adapters which support this experimental feature *must* return ``true`` when
-queried for ${X}_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP via
-${x}DeviceGetInfo. Conversely, before using any of the functionality defined
-in this experimental feature the user *must* use the device query to determine
-if the adapter supports this feature.
-
-
-Changelog
---------------------------------------------------------------------------------
-
-+-----------+---------------------------------------------+
-| Revision  | Changes                                     |
-+===========+=============================================+
-| 1.0       | Initial Draft                               |
-+-----------+---------------------------------------------+
-| 1.1       | Switch from extension string macro to       |
-|           | device info enum for reporting support.     |
-+-----------+---------------------------------------------+
-
-Contributors
---------------------------------------------------------------------------------
-
-* JackAKirk `jack.kirk@codeplay.com <jack.kirk@codeplay.com>`_
-* Aaron Greig `aaron.greig@codeplay.com <aaron.greig@codeplay.com>`_
diff --git a/unified-runtime/scripts/core/PROG.rst b/unified-runtime/scripts/core/PROG.rst
index 23f37327a5ade..28d8397927b5e 100644
--- a/unified-runtime/scripts/core/PROG.rst
+++ b/unified-runtime/scripts/core/PROG.rst
@@ -293,7 +293,7 @@ event dependencies that are passed to each Enqueue command.
     const size_t gWorkSize = {128, 128, 128};
     const size_t lWorkSize = {1, 8, 8}; 
     ${x}EnqueueKernelLaunch(hQueue, hKernel, nDim, gWorkOffset, gWorkSize, 
-                            lWorkSize, 0, nullptr, nullptr);
+                            lWorkSize, 0, nullptr, 0, nullptr, nullptr);
 
 Queue object lifetime
 ---------------------
diff --git a/unified-runtime/scripts/core/device.yml b/unified-runtime/scripts/core/device.yml
index 8d73f8078465b..f7575ca9e07d1 100644
--- a/unified-runtime/scripts/core/device.yml
+++ b/unified-runtime/scripts/core/device.yml
@@ -462,6 +462,8 @@ etors:
       desc: "[int32_t][optional-query] return max power limit in milliwatts."
     - name: BFLOAT16_CONVERSIONS_NATIVE
       desc: "[$x_bool_t] support for native bfloat16 conversions"
+    - name: KERNEL_LAUNCH_CAPABILITIES
+      desc: "[$x_kernel_launch_properties_flags_t] Bitfield of supported kernel launch properties."
 --- #--------------------------------------------------------------------------
 type: function
 desc: "Retrieves various information about device"
@@ -971,3 +973,21 @@ etors:
     - name: OTHER
       desc: "The clock frequency is throttled due to other reason."
       value: "$X_BIT(6)"
+--- #--------------------------------------------------------------------------
+type: enum
+desc: "Kernel launch properties support"
+class: $xDevice
+name: $x_kernel_launch_properties_flags_t
+etors:
+    - name: COOPERATIVE
+      desc: "Supports $X_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE and $xKernelSuggestMaxCooperativeGroupCount"
+      value: "$X_BIT(0)"
+    - name: CLUSTER_DIMENSION
+      desc: "Supports $X_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION"
+      value: "$X_BIT(1)"
+    - name: WORK_GROUP_MEMORY
+      desc: "Supports $X_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY"
+      value: "$X_BIT(2)"
+    - name: OPPORTUNISTIC_QUEUE_SERIALIZE
+      desc: "Supports $X_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE"
+      value: "$X_BIT(3)"
diff --git a/unified-runtime/scripts/core/enqueue.yml b/unified-runtime/scripts/core/enqueue.yml
index cb5d0848fc925..20d7d7bc2ab3f 100644
--- a/unified-runtime/scripts/core/enqueue.yml
+++ b/unified-runtime/scripts/core/enqueue.yml
@@ -12,6 +12,62 @@ type: header
 desc: "Intel $OneApi Unified Runtime APIs"
 ordinal: "10"
 --- #--------------------------------------------------------------------------
+type: enum
+desc: "Specifies a launch property id"
+name: $x_kernel_launch_property_id_t
+analogue:
+    - "**CUlaunchAttributeID**"
+etors:
+    - name: IGNORE
+      desc: "The property has no effect."
+    - name: COOPERATIVE
+      desc: "Whether to launch a cooperative kernel."
+    - name: CLUSTER_DIMENSION
+      desc: "work-group cluster dimensions."
+    - name: WORK_GROUP_MEMORY
+      desc: "Implicit work group memory allocation."
+    - name: OPPORTUNISTIC_QUEUE_SERIALIZE
+      desc: "Whether to opportunistically execute kernel launches serially on a native queue"
+--- #--------------------------------------------------------------------------
+type: union
+desc: "Specifies a launch property value"
+name: $x_kernel_launch_property_value_t
+tag: $x_kernel_launch_property_id_t
+analogue:
+    - "**CUlaunchAttributeValue**"
+members:
+    - type: uint32_t[3]
+      name: clusterDim
+      desc: "[in] dimensions of the cluster (units of work-group) (x, y, z). Each value must be a divisor of the corresponding global work-size dimension (in units of work-group)."
+      tag: $X_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION
+    - type: int
+      name: cooperative
+      desc: "[in] non-zero value indicates a cooperative kernel"
+      tag: $X_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE
+    - type: size_t
+      name: workgroup_mem_size
+      desc: "[in] non-zero value indicates the amount of work group memory to allocate in bytes"
+      tag: $X_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY
+    - type: int
+      name: opportunistic_queue_serialize
+      desc: "[in] non-zero value indicates an opportunistic native queue serialized kernel"
+      tag: $X_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE
+--- #--------------------------------------------------------------------------
+type: struct
+desc: "Kernel launch property"
+name: $x_kernel_launch_property_t
+analogue:
+    - "**cuLaunchAttribute**"
+members:
+    - type: $x_kernel_launch_property_id_t
+      name: id
+      desc: "[in] launch property id"
+      init: $X_KERNEL_LAUNCH_PROPERTY_ID_IGNORE
+    - type: $x_kernel_launch_property_value_t
+      name: value
+      desc: "[in][tagged_by(id)] launch property value"
+      init: nullptr
+--- #--------------------------------------------------------------------------
 type: function
 desc: "Enqueue a command to execute a kernel"
 class: $xEnqueue
@@ -42,6 +98,12 @@ params:
       desc: |
             [in][optional] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function.
             If nullptr, the runtime implementation will choose the work-group size.
+    - type: uint32_t
+      name: numPropsInLaunchPropList
+      desc: "[in] size of the launch prop list"
+    - type: const $x_kernel_launch_property_t*
+      name: launchPropList
+      desc: "[in][optional][range(0, numPropsInLaunchPropList)] pointer to a list of launch properties"
     - type: uint32_t
       name: numEventsInWaitList
       desc: "[in] size of the event wait list"
@@ -69,6 +131,10 @@ returns:
     - $X_RESULT_ERROR_INVALID_VALUE
     - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
     - $X_RESULT_ERROR_OUT_OF_RESOURCES
+    - $X_RESULT_ERROR_UNSUPPORTED_FEATURE:
+        - "If any property in `launchPropList` isn't supported by the device."
+    - $X_RESULT_ERROR_INVALID_NULL_POINTER:
+        - "`launchPropList == NULL && numPropsInLaunchPropList > 0`"
 --- #--------------------------------------------------------------------------
 type: function
 desc: "Enqueue a command which waits a list of events to complete before it completes"
diff --git a/unified-runtime/scripts/core/exp-cooperative-kernels.yml b/unified-runtime/scripts/core/exp-cooperative-kernels.yml
deleted file mode 100644
index 9fbe0d8f8bb35..0000000000000
--- a/unified-runtime/scripts/core/exp-cooperative-kernels.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-#
-# Copyright (C) 2023 Intel Corporation
-#
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# See YaML.md for syntax definition
-#
---- #--------------------------------------------------------------------------
-type: header
-desc: "Intel $OneApi Unified Runtime Experimental APIs for Cooperative Kernels"
-ordinal: "99"
---- #--------------------------------------------------------------------------
-type: enum
-extend: true
-typed_etors: true
-desc: "Extension enums for $x_device_info_t to support cooperative kernels."
-name: $x_device_info_t
-etors:
-    - name: COOPERATIVE_KERNEL_SUPPORT_EXP
-      value: "0x5000"
-      desc: "[$x_bool_t] Returns true if the device supports cooperative kernels."
---- #--------------------------------------------------------------------------
-type: function
-desc: "Enqueue a command to execute a cooperative kernel"
-class: $xEnqueue
-name: CooperativeKernelLaunchExp
-params:
-    - type: $x_queue_handle_t
-      name: hQueue
-      desc: "[in] handle of the queue object"
-    - type: $x_kernel_handle_t
-      name: hKernel
-      desc: "[in] handle of the kernel object"
-    - type: uint32_t
-      name: workDim
-      desc: "[in] number of dimensions, from 1 to 3, to specify the global and work-group work-items"
-    - type: "const size_t*"
-      name: pGlobalWorkOffset
-      desc: "[in] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item"
-    - type: "const size_t*"
-      name: pGlobalWorkSize
-      desc: "[in] pointer to an array of workDim unsigned values that specify the number of global work-items in workDim that will execute the kernel function"
-    - type: "const size_t*"
-      name: pLocalWorkSize
-      desc: |
-            [in][optional] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function.
-            If nullptr, the runtime implementation will choose the work-group size.
-    - type: uint32_t
-      name: numEventsInWaitList
-      desc: "[in] size of the event wait list"
-    - type: "const $x_event_handle_t*"
-      name: phEventWaitList
-      desc: |
-            [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution.
-            If nullptr, the numEventsInWaitList must be 0, indicating that no wait event.
-    - type: $x_event_handle_t*
-      name: phEvent
-      desc: |
-            [out][optional][alloc] return an event object that identifies this particular kernel execution instance. If phEventWaitList and phEvent are not NULL, phEvent must not refer to an element of the phEventWaitList array.
-returns:
-    - $X_RESULT_ERROR_INVALID_QUEUE
-    - $X_RESULT_ERROR_INVALID_KERNEL
-    - $X_RESULT_ERROR_INVALID_EVENT
-    - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST:
-        - "`phEventWaitList == NULL && numEventsInWaitList > 0`"
-        - "`phEventWaitList != NULL && numEventsInWaitList == 0`"
-        - "If event objects in phEventWaitList are not valid events."
-    - $X_RESULT_ERROR_INVALID_WORK_DIMENSION
-    - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-    - $X_RESULT_ERROR_INVALID_VALUE
-    - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
-    - $X_RESULT_ERROR_OUT_OF_RESOURCES
---- #--------------------------------------------------------------------------
-type: function
-desc: "Query the maximum number of work groups for a cooperative kernel"
-class: $xKernel
-name: SuggestMaxCooperativeGroupCountExp
-params:
-    - type: $x_kernel_handle_t
-      name: hKernel
-      desc: "[in] handle of the kernel object"
-    - type: $x_device_handle_t
-      name: hDevice
-      desc: "[in] handle of the device object"
-    - type: uint32_t
-      name: workDim
-      desc: "[in] number of dimensions, from 1 to 3, to specify the work-group work-items"
-    - type: "const size_t*"
-      name: pLocalWorkSize
-      desc: |
-            [in] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function.
-    - type: size_t
-      name: dynamicSharedMemorySize
-      desc: "[in] size of dynamic shared memory, for each work-group, in bytes, that will be used when the kernel is launched"
-    - type: "uint32_t*"
-      name: "pGroupCountRet"
-      desc: "[out] pointer to maximum number of groups"
-returns:
-    - $X_RESULT_ERROR_INVALID_KERNEL
diff --git a/unified-runtime/scripts/core/exp-launch-properties.yml b/unified-runtime/scripts/core/exp-launch-properties.yml
deleted file mode 100644
index e46ea5a95c655..0000000000000
--- a/unified-runtime/scripts/core/exp-launch-properties.yml
+++ /dev/null
@@ -1,160 +0,0 @@
-#
-# Copyright (C) 2024 Intel Corporation
-#
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# See YaML.md for syntax definition
-#
---- #--------------------------------------------------------------------------
-type: header
-desc: "Intel $OneApi Unified Runtime Experimental APIs for (kernel) Launch Properties"
-ordinal: "99"
---- #--------------------------------------------------------------------------
-type: enum
-extend: true
-typed_etors: true
-desc: "Extension enums for $x_device_info_t to support launch properties."
-name: $x_device_info_t
-etors:
-    - name: LAUNCH_PROPERTIES_SUPPORT_EXP
-      value: "0x3000"
-      desc: "[$x_bool_t] Returns true if the device supports the use of kernel launch properties."
---- #--------------------------------------------------------------------------
-type: enum
-desc: "Specifies a launch property id"
-name: $x_exp_launch_property_id_t
-analogue:
-    - "**CUlaunchAttributeID**"
-etors:
-    - name: IGNORE
-      desc: "The property has no effect"
-    - name: COOPERATIVE
-      desc: "Whether to launch a cooperative kernel"
-    - name: CLUSTER_DIMENSION
-      desc: "work-group cluster dimensions"
-    - name: WORK_GROUP_MEMORY
-      desc: "Implicit work group memory allocation"
-    - name: OPPORTUNISTIC_QUEUE_SERIALIZE
-      desc: "Whether to opportunistically execute kernel launches serially on a native queue"
---- #--------------------------------------------------------------------------
-type: union
-desc: "Specifies a launch property value"
-name: $x_exp_launch_property_value_t
-tag: $x_exp_launch_property_id_t
-analogue:
-    - "**CUlaunchAttributeValue**"
-members:
-    - type: uint32_t[3]
-      name: clusterDim
-      desc: "[in] dimensions of the cluster (units of work-group) (x, y, z). Each value must be a divisor of the corresponding global work-size dimension (in units of work-group)."
-      tag: $X_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION
-    - type: int
-      name: cooperative
-      desc: "[in] non-zero value indicates a cooperative kernel"
-      tag: $X_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE
-    - type: size_t
-      name: workgroup_mem_size
-      desc: "[in] non-zero value indicates the amount of work group memory to allocate in bytes"
-      tag: $X_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY
-    - type: int
-      name: opportunistic_queue_serialize
-      desc: "[in] non-zero value indicates an opportunistic native queue serialized kernel"
-      tag: $X_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE
---- #--------------------------------------------------------------------------
-type: struct
-desc: "Kernel launch property"
-name: $x_exp_launch_property_t
-analogue:
-    - "**cuLaunchAttribute**"
-members:
-    - type: $x_exp_launch_property_id_t
-      name: id
-      desc: "[in] launch property id"
-      init: $X_EXP_LAUNCH_PROPERTY_ID_IGNORE
-    - type: $x_exp_launch_property_value_t
-      name: value
-      desc: "[in][tagged_by(id)] launch property value"
-      init: nullptr
---- #--------------------------------------------------------------------------
-type: function
-desc: "Launch kernel with custom launch properties"
-class: $xEnqueue
-name: KernelLaunchCustomExp
-ordinal: "0"
-analogue:
-    - "**cuLaunchKernelEx**"
-details:
-    - "Launches the kernel using the specified launch properties"
-    - "If numPropsInLaunchPropList == 0 then a regular kernel launch is used: `urEnqueueKernelLaunch`"
-    - "Consult the appropriate adapter driver documentation for details of adapter specific behavior and native error codes that may be returned."
-params:
-    - type: $x_queue_handle_t
-      name: hQueue
-      desc: "[in] handle of the queue object"
-    - type: $x_kernel_handle_t
-      name: hKernel
-      desc: "[in] handle of the kernel object"
-    - type: uint32_t
-      name: workDim
-      desc: "[in] number of dimensions, from 1 to 3, to specify the global and work-group work-items"
-    - type: "const size_t*"
-      name: pGlobalWorkOffset
-      desc: "[in] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item"
-    - type: const size_t*
-      name: pGlobalWorkSize
-      desc: "[in] pointer to an array of workDim unsigned values that specify the number of global work-items in workDim that will execute the kernel function"
-    - type: const size_t*
-      name: pLocalWorkSize
-      desc: "[in][optional] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function. If nullptr, the runtime implementation will choose the work-group size."
-    - type: uint32_t
-      name: numPropsInLaunchPropList
-      desc: "[in] size of the launch prop list"
-    - type: const $x_exp_launch_property_t*
-      name: launchPropList
-      desc: "[in][range(0, numPropsInLaunchPropList)] pointer to a list of launch properties"
-    - type: uint32_t
-      name: numEventsInWaitList
-      desc: "[in] size of the event wait list"
-    - type: const $x_event_handle_t*
-      name: phEventWaitList
-      desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. "
-    - type: $x_event_handle_t*
-      name: phEvent
-      desc: "[out][optional][alloc] return an event object that identifies this particular kernel execution instance. If phEventWaitList and phEvent are not NULL, phEvent must not refer to an element of the phEventWaitList array."
-returns:
-    - $X_RESULT_SUCCESS
-    - $X_RESULT_ERROR_UNINITIALIZED
-    - $X_RESULT_ERROR_DEVICE_LOST
-    - $X_RESULT_ERROR_ADAPTER_SPECIFIC
-    - $X_RESULT_ERROR_INVALID_NULL_HANDLE:
-        - "NULL == hQueue"
-        - "NULL == hKernel"
-    - $X_RESULT_ERROR_INVALID_NULL_POINTER:
-        - "NULL == pGlobalWorkSize"
-        - "numPropsInLaunchpropList != 0 && launchPropList == NULL"
-    - $X_RESULT_ERROR_INVALID_QUEUE
-    - $X_RESULT_ERROR_INVALID_KERNEL
-    - $X_RESULT_ERROR_INVALID_EVENT
-    - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST:
-        - "phEventWaitList == NULL && numEventsInWaitList > 0"
-        - "phEventWaitList != NULL && numEventsInWaitList == 0"
-        - "If event objects in phEventWaitList are not valid events."
-    - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS:
-        - "An event in phEventWaitList has $X_EVENT_STATUS_ERROR"
-    - $X_RESULT_ERROR_INVALID_WORK_DIMENSION
-    - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-    - $X_RESULT_ERROR_INVALID_VALUE
-    - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
-    - $X_RESULT_ERROR_OUT_OF_RESOURCES
---- #--------------------------------------------------------------------------
-type: enum
-extend: true
-typed_etors: true
-desc: "Extension enums to $x_device_info_t to support arch specific launch properties."
-name: $x_device_info_t
-etors:
-    - name: CLUSTER_LAUNCH_SUPPORT_EXP
-      value: "0x1111"
-      desc: "[$x_bool_t] return true if enqueue Cluster Launch is supported"
diff --git a/unified-runtime/scripts/core/kernel.yml b/unified-runtime/scripts/core/kernel.yml
index ff2bf0b3d0343..7805299028ef0 100644
--- a/unified-runtime/scripts/core/kernel.yml
+++ b/unified-runtime/scripts/core/kernel.yml
@@ -598,3 +598,34 @@ params:
             suggested local work size that will contain the result of the query
 returns:
     - $X_RESULT_ERROR_UNSUPPORTED_FEATURE
+--- #--------------------------------------------------------------------------
+type: function
+desc: "Query the maximum number of work groups for a cooperative kernel"
+class: $xKernel
+name: SuggestMaxCooperativeGroupCount
+params:
+    - type: $x_kernel_handle_t
+      name: hKernel
+      desc: "[in] handle of the kernel object"
+    - type: $x_device_handle_t
+      name: hDevice
+      desc: "[in] handle of the device object"
+    - type: uint32_t
+      name: workDim
+      desc: "[in] number of dimensions, from 1 to 3, to specify the work-group work-items"
+    - type: "const size_t*"
+      name: pLocalWorkSize
+      desc: |
+            [in] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function.
+    - type: size_t
+      name: dynamicSharedMemorySize
+      desc: "[in] size of dynamic shared memory, for each work-group, in bytes, that will be used when the kernel is launched"
+    - type: "uint32_t*"
+      name: "pGroupCountRet"
+      desc: "[out] pointer to maximum number of groups"
+returns:
+    - $X_RESULT_ERROR_INVALID_KERNEL
+    - $X_RESULT_ERROR_UNSUPPORTED_FEATURE:
+        - "If $X_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES returns a value without the $X_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE bit set."
+    - $X_RESULT_ERROR_INVALID_WORK_DIMENSION:
+        - "`workDim < 1 || workDim > 3`"
diff --git a/unified-runtime/scripts/core/registry.yml b/unified-runtime/scripts/core/registry.yml
index ee1cdefcc13d2..cd3ad5f0905d5 100644
--- a/unified-runtime/scripts/core/registry.yml
+++ b/unified-runtime/scripts/core/registry.yml
@@ -508,12 +508,6 @@ etors:
 - name: LOADER_TEAR_DOWN
   desc: Enumerator for $xLoaderTearDown
   value: '202'
-- name: ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP
-  desc: Enumerator for $xEnqueueCooperativeKernelLaunchExp
-  value: '214'
-- name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP
-  desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp
-  value: '215'
 - name: PROGRAM_GET_GLOBAL_VARIABLE_POINTER
   desc: Enumerator for $xProgramGetGlobalVariablePointer
   value: '216'
@@ -529,9 +523,6 @@ etors:
 - name: ENQUEUE_TIMESTAMP_RECORDING_EXP
   desc: Enumerator for $xEnqueueTimestampRecordingExp
   value: '223'
-- name: ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP
-  desc: Enumerator for $xEnqueueKernelLaunchCustomExp
-  value: '224'
 - name: KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE
   desc: Enumerator for $xKernelGetSuggestedLocalWorkSize
   value: '225'
@@ -661,6 +652,9 @@ etors:
 - name: BINDLESS_IMAGES_FREE_MAPPED_LINEAR_MEMORY_EXP
   desc: Enumerator for $xBindlessImagesFreeMappedLinearMemoryExp
   value: '271'
+- name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT
+  desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCount
+  value: '272'
 ---
 type: enum
 desc: Defines structure types
diff --git a/unified-runtime/source/adapters/adapter.def.in b/unified-runtime/source/adapters/adapter.def.in
index ca73143a78878..73a360b36d853 100644
--- a/unified-runtime/source/adapters/adapter.def.in
+++ b/unified-runtime/source/adapters/adapter.def.in
@@ -8,7 +8,6 @@ EXPORTS
 	urGetEnqueueExpProcAddrTable
 	urGetEventProcAddrTable
 	urGetKernelProcAddrTable
-	urGetKernelExpProcAddrTable
 	urGetMemProcAddrTable
 	urGetPhysicalMemProcAddrTable
 	urGetPlatformProcAddrTable
diff --git a/unified-runtime/source/adapters/adapter.map.in b/unified-runtime/source/adapters/adapter.map.in
index 3166a335faf7c..4ba14eacb7244 100644
--- a/unified-runtime/source/adapters/adapter.map.in
+++ b/unified-runtime/source/adapters/adapter.map.in
@@ -8,7 +8,6 @@
 		urGetEnqueueExpProcAddrTable;
 		urGetEventProcAddrTable;
 		urGetKernelProcAddrTable;
-		urGetKernelExpProcAddrTable;
 		urGetMemProcAddrTable;
 		urGetPhysicalMemProcAddrTable;
 		urGetPlatformProcAddrTable;
diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp
index 36439e66ff162..2cb43ebc88356 100644
--- a/unified-runtime/source/adapters/cuda/device.cpp
+++ b/unified-runtime/source/adapters/cuda/device.cpp
@@ -1164,25 +1164,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   }
   case UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP:
     return ReturnValue(true);
-  case UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP: {
-    int Value = getAttribute(hDevice,
-                             CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 9;
-    return ReturnValue(static_cast<bool>(Value));
-  }
   case UR_DEVICE_INFO_LOW_POWER_EVENTS_SUPPORT_EXP:
     return ReturnValue(false);
   case UR_DEVICE_INFO_USE_NATIVE_ASSERT:
     return ReturnValue(true);
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
     return ReturnValue(true);
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP:
-    return ReturnValue(true);
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP:
-    return ReturnValue(true);
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP:
     return ReturnValue(false);
   case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP:
     return ReturnValue(true);
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES: {
+    auto LaunchPropsSupport =
+        UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE |
+        UR_KERNEL_LAUNCH_PROPERTIES_FLAG_WORK_GROUP_MEMORY;
+    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >=
+        9) {
+      LaunchPropsSupport |=
+          UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION |
+          UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE;
+    }
+
+    return ReturnValue(0);
+  }
   default:
     break;
   }
diff --git a/unified-runtime/source/adapters/cuda/enqueue.cpp b/unified-runtime/source/adapters/cuda/enqueue.cpp
index 038720f5ecdf2..8be4046708a59 100644
--- a/unified-runtime/source/adapters/cuda/enqueue.cpp
+++ b/unified-runtime/source/adapters/cuda/enqueue.cpp
@@ -438,48 +438,18 @@ enqueueKernelLaunch(ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel,
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  return enqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
-                             pGlobalWorkSize, pLocalWorkSize,
-                             numEventsInWaitList, phEventWaitList, phEvent,
-                             /*WorkGroupMemory=*/0);
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  if (pGlobalWorkOffset == nullptr || *pGlobalWorkOffset == 0) {
-    ur_exp_launch_property_t coop_prop;
-    coop_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE;
-    coop_prop.value.cooperative = 1;
-    return urEnqueueKernelLaunchCustomExp(
-        hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-        pLocalWorkSize, 1, &coop_prop, numEventsInWaitList, phEventWaitList,
-        phEvent);
-  }
-  return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
-                               pGlobalWorkSize, pLocalWorkSize,
-                               numEventsInWaitList, phEventWaitList, phEvent);
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
     ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
     const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
-    const ur_exp_launch_property_t *launchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
     uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
     ur_event_handle_t *phEvent) {
 
   size_t WorkGroupMemory = [&]() -> size_t {
-    const ur_exp_launch_property_t *WorkGroupMemoryProp = std::find_if(
+    const ur_kernel_launch_property_t *WorkGroupMemoryProp = std::find_if(
         launchPropList, launchPropList + numPropsInLaunchPropList,
-        [](const ur_exp_launch_property_t &Prop) {
-          return Prop.id == UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY;
+        [](const ur_kernel_launch_property_t &Prop) {
+          return Prop.id == UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY;
         });
     if (WorkGroupMemoryProp != launchPropList + numPropsInLaunchPropList)
       return WorkGroupMemoryProp->value.workgroup_mem_size;
@@ -526,12 +496,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
 
   for (uint32_t i = 0; i < numPropsInLaunchPropList; i++) {
     switch (launchPropList[i].id) {
-    case UR_EXP_LAUNCH_PROPERTY_ID_IGNORE: {
+    case UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE: {
       auto &attr = launch_attribute.emplace_back();
       attr.id = CU_LAUNCH_ATTRIBUTE_IGNORE;
       break;
     }
-    case UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION: {
+    case UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION: {
       auto &attr = launch_attribute.emplace_back();
       attr.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
       // Note that cuda orders from right to left wrt SYCL dimensional order.
@@ -554,20 +524,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
 
       break;
     }
-    case UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE: {
+    case UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE: {
       auto &attr = launch_attribute.emplace_back();
       attr.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE;
       attr.value.cooperative = launchPropList[i].value.cooperative;
       break;
     }
-    case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: {
+    case UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: {
       auto &attr = launch_attribute.emplace_back();
       attr.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION;
       attr.value.programmaticStreamSerializationAllowed =
           launchPropList[i].value.opportunistic_queue_serialize;
       break;
     }
-    case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY: {
+    case UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY: {
       break;
     }
     default: {
diff --git a/unified-runtime/source/adapters/cuda/kernel.cpp b/unified-runtime/source/adapters/cuda/kernel.cpp
index 148f624fb77c4..f296c74611462 100644
--- a/unified-runtime/source/adapters/cuda/kernel.cpp
+++ b/unified-runtime/source/adapters/cuda/kernel.cpp
@@ -159,7 +159,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
     ur_kernel_handle_t hKernel, ur_device_handle_t /*hDevice*/,
     uint32_t workDim, const size_t *pLocalWorkSize,
     size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
diff --git a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp
index b1df1388e1a62..6fa9687f3979c 100644
--- a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp
+++ b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp
@@ -127,6 +127,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
   pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
   pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      urKernelSuggestMaxCooperativeGroupCount;
   return UR_RESULT_SUCCESS;
 }
 
@@ -431,10 +433,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
     return result;
   }
 
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      urEnqueueCooperativeKernelLaunchExp;
   pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp;
-  pDdiTable->pfnKernelLaunchCustomExp = urEnqueueKernelLaunchCustomExp;
   pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp;
   pDdiTable->pfnUSMDeviceAllocExp = urEnqueueUSMDeviceAllocExp;
   pDdiTable->pfnUSMSharedAllocExp = urEnqueueUSMSharedAllocExp;
@@ -445,19 +444,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
   return UR_RESULT_SUCCESS;
 }
 
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      urKernelSuggestMaxCooperativeGroupCountExp;
-
-  return UR_RESULT_SUCCESS;
-}
-
 UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
     ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) {
   auto result = validateProcInputs(version, pDdiTable);
@@ -482,7 +468,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version,
   urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp);
   urGetEventProcAddrTable(version, &pDdiTable->Event);
   urGetKernelProcAddrTable(version, &pDdiTable->Kernel);
-  urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp);
   urGetMemProcAddrTable(version, &pDdiTable->Mem);
   urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem);
   urGetPlatformProcAddrTable(version, &pDdiTable->Platform);
diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp
index f429de1763225..f991dbf7db416 100644
--- a/unified-runtime/source/adapters/hip/device.cpp
+++ b/unified-runtime/source/adapters/hip/device.cpp
@@ -993,7 +993,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
   case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH:
   case UR_DEVICE_INFO_IP_VERSION:
-  case UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP:
   case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS:
   case UR_DEVICE_INFO_FAN_SPEED:
   case UR_DEVICE_INFO_MIN_POWER_LIMIT:
@@ -1041,12 +1040,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
     return ReturnValue(true);
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
     return ReturnValue(true);
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP:
-    return ReturnValue(true);
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP:
     return ReturnValue(false);
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES:
+    return ReturnValue(0);
   default:
     break;
   }
diff --git a/unified-runtime/source/adapters/hip/enqueue.cpp b/unified-runtime/source/adapters/hip/enqueue.cpp
index b854182239154..dc0fac8050eb9 100644
--- a/unified-runtime/source/adapters/hip/enqueue.cpp
+++ b/unified-runtime/source/adapters/hip/enqueue.cpp
@@ -249,13 +249,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
   UR_ASSERT(hQueue->getContext() == hKernel->getContext(),
             UR_RESULT_ERROR_INVALID_QUEUE);
   UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
   UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
 
+  for (uint32_t propIndex = 0; propIndex < numPropsInLaunchPropList;
+       propIndex++) {
+    // Adapters that don't support cooperative kernels are currently expected
+    // to ignore COOPERATIVE launch properties. Ideally we should avoid passing
+    // these at the SYCL RT level instead, see
+    // https://github.com/intel/llvm/issues/18421
+    if (launchPropList[propIndex].id == UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE ||
+        launchPropList[propIndex].id ==
+            UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE) {
+      continue;
+    }
+    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+  }
+
   // Early exit for zero size range kernel
   if (*pGlobalWorkSize == 0) {
     return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList,
@@ -324,16 +340,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
   return UR_RESULT_SUCCESS;
 }
 
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
-                               pGlobalWorkSize, pLocalWorkSize,
-                               numEventsInWaitList, phEventWaitList, phEvent);
-}
-
 /// Enqueues a wait on the given queue for all events.
 /// See \ref enqueueEventWait
 ///
diff --git a/unified-runtime/source/adapters/hip/kernel.cpp b/unified-runtime/source/adapters/hip/kernel.cpp
index 5a7e23feb3563..39cddecd1efd5 100644
--- a/unified-runtime/source/adapters/hip/kernel.cpp
+++ b/unified-runtime/source/adapters/hip/kernel.cpp
@@ -156,7 +156,7 @@ urKernelGetNativeHandle(ur_kernel_handle_t, ur_native_handle_t *) {
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
     ur_kernel_handle_t /*hKernel*/, ur_device_handle_t /*hDevice*/,
     uint32_t /*workDim*/, const size_t * /*pLocalWorkSize*/,
     size_t /*dynamicSharedMemorySize*/, uint32_t * /*pGroupCountRet*/) {
diff --git a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp
index 629eb6c929db3..9e7bc69982822 100644
--- a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp
+++ b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp
@@ -127,6 +127,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
   pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
   pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      urKernelSuggestMaxCooperativeGroupCount;
   return UR_RESULT_SUCCESS;
 }
 
@@ -428,8 +430,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
     return result;
   }
 
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      urEnqueueCooperativeKernelLaunchExp;
   pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp;
   pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp;
   pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp;
@@ -437,19 +437,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
   return UR_RESULT_SUCCESS;
 }
 
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      urKernelSuggestMaxCooperativeGroupCountExp;
-
-  return UR_RESULT_SUCCESS;
-}
-
 UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
     ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) {
   auto result = validateProcInputs(version, pDdiTable);
@@ -474,7 +461,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version,
   urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp);
   urGetEventProcAddrTable(version, &pDdiTable->Event);
   urGetKernelProcAddrTable(version, &pDdiTable->Kernel);
-  urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp);
   urGetMemProcAddrTable(version, &pDdiTable->Mem);
   urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem);
   urGetPlatformProcAddrTable(version, &pDdiTable->Platform);
diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp
index c372443d07ec2..7c8720d67b53e 100644
--- a/unified-runtime/source/adapters/level_zero/device.cpp
+++ b/unified-runtime/source/adapters/level_zero/device.cpp
@@ -1222,10 +1222,6 @@ ur_result_t urDeviceGetInfo(
     return ReturnValue(false);
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
     return ReturnValue(true);
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP:
-    return ReturnValue(true);
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP:
     return ReturnValue(true);
   case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP:
@@ -1346,6 +1342,8 @@ ur_result_t urDeviceGetInfo(
       return ReturnValue(int32_t{PowerProperties.maxLimit});
     }
   }
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES:
+    return ReturnValue(UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE);
   default:
     UR_LOG(ERR, "Unsupported ParamName in urGetDeviceInfo");
     UR_LOG(ERR, "ParamNameParamName={}(0x{})", ParamName,
diff --git a/unified-runtime/source/adapters/level_zero/kernel.cpp b/unified-runtime/source/adapters/level_zero/kernel.cpp
index 345021aebfa98..b6f56cf26f0e8 100644
--- a/unified-runtime/source/adapters/level_zero/kernel.cpp
+++ b/unified-runtime/source/adapters/level_zero/kernel.cpp
@@ -56,7 +56,7 @@ ur_result_t urKernelGetSuggestedLocalWorkSize(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t urEnqueueKernelLaunch(
+inline ur_result_t EnqueueCooperativeKernelLaunchHelper(
     /// [in] handle of the queue object
     ur_queue_handle_t Queue,
     /// [in] handle of the kernel object
@@ -89,9 +89,19 @@ ur_result_t urEnqueueKernelLaunch(
   UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
   UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
 
-  ze_kernel_handle_t ZeKernel{};
-  UR_CALL(getZeKernel(Queue->Device->ZeDevice, Kernel, &ZeKernel));
+  auto ZeDevice = Queue->Device->ZeDevice;
 
+  ze_kernel_handle_t ZeKernel{};
+  if (Kernel->ZeKernelMap.empty()) {
+    ZeKernel = Kernel->ZeKernel;
+  } else {
+    auto It = Kernel->ZeKernelMap.find(ZeDevice);
+    if (It == Kernel->ZeKernelMap.end()) {
+      /* kernel and queue don't match */
+      return UR_RESULT_ERROR_INVALID_QUEUE;
+    }
+    ZeKernel = It->second;
+  }
   // Lock automatically releases when this goes out of scope.
   std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> Lock(
       Queue->Mutex, Kernel->Mutex, Kernel->Program->Mutex);
@@ -118,9 +128,110 @@ ur_result_t urEnqueueKernelLaunch(
   ze_group_count_t ZeThreadGroupDimensions{1, 1, 1};
   uint32_t WG[3]{};
 
-  UR_CALL(calculateKernelWorkDimensions(Kernel->ZeKernel, Queue->Device,
-                                        ZeThreadGroupDimensions, WG, WorkDim,
-                                        GlobalWorkSize, LocalWorkSize));
+  // New variable needed because GlobalWorkSize parameter might not be of size 3
+  size_t GlobalWorkSize3D[3]{1, 1, 1};
+  std::copy(GlobalWorkSize, GlobalWorkSize + WorkDim, GlobalWorkSize3D);
+
+  if (LocalWorkSize) {
+    // L0
+    for (uint32_t I = 0; I < WorkDim; I++) {
+      UR_ASSERT(LocalWorkSize[I] < (std::numeric_limits<uint32_t>::max)(),
+                UR_RESULT_ERROR_INVALID_VALUE);
+      WG[I] = static_cast<uint32_t>(LocalWorkSize[I]);
+    }
+  } else {
+    // We can't call to zeKernelSuggestGroupSize if 64-bit GlobalWorkSize
+    // values do not fit to 32-bit that the API only supports currently.
+    bool SuggestGroupSize = true;
+    for (int I : {0, 1, 2}) {
+      if (GlobalWorkSize3D[I] > UINT32_MAX) {
+        SuggestGroupSize = false;
+      }
+    }
+    if (SuggestGroupSize) {
+      ZE2UR_CALL(zeKernelSuggestGroupSize,
+                 (ZeKernel, GlobalWorkSize3D[0], GlobalWorkSize3D[1],
+                  GlobalWorkSize3D[2], &WG[0], &WG[1], &WG[2]));
+    } else {
+      for (int I : {0, 1, 2}) {
+        // Try to find a I-dimension WG size that the GlobalWorkSize[I] is
+        // fully divisable with. Start with the max possible size in
+        // each dimension.
+        uint32_t GroupSize[] = {
+            Queue->Device->ZeDeviceComputeProperties->maxGroupSizeX,
+            Queue->Device->ZeDeviceComputeProperties->maxGroupSizeY,
+            Queue->Device->ZeDeviceComputeProperties->maxGroupSizeZ};
+        GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize3D[I]);
+        while (GlobalWorkSize3D[I] % GroupSize[I]) {
+          --GroupSize[I];
+        }
+
+        if (GlobalWorkSize3D[I] / GroupSize[I] > UINT32_MAX) {
+          UR_LOG(ERR,
+                 "urEnqueueCooperativeKernelLaunchExp: can't find a WG size "
+                 "suitable for global work size > UINT32_MAX");
+          return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
+        }
+        WG[I] = GroupSize[I];
+      }
+      UR_LOG(DEBUG,
+             "urEnqueueCooperativeKernelLaunchExp: using computed WG "
+             "size = {{{}, {}, {}}}",
+             WG[0], WG[1], WG[2]);
+    }
+  }
+
+  // TODO: assert if sizes do not fit into 32-bit?
+
+  switch (WorkDim) {
+  case 3:
+    ZeThreadGroupDimensions.groupCountX =
+        static_cast<uint32_t>(GlobalWorkSize3D[0] / WG[0]);
+    ZeThreadGroupDimensions.groupCountY =
+        static_cast<uint32_t>(GlobalWorkSize3D[1] / WG[1]);
+    ZeThreadGroupDimensions.groupCountZ =
+        static_cast<uint32_t>(GlobalWorkSize3D[2] / WG[2]);
+    break;
+  case 2:
+    ZeThreadGroupDimensions.groupCountX =
+        static_cast<uint32_t>(GlobalWorkSize3D[0] / WG[0]);
+    ZeThreadGroupDimensions.groupCountY =
+        static_cast<uint32_t>(GlobalWorkSize3D[1] / WG[1]);
+    WG[2] = 1;
+    break;
+  case 1:
+    ZeThreadGroupDimensions.groupCountX =
+        static_cast<uint32_t>(GlobalWorkSize3D[0] / WG[0]);
+    WG[1] = WG[2] = 1;
+    break;
+
+  default:
+    UR_LOG(ERR, "urEnqueueCooperativeKernelLaunchExp: unsupported work_dim");
+    return UR_RESULT_ERROR_INVALID_VALUE;
+  }
+
+  // Error handling for non-uniform group size case
+  if (GlobalWorkSize3D[0] !=
+      size_t(ZeThreadGroupDimensions.groupCountX) * WG[0]) {
+    UR_LOG(ERR,
+           "urEnqueueCooperativeKernelLaunchExp: invalid work_dim. The "
+           "range is not a multiple of the group size in the 1st dimension");
+    return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
+  }
+  if (GlobalWorkSize3D[1] !=
+      size_t(ZeThreadGroupDimensions.groupCountY) * WG[1]) {
+    UR_LOG(ERR,
+           "urEnqueueCooperativeKernelLaunchExp: invalid work_dim. The "
+           "range is not a multiple of the group size in the 2nd dimension");
+    return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
+  }
+  if (GlobalWorkSize3D[2] !=
+      size_t(ZeThreadGroupDimensions.groupCountZ) * WG[2]) {
+    UR_LOG(DEBUG,
+           "urEnqueueCooperativeKernelLaunchExp: invalid work_dim. The "
+           "range is not a multiple of the group size in the 3rd dimension");
+    return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
+  }
 
   ZE2UR_CALL(zeKernelSetGroupSize, (ZeKernel, WG[0], WG[1], WG[2]));
 
@@ -176,7 +287,7 @@ ur_result_t urEnqueueKernelLaunch(
     ContextsLock.lock();
     Queue->CaptureIndirectAccesses();
     // Add the command to the command list, which implies submission.
-    ZE2UR_CALL(zeCommandListAppendLaunchKernel,
+    ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel,
                (CommandList->first, ZeKernel, &ZeThreadGroupDimensions, ZeEvent,
                 (*Event)->WaitList.Length, (*Event)->WaitList.ZeEventList));
   } else {
@@ -184,12 +295,13 @@ ur_result_t urEnqueueKernelLaunch(
     // No lock is needed here, unlike the immediate commandlist case above,
     // because the kernels are not actually submitted yet. Kernels will be
     // submitted only when the comamndlist is closed. Then, a lock is held.
-    ZE2UR_CALL(zeCommandListAppendLaunchKernel,
+    ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel,
                (CommandList->first, ZeKernel, &ZeThreadGroupDimensions, ZeEvent,
                 (*Event)->WaitList.Length, (*Event)->WaitList.ZeEventList));
   }
 
-  UR_LOG(DEBUG, "calling zeCommandListAppendLaunchKernel() with ZeEvent {}",
+  UR_LOG(DEBUG,
+         "calling zeCommandListAppendLaunchCooperativeKernel() with ZeEvent {}",
          ur_cast<std::uintptr_t>(ZeEvent));
   printZeEventList((*Event)->WaitList);
 
@@ -201,7 +313,7 @@ ur_result_t urEnqueueKernelLaunch(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t urEnqueueCooperativeKernelLaunchExp(
+ur_result_t urEnqueueKernelLaunch(
     /// [in] handle of the queue object
     ur_queue_handle_t Queue,
     /// [in] handle of the kernel object
@@ -209,8 +321,8 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
     /// [in] number of dimensions, from 1 to 3, to specify the global and
     /// work-group work-items
     uint32_t WorkDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
+    /// [in][optional] pointer to an array of workDim unsigned values that
+    /// specify the offset used to calculate the global ID of a work-item
     const size_t *GlobalWorkOffset,
     /// [in] pointer to an array of workDim unsigned values that specify the
     /// number of global work-items in workDim that will execute the kernel
@@ -221,6 +333,11 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
     /// will execute the kernel function. If nullptr, the runtime
     /// implementation will choose the work-group size.
     const size_t *LocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t NumPropsInLaunchPropList,
+    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
+    /// properties
+    const ur_kernel_launch_property_t *LaunchPropList,
     /// [in] size of the event wait list
     uint32_t NumEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -231,22 +348,28 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
     /// [in,out][optional] return an event object that identifies this
     /// particular kernel execution instance.
     ur_event_handle_t *OutEvent) {
+  for (uint32_t PropIndex = 0; PropIndex < NumPropsInLaunchPropList;
+       PropIndex++) {
+    if (LaunchPropList[PropIndex].id ==
+            UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE &&
+        LaunchPropList[PropIndex].value.cooperative) {
+      return EnqueueCooperativeKernelLaunchHelper(
+          Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize,
+          LocalWorkSize, NumEventsInWaitList, EventWaitList, OutEvent);
+    }
+    if (LaunchPropList[PropIndex].id != UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE &&
+        LaunchPropList[PropIndex].id !=
+            UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE) {
+      // We don't support any other properties.
+      return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+    }
+  }
   UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
   UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
 
-  auto ZeDevice = Queue->Device->ZeDevice;
-
   ze_kernel_handle_t ZeKernel{};
-  if (Kernel->ZeKernelMap.empty()) {
-    ZeKernel = Kernel->ZeKernel;
-  } else {
-    auto It = Kernel->ZeKernelMap.find(ZeDevice);
-    if (It == Kernel->ZeKernelMap.end()) {
-      /* kernel and queue don't match */
-      return UR_RESULT_ERROR_INVALID_QUEUE;
-    }
-    ZeKernel = It->second;
-  }
+  UR_CALL(getZeKernel(Queue->Device->ZeDevice, Kernel, &ZeKernel));
+
   // Lock automatically releases when this goes out of scope.
   std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> Lock(
       Queue->Mutex, Kernel->Mutex, Kernel->Program->Mutex);
@@ -273,110 +396,9 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
   ze_group_count_t ZeThreadGroupDimensions{1, 1, 1};
   uint32_t WG[3]{};
 
-  // New variable needed because GlobalWorkSize parameter might not be of size 3
-  size_t GlobalWorkSize3D[3]{1, 1, 1};
-  std::copy(GlobalWorkSize, GlobalWorkSize + WorkDim, GlobalWorkSize3D);
-
-  if (LocalWorkSize) {
-    // L0
-    for (uint32_t I = 0; I < WorkDim; I++) {
-      UR_ASSERT(LocalWorkSize[I] < (std::numeric_limits<uint32_t>::max)(),
-                UR_RESULT_ERROR_INVALID_VALUE);
-      WG[I] = static_cast<uint32_t>(LocalWorkSize[I]);
-    }
-  } else {
-    // We can't call to zeKernelSuggestGroupSize if 64-bit GlobalWorkSize
-    // values do not fit to 32-bit that the API only supports currently.
-    bool SuggestGroupSize = true;
-    for (int I : {0, 1, 2}) {
-      if (GlobalWorkSize3D[I] > UINT32_MAX) {
-        SuggestGroupSize = false;
-      }
-    }
-    if (SuggestGroupSize) {
-      ZE2UR_CALL(zeKernelSuggestGroupSize,
-                 (ZeKernel, GlobalWorkSize3D[0], GlobalWorkSize3D[1],
-                  GlobalWorkSize3D[2], &WG[0], &WG[1], &WG[2]));
-    } else {
-      for (int I : {0, 1, 2}) {
-        // Try to find a I-dimension WG size that the GlobalWorkSize[I] is
-        // fully divisable with. Start with the max possible size in
-        // each dimension.
-        uint32_t GroupSize[] = {
-            Queue->Device->ZeDeviceComputeProperties->maxGroupSizeX,
-            Queue->Device->ZeDeviceComputeProperties->maxGroupSizeY,
-            Queue->Device->ZeDeviceComputeProperties->maxGroupSizeZ};
-        GroupSize[I] = (std::min)(size_t(GroupSize[I]), GlobalWorkSize3D[I]);
-        while (GlobalWorkSize3D[I] % GroupSize[I]) {
-          --GroupSize[I];
-        }
-
-        if (GlobalWorkSize3D[I] / GroupSize[I] > UINT32_MAX) {
-          UR_LOG(ERR,
-                 "urEnqueueCooperativeKernelLaunchExp: can't find a WG size "
-                 "suitable for global work size > UINT32_MAX");
-          return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-        }
-        WG[I] = GroupSize[I];
-      }
-      UR_LOG(DEBUG,
-             "urEnqueueCooperativeKernelLaunchExp: using computed WG "
-             "size = {{{}, {}, {}}}",
-             WG[0], WG[1], WG[2]);
-    }
-  }
-
-  // TODO: assert if sizes do not fit into 32-bit?
-
-  switch (WorkDim) {
-  case 3:
-    ZeThreadGroupDimensions.groupCountX =
-        static_cast<uint32_t>(GlobalWorkSize3D[0] / WG[0]);
-    ZeThreadGroupDimensions.groupCountY =
-        static_cast<uint32_t>(GlobalWorkSize3D[1] / WG[1]);
-    ZeThreadGroupDimensions.groupCountZ =
-        static_cast<uint32_t>(GlobalWorkSize3D[2] / WG[2]);
-    break;
-  case 2:
-    ZeThreadGroupDimensions.groupCountX =
-        static_cast<uint32_t>(GlobalWorkSize3D[0] / WG[0]);
-    ZeThreadGroupDimensions.groupCountY =
-        static_cast<uint32_t>(GlobalWorkSize3D[1] / WG[1]);
-    WG[2] = 1;
-    break;
-  case 1:
-    ZeThreadGroupDimensions.groupCountX =
-        static_cast<uint32_t>(GlobalWorkSize3D[0] / WG[0]);
-    WG[1] = WG[2] = 1;
-    break;
-
-  default:
-    UR_LOG(ERR, "urEnqueueCooperativeKernelLaunchExp: unsupported work_dim");
-    return UR_RESULT_ERROR_INVALID_VALUE;
-  }
-
-  // Error handling for non-uniform group size case
-  if (GlobalWorkSize3D[0] !=
-      size_t(ZeThreadGroupDimensions.groupCountX) * WG[0]) {
-    UR_LOG(ERR,
-           "urEnqueueCooperativeKernelLaunchExp: invalid work_dim. The "
-           "range is not a multiple of the group size in the 1st dimension");
-    return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-  }
-  if (GlobalWorkSize3D[1] !=
-      size_t(ZeThreadGroupDimensions.groupCountY) * WG[1]) {
-    UR_LOG(ERR,
-           "urEnqueueCooperativeKernelLaunchExp: invalid work_dim. The "
-           "range is not a multiple of the group size in the 2nd dimension");
-    return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-  }
-  if (GlobalWorkSize3D[2] !=
-      size_t(ZeThreadGroupDimensions.groupCountZ) * WG[2]) {
-    UR_LOG(DEBUG,
-           "urEnqueueCooperativeKernelLaunchExp: invalid work_dim. The "
-           "range is not a multiple of the group size in the 3rd dimension");
-    return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-  }
+  UR_CALL(calculateKernelWorkDimensions(Kernel->ZeKernel, Queue->Device,
+                                        ZeThreadGroupDimensions, WG, WorkDim,
+                                        GlobalWorkSize, LocalWorkSize));
 
   ZE2UR_CALL(zeKernelSetGroupSize, (ZeKernel, WG[0], WG[1], WG[2]));
 
@@ -432,7 +454,7 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
     ContextsLock.lock();
     Queue->CaptureIndirectAccesses();
     // Add the command to the command list, which implies submission.
-    ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel,
+    ZE2UR_CALL(zeCommandListAppendLaunchKernel,
                (CommandList->first, ZeKernel, &ZeThreadGroupDimensions, ZeEvent,
                 (*Event)->WaitList.Length, (*Event)->WaitList.ZeEventList));
   } else {
@@ -440,13 +462,12 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp(
     // No lock is needed here, unlike the immediate commandlist case above,
     // because the kernels are not actually submitted yet. Kernels will be
     // submitted only when the comamndlist is closed. Then, a lock is held.
-    ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel,
+    ZE2UR_CALL(zeCommandListAppendLaunchKernel,
                (CommandList->first, ZeKernel, &ZeThreadGroupDimensions, ZeEvent,
                 (*Event)->WaitList.Length, (*Event)->WaitList.ZeEventList));
   }
 
-  UR_LOG(DEBUG,
-         "calling zeCommandListAppendLaunchCooperativeKernel() with ZeEvent {}",
+  UR_LOG(DEBUG, "calling zeCommandListAppendLaunchKernel() with ZeEvent {}",
          ur_cast<std::uintptr_t>(ZeEvent));
   printZeEventList((*Event)->WaitList);
 
@@ -1094,7 +1115,7 @@ ur_result_t urKernelGetNativeHandle(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
+ur_result_t urKernelSuggestMaxCooperativeGroupCount(
     ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
     const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
     uint32_t *pGroupCountRet) {
diff --git a/unified-runtime/source/adapters/level_zero/queue.cpp b/unified-runtime/source/adapters/level_zero/queue.cpp
index 8a0bd1127965f..2cd607ccf93d7 100644
--- a/unified-runtime/source/adapters/level_zero/queue.cpp
+++ b/unified-runtime/source/adapters/level_zero/queue.cpp
@@ -930,20 +930,6 @@ ur_result_t urQueueFlush(
   return Queue->executeAllOpenCommandLists();
 }
 
-ur_result_t urEnqueueKernelLaunchCustomExp(
-    ur_queue_handle_t /*hQueue*/, ur_kernel_handle_t /*hKernel*/,
-    uint32_t /*workDim*/, const size_t * /*pGlobalWorkOffset*/,
-    const size_t * /*pGlobalWorkSize*/, const size_t * /*pLocalWorkSize*/,
-    uint32_t /*numPropsInLaunchPropList*/,
-    const ur_exp_launch_property_t * /*launchPropList*/,
-    uint32_t /*numEventsInWaitList*/,
-    const ur_event_handle_t * /*phEventWaitList*/,
-    ur_event_handle_t * /*phEvent*/) {
-  UR_LOG(ERR, "[UR][L0] {} function not implemented!",
-         "{} function not implemented!", __FUNCTION__);
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
 } // namespace ur::level_zero
 
 // Configuration of the command-list batching.
diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp
index c9dbb7cf5f963..fc5dda4d63714 100644
--- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp
+++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp
@@ -223,15 +223,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
     return result;
   }
 
-  pDdiTable->pfnKernelLaunchCustomExp =
-      ur::level_zero::urEnqueueKernelLaunchCustomExp;
   pDdiTable->pfnUSMDeviceAllocExp = ur::level_zero::urEnqueueUSMDeviceAllocExp;
   pDdiTable->pfnUSMSharedAllocExp = ur::level_zero::urEnqueueUSMSharedAllocExp;
   pDdiTable->pfnUSMHostAllocExp = ur::level_zero::urEnqueueUSMHostAllocExp;
   pDdiTable->pfnUSMFreeExp = ur::level_zero::urEnqueueUSMFreeExp;
   pDdiTable->pfnCommandBufferExp = ur::level_zero::urEnqueueCommandBufferExp;
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      ur::level_zero::urEnqueueCooperativeKernelLaunchExp;
   pDdiTable->pfnTimestampRecordingExp =
       ur::level_zero::urEnqueueTimestampRecordingExp;
   pDdiTable->pfnNativeCommandExp = ur::level_zero::urEnqueueNativeCommandExp;
@@ -285,19 +281,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetArgMemObj = ur::level_zero::urKernelSetArgMemObj;
   pDdiTable->pfnSetSpecializationConstants =
       ur::level_zero::urKernelSetSpecializationConstants;
-
-  return result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      ur::level_zero::urKernelSuggestMaxCooperativeGroupCountExp;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      ur::level_zero::urKernelSuggestMaxCooperativeGroupCount;
 
   return result;
 }
@@ -594,10 +579,6 @@ ur_result_t populateDdiTable(ur_dditable_t *ddi) {
                                                 &ddi->Kernel);
   if (result != UR_RESULT_SUCCESS)
     return result;
-  result = NAMESPACE_::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT,
-                                                   &ddi->KernelExp);
-  if (result != UR_RESULT_SUCCESS)
-    return result;
   result = NAMESPACE_::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem);
   if (result != UR_RESULT_SUCCESS)
     return result;
diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp
index 64c89511e709f..53704ace96e0f 100644
--- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp
+++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp
@@ -296,6 +296,10 @@ ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel,
                                               const size_t *pGlobalWorkOffset,
                                               const size_t *pGlobalWorkSize,
                                               size_t *pSuggestedLocalWorkSize);
+ur_result_t urKernelSuggestMaxCooperativeGroupCount(
+    ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
+    const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
+    uint32_t *pGroupCountRet);
 ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName,
                            size_t propSize, void *pPropValue,
                            size_t *pPropSizeRet);
@@ -338,8 +342,10 @@ ur_result_t urEventSetCallback(ur_event_handle_t hEvent,
 ur_result_t urEnqueueKernelLaunch(
     ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent);
 ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue,
                                 uint32_t numEventsInWaitList,
                                 const ur_event_handle_t *phEventWaitList,
@@ -756,25 +762,9 @@ urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
 ur_result_t
 urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer,
                                   ur_native_handle_t *phNativeCommandBuffer);
-ur_result_t urEnqueueCooperativeKernelLaunchExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
-ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
-    ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
-    const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
-    uint32_t *pGroupCountRet);
 ur_result_t urEnqueueTimestampRecordingExp(
     ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
-ur_result_t urEnqueueKernelLaunchCustomExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
-    const ur_exp_launch_property_t *launchPropList,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent);
 ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices,
                               ur_device_handle_t *phDevices,
                               const char *pOptions);
diff --git a/unified-runtime/source/adapters/level_zero/v2/kernel.cpp b/unified-runtime/source/adapters/level_zero/v2/kernel.cpp
index c4d754a0e2d56..a2189b57536e8 100644
--- a/unified-runtime/source/adapters/level_zero/v2/kernel.cpp
+++ b/unified-runtime/source/adapters/level_zero/v2/kernel.cpp
@@ -675,7 +675,7 @@ ur_result_t urKernelGetSuggestedLocalWorkSize(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
+ur_result_t urKernelSuggestMaxCooperativeGroupCount(
     ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t workDim,
     const size_t *pLocalWorkSize, size_t dynamicSharedMemorySize,
     uint32_t *pGroupCountRet) {
diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp
index faa09eee3eaa5..d043a68dcaec7 100644
--- a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp
+++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp
@@ -59,11 +59,14 @@ ur_result_t urQueueFlush(ur_queue_handle_t hQueue) try {
 ur_result_t urEnqueueKernelLaunch(
     ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try {
+    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) try {
   return hQueue->get().enqueueKernelLaunch(
       hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
-      numEventsInWaitList, phEventWaitList, phEvent);
+      numPropsInLaunchPropList, launchPropList, numEventsInWaitList,
+      phEventWaitList, phEvent);
 } catch (...) {
   return exceptionToResult(std::current_exception());
 }
@@ -429,17 +432,6 @@ ur_result_t urEnqueueCommandBufferExp(
 } catch (...) {
   return exceptionToResult(std::current_exception());
 }
-ur_result_t urEnqueueCooperativeKernelLaunchExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try {
-  return hQueue->get().enqueueCooperativeKernelLaunchExp(
-      hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
-      numEventsInWaitList, phEventWaitList, phEvent);
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
 ur_result_t urEnqueueTimestampRecordingExp(
     ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try {
@@ -448,20 +440,6 @@ ur_result_t urEnqueueTimestampRecordingExp(
 } catch (...) {
   return exceptionToResult(std::current_exception());
 }
-ur_result_t urEnqueueKernelLaunchCustomExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
-    const ur_exp_launch_property_t *launchPropList,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) try {
-  return hQueue->get().enqueueKernelLaunchCustomExp(
-      hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
-      numPropsInLaunchPropList, launchPropList, numEventsInWaitList,
-      phEventWaitList, phEvent);
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
 ur_result_t urEnqueueEventsWaitWithBarrierExt(
     ur_queue_handle_t hQueue,
     const ur_exp_enqueue_ext_properties_t *pProperties,
diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp
index 004b51f822c7f..b710f9d56b50d 100644
--- a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp
+++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp
@@ -30,7 +30,8 @@ struct ur_queue_t_ {
   virtual ur_result_t enqueueKernelLaunch(ur_kernel_handle_t, uint32_t,
                                           const size_t *, const size_t *,
                                           const size_t *, uint32_t,
-                                          const ur_event_handle_t *,
+                                          const ur_kernel_launch_property_t *,
+                                          uint32_t, const ur_event_handle_t *,
                                           ur_event_handle_t *) = 0;
   virtual ur_result_t enqueueEventsWait(uint32_t, const ur_event_handle_t *,
                                         ur_event_handle_t *) = 0;
@@ -158,17 +159,9 @@ struct ur_queue_t_ {
                                               uint32_t,
                                               const ur_event_handle_t *,
                                               ur_event_handle_t *) = 0;
-  virtual ur_result_t enqueueCooperativeKernelLaunchExp(
-      ur_kernel_handle_t, uint32_t, const size_t *, const size_t *,
-      const size_t *, uint32_t, const ur_event_handle_t *,
-      ur_event_handle_t *) = 0;
   virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t,
                                                    const ur_event_handle_t *,
                                                    ur_event_handle_t *) = 0;
-  virtual ur_result_t enqueueKernelLaunchCustomExp(
-      ur_kernel_handle_t, uint32_t, const size_t *, const size_t *,
-      const size_t *, uint32_t, const ur_exp_launch_property_t *, uint32_t,
-      const ur_event_handle_t *, ur_event_handle_t *) = 0;
   virtual ur_result_t
   enqueueEventsWaitWithBarrierExt(const ur_exp_enqueue_ext_properties_t *,
                                   uint32_t, const ur_event_handle_t *,
diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp
index e649f2292468b..ff9fbce3ef66c 100644
--- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp
+++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp
@@ -191,10 +191,29 @@ ur_queue_immediate_in_order_t::~ur_queue_immediate_in_order_t() {
 ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch(
     ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
   TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueKernelLaunch");
 
+  for (uint32_t propIndex = 0; propIndex < numPropsInLaunchPropList;
+       propIndex++) {
+    if (launchPropList[propIndex].id ==
+            UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE &&
+        launchPropList[propIndex].value.cooperative) {
+      return enqueueCooperativeKernelLaunchHelper(
+          hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
+          numEventsInWaitList, phEventWaitList, phEvent);
+    }
+    if (launchPropList[propIndex].id != UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE &&
+        launchPropList[propIndex].id !=
+            UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE) {
+      // We don't support any other properties.
+      return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+    }
+  }
+
   auto commandListLocked = commandListManager.lock();
   UR_CALL(commandListLocked->appendKernelLaunch(
       hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
@@ -892,14 +911,11 @@ ur_queue_immediate_in_order_t::bindlessImagesSignalExternalSemaphoreExp(
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
-ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp(
+ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchHelper(
     ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
     const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  TRACK_SCOPE_LATENCY(
-      "ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp");
-
   UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
   UR_ASSERT(hKernel->getProgramHandle(), UR_RESULT_ERROR_INVALID_NULL_POINTER);
 
@@ -1020,17 +1036,6 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBufferExp(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp(
-    ur_kernel_handle_t /*hKernel*/, uint32_t /*workDim*/,
-    const size_t * /*pGlobalWorkOffset*/, const size_t * /*pGlobalWorkSize*/,
-    const size_t * /*pLocalWorkSize*/, uint32_t /*numPropsInLaunchPropList*/,
-    const ur_exp_launch_property_t * /*launchPropList*/,
-    uint32_t /*numEventsInWaitList*/,
-    const ur_event_handle_t * /*phEventWaitList*/,
-    ur_event_handle_t * /*phEvent*/) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
 ur_result_t ur_queue_immediate_in_order_t::enqueueNativeCommandExp(
     ur_exp_enqueue_native_command_function_t, void *, uint32_t,
     const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *,
diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp
index 6e290bd01efdd..2e3ae8c59caa1 100644
--- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp
+++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp
@@ -64,6 +64,12 @@ struct ur_queue_immediate_in_order_t : ur_object, public ur_queue_t_ {
 
   void recordSubmittedKernel(ur_kernel_handle_t hKernel);
 
+  inline ur_result_t enqueueCooperativeKernelLaunchHelper(
+      ur_kernel_handle_t hKernel, uint32_t workDim,
+      const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
+      const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
+      const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+
   ur_result_t
   enqueueUSMAllocHelper(ur_usm_pool_handle_t pPool, const size_t size,
                         const ur_exp_async_usm_alloc_properties_t *pProperties,
@@ -86,13 +92,13 @@ struct ur_queue_immediate_in_order_t : ur_object, public ur_queue_t_ {
                                    ur_native_handle_t *phNativeQueue) override;
   ur_result_t queueFinish() override;
   ur_result_t queueFlush() override;
-  ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim,
-                                  const size_t *pGlobalWorkOffset,
-                                  const size_t *pGlobalWorkSize,
-                                  const size_t *pLocalWorkSize,
-                                  uint32_t numEventsInWaitList,
-                                  const ur_event_handle_t *phEventWaitList,
-                                  ur_event_handle_t *phEvent) override;
+  ur_result_t enqueueKernelLaunch(
+      ur_kernel_handle_t hKernel, uint32_t workDim,
+      const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
+      const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+      const ur_kernel_launch_property_t *launchPropList,
+      uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+      ur_event_handle_t *phEvent) override;
   ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList,
                                 const ur_event_handle_t *phEventWaitList,
                                 ur_event_handle_t *phEvent) override;
@@ -263,12 +269,6 @@ struct ur_queue_immediate_in_order_t : ur_object, public ur_queue_t_ {
       uint64_t signalValue, uint32_t numEventsInWaitList,
       const ur_event_handle_t *phEventWaitList,
       ur_event_handle_t *phEvent) override;
-  ur_result_t enqueueCooperativeKernelLaunchExp(
-      ur_kernel_handle_t hKernel, uint32_t workDim,
-      const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-      const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-      const ur_event_handle_t *phEventWaitList,
-      ur_event_handle_t *phEvent) override;
   ur_result_t
   enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList,
                                const ur_event_handle_t *phEventWaitList,
@@ -278,13 +278,6 @@ struct ur_queue_immediate_in_order_t : ur_object, public ur_queue_t_ {
                           uint32_t numEventsInWaitList,
                           const ur_event_handle_t *phEventWaitList,
                           ur_event_handle_t *phEvent) override;
-  ur_result_t enqueueKernelLaunchCustomExp(
-      ur_kernel_handle_t hKernel, uint32_t workDim,
-      const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-      const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
-      const ur_exp_launch_property_t *launchPropList,
-      uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-      ur_event_handle_t *phEvent) override;
   ur_result_t
   enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *,
                           uint32_t, const ur_mem_handle_t *,
diff --git a/unified-runtime/source/adapters/mock/ur_mock.cpp b/unified-runtime/source/adapters/mock/ur_mock.cpp
index 8efa56df01476..142a706146a9b 100644
--- a/unified-runtime/source/adapters/mock/ur_mock.cpp
+++ b/unified-runtime/source/adapters/mock/ur_mock.cpp
@@ -105,7 +105,6 @@ context_t::context_t() {
   urGetEnqueueExpProcAddrTable(version, &urDdiTable.EnqueueExp);
   urGetEventProcAddrTable(version, &urDdiTable.Event);
   urGetKernelProcAddrTable(version, &urDdiTable.Kernel);
-  urGetKernelExpProcAddrTable(version, &urDdiTable.KernelExp);
   urGetMemProcAddrTable(version, &urDdiTable.Mem);
   urGetPhysicalMemProcAddrTable(version, &urDdiTable.PhysicalMem);
   urGetPlatformProcAddrTable(version, &urDdiTable.Platform);
diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp
index e1009e84de18e..62435435772c0 100644
--- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp
+++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp
@@ -4815,6 +4815,71 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
   return exceptionToResult(std::current_exception());
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCount
+__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet) try {
+  ur_result_t result = UR_RESULT_SUCCESS;
+
+  ur_kernel_suggest_max_cooperative_group_count_params_t params = {
+      &hKernel,
+      &hDevice,
+      &workDim,
+      &pLocalWorkSize,
+      &dynamicSharedMemorySize,
+      &pGroupCountRet};
+
+  auto beforeCallback = reinterpret_cast<ur_mock_callback_t>(
+      mock::getCallbacks().get_before_callback(
+          "urKernelSuggestMaxCooperativeGroupCount"));
+  if (beforeCallback) {
+    result = beforeCallback(&params);
+    if (result != UR_RESULT_SUCCESS) {
+      return result;
+    }
+  }
+
+  auto replaceCallback = reinterpret_cast<ur_mock_callback_t>(
+      mock::getCallbacks().get_replace_callback(
+          "urKernelSuggestMaxCooperativeGroupCount"));
+  if (replaceCallback) {
+    result = replaceCallback(&params);
+  } else {
+
+    result = UR_RESULT_SUCCESS;
+  }
+
+  if (result != UR_RESULT_SUCCESS) {
+    return result;
+  }
+
+  auto afterCallback = reinterpret_cast<ur_mock_callback_t>(
+      mock::getCallbacks().get_after_callback(
+          "urKernelSuggestMaxCooperativeGroupCount"));
+  if (afterCallback) {
+    return afterCallback(&params);
+  }
+
+  return result;
+} catch (...) {
+  return exceptionToResult(std::current_exception());
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urQueueGetInfo
 __urdlllocal ur_result_t UR_APICALL urQueueGetInfo(
@@ -5595,6 +5660,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -5615,6 +5685,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
                                               &pGlobalWorkOffset,
                                               &pGlobalWorkSize,
                                               &pLocalWorkSize,
+                                              &numPropsInLaunchPropList,
+                                              &launchPropList,
                                               &numEventsInWaitList,
                                               &phEventWaitList,
                                               &phEvent};
@@ -11107,158 +11179,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
   return exceptionToResult(std::current_exception());
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) try {
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  ur_enqueue_cooperative_kernel_launch_exp_params_t params = {
-      &hQueue,
-      &hKernel,
-      &workDim,
-      &pGlobalWorkOffset,
-      &pGlobalWorkSize,
-      &pLocalWorkSize,
-      &numEventsInWaitList,
-      &phEventWaitList,
-      &phEvent};
-
-  auto beforeCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_before_callback(
-          "urEnqueueCooperativeKernelLaunchExp"));
-  if (beforeCallback) {
-    result = beforeCallback(&params);
-    if (result != UR_RESULT_SUCCESS) {
-      return result;
-    }
-  }
-
-  auto replaceCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_replace_callback(
-          "urEnqueueCooperativeKernelLaunchExp"));
-  if (replaceCallback) {
-    result = replaceCallback(&params);
-  } else {
-
-    // optional output handle
-    if (phEvent) {
-      *phEvent = mock::createDummyHandle<ur_event_handle_t>();
-    }
-    result = UR_RESULT_SUCCESS;
-  }
-
-  if (result != UR_RESULT_SUCCESS) {
-    return result;
-  }
-
-  auto afterCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_after_callback(
-          "urEnqueueCooperativeKernelLaunchExp"));
-  if (afterCallback) {
-    return afterCallback(&params);
-  }
-
-  return result;
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
-__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet) try {
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  ur_kernel_suggest_max_cooperative_group_count_exp_params_t params = {
-      &hKernel,
-      &hDevice,
-      &workDim,
-      &pLocalWorkSize,
-      &dynamicSharedMemorySize,
-      &pGroupCountRet};
-
-  auto beforeCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_before_callback(
-          "urKernelSuggestMaxCooperativeGroupCountExp"));
-  if (beforeCallback) {
-    result = beforeCallback(&params);
-    if (result != UR_RESULT_SUCCESS) {
-      return result;
-    }
-  }
-
-  auto replaceCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_replace_callback(
-          "urKernelSuggestMaxCooperativeGroupCountExp"));
-  if (replaceCallback) {
-    result = replaceCallback(&params);
-  } else {
-
-    result = UR_RESULT_SUCCESS;
-  }
-
-  if (result != UR_RESULT_SUCCESS) {
-    return result;
-  }
-
-  auto afterCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_after_callback(
-          "urKernelSuggestMaxCooperativeGroupCountExp"));
-  if (afterCallback) {
-    return afterCallback(&params);
-  }
-
-  return result;
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urEnqueueTimestampRecordingExp
 __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
@@ -11328,99 +11248,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
   return exceptionToResult(std::current_exception());
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueKernelLaunchCustomExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) try {
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  ur_enqueue_kernel_launch_custom_exp_params_t params = {
-      &hQueue,
-      &hKernel,
-      &workDim,
-      &pGlobalWorkOffset,
-      &pGlobalWorkSize,
-      &pLocalWorkSize,
-      &numPropsInLaunchPropList,
-      &launchPropList,
-      &numEventsInWaitList,
-      &phEventWaitList,
-      &phEvent};
-
-  auto beforeCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_before_callback(
-          "urEnqueueKernelLaunchCustomExp"));
-  if (beforeCallback) {
-    result = beforeCallback(&params);
-    if (result != UR_RESULT_SUCCESS) {
-      return result;
-    }
-  }
-
-  auto replaceCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_replace_callback(
-          "urEnqueueKernelLaunchCustomExp"));
-  if (replaceCallback) {
-    result = replaceCallback(&params);
-  } else {
-
-    // optional output handle
-    if (phEvent) {
-      *phEvent = mock::createDummyHandle<ur_event_handle_t>();
-    }
-    result = UR_RESULT_SUCCESS;
-  }
-
-  if (result != UR_RESULT_SUCCESS) {
-    return result;
-  }
-
-  auto afterCallback = reinterpret_cast<ur_mock_callback_t>(
-      mock::getCallbacks().get_after_callback(
-          "urEnqueueKernelLaunchCustomExp"));
-  if (afterCallback) {
-    return afterCallback(&params);
-  }
-
-  return result;
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urProgramBuildExp
 __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
@@ -12325,8 +12152,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
 
   ur_result_t result = UR_RESULT_SUCCESS;
 
-  pDdiTable->pfnKernelLaunchCustomExp = driver::urEnqueueKernelLaunchCustomExp;
-
   pDdiTable->pfnUSMDeviceAllocExp = driver::urEnqueueUSMDeviceAllocExp;
 
   pDdiTable->pfnUSMSharedAllocExp = driver::urEnqueueUSMSharedAllocExp;
@@ -12337,9 +12162,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
 
   pDdiTable->pfnCommandBufferExp = driver::urEnqueueCommandBufferExp;
 
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      driver::urEnqueueCooperativeKernelLaunchExp;
-
   pDdiTable->pfnTimestampRecordingExp = driver::urEnqueueTimestampRecordingExp;
 
   pDdiTable->pfnNativeCommandExp = driver::urEnqueueNativeCommandExp;
@@ -12446,34 +12268,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetSpecializationConstants =
       driver::urKernelSetSpecializationConstants;
 
-  return result;
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's KernelExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///     - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    /// [in] API version requested
-    ur_api_version_t version,
-    /// [in,out] pointer to table of DDI function pointers
-    ur_kernel_exp_dditable_t *pDdiTable) try {
-  if (nullptr == pDdiTable)
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-  if (driver::d_context.version < version)
-    return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
-
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      driver::urKernelSuggestMaxCooperativeGroupCountExp;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      driver::urKernelSuggestMaxCooperativeGroupCount;
 
   return result;
 } catch (...) {
diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp
index fdfd3380318b7..72f76005905f1 100644
--- a/unified-runtime/source/adapters/native_cpu/device.cpp
+++ b/unified-runtime/source/adapters/native_cpu/device.cpp
@@ -445,18 +445,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
     return ReturnValue(false);
 
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP:
-    return ReturnValue(false);
-
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP:
-    return ReturnValue(false);
-
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP:
     return ReturnValue(true);
 
   case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT:
     return ReturnValue(false);
 
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES:
+    return ReturnValue(0);
+
   default:
     DIE_NO_IMPLEMENTATION;
   }
diff --git a/unified-runtime/source/adapters/native_cpu/enqueue.cpp b/unified-runtime/source/adapters/native_cpu/enqueue.cpp
index 74a21b8ea1f36..d4203ab32a77f 100644
--- a/unified-runtime/source/adapters/native_cpu/enqueue.cpp
+++ b/unified-runtime/source/adapters/native_cpu/enqueue.cpp
@@ -66,8 +66,17 @@ static native_cpu::state getResizedState(const native_cpu::NDRDescT &ndr,
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  // We don't support any launch properties.
+  for (uint32_t propIndex = 0; propIndex < numPropsInLaunchPropList;
+       propIndex++) {
+    if (launchPropList[propIndex].id != UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE) {
+      return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+    }
+  }
 
   urEventWait(numEventsInWaitList, phEventWaitList);
   UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
diff --git a/unified-runtime/source/adapters/native_cpu/kernel.cpp b/unified-runtime/source/adapters/native_cpu/kernel.cpp
index 970b184ccb126..500b2c6bcd8a5 100644
--- a/unified-runtime/source/adapters/native_cpu/kernel.cpp
+++ b/unified-runtime/source/adapters/native_cpu/kernel.cpp
@@ -285,3 +285,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
     [[maybe_unused]] size_t *pSuggestedLocalWorkSize) {
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    [[maybe_unused]] ur_kernel_handle_t hKernel,
+    [[maybe_unused]] ur_device_handle_t hDevice,
+    [[maybe_unused]] uint32_t workDim,
+    [[maybe_unused]] const size_t *pLocalWorkSize,
+    [[maybe_unused]] size_t dynamicSharedMemorySize,
+    [[maybe_unused]] uint32_t *pGroupCountRet) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
diff --git a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp
index 0d9369817a7d0..8b43086cfa434 100644
--- a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp
+++ b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp
@@ -125,6 +125,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
   pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
   pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      urKernelSuggestMaxCooperativeGroupCount;
   return UR_RESULT_SUCCESS;
 }
 
@@ -412,7 +414,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
     return result;
   }
 
-  pDdiTable->pfnCooperativeKernelLaunchExp = nullptr;
   pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp;
   pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp;
   pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp;
@@ -420,18 +421,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
   return UR_RESULT_SUCCESS;
 }
 
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = nullptr;
-
-  return UR_RESULT_SUCCESS;
-}
-
 UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
     ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) {
   auto result = validateProcInputs(version, pDdiTable);
@@ -456,7 +445,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version,
   urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp);
   urGetEventProcAddrTable(version, &pDdiTable->Event);
   urGetKernelProcAddrTable(version, &pDdiTable->Kernel);
-  urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp);
   urGetMemProcAddrTable(version, &pDdiTable->Mem);
   urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem);
   urGetPlatformProcAddrTable(version, &pDdiTable->Platform);
diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp
index 169f6e9875f94..1dbde401219ac 100644
--- a/unified-runtime/source/adapters/opencl/device.cpp
+++ b/unified-runtime/source/adapters/opencl/device.cpp
@@ -1421,12 +1421,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   }
   case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP:
     return ReturnValue(false);
-  case UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP:
-    return ReturnValue(true);
   case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP:
-    return ReturnValue(false);
+    return ReturnValue(true);
+  case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES:
+    return ReturnValue(0);
   // TODO: We can't query to check if these are supported, they will need to be
   // manually updated if support is ever implemented.
   case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS:
@@ -1435,7 +1433,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
   case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP:
   case UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP:
   case UR_DEVICE_INFO_LOW_POWER_EVENTS_SUPPORT_EXP:
-  case UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP:
   case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP:
   case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP:
   case UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP:
diff --git a/unified-runtime/source/adapters/opencl/enqueue.cpp b/unified-runtime/source/adapters/opencl/enqueue.cpp
index 9314c34c5b2d7..63b7b45426632 100644
--- a/unified-runtime/source/adapters/opencl/enqueue.cpp
+++ b/unified-runtime/source/adapters/opencl/enqueue.cpp
@@ -42,8 +42,24 @@ void MapUREventsToCL(uint32_t numEvents, const ur_event_handle_t *UREvents,
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
     ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
     const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+    const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
+    const ur_kernel_launch_property_t *launchPropList,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  for (uint32_t propIndex = 0; propIndex < numPropsInLaunchPropList;
+       propIndex++) {
+    // Adapters that don't support cooperative kernels are currently expected
+    // to ignore COOPERATIVE launch properties. Ideally we should avoid passing
+    // these at the SYCL RT level instead, see
+    // https://github.com/intel/llvm/issues/18421
+    if (launchPropList[propIndex].id == UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE ||
+        launchPropList[propIndex].id ==
+            UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE) {
+      continue;
+    }
+    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+  }
+
   std::vector<size_t> compiledLocalWorksize;
   if (!pLocalWorkSize) {
     cl_device_id device = nullptr;
@@ -81,16 +97,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
   return UR_RESULT_SUCCESS;
 }
 
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
-                               pGlobalWorkSize, pLocalWorkSize,
-                               numEventsInWaitList, phEventWaitList, phEvent);
-}
-
 UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
     ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
     const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
diff --git a/unified-runtime/source/adapters/opencl/kernel.cpp b/unified-runtime/source/adapters/opencl/kernel.cpp
index 0be1574d8c084..f0c22a99749a9 100644
--- a/unified-runtime/source/adapters/opencl/kernel.cpp
+++ b/unified-runtime/source/adapters/opencl/kernel.cpp
@@ -454,7 +454,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
   return UR_RESULT_SUCCESS;
 }
 
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
     [[maybe_unused]] ur_kernel_handle_t hKernel,
     [[maybe_unused]] ur_device_handle_t hDevice,
     [[maybe_unused]] uint32_t workDim,
diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp
index 4000edeb14e3a..8015e632b4f0c 100644
--- a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp
+++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp
@@ -144,6 +144,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
   pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
   pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      urKernelSuggestMaxCooperativeGroupCount;
   return UR_RESULT_SUCCESS;
 }
 
@@ -417,8 +419,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
     return result;
   }
 
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      urEnqueueCooperativeKernelLaunchExp;
   pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp;
   pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp;
   pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp;
@@ -426,19 +426,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
   return UR_RESULT_SUCCESS;
 }
 
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      urKernelSuggestMaxCooperativeGroupCountExp;
-
-  return UR_RESULT_SUCCESS;
-}
-
 UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
     ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) {
   auto result = validateProcInputs(version, pDdiTable);
@@ -463,7 +450,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version,
   urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp);
   urGetEventProcAddrTable(version, &pDdiTable->Event);
   urGetKernelProcAddrTable(version, &pDdiTable->Kernel);
-  urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp);
   urGetMemProcAddrTable(version, &pDdiTable->Mem);
   urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem);
   urGetPlatformProcAddrTable(version, &pDdiTable->Platform);
diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp
index 1d035125d6a56..0b4a64c38a549 100644
--- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp
+++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp
@@ -484,6 +484,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function. If nullptr, the runtime implementation will
     /// choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
+    /// properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -517,8 +522,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
 
   UR_CALL(getContext()->urDdiTable.Enqueue.pfnKernelLaunch(
       hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      LaunchInfo.LocalWorkSize.data(), numEventsInWaitList, phEventWaitList,
-      phEvent));
+      LaunchInfo.LocalWorkSize.data(), numPropsInLaunchPropList, launchPropList,
+      numEventsInWaitList, phEventWaitList, phEvent));
 
   UR_CALL(getAsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
 
@@ -1404,58 +1409,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-
-  UR_LOG_L(getContext()->logger, DEBUG,
-           "==== urEnqueueCooperativeKernelLaunchExp");
-
-  LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), pGlobalWorkSize,
-                        pLocalWorkSize, pGlobalWorkOffset, workDim);
-  UR_CALL(LaunchInfo.Data.syncToDevice(hQueue));
-
-  UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
-
-  UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      LaunchInfo.LocalWorkSize.data(), numEventsInWaitList, phEventWaitList,
-      phEvent));
-
-  UR_CALL(getAsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
-
-  return UR_RESULT_SUCCESS;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urKernelRetain
 __urdlllocal ur_result_t UR_APICALL urKernelRetain(
@@ -2000,25 +1953,6 @@ __urdlllocal ur_result_t UR_APICALL urGetDeviceProcAddrTable(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's EnqueueExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-__urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
-    /// [in,out] pointer to table of DDI function pointers
-    ur_enqueue_exp_dditable_t *pDdiTable) {
-  if (nullptr == pDdiTable) {
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-  }
-
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      ur_sanitizer_layer::asan::urEnqueueCooperativeKernelLaunchExp;
-  return UR_RESULT_SUCCESS;
-}
-
 template <class A, class B> struct NotSupportedApi;
 
 template <class MsgType, class R, class... A>
@@ -2214,11 +2148,6 @@ ur_result_t initAsanDDITable(ur_dditable_t *dditable) {
         UR_API_VERSION_CURRENT, &dditable->VirtualMem);
   }
 
-  if (UR_RESULT_SUCCESS == result) {
-    result = ur_sanitizer_layer::asan::urGetEnqueueExpProcAddrTable(
-        &dditable->EnqueueExp);
-  }
-
   if (result != UR_RESULT_SUCCESS) {
     UR_LOG_L(getContext()->logger, ERR, "Initialize ASAN DDI table failed: {}",
              result);
diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp
index c8803019523b8..13aa868cbf0f0 100644
--- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp
+++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp
@@ -411,6 +411,11 @@ ur_result_t urEnqueueKernelLaunch(
     /// execute the kernel function. If nullptr, the runtime implementation will
     /// choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
+    /// properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -432,8 +437,8 @@ ur_result_t urEnqueueKernelLaunch(
 
   UR_CALL(getContext()->urDdiTable.Enqueue.pfnKernelLaunch(
       hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      LaunchInfo.LocalWorkSize.data(), numEventsInWaitList, phEventWaitList,
-      phEvent));
+      LaunchInfo.LocalWorkSize.data(), numPropsInLaunchPropList, launchPropList,
+      numEventsInWaitList, phEventWaitList, phEvent));
 
   UR_CALL(getMsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
 
@@ -1314,59 +1319,6 @@ ur_result_t urEnqueueMemUnmap(
   return UR_RESULT_SUCCESS;
 }
 
-ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-
-  UR_LOG_L(getContext()->logger, DEBUG,
-           "==== urEnqueueCooperativeKernelLaunchExp");
-
-  USMLaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue),
-                           pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
-                           workDim);
-  UR_CALL(LaunchInfo.initialize());
-
-  UR_CALL(getMsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
-
-  UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      LaunchInfo.LocalWorkSize.data(), numEventsInWaitList, phEventWaitList,
-      phEvent));
-
-  UR_CALL(getMsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
-
-  return UR_RESULT_SUCCESS;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urKernelRetain
 ur_result_t urKernelRetain(
@@ -1961,25 +1913,6 @@ ur_result_t urCheckVersion(ur_api_version_t version) {
   return UR_RESULT_SUCCESS;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's EnqueueExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-__urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
-    /// [in,out] pointer to table of DDI function pointers
-    ur_enqueue_exp_dditable_t *pDdiTable) {
-  if (nullptr == pDdiTable) {
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-  }
-
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      ur_sanitizer_layer::msan::urEnqueueCooperativeKernelLaunchExp;
-  return UR_RESULT_SUCCESS;
-}
-
 } // namespace msan
 
 ur_result_t initMsanDDITable(ur_dditable_t *dditable) {
@@ -2034,11 +1967,6 @@ ur_result_t initMsanDDITable(ur_dditable_t *dditable) {
     result = ur_sanitizer_layer::msan::urGetUSMProcAddrTable(&dditable->USM);
   }
 
-  if (UR_RESULT_SUCCESS == result) {
-    result = ur_sanitizer_layer::msan::urGetEnqueueExpProcAddrTable(
-        &dditable->EnqueueExp);
-  }
-
   if (result != UR_RESULT_SUCCESS) {
     UR_LOG_L(getContext()->logger, ERR, "Initialize MSAN DDI table failed: {}",
              result);
diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp
index 46c7142688959..f3802f652d614 100644
--- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp
+++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp
@@ -1127,6 +1127,11 @@ ur_result_t urEnqueueKernelLaunch(
     /// execute the kernel function. If nullptr, the runtime implementation will
     /// choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
+    /// properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -1145,55 +1150,8 @@ ur_result_t urEnqueueKernelLaunch(
 
   UR_CALL(getContext()->urDdiTable.Enqueue.pfnKernelLaunch(
       hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent));
-
-  UR_CALL(getTsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
-
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  UR_LOG_L(getContext()->logger, DEBUG,
-           "==== urEnqueueCooperativeKernelLaunchExp");
-
-  LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue));
-
-  UR_CALL(getTsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
-
-  UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent));
+      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
+      numEventsInWaitList, phEventWaitList, phEvent));
 
   UR_CALL(getTsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
 
@@ -1378,25 +1336,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
   return UR_RESULT_SUCCESS;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's EnqueueExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-__urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
-    /// [in,out] pointer to table of DDI function pointers
-    ur_enqueue_exp_dditable_t *pDdiTable) {
-  if (nullptr == pDdiTable) {
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-  }
-
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      ur_sanitizer_layer::tsan::urEnqueueCooperativeKernelLaunchExp;
-  return UR_RESULT_SUCCESS;
-}
-
 } // namespace tsan
 
 ur_result_t initTsanDDITable(ur_dditable_t *dditable) {
@@ -1441,11 +1380,6 @@ ur_result_t initTsanDDITable(ur_dditable_t *dditable) {
         ur_sanitizer_layer::tsan::urGetEnqueueProcAddrTable(&dditable->Enqueue);
   }
 
-  if (UR_RESULT_SUCCESS == result) {
-    result = ur_sanitizer_layer::tsan::urGetEnqueueExpProcAddrTable(
-        &dditable->EnqueueExp);
-  }
-
   if (result != UR_RESULT_SUCCESS) {
     UR_LOG_L(getContext()->logger, ERR, "Initialize TSAN DDI table failed: {}",
              result);
diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp
index ca3e93b0708cb..25fb682587635 100644
--- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp
+++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp
@@ -3982,6 +3982,66 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
   return result;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCount
+__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet) {
+  auto pfnSuggestMaxCooperativeGroupCount =
+      getContext()->urDdiTable.Kernel.pfnSuggestMaxCooperativeGroupCount;
+
+  if (nullptr == pfnSuggestMaxCooperativeGroupCount)
+    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+
+  ur_kernel_suggest_max_cooperative_group_count_params_t params = {
+      &hKernel,
+      &hDevice,
+      &workDim,
+      &pLocalWorkSize,
+      &dynamicSharedMemorySize,
+      &pGroupCountRet};
+  uint64_t instance = getContext()->notify_begin(
+      UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT,
+      "urKernelSuggestMaxCooperativeGroupCount", &params);
+
+  auto &logger = getContext()->logger;
+  UR_LOG_L(logger, INFO, "   ---> urKernelSuggestMaxCooperativeGroupCount\n");
+
+  ur_result_t result = pfnSuggestMaxCooperativeGroupCount(
+      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
+      pGroupCountRet);
+
+  getContext()->notify_end(
+      UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT,
+      "urKernelSuggestMaxCooperativeGroupCount", &params, &result, instance);
+
+  if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) {
+    std::ostringstream args_str;
+    ur::extras::printFunctionParams(
+        args_str, UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT,
+        &params);
+    UR_LOG_L(logger, INFO,
+             "   <--- urKernelSuggestMaxCooperativeGroupCount({}) -> {};\n",
+             args_str.str(), result);
+  }
+
+  return result;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urQueueGetInfo
 __urdlllocal ur_result_t UR_APICALL urQueueGetInfo(
@@ -4615,6 +4675,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -4638,6 +4703,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
                                               &pGlobalWorkOffset,
                                               &pGlobalWorkSize,
                                               &pLocalWorkSize,
+                                              &numPropsInLaunchPropList,
+                                              &launchPropList,
                                               &numEventsInWaitList,
                                               &phEventWaitList,
                                               &phEvent};
@@ -4649,7 +4716,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
 
   ur_result_t result = pfnKernelLaunch(
       hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent);
+      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
+      numEventsInWaitList, phEventWaitList, phEvent);
 
   getContext()->notify_end(UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH,
                            "urEnqueueKernelLaunch", &params, &result, instance);
@@ -9389,144 +9457,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  auto pfnCooperativeKernelLaunchExp =
-      getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp;
-
-  if (nullptr == pfnCooperativeKernelLaunchExp)
-    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-
-  ur_enqueue_cooperative_kernel_launch_exp_params_t params = {
-      &hQueue,
-      &hKernel,
-      &workDim,
-      &pGlobalWorkOffset,
-      &pGlobalWorkSize,
-      &pLocalWorkSize,
-      &numEventsInWaitList,
-      &phEventWaitList,
-      &phEvent};
-  uint64_t instance = getContext()->notify_begin(
-      UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP,
-      "urEnqueueCooperativeKernelLaunchExp", &params);
-
-  auto &logger = getContext()->logger;
-  UR_LOG_L(logger, INFO, "   ---> urEnqueueCooperativeKernelLaunchExp\n");
-
-  ur_result_t result = pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent);
-
-  getContext()->notify_end(UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP,
-                           "urEnqueueCooperativeKernelLaunchExp", &params,
-                           &result, instance);
-
-  if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) {
-    std::ostringstream args_str;
-    ur::extras::printFunctionParams(
-        args_str, UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP, &params);
-    UR_LOG_L(logger, INFO,
-             "   <--- urEnqueueCooperativeKernelLaunchExp({}) -> {};\n",
-             args_str.str(), result);
-  }
-
-  return result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
-__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet) {
-  auto pfnSuggestMaxCooperativeGroupCountExp =
-      getContext()->urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp;
-
-  if (nullptr == pfnSuggestMaxCooperativeGroupCountExp)
-    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-
-  ur_kernel_suggest_max_cooperative_group_count_exp_params_t params = {
-      &hKernel,
-      &hDevice,
-      &workDim,
-      &pLocalWorkSize,
-      &dynamicSharedMemorySize,
-      &pGroupCountRet};
-  uint64_t instance = getContext()->notify_begin(
-      UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP,
-      "urKernelSuggestMaxCooperativeGroupCountExp", &params);
-
-  auto &logger = getContext()->logger;
-  UR_LOG_L(logger, INFO,
-           "   ---> urKernelSuggestMaxCooperativeGroupCountExp\n");
-
-  ur_result_t result = pfnSuggestMaxCooperativeGroupCountExp(
-      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
-      pGroupCountRet);
-
-  getContext()->notify_end(
-      UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP,
-      "urKernelSuggestMaxCooperativeGroupCountExp", &params, &result, instance);
-
-  if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) {
-    std::ostringstream args_str;
-    ur::extras::printFunctionParams(
-        args_str, UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP,
-        &params);
-    UR_LOG_L(logger, INFO,
-             "   <--- urKernelSuggestMaxCooperativeGroupCountExp({}) -> {};\n",
-             args_str.str(), result);
-  }
-
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urEnqueueTimestampRecordingExp
 __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
@@ -9588,90 +9518,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueKernelLaunchCustomExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  auto pfnKernelLaunchCustomExp =
-      getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchCustomExp;
-
-  if (nullptr == pfnKernelLaunchCustomExp)
-    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-
-  ur_enqueue_kernel_launch_custom_exp_params_t params = {
-      &hQueue,
-      &hKernel,
-      &workDim,
-      &pGlobalWorkOffset,
-      &pGlobalWorkSize,
-      &pLocalWorkSize,
-      &numPropsInLaunchPropList,
-      &launchPropList,
-      &numEventsInWaitList,
-      &phEventWaitList,
-      &phEvent};
-  uint64_t instance =
-      getContext()->notify_begin(UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP,
-                                 "urEnqueueKernelLaunchCustomExp", &params);
-
-  auto &logger = getContext()->logger;
-  UR_LOG_L(logger, INFO, "   ---> urEnqueueKernelLaunchCustomExp\n");
-
-  ur_result_t result = pfnKernelLaunchCustomExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
-      numEventsInWaitList, phEventWaitList, phEvent);
-
-  getContext()->notify_end(UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP,
-                           "urEnqueueKernelLaunchCustomExp", &params, &result,
-                           instance);
-
-  if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) {
-    std::ostringstream args_str;
-    ur::extras::printFunctionParams(
-        args_str, UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP, &params);
-    UR_LOG_L(logger, INFO,
-             "   <--- urEnqueueKernelLaunchCustomExp({}) -> {};\n",
-             args_str.str(), result);
-  }
-
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urProgramBuildExp
 __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
@@ -10613,10 +10459,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
 
   ur_result_t result = UR_RESULT_SUCCESS;
 
-  dditable.pfnKernelLaunchCustomExp = pDdiTable->pfnKernelLaunchCustomExp;
-  pDdiTable->pfnKernelLaunchCustomExp =
-      ur_tracing_layer::urEnqueueKernelLaunchCustomExp;
-
   dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp;
   pDdiTable->pfnUSMDeviceAllocExp =
       ur_tracing_layer::urEnqueueUSMDeviceAllocExp;
@@ -10634,11 +10476,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
   dditable.pfnCommandBufferExp = pDdiTable->pfnCommandBufferExp;
   pDdiTable->pfnCommandBufferExp = ur_tracing_layer::urEnqueueCommandBufferExp;
 
-  dditable.pfnCooperativeKernelLaunchExp =
-      pDdiTable->pfnCooperativeKernelLaunchExp;
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      ur_tracing_layer::urEnqueueCooperativeKernelLaunchExp;
-
   dditable.pfnTimestampRecordingExp = pDdiTable->pfnTimestampRecordingExp;
   pDdiTable->pfnTimestampRecordingExp =
       ur_tracing_layer::urEnqueueTimestampRecordingExp;
@@ -10780,38 +10617,10 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetSpecializationConstants =
       ur_tracing_layer::urKernelSetSpecializationConstants;
 
-  return result;
-}
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's KernelExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///     - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
-__urdlllocal ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    /// [in] API version requested
-    ur_api_version_t version,
-    /// [in,out] pointer to table of DDI function pointers
-    ur_kernel_exp_dditable_t *pDdiTable) {
-  auto &dditable = ur_tracing_layer::getContext()->urDdiTable.KernelExp;
-
-  if (nullptr == pDdiTable)
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-  if (UR_MAJOR_VERSION(ur_tracing_layer::getContext()->version) !=
-          UR_MAJOR_VERSION(version) ||
-      UR_MINOR_VERSION(ur_tracing_layer::getContext()->version) >
-          UR_MINOR_VERSION(version))
-    return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
-
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  dditable.pfnSuggestMaxCooperativeGroupCountExp =
-      pDdiTable->pfnSuggestMaxCooperativeGroupCountExp;
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      ur_tracing_layer::urKernelSuggestMaxCooperativeGroupCountExp;
+  dditable.pfnSuggestMaxCooperativeGroupCount =
+      pDdiTable->pfnSuggestMaxCooperativeGroupCount;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      ur_tracing_layer::urKernelSuggestMaxCooperativeGroupCount;
 
   return result;
 }
@@ -11499,11 +11308,6 @@ ur_result_t context_t::init(ur_dditable_t *dditable,
                                                         &dditable->Kernel);
   }
 
-  if (UR_RESULT_SUCCESS == result) {
-    result = ur_tracing_layer::urGetKernelExpProcAddrTable(
-        UR_API_VERSION_CURRENT, &dditable->KernelExp);
-  }
-
   if (UR_RESULT_SUCCESS == result) {
     result = ur_tracing_layer::urGetMemProcAddrTable(UR_API_VERSION_CURRENT,
                                                      &dditable->Mem);
diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp
index 47422683f9cea..e6876eae64237 100644
--- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp
+++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp
@@ -3893,6 +3893,66 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
   return result;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCount
+__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet) {
+  auto pfnSuggestMaxCooperativeGroupCount =
+      getContext()->urDdiTable.Kernel.pfnSuggestMaxCooperativeGroupCount;
+
+  if (nullptr == pfnSuggestMaxCooperativeGroupCount) {
+    return UR_RESULT_ERROR_UNINITIALIZED;
+  }
+
+  if (getContext()->enableParameterValidation) {
+    if (NULL == pLocalWorkSize)
+      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+
+    if (NULL == pGroupCountRet)
+      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+
+    if (NULL == hKernel)
+      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
+
+    if (NULL == hDevice)
+      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
+
+    if (workDim < 1 || workDim > 3)
+      return UR_RESULT_ERROR_INVALID_WORK_DIMENSION;
+  }
+
+  if (getContext()->enableLifetimeValidation &&
+      !getContext()->refCountContext->isReferenceValid(hKernel)) {
+    URLOG_CTX_INVALID_REFERENCE(hKernel);
+  }
+
+  if (getContext()->enableLifetimeValidation &&
+      !getContext()->refCountContext->isReferenceValid(hDevice)) {
+    URLOG_CTX_INVALID_REFERENCE(hDevice);
+  }
+
+  ur_result_t result = pfnSuggestMaxCooperativeGroupCount(
+      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
+      pGroupCountRet);
+
+  return result;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urQueueGetInfo
 __urdlllocal ur_result_t UR_APICALL urQueueGetInfo(
@@ -4488,6 +4548,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -4510,6 +4575,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
     if (NULL == pGlobalWorkSize)
       return UR_RESULT_ERROR_INVALID_NULL_POINTER;
 
+    if (launchPropList == NULL && numPropsInLaunchPropList > 0)
+      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+
     if (NULL == hQueue)
       return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
 
@@ -4543,7 +4611,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
 
   ur_result_t result = pfnKernelLaunch(
       hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent);
+      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
+      numEventsInWaitList, phEventWaitList, phEvent);
 
   if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS &&
       phEvent) {
@@ -10151,154 +10220,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  auto pfnCooperativeKernelLaunchExp =
-      getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp;
-
-  if (nullptr == pfnCooperativeKernelLaunchExp) {
-    return UR_RESULT_ERROR_UNINITIALIZED;
-  }
-
-  if (getContext()->enableParameterValidation) {
-    if (NULL == pGlobalWorkOffset)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == pGlobalWorkSize)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == hQueue)
-      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
-
-    if (NULL == hKernel)
-      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
-
-    if (phEventWaitList == NULL && numEventsInWaitList > 0)
-      return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST;
-
-    if (phEventWaitList != NULL && numEventsInWaitList == 0)
-      return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST;
-
-    if (phEventWaitList != NULL && numEventsInWaitList > 0) {
-      for (uint32_t i = 0; i < numEventsInWaitList; ++i) {
-        if (phEventWaitList[i] == NULL) {
-          return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST;
-        }
-      }
-    }
-  }
-
-  if (getContext()->enableLifetimeValidation &&
-      !getContext()->refCountContext->isReferenceValid(hQueue)) {
-    URLOG_CTX_INVALID_REFERENCE(hQueue);
-  }
-
-  if (getContext()->enableLifetimeValidation &&
-      !getContext()->refCountContext->isReferenceValid(hKernel)) {
-    URLOG_CTX_INVALID_REFERENCE(hKernel);
-  }
-
-  ur_result_t result = pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent);
-
-  if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS &&
-      phEvent) {
-    getContext()->refCountContext->createRefCount(*phEvent);
-  }
-
-  return result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
-__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet) {
-  auto pfnSuggestMaxCooperativeGroupCountExp =
-      getContext()->urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp;
-
-  if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) {
-    return UR_RESULT_ERROR_UNINITIALIZED;
-  }
-
-  if (getContext()->enableParameterValidation) {
-    if (NULL == pLocalWorkSize)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == pGroupCountRet)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == hKernel)
-      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
-
-    if (NULL == hDevice)
-      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
-  }
-
-  if (getContext()->enableLifetimeValidation &&
-      !getContext()->refCountContext->isReferenceValid(hKernel)) {
-    URLOG_CTX_INVALID_REFERENCE(hKernel);
-  }
-
-  if (getContext()->enableLifetimeValidation &&
-      !getContext()->refCountContext->isReferenceValid(hDevice)) {
-    URLOG_CTX_INVALID_REFERENCE(hDevice);
-  }
-
-  ur_result_t result = pfnSuggestMaxCooperativeGroupCountExp(
-      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
-      pGroupCountRet);
-
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urEnqueueTimestampRecordingExp
 __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
@@ -10371,99 +10292,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueKernelLaunchCustomExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  auto pfnKernelLaunchCustomExp =
-      getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchCustomExp;
-
-  if (nullptr == pfnKernelLaunchCustomExp) {
-    return UR_RESULT_ERROR_UNINITIALIZED;
-  }
-
-  if (getContext()->enableParameterValidation) {
-    if (NULL == pGlobalWorkOffset)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == pGlobalWorkSize)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == launchPropList)
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-    if (NULL == hQueue)
-      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
-
-    if (NULL == hKernel)
-      return UR_RESULT_ERROR_INVALID_NULL_HANDLE;
-
-    if (phEventWaitList != NULL && numEventsInWaitList > 0) {
-      for (uint32_t i = 0; i < numEventsInWaitList; ++i) {
-        if (phEventWaitList[i] == NULL) {
-          return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST;
-        }
-      }
-    }
-  }
-
-  if (getContext()->enableLifetimeValidation &&
-      !getContext()->refCountContext->isReferenceValid(hQueue)) {
-    URLOG_CTX_INVALID_REFERENCE(hQueue);
-  }
-
-  if (getContext()->enableLifetimeValidation &&
-      !getContext()->refCountContext->isReferenceValid(hKernel)) {
-    URLOG_CTX_INVALID_REFERENCE(hKernel);
-  }
-
-  ur_result_t result = pfnKernelLaunchCustomExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
-      numEventsInWaitList, phEventWaitList, phEvent);
-
-  if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS &&
-      phEvent) {
-    getContext()->refCountContext->createRefCount(*phEvent);
-  }
-
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urProgramBuildExp
 __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
@@ -11420,10 +11248,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
 
   ur_result_t result = UR_RESULT_SUCCESS;
 
-  dditable.pfnKernelLaunchCustomExp = pDdiTable->pfnKernelLaunchCustomExp;
-  pDdiTable->pfnKernelLaunchCustomExp =
-      ur_validation_layer::urEnqueueKernelLaunchCustomExp;
-
   dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp;
   pDdiTable->pfnUSMDeviceAllocExp =
       ur_validation_layer::urEnqueueUSMDeviceAllocExp;
@@ -11442,11 +11266,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
   pDdiTable->pfnCommandBufferExp =
       ur_validation_layer::urEnqueueCommandBufferExp;
 
-  dditable.pfnCooperativeKernelLaunchExp =
-      pDdiTable->pfnCooperativeKernelLaunchExp;
-  pDdiTable->pfnCooperativeKernelLaunchExp =
-      ur_validation_layer::urEnqueueCooperativeKernelLaunchExp;
-
   dditable.pfnTimestampRecordingExp = pDdiTable->pfnTimestampRecordingExp;
   pDdiTable->pfnTimestampRecordingExp =
       ur_validation_layer::urEnqueueTimestampRecordingExp;
@@ -11591,39 +11410,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   pDdiTable->pfnSetSpecializationConstants =
       ur_validation_layer::urKernelSetSpecializationConstants;
 
-  return result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's KernelExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///     - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    /// [in] API version requested
-    ur_api_version_t version,
-    /// [in,out] pointer to table of DDI function pointers
-    ur_kernel_exp_dditable_t *pDdiTable) {
-  auto &dditable = ur_validation_layer::getContext()->urDdiTable.KernelExp;
-
-  if (nullptr == pDdiTable)
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-  if (UR_MAJOR_VERSION(ur_validation_layer::getContext()->version) !=
-          UR_MAJOR_VERSION(version) ||
-      UR_MINOR_VERSION(ur_validation_layer::getContext()->version) >
-          UR_MINOR_VERSION(version))
-    return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
-
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  dditable.pfnSuggestMaxCooperativeGroupCountExp =
-      pDdiTable->pfnSuggestMaxCooperativeGroupCountExp;
-  pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-      ur_validation_layer::urKernelSuggestMaxCooperativeGroupCountExp;
+  dditable.pfnSuggestMaxCooperativeGroupCount =
+      pDdiTable->pfnSuggestMaxCooperativeGroupCount;
+  pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+      ur_validation_layer::urKernelSuggestMaxCooperativeGroupCount;
 
   return result;
 }
@@ -12343,11 +12133,6 @@ ur_result_t context_t::init(ur_dditable_t *dditable,
         UR_API_VERSION_CURRENT, &dditable->Kernel);
   }
 
-  if (UR_RESULT_SUCCESS == result) {
-    result = ur_validation_layer::urGetKernelExpProcAddrTable(
-        UR_API_VERSION_CURRENT, &dditable->KernelExp);
-  }
-
   if (UR_RESULT_SUCCESS == result) {
     result = ur_validation_layer::urGetMemProcAddrTable(UR_API_VERSION_CURRENT,
                                                         &dditable->Mem);
diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in
index 41de651edf230..8161d6a906fe6 100644
--- a/unified-runtime/source/loader/loader.def.in
+++ b/unified-runtime/source/loader/loader.def.in
@@ -69,14 +69,12 @@ EXPORTS
 	urDeviceRetain
 	urDeviceSelectBinary
 	urEnqueueCommandBufferExp
-	urEnqueueCooperativeKernelLaunchExp
 	urEnqueueDeviceGlobalVariableRead
 	urEnqueueDeviceGlobalVariableWrite
 	urEnqueueEventsWait
 	urEnqueueEventsWaitWithBarrier
 	urEnqueueEventsWaitWithBarrierExt
 	urEnqueueKernelLaunch
-	urEnqueueKernelLaunchCustomExp
 	urEnqueueMemBufferCopy
 	urEnqueueMemBufferCopyRect
 	urEnqueueMemBufferFill
@@ -119,7 +117,6 @@ EXPORTS
 	urGetEnqueueExpProcAddrTable
 	urGetEnqueueProcAddrTable
 	urGetEventProcAddrTable
-	urGetKernelExpProcAddrTable
 	urGetKernelProcAddrTable
 	urGetMemProcAddrTable
 	urGetPhysicalMemProcAddrTable
@@ -148,7 +145,7 @@ EXPORTS
 	urKernelSetArgValue
 	urKernelSetExecInfo
 	urKernelSetSpecializationConstants
-	urKernelSuggestMaxCooperativeGroupCountExp
+	urKernelSuggestMaxCooperativeGroupCount
 	urLoaderConfigCreate
 	urLoaderConfigEnableLayer
 	urLoaderConfigGetInfo
@@ -279,13 +276,11 @@ EXPORTS
 	urPrintDeviceType
 	urPrintDeviceUsmAccessCapabilityFlags
 	urPrintEnqueueCommandBufferExpParams
-	urPrintEnqueueCooperativeKernelLaunchExpParams
 	urPrintEnqueueDeviceGlobalVariableReadParams
 	urPrintEnqueueDeviceGlobalVariableWriteParams
 	urPrintEnqueueEventsWaitParams
 	urPrintEnqueueEventsWaitWithBarrierExtParams
 	urPrintEnqueueEventsWaitWithBarrierParams
-	urPrintEnqueueKernelLaunchCustomExpParams
 	urPrintEnqueueKernelLaunchParams
 	urPrintEnqueueMemBufferCopyParams
 	urPrintEnqueueMemBufferCopyRectParams
@@ -347,8 +342,6 @@ EXPORTS
 	urPrintExpImageCopyFlags
 	urPrintExpImageCopyRegion
 	urPrintExpImageMemType
-	urPrintExpLaunchProperty
-	urPrintExpLaunchPropertyId
 	urPrintExpPeerInfo
 	urPrintExpSamplerAddrModes
 	urPrintExpSamplerCubemapFilterMode
@@ -379,6 +372,9 @@ EXPORTS
 	urPrintKernelGetSuggestedLocalWorkSizeParams
 	urPrintKernelGroupInfo
 	urPrintKernelInfo
+	urPrintKernelLaunchPropertiesFlags
+	urPrintKernelLaunchProperty
+	urPrintKernelLaunchPropertyId
 	urPrintKernelNativeProperties
 	urPrintKernelReleaseParams
 	urPrintKernelRetainParams
@@ -390,7 +386,7 @@ EXPORTS
 	urPrintKernelSetExecInfoParams
 	urPrintKernelSetSpecializationConstantsParams
 	urPrintKernelSubGroupInfo
-	urPrintKernelSuggestMaxCooperativeGroupCountExpParams
+	urPrintKernelSuggestMaxCooperativeGroupCountParams
 	urPrintLoaderConfigCreateParams
 	urPrintLoaderConfigEnableLayerParams
 	urPrintLoaderConfigGetInfoParams
diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in
index b5bf644596b58..ee485f01fc4d9 100644
--- a/unified-runtime/source/loader/loader.map.in
+++ b/unified-runtime/source/loader/loader.map.in
@@ -69,14 +69,12 @@
 		urDeviceRetain;
 		urDeviceSelectBinary;
 		urEnqueueCommandBufferExp;
-		urEnqueueCooperativeKernelLaunchExp;
 		urEnqueueDeviceGlobalVariableRead;
 		urEnqueueDeviceGlobalVariableWrite;
 		urEnqueueEventsWait;
 		urEnqueueEventsWaitWithBarrier;
 		urEnqueueEventsWaitWithBarrierExt;
 		urEnqueueKernelLaunch;
-		urEnqueueKernelLaunchCustomExp;
 		urEnqueueMemBufferCopy;
 		urEnqueueMemBufferCopyRect;
 		urEnqueueMemBufferFill;
@@ -119,7 +117,6 @@
 		urGetEnqueueExpProcAddrTable;
 		urGetEnqueueProcAddrTable;
 		urGetEventProcAddrTable;
-		urGetKernelExpProcAddrTable;
 		urGetKernelProcAddrTable;
 		urGetMemProcAddrTable;
 		urGetPhysicalMemProcAddrTable;
@@ -148,7 +145,7 @@
 		urKernelSetArgValue;
 		urKernelSetExecInfo;
 		urKernelSetSpecializationConstants;
-		urKernelSuggestMaxCooperativeGroupCountExp;
+		urKernelSuggestMaxCooperativeGroupCount;
 		urLoaderConfigCreate;
 		urLoaderConfigEnableLayer;
 		urLoaderConfigGetInfo;
@@ -279,13 +276,11 @@
 		urPrintDeviceType;
 		urPrintDeviceUsmAccessCapabilityFlags;
 		urPrintEnqueueCommandBufferExpParams;
-		urPrintEnqueueCooperativeKernelLaunchExpParams;
 		urPrintEnqueueDeviceGlobalVariableReadParams;
 		urPrintEnqueueDeviceGlobalVariableWriteParams;
 		urPrintEnqueueEventsWaitParams;
 		urPrintEnqueueEventsWaitWithBarrierExtParams;
 		urPrintEnqueueEventsWaitWithBarrierParams;
-		urPrintEnqueueKernelLaunchCustomExpParams;
 		urPrintEnqueueKernelLaunchParams;
 		urPrintEnqueueMemBufferCopyParams;
 		urPrintEnqueueMemBufferCopyRectParams;
@@ -347,8 +342,6 @@
 		urPrintExpImageCopyFlags;
 		urPrintExpImageCopyRegion;
 		urPrintExpImageMemType;
-		urPrintExpLaunchProperty;
-		urPrintExpLaunchPropertyId;
 		urPrintExpPeerInfo;
 		urPrintExpSamplerAddrModes;
 		urPrintExpSamplerCubemapFilterMode;
@@ -379,6 +372,9 @@
 		urPrintKernelGetSuggestedLocalWorkSizeParams;
 		urPrintKernelGroupInfo;
 		urPrintKernelInfo;
+		urPrintKernelLaunchPropertiesFlags;
+		urPrintKernelLaunchProperty;
+		urPrintKernelLaunchPropertyId;
 		urPrintKernelNativeProperties;
 		urPrintKernelReleaseParams;
 		urPrintKernelRetainParams;
@@ -390,7 +386,7 @@
 		urPrintKernelSetExecInfoParams;
 		urPrintKernelSetSpecializationConstantsParams;
 		urPrintKernelSubGroupInfo;
-		urPrintKernelSuggestMaxCooperativeGroupCountExpParams;
+		urPrintKernelSuggestMaxCooperativeGroupCountParams;
 		urPrintLoaderConfigCreateParams;
 		urPrintLoaderConfigEnableLayerParams;
 		urPrintLoaderConfigGetInfoParams;
diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp
index fbec1349d3b90..4446c02902186 100644
--- a/unified-runtime/source/loader/ur_ldrddi.cpp
+++ b/unified-runtime/source/loader/ur_ldrddi.cpp
@@ -2204,6 +2204,39 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
                                       pSuggestedLocalWorkSize);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCount
+__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet) {
+
+  auto *dditable = *reinterpret_cast<ur_dditable_t **>(hKernel);
+
+  auto *pfnSuggestMaxCooperativeGroupCount =
+      dditable->Kernel.pfnSuggestMaxCooperativeGroupCount;
+  if (nullptr == pfnSuggestMaxCooperativeGroupCount)
+    return UR_RESULT_ERROR_UNINITIALIZED;
+
+  // forward to device-platform
+  return pfnSuggestMaxCooperativeGroupCount(
+      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
+      pGroupCountRet);
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urQueueGetInfo
 __urdlllocal ur_result_t UR_APICALL urQueueGetInfo(
@@ -2548,6 +2581,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -2569,8 +2607,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
 
   // forward to device-platform
   return pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
-                         pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList,
-                         phEventWaitList, phEvent);
+                         pGlobalWorkSize, pLocalWorkSize,
+                         numPropsInLaunchPropList, launchPropList,
+                         numEventsInWaitList, phEventWaitList, phEvent);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -5346,87 +5385,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
   return pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer);
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-
-  auto *dditable = *reinterpret_cast<ur_dditable_t **>(hQueue);
-
-  auto *pfnCooperativeKernelLaunchExp =
-      dditable->EnqueueExp.pfnCooperativeKernelLaunchExp;
-  if (nullptr == pfnCooperativeKernelLaunchExp)
-    return UR_RESULT_ERROR_UNINITIALIZED;
-
-  // forward to device-platform
-  return pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp
-__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet) {
-
-  auto *dditable = *reinterpret_cast<ur_dditable_t **>(hKernel);
-
-  auto *pfnSuggestMaxCooperativeGroupCountExp =
-      dditable->KernelExp.pfnSuggestMaxCooperativeGroupCountExp;
-  if (nullptr == pfnSuggestMaxCooperativeGroupCountExp)
-    return UR_RESULT_ERROR_UNINITIALIZED;
-
-  // forward to device-platform
-  return pfnSuggestMaxCooperativeGroupCountExp(
-      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
-      pGroupCountRet);
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urEnqueueTimestampRecordingExp
 __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
@@ -5467,59 +5425,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
                                   phEventWaitList, phEvent);
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Intercept function for urEnqueueKernelLaunchCustomExp
-__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-
-  auto *dditable = *reinterpret_cast<ur_dditable_t **>(hQueue);
-
-  auto *pfnKernelLaunchCustomExp =
-      dditable->EnqueueExp.pfnKernelLaunchCustomExp;
-  if (nullptr == pfnKernelLaunchCustomExp)
-    return UR_RESULT_ERROR_UNINITIALIZED;
-
-  // forward to device-platform
-  return pfnKernelLaunchCustomExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
-      numEventsInWaitList, phEventWaitList, phEvent);
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Intercept function for urProgramBuildExp
 __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
@@ -6212,15 +6117,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
     if (ur_loader::getContext()->platforms.size() != 1 ||
         ur_loader::getContext()->forceIntercept) {
       // return pointers to loader's DDIs
-      pDdiTable->pfnKernelLaunchCustomExp =
-          ur_loader::urEnqueueKernelLaunchCustomExp;
       pDdiTable->pfnUSMDeviceAllocExp = ur_loader::urEnqueueUSMDeviceAllocExp;
       pDdiTable->pfnUSMSharedAllocExp = ur_loader::urEnqueueUSMSharedAllocExp;
       pDdiTable->pfnUSMHostAllocExp = ur_loader::urEnqueueUSMHostAllocExp;
       pDdiTable->pfnUSMFreeExp = ur_loader::urEnqueueUSMFreeExp;
       pDdiTable->pfnCommandBufferExp = ur_loader::urEnqueueCommandBufferExp;
-      pDdiTable->pfnCooperativeKernelLaunchExp =
-          ur_loader::urEnqueueCooperativeKernelLaunchExp;
       pDdiTable->pfnTimestampRecordingExp =
           ur_loader::urEnqueueTimestampRecordingExp;
       pDdiTable->pfnNativeCommandExp = ur_loader::urEnqueueNativeCommandExp;
@@ -6355,6 +6256,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
       pDdiTable->pfnSetArgMemObj = ur_loader::urKernelSetArgMemObj;
       pDdiTable->pfnSetSpecializationConstants =
           ur_loader::urKernelSetSpecializationConstants;
+      pDdiTable->pfnSuggestMaxCooperativeGroupCount =
+          ur_loader::urKernelSuggestMaxCooperativeGroupCount;
     } else {
       // return pointers directly to platform's DDIs
       *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Kernel;
@@ -6364,60 +6267,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Exported function for filling application's KernelExp table
-///        with current process' addresses
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///     - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
-    /// [in] API version requested
-    ur_api_version_t version,
-    /// [in,out] pointer to table of DDI function pointers
-    ur_kernel_exp_dditable_t *pDdiTable) {
-  if (nullptr == pDdiTable)
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-
-  if (ur_loader::getContext()->version < version)
-    return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
-
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  // Load the device-platform DDI tables
-  for (auto &platform : ur_loader::getContext()->platforms) {
-    // statically linked adapter inside of the loader
-    if (platform.handle == nullptr)
-      continue;
-
-    if (platform.initStatus != UR_RESULT_SUCCESS)
-      continue;
-    auto getTable = reinterpret_cast<ur_pfnGetKernelExpProcAddrTable_t>(
-        ur_loader::LibLoader::getFunctionPtr(platform.handle.get(),
-                                             "urGetKernelExpProcAddrTable"));
-    if (!getTable)
-      continue;
-    platform.initStatus = getTable(version, &platform.dditable.KernelExp);
-  }
-
-  if (UR_RESULT_SUCCESS == result) {
-    if (ur_loader::getContext()->platforms.size() != 1 ||
-        ur_loader::getContext()->forceIntercept) {
-      // return pointers to loader's DDIs
-      pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
-          ur_loader::urKernelSuggestMaxCooperativeGroupCountExp;
-    } else {
-      // return pointers directly to platform's DDIs
-      *pDdiTable =
-          ur_loader::getContext()->platforms.front().dditable.KernelExp;
-    }
-  }
-
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Exported function for filling application's Mem table
 ///        with current process' addresses
diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp
index fad209a4bb29a..521f1bd016faa 100644
--- a/unified-runtime/source/loader/ur_libapi.cpp
+++ b/unified-runtime/source/loader/ur_libapi.cpp
@@ -4401,6 +4401,56 @@ ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
   return exceptionToResult(std::current_exception());
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Query the maximum number of work groups for a cooperative kernel
+///
+/// @returns
+///     - ::UR_RESULT_SUCCESS
+///     - ::UR_RESULT_ERROR_UNINITIALIZED
+///     - ::UR_RESULT_ERROR_DEVICE_LOST
+///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
+///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `NULL == hKernel`
+///         + `NULL == hDevice`
+///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `NULL == pLocalWorkSize`
+///         + `NULL == pGroupCountRet`
+///     - ::UR_RESULT_ERROR_INVALID_KERNEL
+///     - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+///         + If ::UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES returns a value
+///         without the ::UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE bit set.
+///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
+///         + `workDim < 1 || workDim > 3`
+ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet) try {
+  auto pfnSuggestMaxCooperativeGroupCount =
+      ur_lib::getContext()
+          ->urDdiTable.Kernel.pfnSuggestMaxCooperativeGroupCount;
+  if (nullptr == pfnSuggestMaxCooperativeGroupCount)
+    return UR_RESULT_ERROR_UNINITIALIZED;
+
+  return pfnSuggestMaxCooperativeGroupCount(
+      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
+      pGroupCountRet);
+} catch (...) {
+  return exceptionToResult(std::current_exception());
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Query information about a command queue
 ///
@@ -5066,6 +5116,7 @@ ur_result_t UR_APICALL urEventSetCallback(
 ///         + `NULL == hKernel`
 ///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
 ///         + `NULL == pGlobalWorkSize`
+///         + `launchPropList == NULL && numPropsInLaunchPropList > 0`
 ///     - ::UR_RESULT_ERROR_INVALID_QUEUE
 ///     - ::UR_RESULT_ERROR_INVALID_KERNEL
 ///     - ::UR_RESULT_ERROR_INVALID_EVENT
@@ -5080,6 +5131,8 @@ ur_result_t UR_APICALL urEventSetCallback(
 ///     - ::UR_RESULT_ERROR_INVALID_VALUE
 ///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
 ///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
+///     - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+///         + If any property in `launchPropList` isn't supported by the device.
 ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// [in] handle of the queue object
     ur_queue_handle_t hQueue,
@@ -5100,6 +5153,11 @@ ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -5118,8 +5176,9 @@ ur_result_t UR_APICALL urEnqueueKernelLaunch(
     return UR_RESULT_ERROR_UNINITIALIZED;
 
   return pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
-                         pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList,
-                         phEventWaitList, phEvent);
+                         pGlobalWorkSize, pLocalWorkSize,
+                         numPropsInLaunchPropList, launchPropList,
+                         numEventsInWaitList, phEventWaitList, phEvent);
 } catch (...) {
   return exceptionToResult(std::current_exception());
 }
@@ -9840,121 +9899,6 @@ ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
   return exceptionToResult(std::current_exception());
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Enqueue a command to execute a cooperative kernel
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hQueue`
-///         + `NULL == hKernel`
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pGlobalWorkOffset`
-///         + `NULL == pGlobalWorkSize`
-///     - ::UR_RESULT_ERROR_INVALID_QUEUE
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-///     - ::UR_RESULT_ERROR_INVALID_EVENT
-///     - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
-///         + `phEventWaitList == NULL && numEventsInWaitList > 0`
-///         + `phEventWaitList != NULL && numEventsInWaitList == 0`
-///         + If event objects in phEventWaitList are not valid events.
-///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
-///     - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-///     - ::UR_RESULT_ERROR_INVALID_VALUE
-///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
-///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
-ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) try {
-  auto pfnCooperativeKernelLaunchExp =
-      ur_lib::getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp;
-  if (nullptr == pfnCooperativeKernelLaunchExp)
-    return UR_RESULT_ERROR_UNINITIALIZED;
-
-  return pfnCooperativeKernelLaunchExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent);
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Query the maximum number of work groups for a cooperative kernel
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hKernel`
-///         + `NULL == hDevice`
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pLocalWorkSize`
-///         + `NULL == pGroupCountRet`
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet) try {
-  auto pfnSuggestMaxCooperativeGroupCountExp =
-      ur_lib::getContext()
-          ->urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp;
-  if (nullptr == pfnSuggestMaxCooperativeGroupCountExp)
-    return UR_RESULT_ERROR_UNINITIALIZED;
-
-  return pfnSuggestMaxCooperativeGroupCountExp(
-      hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize,
-      pGroupCountRet);
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Enqueue a command for recording the device timestamp
 ///
@@ -10006,103 +9950,6 @@ ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
   return exceptionToResult(std::current_exception());
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Launch kernel with custom launch properties
-///
-/// @details
-///     - Launches the kernel using the specified launch properties
-///     - If numPropsInLaunchPropList == 0 then a regular kernel launch is used:
-///       `urEnqueueKernelLaunch`
-///     - Consult the appropriate adapter driver documentation for details of
-///       adapter specific behavior and native error codes that may be returned.
-///
-/// @remarks
-///   _Analogues_
-///     - **cuLaunchKernelEx**
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hQueue`
-///         + `NULL == hKernel`
-///         + NULL == hQueue
-///         + NULL == hKernel
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pGlobalWorkOffset`
-///         + `NULL == pGlobalWorkSize`
-///         + `NULL == launchPropList`
-///         + NULL == pGlobalWorkSize
-///         + numPropsInLaunchpropList != 0 && launchPropList == NULL
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_QUEUE
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-///     - ::UR_RESULT_ERROR_INVALID_EVENT
-///     - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
-///         + phEventWaitList == NULL && numEventsInWaitList > 0
-///         + phEventWaitList != NULL && numEventsInWaitList == 0
-///         + If event objects in phEventWaitList are not valid events.
-///     - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS
-///         + An event in phEventWaitList has ::UR_EVENT_STATUS_ERROR
-///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
-///     - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-///     - ::UR_RESULT_ERROR_INVALID_VALUE
-///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
-///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
-ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) try {
-  auto pfnKernelLaunchCustomExp =
-      ur_lib::getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchCustomExp;
-  if (nullptr == pfnKernelLaunchCustomExp)
-    return UR_RESULT_ERROR_UNINITIALIZED;
-
-  return pfnKernelLaunchCustomExp(
-      hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
-      pLocalWorkSize, numPropsInLaunchPropList, launchPropList,
-      numEventsInWaitList, phEventWaitList, phEvent);
-} catch (...) {
-  return exceptionToResult(std::current_exception());
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Produces an executable program from one program, negates need for the
 ///        linking step.
diff --git a/unified-runtime/source/loader/ur_libddi.cpp b/unified-runtime/source/loader/ur_libddi.cpp
index efb58f38fc1f0..616b79ef541e1 100644
--- a/unified-runtime/source/loader/ur_libddi.cpp
+++ b/unified-runtime/source/loader/ur_libddi.cpp
@@ -61,11 +61,6 @@ __urdlllocal ur_result_t context_t::ddiInit() {
         urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Kernel);
   }
 
-  if (UR_RESULT_SUCCESS == result) {
-    result = urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT,
-                                         &urDdiTable.KernelExp);
-  }
-
   if (UR_RESULT_SUCCESS == result) {
     result = urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Mem);
   }
diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp
index b9de89785f0be..ec488519251e6 100644
--- a/unified-runtime/source/loader/ur_print.cpp
+++ b/unified-runtime/source/loader/ur_print.cpp
@@ -292,6 +292,14 @@ urPrintDeviceThrottleReasonsFlags(enum ur_device_throttle_reasons_flag_t value,
   return str_copy(&ss, buffer, buff_size, out_size);
 }
 
+ur_result_t urPrintKernelLaunchPropertiesFlags(
+    enum ur_kernel_launch_properties_flag_t value, char *buffer,
+    const size_t buff_size, size_t *out_size) {
+  std::stringstream ss;
+  ss << value;
+  return str_copy(&ss, buffer, buff_size, out_size);
+}
+
 ur_result_t urPrintContextFlags(enum ur_context_flag_t value, char *buffer,
                                 const size_t buff_size, size_t *out_size) {
   std::stringstream ss;
@@ -882,6 +890,24 @@ ur_result_t urPrintExecutionInfo(enum ur_execution_info_t value, char *buffer,
   return str_copy(&ss, buffer, buff_size, out_size);
 }
 
+ur_result_t
+urPrintKernelLaunchPropertyId(enum ur_kernel_launch_property_id_t value,
+                              char *buffer, const size_t buff_size,
+                              size_t *out_size) {
+  std::stringstream ss;
+  ss << value;
+  return str_copy(&ss, buffer, buff_size, out_size);
+}
+
+ur_result_t
+urPrintKernelLaunchProperty(const struct ur_kernel_launch_property_t params,
+                            char *buffer, const size_t buff_size,
+                            size_t *out_size) {
+  std::stringstream ss;
+  ss << params;
+  return str_copy(&ss, buffer, buff_size, out_size);
+}
+
 ur_result_t urPrintMapFlags(enum ur_map_flag_t value, char *buffer,
                             const size_t buff_size, size_t *out_size) {
   std::stringstream ss;
@@ -1105,23 +1131,6 @@ ur_result_t urPrintExpCommandBufferUpdateKernelLaunchDesc(
   return str_copy(&ss, buffer, buff_size, out_size);
 }
 
-ur_result_t urPrintExpLaunchPropertyId(enum ur_exp_launch_property_id_t value,
-                                       char *buffer, const size_t buff_size,
-                                       size_t *out_size) {
-  std::stringstream ss;
-  ss << value;
-  return str_copy(&ss, buffer, buff_size, out_size);
-}
-
-ur_result_t
-urPrintExpLaunchProperty(const struct ur_exp_launch_property_t params,
-                         char *buffer, const size_t buff_size,
-                         size_t *out_size) {
-  std::stringstream ss;
-  ss << params;
-  return str_copy(&ss, buffer, buff_size, out_size);
-}
-
 ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer,
                                const size_t buff_size, size_t *out_size) {
   std::stringstream ss;
@@ -1851,14 +1860,6 @@ ur_result_t urPrintEnqueueWriteHostPipeParams(
   return str_copy(&ss, buffer, buff_size, out_size);
 }
 
-ur_result_t urPrintEnqueueKernelLaunchCustomExpParams(
-    const struct ur_enqueue_kernel_launch_custom_exp_params_t *params,
-    char *buffer, const size_t buff_size, size_t *out_size) {
-  std::stringstream ss;
-  ss << params;
-  return str_copy(&ss, buffer, buff_size, out_size);
-}
-
 ur_result_t urPrintEnqueueEventsWaitWithBarrierExtParams(
     const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params,
     char *buffer, const size_t buff_size, size_t *out_size) {
@@ -1907,14 +1908,6 @@ ur_result_t urPrintEnqueueCommandBufferExpParams(
   return str_copy(&ss, buffer, buff_size, out_size);
 }
 
-ur_result_t urPrintEnqueueCooperativeKernelLaunchExpParams(
-    const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params,
-    char *buffer, const size_t buff_size, size_t *out_size) {
-  std::stringstream ss;
-  ss << params;
-  return str_copy(&ss, buffer, buff_size, out_size);
-}
-
 ur_result_t urPrintEnqueueTimestampRecordingExpParams(
     const struct ur_enqueue_timestamp_recording_exp_params_t *params,
     char *buffer, const size_t buff_size, size_t *out_size) {
@@ -2130,9 +2123,8 @@ ur_result_t urPrintKernelSetSpecializationConstantsParams(
   return str_copy(&ss, buffer, buff_size, out_size);
 }
 
-ur_result_t urPrintKernelSuggestMaxCooperativeGroupCountExpParams(
-    const struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t
-        *params,
+ur_result_t urPrintKernelSuggestMaxCooperativeGroupCountParams(
+    const struct ur_kernel_suggest_max_cooperative_group_count_params_t *params,
     char *buffer, const size_t buff_size, size_t *out_size) {
   std::stringstream ss;
   ss << params;
diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp
index 98aa6d5d9a86f..f50417580244d 100644
--- a/unified-runtime/source/ur_api.cpp
+++ b/unified-runtime/source/ur_api.cpp
@@ -3847,6 +3847,47 @@ ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
   return result;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Query the maximum number of work groups for a cooperative kernel
+///
+/// @returns
+///     - ::UR_RESULT_SUCCESS
+///     - ::UR_RESULT_ERROR_UNINITIALIZED
+///     - ::UR_RESULT_ERROR_DEVICE_LOST
+///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
+///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `NULL == hKernel`
+///         + `NULL == hDevice`
+///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `NULL == pLocalWorkSize`
+///         + `NULL == pGroupCountRet`
+///     - ::UR_RESULT_ERROR_INVALID_KERNEL
+///     - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+///         + If ::UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES returns a value
+///         without the ::UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE bit set.
+///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
+///         + `workDim < 1 || workDim > 3`
+ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCount(
+    /// [in] handle of the kernel object
+    ur_kernel_handle_t hKernel,
+    /// [in] handle of the device object
+    ur_device_handle_t hDevice,
+    /// [in] number of dimensions, from 1 to 3, to specify the work-group
+    /// work-items
+    uint32_t workDim,
+    /// [in] pointer to an array of workDim unsigned values that specify the
+    /// number of local work-items forming a work-group that will execute the
+    /// kernel function.
+    const size_t *pLocalWorkSize,
+    /// [in] size of dynamic shared memory, for each work-group, in bytes,
+    /// that will be used when the kernel is launched
+    size_t dynamicSharedMemorySize,
+    /// [out] pointer to maximum number of groups
+    uint32_t *pGroupCountRet) {
+  ur_result_t result = UR_RESULT_SUCCESS;
+  return result;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Query information about a command queue
 ///
@@ -4424,6 +4465,7 @@ ur_result_t UR_APICALL urEventSetCallback(
 ///         + `NULL == hKernel`
 ///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
 ///         + `NULL == pGlobalWorkSize`
+///         + `launchPropList == NULL && numPropsInLaunchPropList > 0`
 ///     - ::UR_RESULT_ERROR_INVALID_QUEUE
 ///     - ::UR_RESULT_ERROR_INVALID_KERNEL
 ///     - ::UR_RESULT_ERROR_INVALID_EVENT
@@ -4438,6 +4480,8 @@ ur_result_t UR_APICALL urEventSetCallback(
 ///     - ::UR_RESULT_ERROR_INVALID_VALUE
 ///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
 ///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
+///     - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+///         + If any property in `launchPropList` isn't supported by the device.
 ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// [in] handle of the queue object
     ur_queue_handle_t hQueue,
@@ -4458,6 +4502,11 @@ ur_result_t UR_APICALL urEnqueueKernelLaunch(
     /// execute the kernel function.
     /// If nullptr, the runtime implementation will choose the work-group size.
     const size_t *pLocalWorkSize,
+    /// [in] size of the launch prop list
+    uint32_t numPropsInLaunchPropList,
+    /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
+    /// of launch properties
+    const ur_kernel_launch_property_t *launchPropList,
     /// [in] size of the event wait list
     uint32_t numEventsInWaitList,
     /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
@@ -8572,104 +8621,6 @@ ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Enqueue a command to execute a cooperative kernel
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hQueue`
-///         + `NULL == hKernel`
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pGlobalWorkOffset`
-///         + `NULL == pGlobalWorkSize`
-///     - ::UR_RESULT_ERROR_INVALID_QUEUE
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-///     - ::UR_RESULT_ERROR_INVALID_EVENT
-///     - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
-///         + `phEventWaitList == NULL && numEventsInWaitList > 0`
-///         + `phEventWaitList != NULL && numEventsInWaitList == 0`
-///         + If event objects in phEventWaitList are not valid events.
-///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
-///     - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-///     - ::UR_RESULT_ERROR_INVALID_VALUE
-///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
-///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
-ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function.
-    /// If nullptr, the runtime implementation will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution.
-    /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
-    /// event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  ur_result_t result = UR_RESULT_SUCCESS;
-  return result;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Query the maximum number of work groups for a cooperative kernel
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hKernel`
-///         + `NULL == hDevice`
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pLocalWorkSize`
-///         + `NULL == pGroupCountRet`
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] handle of the device object
-    ur_device_handle_t hDevice,
-    /// [in] number of dimensions, from 1 to 3, to specify the work-group
-    /// work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of local work-items forming a work-group that will execute the
-    /// kernel function.
-    const size_t *pLocalWorkSize,
-    /// [in] size of dynamic shared memory, for each work-group, in bytes,
-    /// that will be used when the kernel is launched
-    size_t dynamicSharedMemorySize,
-    /// [out] pointer to maximum number of groups
-    uint32_t *pGroupCountRet) {
-  ur_result_t result = UR_RESULT_SUCCESS;
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Enqueue a command for recording the device timestamp
 ///
@@ -8714,94 +8665,6 @@ ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
   return result;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-/// @brief Launch kernel with custom launch properties
-///
-/// @details
-///     - Launches the kernel using the specified launch properties
-///     - If numPropsInLaunchPropList == 0 then a regular kernel launch is used:
-///       `urEnqueueKernelLaunch`
-///     - Consult the appropriate adapter driver documentation for details of
-///       adapter specific behavior and native error codes that may be returned.
-///
-/// @remarks
-///   _Analogues_
-///     - **cuLaunchKernelEx**
-///
-/// @returns
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
-///         + `NULL == hQueue`
-///         + `NULL == hKernel`
-///         + NULL == hQueue
-///         + NULL == hKernel
-///     - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
-///         + `NULL == pGlobalWorkOffset`
-///         + `NULL == pGlobalWorkSize`
-///         + `NULL == launchPropList`
-///         + NULL == pGlobalWorkSize
-///         + numPropsInLaunchpropList != 0 && launchPropList == NULL
-///     - ::UR_RESULT_SUCCESS
-///     - ::UR_RESULT_ERROR_UNINITIALIZED
-///     - ::UR_RESULT_ERROR_DEVICE_LOST
-///     - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
-///     - ::UR_RESULT_ERROR_INVALID_QUEUE
-///     - ::UR_RESULT_ERROR_INVALID_KERNEL
-///     - ::UR_RESULT_ERROR_INVALID_EVENT
-///     - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
-///         + phEventWaitList == NULL && numEventsInWaitList > 0
-///         + phEventWaitList != NULL && numEventsInWaitList == 0
-///         + If event objects in phEventWaitList are not valid events.
-///     - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS
-///         + An event in phEventWaitList has ::UR_EVENT_STATUS_ERROR
-///     - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
-///     - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
-///     - ::UR_RESULT_ERROR_INVALID_VALUE
-///     - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
-///     - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
-ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
-    /// [in] handle of the queue object
-    ur_queue_handle_t hQueue,
-    /// [in] handle of the kernel object
-    ur_kernel_handle_t hKernel,
-    /// [in] number of dimensions, from 1 to 3, to specify the global and
-    /// work-group work-items
-    uint32_t workDim,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// offset used to calculate the global ID of a work-item
-    const size_t *pGlobalWorkOffset,
-    /// [in] pointer to an array of workDim unsigned values that specify the
-    /// number of global work-items in workDim that will execute the kernel
-    /// function
-    const size_t *pGlobalWorkSize,
-    /// [in][optional] pointer to an array of workDim unsigned values that
-    /// specify the number of local work-items forming a work-group that will
-    /// execute the kernel function. If nullptr, the runtime implementation
-    /// will choose the work-group size.
-    const size_t *pLocalWorkSize,
-    /// [in] size of the launch prop list
-    uint32_t numPropsInLaunchPropList,
-    /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
-    /// properties
-    const ur_exp_launch_property_t *launchPropList,
-    /// [in] size of the event wait list
-    uint32_t numEventsInWaitList,
-    /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
-    /// events that must be complete before the kernel execution. If nullptr,
-    /// the numEventsInWaitList must be 0, indicating that no wait event.
-    const ur_event_handle_t *phEventWaitList,
-    /// [out][optional][alloc] return an event object that identifies this
-    /// particular kernel execution instance. If phEventWaitList and phEvent
-    /// are not NULL, phEvent must not refer to an element of the
-    /// phEventWaitList array.
-    ur_event_handle_t *phEvent) {
-  ur_result_t result = UR_RESULT_SUCCESS;
-  return result;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Produces an executable program from one program, negates need for the
 ///        linking step.
diff --git a/unified-runtime/test/adapters/cuda/kernel_tests.cpp b/unified-runtime/test/adapters/cuda/kernel_tests.cpp
index 6d1937f8873e0..c88928a2f1869 100644
--- a/unified-runtime/test/adapters/cuda/kernel_tests.cpp
+++ b/unified-runtime/test/adapters/cuda/kernel_tests.cpp
@@ -236,7 +236,7 @@ TEST_P(cudaKernelTest, URKernelDispatch) {
   const size_t localWorkSize[] = {1};
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, workDim, globalWorkOffset,
                                        globalWorkSize, localWorkSize, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
 
@@ -271,6 +271,6 @@ TEST_P(cudaKernelTest, URKernelDispatchTwo) {
   const size_t localWorkSize[] = {1};
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, workDim, globalWorkOffset,
                                        globalWorkSize, localWorkSize, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
diff --git a/unified-runtime/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp b/unified-runtime/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp
index a263d818516a7..e088544fc7899 100644
--- a/unified-runtime/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp
+++ b/unified-runtime/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp
@@ -72,8 +72,8 @@ TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) {
   size_t local_size = 1;
   size_t global_size = 1;
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &global_offset,
-                                       &local_size, &global_size, 0, nullptr,
-                                       nullptr));
+                                       &local_size, &global_size, 0, nullptr, 0,
+                                       nullptr, nullptr));
 
   ASSERT_SUCCESS(urKernelRelease(kernel));
   ASSERT_SUCCESS(urProgramRelease(program));
diff --git a/unified-runtime/test/adapters/level_zero/v2/deferred_kernel.cpp b/unified-runtime/test/adapters/level_zero/v2/deferred_kernel.cpp
index 5e224231198ee..ada45f502ced7 100644
--- a/unified-runtime/test/adapters/level_zero/v2/deferred_kernel.cpp
+++ b/unified-runtime/test/adapters/level_zero/v2/deferred_kernel.cpp
@@ -46,7 +46,7 @@ TEST_P(urEnqueueKernelLaunchTest, DeferredKernelRelease) {
   ASSERT_SUCCESS(urEnqueueEventsWait(queue, 1, &event, nullptr));
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                        &global_offset, &global_size, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urKernelRelease(kernel));
 
   // Kernel should still be alive since kernel launch is pending
@@ -138,13 +138,13 @@ TEST_P(urMultiQueueLaunchKernelDeferFreeTest, Success) {
 
   ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 1, &event1, nullptr));
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernel, 1, &global_offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       nullptr));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
 
   ASSERT_SUCCESS(urEnqueueEventsWait(queues[1], 1, &event2, nullptr));
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[1], kernel, 1, &global_offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       nullptr));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
 
   ASSERT_SUCCESS(urKernelRelease(kernel));
 
diff --git a/unified-runtime/test/conformance/CMakeLists.txt b/unified-runtime/test/conformance/CMakeLists.txt
index f53c14384f560..35e133143cf11 100644
--- a/unified-runtime/test/conformance/CMakeLists.txt
+++ b/unified-runtime/test/conformance/CMakeLists.txt
@@ -79,7 +79,6 @@ set(TEST_SUBDIRECTORIES_DPCXX
     "exp_command_buffer"
     "exp_enqueue_native"
     "exp_usm_p2p"
-    "exp_launch_properties"
     "memory-migrate"
     "usm"
 )
diff --git a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp
index 277a19c473a40..9980f8f7e1a2e 100644
--- a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp
+++ b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp
@@ -2057,23 +2057,6 @@ TEST_P(urDeviceGetInfoTest, SuccessCommandBufferEventSupport) {
   ASSERT_TRUE(casted_value == false || casted_value == true);
 }
 
-TEST_P(urDeviceGetInfoTest, SuccessClusterLaunch) {
-  size_t property_size = 0;
-  const ur_device_info_t property_name =
-      UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP;
-
-  UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(
-      urDeviceGetInfo(device, property_name, 0, nullptr, &property_size));
-  ASSERT_EQ(property_size, sizeof(ur_bool_t));
-
-  ur_bool_t property_value = false;
-  ASSERT_SUCCESS(urDeviceGetInfo(device, property_name, property_size,
-                                 &property_value, nullptr));
-
-  bool casted_value = static_cast<bool>(property_value);
-  ASSERT_TRUE(casted_value == false || casted_value == true);
-}
-
 TEST_P(urDeviceGetInfoTest, SuccessBindlessImagesSupport) {
   size_t property_size = 0;
   const ur_device_info_t property_name =
@@ -2755,3 +2738,21 @@ TEST_P(urDeviceGetInfoComponentDevicesTest, SuccessComponentDevices) {
     }
   }
 }
+
+TEST_P(urDeviceGetInfoTest, SuccessKernelLaunchPropertiesSupport) {
+  size_t property_size = 0;
+  const ur_device_info_t property_name =
+      UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES;
+
+  ASSERT_SUCCESS_OR_OPTIONAL_QUERY(
+      urDeviceGetInfo(device, property_name, 0, nullptr, &property_size),
+      property_name);
+  ASSERT_EQ(property_size, sizeof(ur_kernel_launch_properties_flags_t));
+
+  ur_kernel_launch_properties_flags_t property_value =
+      UR_KERNEL_LAUNCH_PROPERTIES_FLAG_FORCE_UINT32;
+  ASSERT_SUCCESS(urDeviceGetInfo(device, property_name, property_size,
+                                 &property_value, nullptr));
+
+  ASSERT_EQ(property_value & UR_KERNEL_LAUNCH_PROPERTIES_FLAGS_MASK, 0);
+}
diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp
index 96acb17d2dee7..d8499e997d5b1 100644
--- a/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp
+++ b/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp
@@ -27,7 +27,7 @@ TEST_P(urEnqueueDeviceGetGlobalVariableReadWithParamTest, Success) {
   // execute the kernel
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                        &global_offset, &global_size, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   // read global var back to host
diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableWrite.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableWrite.cpp
index 26b74956d7b2a..4ce2c931fd9d1 100644
--- a/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableWrite.cpp
+++ b/unified-runtime/test/conformance/enqueue/urEnqueueDeviceGlobalVariableWrite.cpp
@@ -32,7 +32,7 @@ TEST_P(urEnqueueDeviceGetGlobalVariableWriteWithParamTest, Success) {
   // execute the kernel
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                        &global_offset, &global_size, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   // read global var back to host
diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp
index 4cc02636285d5..20806da38acf9 100644
--- a/unified-runtime/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp
+++ b/unified-runtime/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp
@@ -183,10 +183,12 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest,
                             nullptr, &event);
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count,
-                                         nullptr, 0, nullptr, &event));
+                                         nullptr, 0, nullptr, 0, nullptr,
+                                         &event));
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count,
-                                         nullptr, 0, nullptr, &event));
+                                         nullptr, 0, nullptr, 0, nullptr,
+                                         &event));
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr));
     addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004);
   }
@@ -212,10 +214,12 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest,
                             nullptr, nullptr);
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count,
-                                         nullptr, 1, &event, nullptr));
+                                         nullptr, 0, nullptr, 1, &event,
+                                         nullptr));
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count,
-                                         nullptr, 1, &event, nullptr));
+                                         nullptr, 0, nullptr, 1, &event,
+                                         nullptr));
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event));
     addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004);
   }
@@ -241,11 +245,13 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, SuccessEventDependencies) {
     EXPECT_SUCCESS(
         urEnqueueEventsWaitWithBarrier(queue, 1, &event[0], &event[1]));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count,
-                                         nullptr, 1, &event[1], &event[2]));
+                                         nullptr, 0, nullptr, 1, &event[1],
+                                         &event[2]));
     EXPECT_SUCCESS(
         urEnqueueEventsWaitWithBarrier(queue, 1, &event[2], &event[3]));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count,
-                                         nullptr, 1, &event[3], &event[4]));
+                                         nullptr, 0, nullptr, 1, &event[3],
+                                         &event[4]));
     EXPECT_SUCCESS(
         urEnqueueEventsWaitWithBarrier(queue, 1, &event[4], &event[5]));
     addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004);
@@ -271,10 +277,12 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest,
                             nullptr, nullptr);
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count,
-                                         nullptr, 0, nullptr, nullptr));
+                                         nullptr, 0, nullptr, 0, nullptr,
+                                         nullptr));
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr));
     EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count,
-                                         nullptr, 0, nullptr, nullptr));
+                                         nullptr, 0, nullptr, 0, nullptr,
+                                         nullptr));
     EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr));
     addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004);
   }
diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
index f2a3b29416764..0282d55ed9d96 100644
--- a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
+++ b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp
@@ -73,7 +73,54 @@ TEST_P(urEnqueueKernelLaunchTest, Success) {
   AddPodArg(val);
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                        &global_offset, &global_size, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urQueueFinish(queue));
+  ValidateBuffer(buffer, sizeof(val) * global_size, val);
+}
+
+TEST_P(urEnqueueKernelLaunchTest, SuccessWithLaunchProperties) {
+  std::vector<ur_kernel_launch_property_t> props(1);
+  props[0].id = UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE;
+
+  ur_kernel_launch_properties_flags_t supported_properties = 0;
+  ASSERT_SUCCESS(urDeviceGetInfo(
+      device, UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES,
+      sizeof(supported_properties), &supported_properties, nullptr));
+
+  if (supported_properties & UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE) {
+    ur_kernel_launch_property_t coop_prop;
+    coop_prop.id = UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE;
+    coop_prop.value.cooperative = 1;
+    props.push_back(coop_prop);
+  }
+
+  if (supported_properties &
+      UR_KERNEL_LAUNCH_PROPERTIES_FLAG_CLUSTER_DIMENSION) {
+    ur_kernel_launch_property_t cluster_dims_prop;
+    cluster_dims_prop.id = UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION;
+    cluster_dims_prop.value.clusterDim[0] = 16;
+    cluster_dims_prop.value.clusterDim[1] = 1;
+    cluster_dims_prop.value.clusterDim[2] = 1;
+
+    props.push_back(cluster_dims_prop);
+  }
+
+  if (supported_properties &
+      UR_KERNEL_LAUNCH_PROPERTIES_FLAG_OPPORTUNISTIC_QUEUE_SERIALIZE) {
+    ur_kernel_launch_property_t opportunistic_queue_serialize_prop;
+    opportunistic_queue_serialize_prop.id =
+        UR_KERNEL_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE;
+    opportunistic_queue_serialize_prop.value.opportunistic_queue_serialize = 1;
+    props.push_back(opportunistic_queue_serialize_prop);
+  }
+
+  ur_mem_handle_t buffer = nullptr;
+  AddBuffer1DArg(sizeof(val) * global_size, &buffer);
+  AddPodArg(val);
+
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
+                                       &global_offset, &global_size, nullptr, 1,
+                                       &props[0], 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   ValidateBuffer(buffer, sizeof(val) * global_size, val);
@@ -84,8 +131,8 @@ TEST_P(urEnqueueKernelLaunchTest, SuccessNoOffset) {
   AddBuffer1DArg(sizeof(val) * global_size, &buffer);
   AddPodArg(val);
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, nullptr,
-                                       &global_size, nullptr, 0, nullptr,
-                                       nullptr));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   ValidateBuffer(buffer, sizeof(val) * global_size, val);
@@ -94,28 +141,28 @@ TEST_P(urEnqueueKernelLaunchTest, SuccessNoOffset) {
 TEST_P(urEnqueueKernelLaunchTest, InvalidNullHandleQueue) {
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(nullptr, kernel, n_dimensions,
                                          &global_offset, &global_size, nullptr,
-                                         0, nullptr, nullptr),
+                                         0, nullptr, 0, nullptr, nullptr),
                    UR_RESULT_ERROR_INVALID_NULL_HANDLE);
 }
 
 TEST_P(urEnqueueKernelLaunchTest, InvalidNullPointer) {
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                          &global_offset, nullptr, nullptr, 0,
-                                         nullptr, nullptr),
+                                         nullptr, 0, nullptr, nullptr),
                    UR_RESULT_ERROR_INVALID_NULL_POINTER);
 }
 
 TEST_P(urEnqueueKernelLaunchTest, InvalidNullHandleKernel) {
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, nullptr, n_dimensions,
                                          &global_offset, &global_size, nullptr,
-                                         0, nullptr, nullptr),
+                                         0, nullptr, 0, nullptr, nullptr),
                    UR_RESULT_ERROR_INVALID_NULL_HANDLE);
 }
 
 TEST_P(urEnqueueKernelLaunchTest, InvalidNullPtrEventWaitList) {
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                          &global_offset, &global_size, nullptr,
-                                         1, nullptr, nullptr),
+                                         0, nullptr, 1, nullptr, nullptr),
                    UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
 
   ur_event_handle_t validEvent;
@@ -123,13 +170,13 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidNullPtrEventWaitList) {
 
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                          &global_offset, &global_size, nullptr,
-                                         0, &validEvent, nullptr),
+                                         0, nullptr, 0, &validEvent, nullptr),
                    UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
 
   ur_event_handle_t inv_evt = nullptr;
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                          &global_offset, &global_size, nullptr,
-                                         1, &inv_evt, nullptr),
+                                         0, nullptr, 1, &inv_evt, nullptr),
                    UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
   ASSERT_SUCCESS(urEventRelease(validEvent));
 }
@@ -142,7 +189,7 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidWorkDimension) {
   ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel,
                                          max_work_item_dimensions + 1,
                                          &global_offset, &global_size, nullptr,
-                                         0, nullptr, nullptr),
+                                         0, nullptr, 0, nullptr, nullptr),
                    UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
 }
 
@@ -154,9 +201,9 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidWorkGroupSize) {
   ur_mem_handle_t buffer = nullptr;
   AddBuffer1DArg(sizeof(val) * global_size, &buffer);
   AddPodArg(val);
-  auto result =
-      urEnqueueKernelLaunch(queue, kernel, n_dimensions, &global_offset,
-                            &global_size, &local_size, 0, nullptr, nullptr);
+  auto result = urEnqueueKernelLaunch(queue, kernel, n_dimensions,
+                                      &global_offset, &global_size, &local_size,
+                                      0, nullptr, 0, nullptr, nullptr);
   ASSERT_TRUE(result == UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE ||
               result == UR_RESULT_SUCCESS);
 }
@@ -164,16 +211,16 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidWorkGroupSize) {
 TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, Success) {
   UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
 
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       global_offset.data(), global_size.data(),
-                                       nullptr, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, global_offset.data(), global_size.data(),
+      nullptr, 0, nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
 
 TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, SuccessWithExplicitLocalSize) {
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       global_offset.data(), global_size.data(),
-                                       wg_size.data(), 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, global_offset.data(), global_size.data(),
+      wg_size.data(), 0, nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
 
@@ -184,7 +231,7 @@ TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, NonMatchingLocalSize) {
   ASSERT_EQ_RESULT(
       urEnqueueKernelLaunch(queue, kernel, n_dimensions, global_offset.data(),
                             global_size.data(), wrong_wg_size.data(), 0,
-                            nullptr, nullptr),
+                            nullptr, 0, nullptr, nullptr),
       UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE);
 }
 
@@ -193,9 +240,9 @@ TEST_P(urEnqueueKernelLaunchKernelSubGroupTest, Success) {
 
   ur_mem_handle_t buffer = nullptr;
   AddBuffer1DArg(sizeof(size_t), &buffer);
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       global_offset.data(), global_size.data(),
-                                       nullptr, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, global_offset.data(), global_size.data(),
+      nullptr, 0, nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
   // We specify this subgroup size in the kernel source, and then the kernel
   // queries for its subgroup size at runtime and writes it to the buffer.
@@ -218,8 +265,8 @@ TEST_P(urEnqueueKernelLaunchKernelStandardTest, Success) {
   AddPodArg<float>(11.0);
 
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, &offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       nullptr));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
   ValidateBuffer<uint32_t>(output, sizeof(uint32_t), expected_result);
 }
@@ -309,7 +356,7 @@ TEST_P(urEnqueueKernelLaunchTestWithParam, Success) {
   AddPodArg(val);
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                        global_offset, global_range, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
   ValidateBuffer(buffer, buffer_size, val);
 }
@@ -365,8 +412,8 @@ TEST_P(urEnqueueKernelLaunchWithUSM, Success) {
 
   ur_event_handle_t kernel_evt;
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, work_dim, &global_offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       &kernel_evt));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, &kernel_evt));
 
   ASSERT_SUCCESS(urQueueFinish(queue));
 
@@ -396,8 +443,8 @@ TEST_P(urEnqueueKernelLaunchWithUSM, WithMemcpy) {
 
   ur_event_handle_t kernel_evt;
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, work_dim, &global_offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       &kernel_evt));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, &kernel_evt));
 
   ur_event_handle_t memcpy_event;
   ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, data.data(), usmPtr,
@@ -484,8 +531,8 @@ TEST_P(urEnqueueKernelLaunchWithVirtualMemory, Success) {
 
   ur_event_handle_t kernel_evt;
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, work_dim, &global_offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       &kernel_evt));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, &kernel_evt));
 
   std::vector<uint32_t> data(global_size);
   ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, true, data.data(), virtual_ptr,
@@ -562,7 +609,7 @@ TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) {
   helper.AddPodArg(val);
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernel, n_dimensions,
                                        &global_offset, &global_size, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
 
   // Wait for the queue to finish executing.
   EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr));
@@ -675,8 +722,8 @@ TEST_P(urEnqueueKernelLaunchUSMLinkedList, Success) {
   // Run kernel which will iterate the list and modify the values
   ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, list_head));
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &global_offset,
-                                       &global_size, nullptr, 0, nullptr,
-                                       nullptr));
+                                       &global_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   // Verify values
diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
index bc45a59496e11..74b0e293458bc 100644
--- a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
+++ b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
@@ -203,8 +203,8 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
 
     // execute kernel that increments each element by 1
     ASSERT_SUCCESS(urEnqueueKernelLaunch(
-        queue, kernels[i], n_dimensions, &global_offset, &ArraySize, nullptr,
-        bool(lastMemcpyEvent), lastMemcpyEvent, kernelEvent));
+        queue, kernels[i], n_dimensions, &global_offset, &ArraySize, nullptr, 0,
+        nullptr, bool(lastMemcpyEvent), lastMemcpyEvent, kernelEvent));
 
     // copy the memory (input for the next kernel)
     if (i < numOps - 1) {
@@ -296,9 +296,10 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
     memcpyEvent = i < devices.size() - 1 ? memcpyEvents[i].ptr() : nullptr;
 
     // execute kernel that increments each element by 1
-    ASSERT_SUCCESS(urEnqueueKernelLaunch(
-        queues[i], kernels[i], n_dimensions, &global_offset, &ArraySize,
-        nullptr, bool(lastMemcpyEvent), lastMemcpyEvent, kernelEvent));
+    ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[i], kernels[i], n_dimensions,
+                                         &global_offset, &ArraySize, nullptr, 0,
+                                         nullptr, bool(lastMemcpyEvent),
+                                         lastMemcpyEvent, kernelEvent));
 
     // copy the memory to next device
     if (i < devices.size() - 1) {
@@ -396,8 +397,8 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
 
         // execute kernel that increments each element by 1
         ASSERT_SUCCESS(urEnqueueKernelLaunch(
-            queue, kernel, n_dimensions, &global_offset, &ArraySize, nullptr,
-            waitNum, lastEvent, signalEvent));
+            queue, kernel, n_dimensions, &global_offset, &ArraySize, nullptr, 0,
+            nullptr, waitNum, lastEvent, signalEvent));
       }
 
       std::vector<uint32_t> data(ArraySize);
diff --git a/unified-runtime/test/conformance/exp_command_buffer/regression/usm_copy.cpp b/unified-runtime/test/conformance/exp_command_buffer/regression/usm_copy.cpp
index cc691517d136f..21616b0b83fcf 100644
--- a/unified-runtime/test/conformance/exp_command_buffer/regression/usm_copy.cpp
+++ b/unified-runtime/test/conformance/exp_command_buffer/regression/usm_copy.cpp
@@ -84,7 +84,7 @@ TEST_P(urCommandBufferUSMCopyInOrderTest, Success) {
   // D[0] = 44
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
                                        &global_offset, &global_size, nullptr, 0,
-                                       nullptr, nullptr));
+                                       nullptr, 0, nullptr, nullptr));
 
   // command-buffer sync point used to enforce linear dependencies when
   // appending commands to the command-buffer.
diff --git a/unified-runtime/test/conformance/exp_launch_properties/CMakeLists.txt b/unified-runtime/test/conformance/exp_launch_properties/CMakeLists.txt
deleted file mode 100644
index 82b88259b97b0..0000000000000
--- a/unified-runtime/test/conformance/exp_launch_properties/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-add_conformance_kernels_test(exp_launch_properties
-        launch_properties.cpp
-  )
diff --git a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp
deleted file mode 100644
index 042478a489b20..0000000000000
--- a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
-// Exceptions. See LICENSE.TXT
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include <uur/fixtures.h>
-#include <uur/known_failure.h>
-
-struct urEnqueueKernelLaunchCustomTest : uur::urKernelExecutionTest {
-  void SetUp() override {
-    program_name = "fill";
-    UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
-  }
-
-  uint32_t val = 42;
-  size_t global_size = 32;
-  size_t global_offset = 0;
-  size_t n_dimensions = 1;
-};
-UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueKernelLaunchCustomTest);
-
-TEST_P(urEnqueueKernelLaunchCustomTest, Success) {
-  UUR_KNOWN_FAILURE_ON(uur::NativeCPU{});
-
-  ur_bool_t launch_properties_support = false;
-  ASSERT_SUCCESS(urDeviceGetInfo(
-      device, UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP,
-      sizeof(launch_properties_support), &launch_properties_support, nullptr));
-  if (!launch_properties_support) {
-    GTEST_SKIP() << "EXP launch properties feature is not supported.";
-  }
-
-  std::vector<ur_exp_launch_property_t> props(1);
-  props[0].id = UR_EXP_LAUNCH_PROPERTY_ID_IGNORE;
-
-  size_t returned_size = 0;
-  ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PROFILE, 0, nullptr,
-                                 &returned_size));
-
-  std::unique_ptr<char[]> returned_backend(new char[returned_size]);
-
-  ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PROFILE, returned_size,
-                                 returned_backend.get(), nullptr));
-
-  std::string_view backend_string(returned_backend.get());
-  const bool cuda_backend = backend_string.find("CUDA") != std::string::npos;
-
-  if (cuda_backend) {
-    ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_VERSION, 0, nullptr,
-                                   &returned_size));
-
-    std::unique_ptr<char[]> returned_compute_capability(
-        new char[returned_size]);
-
-    ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_VERSION,
-                                   returned_size,
-                                   returned_compute_capability.get(), nullptr));
-
-    auto compute_capability =
-        std::stof(std::string(returned_compute_capability.get()));
-
-    if (compute_capability >= 6.0) {
-      ur_exp_launch_property_t coop_prop;
-      coop_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE;
-      coop_prop.value.cooperative = 1;
-      props.push_back(coop_prop);
-    }
-
-    if (compute_capability >= 9.0) {
-      ur_exp_launch_property_t opportunistic_queue_serialize_prop;
-      opportunistic_queue_serialize_prop.id =
-          UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE;
-      opportunistic_queue_serialize_prop.value.opportunistic_queue_serialize =
-          1;
-      props.push_back(opportunistic_queue_serialize_prop);
-    }
-
-    ur_bool_t cluster_launch_supported = false;
-    ASSERT_SUCCESS(
-        urDeviceGetInfo(device, UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP,
-                        sizeof(ur_bool_t), &cluster_launch_supported, nullptr));
-
-    if (cluster_launch_supported) {
-      ur_exp_launch_property_t cluster_dims_prop;
-      cluster_dims_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION;
-      cluster_dims_prop.value.clusterDim[0] = 16;
-      cluster_dims_prop.value.clusterDim[1] = 1;
-      cluster_dims_prop.value.clusterDim[2] = 1;
-
-      props.push_back(cluster_dims_prop);
-    }
-  }
-  ur_mem_handle_t buffer = nullptr;
-  AddBuffer1DArg(sizeof(val) * global_size, &buffer);
-  AddPodArg(val);
-
-  ASSERT_SUCCESS(urEnqueueKernelLaunchCustomExp(
-      queue, kernel, n_dimensions, &global_offset, &global_size, nullptr, 1,
-      &props[0], 0, nullptr, nullptr));
-  ASSERT_SUCCESS(urQueueFinish(queue));
-  ValidateBuffer(buffer, sizeof(val) * global_size, val);
-}
diff --git a/unified-runtime/test/conformance/integration/QueueBuffer.cpp b/unified-runtime/test/conformance/integration/QueueBuffer.cpp
index 91ea52ed13617..8be98bfe6ba92 100644
--- a/unified-runtime/test/conformance/integration/QueueBuffer.cpp
+++ b/unified-runtime/test/conformance/integration/QueueBuffer.cpp
@@ -87,7 +87,7 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) {
 
     ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions,
                                          &GlobalOffset, &ArraySize, nullptr, 0,
-                                         nullptr, &Event));
+                                         nullptr, 0, nullptr, &Event));
     ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
 
     CurValueMem2 = CurValueMem1 * 2;
@@ -100,7 +100,7 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) {
 
     ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions,
                                          &GlobalOffset, &ArraySize, nullptr, 0,
-                                         nullptr, &Event));
+                                         nullptr, 0, nullptr, &Event));
     ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
 
     CurValueMem1 = CurValueMem2 * 2;
diff --git a/unified-runtime/test/conformance/integration/QueueEmptyStatus.cpp b/unified-runtime/test/conformance/integration/QueueEmptyStatus.cpp
index e6308492d172f..2885bf51d0368 100644
--- a/unified-runtime/test/conformance/integration/QueueEmptyStatus.cpp
+++ b/unified-runtime/test/conformance/integration/QueueEmptyStatus.cpp
@@ -60,7 +60,7 @@ struct QueueEmptyStatusTestWithParam : uur::IntegrationQueueTestWithParam {
     for (uint32_t i = 0; i < num_iterations; ++i) {
       ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, n_dimensions,
                                            &global_offset, &ArraySize, nullptr,
-                                           0, nullptr, &Event));
+                                           0, nullptr, 0, nullptr, &Event));
       ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
     }
 
diff --git a/unified-runtime/test/conformance/integration/QueueUSM.cpp b/unified-runtime/test/conformance/integration/QueueUSM.cpp
index 7f0dce10d5cc1..d9958dfb20fdc 100644
--- a/unified-runtime/test/conformance/integration/QueueUSM.cpp
+++ b/unified-runtime/test/conformance/integration/QueueUSM.cpp
@@ -100,7 +100,7 @@ TEST_P(QueueUSMTestWithParam, QueueUSMTest) {
 
     ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions,
                                          &GlobalOffset, &ArraySize, nullptr, 0,
-                                         nullptr, &Event));
+                                         nullptr, 0, nullptr, &Event));
     ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
 
     CurValueMem2 = CurValueMem1 * 2;
@@ -111,7 +111,7 @@ TEST_P(QueueUSMTestWithParam, QueueUSMTest) {
 
     ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions,
                                          &GlobalOffset, &ArraySize, nullptr, 0,
-                                         nullptr, &Event));
+                                         nullptr, 0, nullptr, &Event));
     ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
 
     CurValueMem1 = CurValueMem2 * 2;
diff --git a/unified-runtime/test/conformance/kernel/CMakeLists.txt b/unified-runtime/test/conformance/kernel/CMakeLists.txt
index a39aef8f875c7..3ec3c30628fff 100644
--- a/unified-runtime/test/conformance/kernel/CMakeLists.txt
+++ b/unified-runtime/test/conformance/kernel/CMakeLists.txt
@@ -19,4 +19,5 @@ add_conformance_kernels_test(kernel
     urKernelSetArgValue.cpp
     urKernelSetExecInfo.cpp
     urKernelSetSpecializationConstants.cpp
+    urKernelSuggestMaxCooperativeGroupCount.cpp
     urKernelGetSuggestedLocalWorkSize.cpp)
diff --git a/unified-runtime/test/conformance/kernel/urKernelCreate.cpp b/unified-runtime/test/conformance/kernel/urKernelCreate.cpp
index 7bd45a06bc148..651193b7e2823 100644
--- a/unified-runtime/test/conformance/kernel/urKernelCreate.cpp
+++ b/unified-runtime/test/conformance/kernel/urKernelCreate.cpp
@@ -84,9 +84,9 @@ TEST_P(urMultiDeviceKernelCreateTest, WithProgramBuild) {
     ASSERT_SUCCESS(
         urKernelCreate(program.get(), kernelName.data(), kernel.ptr()));
 
-    ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[i], kernel.get(), n_dimensions,
-                                         &global_offset, &local_size,
-                                         &global_size, 0, nullptr, nullptr));
+    ASSERT_SUCCESS(urEnqueueKernelLaunch(
+        queues[i], kernel.get(), n_dimensions, &global_offset, &local_size,
+        &global_size, 0, nullptr, 0, nullptr, nullptr));
 
     ASSERT_SUCCESS(urQueueFinish(queues[i]));
   }
@@ -126,9 +126,9 @@ TEST_P(urMultiDeviceKernelCreateTest, WithProgramCompileAndLink) {
     ASSERT_SUCCESS(
         urKernelCreate(linked_program.get(), kernelName.data(), kernel.ptr()));
 
-    ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[i], kernel.get(), n_dimensions,
-                                         &global_offset, &local_size,
-                                         &global_size, 0, nullptr, nullptr));
+    ASSERT_SUCCESS(urEnqueueKernelLaunch(
+        queues[i], kernel.get(), n_dimensions, &global_offset, &local_size,
+        &global_size, 0, nullptr, 0, nullptr, nullptr));
 
     ASSERT_SUCCESS(urQueueFinish(queues[i]));
   }
diff --git a/unified-runtime/test/conformance/kernel/urKernelSetArgLocal.cpp b/unified-runtime/test/conformance/kernel/urKernelSetArgLocal.cpp
index 2bc51c691cbd6..ef9c1d6021b14 100644
--- a/unified-runtime/test/conformance/kernel/urKernelSetArgLocal.cpp
+++ b/unified-runtime/test/conformance/kernel/urKernelSetArgLocal.cpp
@@ -150,9 +150,9 @@ struct urKernelSetArgLocalMultiTest : uur::urKernelExecutionTest {
 UUR_INSTANTIATE_DEVICE_TEST_SUITE(urKernelSetArgLocalMultiTest);
 
 TEST_P(urKernelSetArgLocalMultiTest, Basic) {
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       &global_offset, &global_size,
-                                       &local_size, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, &global_offset, &global_size, &local_size, 0,
+      nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   uint32_t *output = (uint32_t *)shared_ptrs[0];
@@ -162,9 +162,9 @@ TEST_P(urKernelSetArgLocalMultiTest, Basic) {
 }
 
 TEST_P(urKernelSetArgLocalMultiTest, ReLaunch) {
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       &global_offset, &global_size,
-                                       &local_size, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, &global_offset, &global_size, &local_size, 0,
+      nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   uint32_t *output = (uint32_t *)shared_ptrs[0];
@@ -173,9 +173,9 @@ TEST_P(urKernelSetArgLocalMultiTest, ReLaunch) {
   Validate(output, X, Y, A, global_size, local_size);
 
   // Relaunch with new arguments
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       &global_offset, &global_size,
-                                       &local_size, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, &global_offset, &global_size, &local_size, 0,
+      nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
   uint32_t *new_output = (uint32_t *)shared_ptrs[0];
   uint32_t *new_X = (uint32_t *)shared_ptrs[3];
@@ -185,9 +185,9 @@ TEST_P(urKernelSetArgLocalMultiTest, ReLaunch) {
 
 // Overwrite local args to a larger value, then reset back to original
 TEST_P(urKernelSetArgLocalMultiTest, Overwrite) {
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       &global_offset, &global_size,
-                                       &local_size, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, &global_offset, &global_size, &local_size, 0,
+      nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   uint32_t *output = (uint32_t *)shared_ptrs[0];
@@ -230,9 +230,9 @@ TEST_P(urKernelSetArgLocalMultiTest, Overwrite) {
                                        &hip_local_offset));
   }
 
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       &global_offset, &global_size,
-                                       &new_local_size, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, &global_offset, &global_size,
+      &new_local_size, 0, nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   Validate(output, X, Y, A, global_size, new_local_size);
@@ -326,9 +326,9 @@ struct urKernelSetArgLocalOutOfOrder : urKernelSetArgLocalMultiTest {
 
 UUR_INSTANTIATE_DEVICE_TEST_SUITE(urKernelSetArgLocalOutOfOrder);
 TEST_P(urKernelSetArgLocalOutOfOrder, Success) {
-  ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
-                                       &global_offset, &global_size,
-                                       &local_size, 0, nullptr, nullptr));
+  ASSERT_SUCCESS(urEnqueueKernelLaunch(
+      queue, kernel, n_dimensions, &global_offset, &global_size, &local_size, 0,
+      nullptr, 0, nullptr, nullptr));
   ASSERT_SUCCESS(urQueueFinish(queue));
 
   uint32_t *output = (uint32_t *)shared_ptrs[0];
diff --git a/unified-runtime/test/conformance/kernel/urKernelSuggestMaxCooperativeGroupCount.cpp b/unified-runtime/test/conformance/kernel/urKernelSuggestMaxCooperativeGroupCount.cpp
new file mode 100644
index 0000000000000..a7a948f6d17d3
--- /dev/null
+++ b/unified-runtime/test/conformance/kernel/urKernelSuggestMaxCooperativeGroupCount.cpp
@@ -0,0 +1,81 @@
+// Copyright (C) 2025 Intel Corporation
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <uur/fixtures.h>
+
+struct urKernelSuggestMaxCooperativeGroupCountTest
+    : uur::urKernelExecutionTest {
+  void SetUp() override {
+    program_name = "bar";
+
+    UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
+
+    ur_kernel_launch_properties_flags_t supported_properties = 0;
+    ASSERT_SUCCESS(urDeviceGetInfo(
+        device, UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES,
+        sizeof(supported_properties), &supported_properties, nullptr));
+    if (!(supported_properties &
+          UR_KERNEL_LAUNCH_PROPERTIES_FLAG_COOPERATIVE)) {
+      GTEST_SKIP() << "Cooperative launch is not supported.";
+    }
+  }
+
+  uint32_t suggested_work_groups = 0;
+  const uint32_t n_dimensions = 1;
+  const size_t local_size = 1;
+};
+UUR_INSTANTIATE_DEVICE_TEST_SUITE(urKernelSuggestMaxCooperativeGroupCountTest);
+
+TEST_P(urKernelSuggestMaxCooperativeGroupCountTest, Success) {
+  ASSERT_SUCCESS(urKernelSuggestMaxCooperativeGroupCount(
+      kernel, device, n_dimensions, &local_size, 0, &suggested_work_groups));
+  ASSERT_GE(suggested_work_groups, 0);
+}
+
+TEST_P(urKernelSuggestMaxCooperativeGroupCountTest, InvalidNullHandleKernel) {
+  ASSERT_EQ_RESULT(urKernelSuggestMaxCooperativeGroupCount(
+                       nullptr, device, n_dimensions, &local_size, 0,
+                       &suggested_work_groups),
+                   UR_RESULT_ERROR_INVALID_NULL_HANDLE);
+}
+
+TEST_P(urKernelSuggestMaxCooperativeGroupCountTest, InvalidNullHandleDevice) {
+  ASSERT_EQ_RESULT(urKernelSuggestMaxCooperativeGroupCount(
+                       kernel, nullptr, n_dimensions, &local_size, 0,
+                       &suggested_work_groups),
+                   UR_RESULT_ERROR_INVALID_NULL_HANDLE);
+}
+
+TEST_P(urKernelSuggestMaxCooperativeGroupCountTest, InvalidWorkDimension) {
+  // Only supports 1-3 dimensions.
+  ASSERT_EQ_RESULT(
+      urKernelSuggestMaxCooperativeGroupCount(kernel, device, 4, &local_size, 0,
+                                              &suggested_work_groups),
+      UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
+  ASSERT_EQ_RESULT(
+      urKernelSuggestMaxCooperativeGroupCount(kernel, device, 0, &local_size, 0,
+                                              &suggested_work_groups),
+      UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
+  ASSERT_EQ_RESULT(
+      urKernelSuggestMaxCooperativeGroupCount(
+          kernel, device, UINT32_MAX, &local_size, 0, &suggested_work_groups),
+      UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
+}
+
+TEST_P(urKernelSuggestMaxCooperativeGroupCountTest,
+       InvalidNullPointerLocalSize) {
+  ASSERT_EQ_RESULT(
+      urKernelSuggestMaxCooperativeGroupCount(
+          kernel, device, n_dimensions, nullptr, 0, &suggested_work_groups),
+      UR_RESULT_ERROR_INVALID_NULL_POINTER);
+}
+
+TEST_P(urKernelSuggestMaxCooperativeGroupCountTest,
+       InvalidNullPointerGroupCountRet) {
+  ASSERT_EQ_RESULT(urKernelSuggestMaxCooperativeGroupCount(
+                       kernel, device, n_dimensions, &local_size, 0, nullptr),
+                   UR_RESULT_ERROR_INVALID_NULL_POINTER);
+}
diff --git a/unified-runtime/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp b/unified-runtime/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp
index 9bb064fdbf771..47f068ff9067f 100644
--- a/unified-runtime/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp
+++ b/unified-runtime/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp
@@ -216,8 +216,8 @@ TEST_P(urMultiDeviceContextMemBufferTest, WriteKernelRead) {
 
   // Kernel increments the fill val by 1
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[1], kernels[1], 1 /*workDim=*/,
-                                       offset, work_dims, nullptr, 1, &e1,
-                                       &e2));
+                                       offset, work_dims, nullptr, 0, nullptr,
+                                       1, &e1, &e2));
 
   ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[0], buffer, false, 0,
                                         buffer_size_bytes, out_vec.data(), 1,
@@ -252,13 +252,13 @@ TEST_P(urMultiDeviceContextMemBufferTest, WriteKernelKernelRead) {
 
   // Kernel increments the fill val by 1
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[1], kernels[1], 1 /*workDim=*/,
-                                       offset, work_dims, nullptr, 1, &e1,
-                                       &e2));
+                                       offset, work_dims, nullptr, 0, nullptr,
+                                       1, &e1, &e2));
 
   // Kernel increments the fill val by 1
   ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernels[0], 1 /*workDim=*/,
-                                       offset, work_dims, nullptr, 1, &e2,
-                                       &e3));
+                                       offset, work_dims, nullptr, 0, nullptr,
+                                       1, &e2, &e3));
 
   ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[1], buffer, false, 0,
                                         buffer_size_bytes, out_vec.data(), 1,
diff --git a/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp
index 3eea89322665b..5f541fc9bc98f 100644
--- a/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp
+++ b/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp
@@ -73,9 +73,9 @@ TEST_P(urMultiDeviceProgramCreateWithBinaryTest,
     ASSERT_SUCCESS(
         urKernelCreate(binary_program, kernelName.data(), kernel.ptr()));
 
-    ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[i], kernel.get(), n_dimensions,
-                                         &global_offset, &local_size,
-                                         &global_size, 0, nullptr, nullptr));
+    ASSERT_SUCCESS(urEnqueueKernelLaunch(
+        queues[i], kernel.get(), n_dimensions, &global_offset, &local_size,
+        &global_size, 0, nullptr, 0, nullptr, nullptr));
 
     ASSERT_SUCCESS(urQueueFinish(queues[i]));
   }
diff --git a/unified-runtime/test/conformance/testing/include/uur/fixtures.h b/unified-runtime/test/conformance/testing/include/uur/fixtures.h
index 9a9adfe8f6b5a..b67eddd8f8182 100644
--- a/unified-runtime/test/conformance/testing/include/uur/fixtures.h
+++ b/unified-runtime/test/conformance/testing/include/uur/fixtures.h
@@ -1498,7 +1498,7 @@ struct KernelLaunchHelper {
     size_t offset = 0;
     ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &offset,
                                          &global_size, &local_size, 0, nullptr,
-                                         nullptr));
+                                         0, nullptr, nullptr));
     ASSERT_SUCCESS(urQueueFinish(queue));
   }
 
diff --git a/unified-runtime/test/conformance/usm/urUSMFree.cpp b/unified-runtime/test/conformance/usm/urUSMFree.cpp
index 60505c4ada4b1..254812e611266 100644
--- a/unified-runtime/test/conformance/usm/urUSMFree.cpp
+++ b/unified-runtime/test/conformance/usm/urUSMFree.cpp
@@ -133,8 +133,9 @@ TEST_P(urUSMFreeDuringExecutionTest, SuccessHost) {
 
   EXPECT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, allocation));
   EXPECT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(data), nullptr, &data));
-  EXPECT_SUCCESS(urEnqueueKernelLaunch(
-      queue, kernel, 1, &wg_offset, &array_size, nullptr, 0, nullptr, nullptr));
+  EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &wg_offset,
+                                       &array_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
   ASSERT_SUCCESS(urUSMFree(context, allocation));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
@@ -153,8 +154,9 @@ TEST_P(urUSMFreeDuringExecutionTest, SuccessDevice) {
   EXPECT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, allocation));
   EXPECT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(data), nullptr, &data));
 
-  EXPECT_SUCCESS(urEnqueueKernelLaunch(
-      queue, kernel, 1, &wg_offset, &array_size, nullptr, 0, nullptr, nullptr));
+  EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &wg_offset,
+                                       &array_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
   ASSERT_SUCCESS(urUSMFree(context, allocation));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
@@ -173,8 +175,9 @@ TEST_P(urUSMFreeDuringExecutionTest, SuccessShared) {
 
   EXPECT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, allocation));
   EXPECT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(data), nullptr, &data));
-  EXPECT_SUCCESS(urEnqueueKernelLaunch(
-      queue, kernel, 1, &wg_offset, &array_size, nullptr, 0, nullptr, nullptr));
+  EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &wg_offset,
+                                       &array_size, nullptr, 0, nullptr, 0,
+                                       nullptr, nullptr));
   ASSERT_SUCCESS(urUSMFree(context, allocation));
   ASSERT_SUCCESS(urQueueFinish(queue));
 }
diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp
index 9992b08b51bb8..49618c4fe82e8 100644
--- a/unified-runtime/tools/urinfo/urinfo.hpp
+++ b/unified-runtime/tools/urinfo/urinfo.hpp
@@ -344,6 +344,9 @@ inline void printDeviceInfos(ur_device_handle_t hDevice,
   printDeviceInfo<ur_bool_t>(hDevice,
                              UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE);
   std::cout << prefix;
+  printDeviceInfo<ur_kernel_launch_properties_flags_t>(
+      hDevice, UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES);
+  std::cout << prefix;
   printDeviceInfo<ur_bool_t>(hDevice,
                              UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP);
   std::cout << prefix;
@@ -356,9 +359,6 @@ inline void printDeviceInfos(ur_device_handle_t hDevice,
   printDeviceInfo<ur_bool_t>(
       hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP);
   std::cout << prefix;
-  printDeviceInfo<ur_bool_t>(hDevice,
-                             UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP);
-  std::cout << prefix;
   printDeviceInfo<ur_bool_t>(hDevice,
                              UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP);
   std::cout << prefix;
@@ -444,14 +444,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice,
   printDeviceInfo<ur_bool_t>(hDevice,
                              UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP);
   std::cout << prefix;
-  printDeviceInfo<ur_bool_t>(hDevice,
-                             UR_DEVICE_INFO_LAUNCH_PROPERTIES_SUPPORT_EXP);
-  std::cout << prefix;
   printDeviceInfo<ur_bool_t>(hDevice, UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP);
   std::cout << prefix;
-  printDeviceInfo<ur_bool_t>(hDevice,
-                             UR_DEVICE_INFO_COOPERATIVE_KERNEL_SUPPORT_EXP);
-  std::cout << prefix;
   printDeviceInfo<ur_bool_t>(hDevice,
                              UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP);
 }