diff --git a/include/ur_api.h b/include/ur_api.h index d7621bda32..721eedec3f 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -429,6 +429,14 @@ typedef enum ur_function_t { UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, /// Enumerator for ::urPhysicalMemGetInfo UR_FUNCTION_PHYSICAL_MEM_GET_INFO = 249, + /// Enumerator for ::urEnqueueUSMDeviceAllocExp + UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP = 250, + /// Enumerator for ::urEnqueueUSMSharedAllocExp + UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP = 251, + /// Enumerator for ::urEnqueueUSMHostAllocExp + UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP = 252, + /// Enumerator for ::urEnqueueUSMFreeExp + UR_FUNCTION_ENQUEUE_USM_FREE_EXP = 253, /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -508,6 +516,8 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, /// ::ur_usm_alloc_location_desc_t UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, + /// ::ur_usm_pool_native_desc_t + UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC = 36, /// ::ur_exp_command_buffer_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, /// ::ur_exp_command_buffer_update_kernel_launch_desc_t @@ -536,6 +546,8 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION = 0x2007, /// ::ur_exp_enqueue_native_command_properties_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, + /// ::ur_exp_enqueue_usm_alloc_properties_t + UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES = 0x3001, /// ::ur_exp_enqueue_ext_properties_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES = 0x4000, /// @cond @@ -2290,6 +2302,9 @@ typedef enum ur_device_info_t { /// [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field /// of Intel GPU 2D block array capabilities UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, + /// [::ur_bool_t] returns true if the device supports USM allocation + /// enqueueing + UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP = 0x2023, /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -2315,7 +2330,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName` +/// + `::UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -7055,6 +7070,14 @@ typedef enum ur_command_t { UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, /// Event created by ::urEnqueueNativeCommandExp UR_COMMAND_ENQUEUE_NATIVE_EXP = 0x2004, + /// Event created by ::urEnqueueUSMDeviceAllocExp + UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP = 0x2005, + /// Event created by ::urEnqueueUSMSharedAllocExp + UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP = 0x2006, + /// Event created by ::urEnqueueUSMHostAllocExp + UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP = 0x2007, + /// Event created by ::urEnqueueUSMFreeExp + UR_COMMAND_ENQUEUE_USM_FREE_EXP = 0x2008, /// @cond UR_COMMAND_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -8955,6 +8978,218 @@ typedef enum ur_exp_device_2d_block_array_capability_flag_t { /// ur_exp_device_2d_block_array_capability_flags_t #define UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAGS_MASK 0xfffffffc +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for enqueuing asynchronous +// USM allocations +#if !defined(__GNUC__) +#pragma region async_alloc_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief USM native pool descriptor type +/// +/// @details +/// - Specify these properties in ::urUSMPoolCreate via ::ur_usm_pool_desc_t +/// as part of a `pNext` chain. +typedef struct ur_usm_pool_native_desc_t { + /// [in] type of this structure, must be + /// ::UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC + ur_structure_type_t stype; + /// [in][optional] pointer to extension-specific structure + const void *pNext; + /// [in] USM memory object + void *pMem; + /// [in] size of USM memory object + size_t size; + /// [in] type of USM memory object + ur_usm_type_t memType; + /// [in] device associated with the USM memory object + ur_device_handle_t device; + +} ur_usm_pool_native_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue USM allocation flags +typedef uint32_t ur_exp_enqueue_usm_alloc_flags_t; +typedef enum ur_exp_enqueue_usm_alloc_flag_t { + /// reserved for future use. + UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD = UR_BIT(0), + /// @cond + UR_EXP_ENQUEUE_USM_ALLOC_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_enqueue_usm_alloc_flag_t; +/// @brief Bit Mask for validating ur_exp_enqueue_usm_alloc_flags_t +#define UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK 0xfffffffe + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue USM allocation properties +typedef struct ur_exp_enqueue_usm_alloc_properties_t { + /// [in] type of this structure, must be + /// ::UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES + ur_structure_type_t stype; + /// [in,out][optional] pointer to extension-specific structure + void *pNext; + /// [in] enqueue USM allocation flags + ur_exp_enqueue_usm_alloc_flags_t flags; + +} ur_exp_enqueue_usm_alloc_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM deallocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent); + #if !defined(__GNUC__) #pragma endregion #endif @@ -13504,6 +13739,64 @@ typedef struct ur_enqueue_events_wait_with_barrier_ext_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_events_wait_with_barrier_ext_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMDeviceAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_device_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_enqueue_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_device_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMSharedAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_shared_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_enqueue_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_shared_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMHostAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_host_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_enqueue_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_host_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMFreeExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_free_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + void **ppMem; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_free_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp /// @details Each entry is a pointer to the parameter passed to the function; @@ -13548,6 +13841,139 @@ typedef struct ur_enqueue_native_command_exp_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_native_command_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMHostAlloc +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_host_alloc_params_t { + ur_context_handle_t *phContext; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *psize; + void ***pppMem; +} ur_usm_host_alloc_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMDeviceAlloc +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_device_alloc_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *psize; + void ***pppMem; +} ur_usm_device_alloc_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMSharedAlloc +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_shared_alloc_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *psize; + void ***pppMem; +} ur_usm_shared_alloc_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMFree +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_free_params_t { + ur_context_handle_t *phContext; + void **ppMem; +} ur_usm_free_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMGetMemAllocInfo +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_get_mem_alloc_info_params_t { + ur_context_handle_t *phContext; + const void **ppMem; + ur_usm_alloc_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_usm_get_mem_alloc_info_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolCreate +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_create_params_t { + ur_context_handle_t *phContext; + ur_usm_pool_desc_t **ppPoolDesc; + ur_usm_pool_handle_t **pppPool; +} ur_usm_pool_create_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolRetain +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_retain_params_t { + ur_usm_pool_handle_t *ppPool; +} ur_usm_pool_retain_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolRelease +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_release_params_t { + ur_usm_pool_handle_t *ppPool; +} ur_usm_pool_release_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolGetInfo +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_get_info_params_t { + ur_usm_pool_handle_t *phPool; + ur_usm_pool_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_usm_pool_get_info_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPitchedAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pitched_alloc_exp_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *pwidthInBytes; + size_t *pheight; + size_t *pelementSizeBytes; + void ***pppMem; + size_t **ppResultPitch; +} ur_usm_pitched_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMImportExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_import_exp_params_t { + ur_context_handle_t *phContext; + void **ppMem; + size_t *psize; +} ur_usm_import_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMReleaseExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_release_exp_params_t { + ur_context_handle_t *phContext; + void **ppMem; +} ur_usm_release_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for /// urBindlessImagesUnsampledImageHandleDestroyExp @@ -13770,139 +14196,6 @@ typedef struct ur_bindless_images_signal_external_semaphore_exp_params_t { ur_event_handle_t **pphEvent; } ur_bindless_images_signal_external_semaphore_exp_params_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMHostAlloc -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_host_alloc_params_t { - ur_context_handle_t *phContext; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *psize; - void ***pppMem; -} ur_usm_host_alloc_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMDeviceAlloc -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_device_alloc_params_t { - ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *psize; - void ***pppMem; -} ur_usm_device_alloc_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMSharedAlloc -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_shared_alloc_params_t { - ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *psize; - void ***pppMem; -} ur_usm_shared_alloc_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMFree -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_free_params_t { - ur_context_handle_t *phContext; - void **ppMem; -} ur_usm_free_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMGetMemAllocInfo -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_get_mem_alloc_info_params_t { - ur_context_handle_t *phContext; - const void **ppMem; - ur_usm_alloc_info_t *ppropName; - size_t *ppropSize; - void **ppPropValue; - size_t **ppPropSizeRet; -} ur_usm_get_mem_alloc_info_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolCreate -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_create_params_t { - ur_context_handle_t *phContext; - ur_usm_pool_desc_t **ppPoolDesc; - ur_usm_pool_handle_t **pppPool; -} ur_usm_pool_create_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolRetain -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_retain_params_t { - ur_usm_pool_handle_t *ppPool; -} ur_usm_pool_retain_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolRelease -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_release_params_t { - ur_usm_pool_handle_t *ppPool; -} ur_usm_pool_release_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolGetInfo -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_get_info_params_t { - ur_usm_pool_handle_t *phPool; - ur_usm_pool_info_t *ppropName; - size_t *ppropSize; - void **ppPropValue; - size_t **ppPropSizeRet; -} ur_usm_pool_get_info_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPitchedAllocExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pitched_alloc_exp_params_t { - ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *pwidthInBytes; - size_t *pheight; - size_t *pelementSizeBytes; - void ***pppMem; - size_t **ppResultPitch; -} ur_usm_pitched_alloc_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMImportExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_import_exp_params_t { - ur_context_handle_t *phContext; - void **ppMem; - size_t *psize; -} ur_usm_import_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMReleaseExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_release_exp_params_t { - ur_context_handle_t *phContext; - void **ppMem; -} ur_usm_release_exp_params_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferCreateExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index 8c25dde67f..cc19d89584 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -132,9 +132,25 @@ _UR_API(urEnqueueReadHostPipe) _UR_API(urEnqueueWriteHostPipe) _UR_API(urEnqueueEventsWaitWithBarrierExt) _UR_API(urEnqueueKernelLaunchCustomExp) +_UR_API(urEnqueueUSMDeviceAllocExp) +_UR_API(urEnqueueUSMSharedAllocExp) +_UR_API(urEnqueueUSMHostAllocExp) +_UR_API(urEnqueueUSMFreeExp) _UR_API(urEnqueueCooperativeKernelLaunchExp) _UR_API(urEnqueueTimestampRecordingExp) _UR_API(urEnqueueNativeCommandExp) +_UR_API(urUSMHostAlloc) +_UR_API(urUSMDeviceAlloc) +_UR_API(urUSMSharedAlloc) +_UR_API(urUSMFree) +_UR_API(urUSMGetMemAllocInfo) +_UR_API(urUSMPoolCreate) +_UR_API(urUSMPoolRetain) +_UR_API(urUSMPoolRelease) +_UR_API(urUSMPoolGetInfo) +_UR_API(urUSMPitchedAllocExp) +_UR_API(urUSMImportExp) +_UR_API(urUSMReleaseExp) _UR_API(urBindlessImagesUnsampledImageHandleDestroyExp) _UR_API(urBindlessImagesSampledImageHandleDestroyExp) _UR_API(urBindlessImagesImageAllocateExp) @@ -153,18 +169,6 @@ _UR_API(urBindlessImagesImportExternalSemaphoreExp) _UR_API(urBindlessImagesReleaseExternalSemaphoreExp) _UR_API(urBindlessImagesWaitExternalSemaphoreExp) _UR_API(urBindlessImagesSignalExternalSemaphoreExp) -_UR_API(urUSMHostAlloc) -_UR_API(urUSMDeviceAlloc) -_UR_API(urUSMSharedAlloc) -_UR_API(urUSMFree) -_UR_API(urUSMGetMemAllocInfo) -_UR_API(urUSMPoolCreate) -_UR_API(urUSMPoolRetain) -_UR_API(urUSMPoolRelease) -_UR_API(urUSMPoolGetInfo) -_UR_API(urUSMPitchedAllocExp) -_UR_API(urUSMImportExp) -_UR_API(urUSMReleaseExp) _UR_API(urCommandBufferCreateExp) _UR_API(urCommandBufferRetainExp) _UR_API(urCommandBufferReleaseExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index c64aaa8d46..e3aee6a3c6 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1117,6 +1117,33 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunchCustomExp_t)( const size_t *, const size_t *, uint32_t, const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMDeviceAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMDeviceAllocExp_t)( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMSharedAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMSharedAllocExp_t)( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMHostAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMHostAllocExp_t)( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMFreeExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMFreeExp_t)( + ur_queue_handle_t, ur_usm_pool_handle_t, void *, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)( @@ -1142,6 +1169,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueNativeCommandExp_t)( /// @brief Table of EnqueueExp functions pointers typedef struct ur_enqueue_exp_dditable_t { ur_pfnEnqueueKernelLaunchCustomExp_t pfnKernelLaunchCustomExp; + ur_pfnEnqueueUSMDeviceAllocExp_t pfnUSMDeviceAllocExp; + ur_pfnEnqueueUSMSharedAllocExp_t pfnUSMSharedAllocExp; + ur_pfnEnqueueUSMHostAllocExp_t pfnUSMHostAllocExp; + ur_pfnEnqueueUSMFreeExp_t pfnUSMFreeExp; ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp; ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp; ur_pfnEnqueueNativeCommandExp_t pfnNativeCommandExp; @@ -1167,6 +1198,140 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueExpProcAddrTable_t)( ur_api_version_t, ur_enqueue_exp_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMHostAlloc +typedef ur_result_t(UR_APICALL *ur_pfnUSMHostAlloc_t)(ur_context_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, void **); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMDeviceAlloc +typedef ur_result_t(UR_APICALL *ur_pfnUSMDeviceAlloc_t)(ur_context_handle_t, + ur_device_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, void **); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMSharedAlloc +typedef ur_result_t(UR_APICALL *ur_pfnUSMSharedAlloc_t)(ur_context_handle_t, + ur_device_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, void **); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMFree +typedef ur_result_t(UR_APICALL *ur_pfnUSMFree_t)(ur_context_handle_t, void *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMGetMemAllocInfo +typedef ur_result_t(UR_APICALL *ur_pfnUSMGetMemAllocInfo_t)(ur_context_handle_t, + const void *, + ur_usm_alloc_info_t, + size_t, void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolCreate +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolCreate_t)(ur_context_handle_t, + ur_usm_pool_desc_t *, + ur_usm_pool_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolRetain +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRetain_t)(ur_usm_pool_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolRelease +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRelease_t)(ur_usm_pool_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolGetInfo +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolGetInfo_t)(ur_usm_pool_handle_t, + ur_usm_pool_info_t, + size_t, void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of USM functions pointers +typedef struct ur_usm_dditable_t { + ur_pfnUSMHostAlloc_t pfnHostAlloc; + ur_pfnUSMDeviceAlloc_t pfnDeviceAlloc; + ur_pfnUSMSharedAlloc_t pfnSharedAlloc; + ur_pfnUSMFree_t pfnFree; + ur_pfnUSMGetMemAllocInfo_t pfnGetMemAllocInfo; + ur_pfnUSMPoolCreate_t pfnPoolCreate; + ur_pfnUSMPoolRetain_t pfnPoolRetain; + ur_pfnUSMPoolRelease_t pfnPoolRelease; + ur_pfnUSMPoolGetInfo_t pfnPoolGetInfo; +} ur_usm_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's USM table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( + /// [in] API version requested + ur_api_version_t version, + /// [in,out] pointer to table of DDI function pointers + ur_usm_dditable_t *pDdiTable); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetUSMProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetUSMProcAddrTable_t)( + ur_api_version_t, ur_usm_dditable_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPitchedAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMPitchedAllocExp_t)( + ur_context_handle_t, ur_device_handle_t, const ur_usm_desc_t *, + ur_usm_pool_handle_t, size_t, size_t, size_t, void **, size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMImportExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMImportExp_t)(ur_context_handle_t, + void *, size_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMReleaseExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMReleaseExp_t)(ur_context_handle_t, + void *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of USMExp functions pointers +typedef struct ur_usm_exp_dditable_t { + ur_pfnUSMPitchedAllocExp_t pfnPitchedAllocExp; + ur_pfnUSMImportExp_t pfnImportExp; + ur_pfnUSMReleaseExp_t pfnReleaseExp; +} ur_usm_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's USMExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + /// [in] API version requested + ur_api_version_t version, + /// [in,out] pointer to table of DDI function pointers + ur_usm_exp_dditable_t *pDdiTable); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetUSMExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetUSMExpProcAddrTable_t)( + ur_api_version_t, ur_usm_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urBindlessImagesUnsampledImageHandleDestroyExp typedef ur_result_t( @@ -1331,140 +1496,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( typedef ur_result_t(UR_APICALL *ur_pfnGetBindlessImagesExpProcAddrTable_t)( ur_api_version_t, ur_bindless_images_exp_dditable_t *); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMHostAlloc -typedef ur_result_t(UR_APICALL *ur_pfnUSMHostAlloc_t)(ur_context_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, void **); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMDeviceAlloc -typedef ur_result_t(UR_APICALL *ur_pfnUSMDeviceAlloc_t)(ur_context_handle_t, - ur_device_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, void **); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMSharedAlloc -typedef ur_result_t(UR_APICALL *ur_pfnUSMSharedAlloc_t)(ur_context_handle_t, - ur_device_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, void **); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMFree -typedef ur_result_t(UR_APICALL *ur_pfnUSMFree_t)(ur_context_handle_t, void *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMGetMemAllocInfo -typedef ur_result_t(UR_APICALL *ur_pfnUSMGetMemAllocInfo_t)(ur_context_handle_t, - const void *, - ur_usm_alloc_info_t, - size_t, void *, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolCreate -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolCreate_t)(ur_context_handle_t, - ur_usm_pool_desc_t *, - ur_usm_pool_handle_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolRetain -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRetain_t)(ur_usm_pool_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolRelease -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRelease_t)(ur_usm_pool_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolGetInfo -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolGetInfo_t)(ur_usm_pool_handle_t, - ur_usm_pool_info_t, - size_t, void *, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of USM functions pointers -typedef struct ur_usm_dditable_t { - ur_pfnUSMHostAlloc_t pfnHostAlloc; - ur_pfnUSMDeviceAlloc_t pfnDeviceAlloc; - ur_pfnUSMSharedAlloc_t pfnSharedAlloc; - ur_pfnUSMFree_t pfnFree; - ur_pfnUSMGetMemAllocInfo_t pfnGetMemAllocInfo; - ur_pfnUSMPoolCreate_t pfnPoolCreate; - ur_pfnUSMPoolRetain_t pfnPoolRetain; - ur_pfnUSMPoolRelease_t pfnPoolRelease; - ur_pfnUSMPoolGetInfo_t pfnPoolGetInfo; -} ur_usm_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's USM table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_usm_dditable_t *pDdiTable); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetUSMProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetUSMProcAddrTable_t)( - ur_api_version_t, ur_usm_dditable_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPitchedAllocExp -typedef ur_result_t(UR_APICALL *ur_pfnUSMPitchedAllocExp_t)( - ur_context_handle_t, ur_device_handle_t, const ur_usm_desc_t *, - ur_usm_pool_handle_t, size_t, size_t, size_t, void **, size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMImportExp -typedef ur_result_t(UR_APICALL *ur_pfnUSMImportExp_t)(ur_context_handle_t, - void *, size_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMReleaseExp -typedef ur_result_t(UR_APICALL *ur_pfnUSMReleaseExp_t)(ur_context_handle_t, - void *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of USMExp functions pointers -typedef struct ur_usm_exp_dditable_t { - ur_pfnUSMPitchedAllocExp_t pfnPitchedAllocExp; - ur_pfnUSMImportExp_t pfnImportExp; - ur_pfnUSMReleaseExp_t pfnReleaseExp; -} ur_usm_exp_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's USMExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_usm_exp_dditable_t *pDdiTable); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetUSMExpProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetUSMExpProcAddrTable_t)( - ur_api_version_t, ur_usm_exp_dditable_t *); - /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferCreateExp typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferCreateExp_t)( @@ -1880,9 +1911,9 @@ typedef struct ur_dditable_t { ur_global_dditable_t Global; ur_enqueue_dditable_t Enqueue; ur_enqueue_exp_dditable_t EnqueueExp; - ur_bindless_images_exp_dditable_t BindlessImagesExp; ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; + ur_bindless_images_exp_dditable_t BindlessImagesExp; ur_command_buffer_exp_dditable_t CommandBufferExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; ur_virtual_mem_dditable_t VirtualMem; diff --git a/include/ur_print.h b/include/ur_print.h index f58133bb8a..0333103f87 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1122,6 +1122,36 @@ urPrintExpDevice_2dBlockArrayCapabilityFlags( enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_native_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolNativeDesc( + const struct ur_usm_pool_native_desc_t params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_usm_alloc_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueUsmAllocFlags( + enum ur_exp_enqueue_usm_alloc_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_usm_alloc_properties_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueUsmAllocProperties( + const struct ur_exp_enqueue_usm_alloc_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_image_copy_flag_t enum /// @returns @@ -2626,6 +2656,46 @@ urPrintEnqueueEventsWaitWithBarrierExtParams( const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_device_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmDeviceAllocExpParams( + const struct ur_enqueue_usm_device_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_shared_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmSharedAllocExpParams( + const struct ur_enqueue_usm_shared_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_host_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmHostAllocExpParams( + const struct ur_enqueue_usm_host_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_free_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmFreeExpParams( + const struct ur_enqueue_usm_free_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct /// @returns @@ -2657,6 +2727,126 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueNativeCommandExpParams( const struct ur_enqueue_native_command_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_host_alloc_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmHostAllocParams( + const struct ur_usm_host_alloc_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_device_alloc_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmDeviceAllocParams( + const struct ur_usm_device_alloc_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_shared_alloc_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmSharedAllocParams( + const struct ur_usm_shared_alloc_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_free_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL +urPrintUsmFreeParams(const struct ur_usm_free_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_get_mem_alloc_info_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmGetMemAllocInfoParams( + const struct ur_usm_get_mem_alloc_info_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_create_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolCreateParams( + const struct ur_usm_pool_create_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_retain_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolRetainParams( + const struct ur_usm_pool_retain_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_release_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolReleaseParams( + const struct ur_usm_pool_release_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pool_get_info_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolGetInfoParams( + const struct ur_usm_pool_get_info_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_pitched_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPitchedAllocExpParams( + const struct ur_usm_pitched_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_import_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmImportExpParams( + const struct ur_usm_import_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_usm_release_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmReleaseExpParams( + const struct ur_usm_release_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t /// struct @@ -2864,126 +3054,6 @@ urPrintBindlessImagesSignalExternalSemaphoreExpParams( *params, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_host_alloc_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmHostAllocParams( - const struct ur_usm_host_alloc_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_device_alloc_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmDeviceAllocParams( - const struct ur_usm_device_alloc_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_shared_alloc_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmSharedAllocParams( - const struct ur_usm_shared_alloc_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_free_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL -urPrintUsmFreeParams(const struct ur_usm_free_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_get_mem_alloc_info_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmGetMemAllocInfoParams( - const struct ur_usm_get_mem_alloc_info_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_create_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolCreateParams( - const struct ur_usm_pool_create_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_retain_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolRetainParams( - const struct ur_usm_pool_retain_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_release_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolReleaseParams( - const struct ur_usm_pool_release_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_get_info_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolGetInfoParams( - const struct ur_usm_pool_get_info_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pitched_alloc_exp_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPitchedAllocExpParams( - const struct ur_usm_pitched_alloc_exp_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_import_exp_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmImportExpParams( - const struct ur_usm_import_exp_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_release_exp_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmReleaseExpParams( - const struct ur_usm_release_exp_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_command_buffer_create_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 5c5f573477..8b38ea93cc 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -225,6 +225,10 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, + uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -502,6 +506,14 @@ inline std::ostream &operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_native_desc_t params); +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_enqueue_usm_alloc_flag_t value); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_exp_enqueue_usm_alloc_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); inline std::ostream & @@ -1177,6 +1189,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_PHYSICAL_MEM_GET_INFO: os << "UR_FUNCTION_PHYSICAL_MEM_GET_INFO"; break; + case UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_FREE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -1295,6 +1319,9 @@ inline std::ostream &operator<<(std::ostream &os, case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: os << "UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC"; break; + case UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC: + os << "UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; @@ -1337,6 +1364,9 @@ inline std::ostream &operator<<(std::ostream &os, case UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES"; + break; case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES"; break; @@ -1556,6 +1586,12 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC: { + const ur_usm_pool_native_desc_t *pstruct = + (const ur_usm_pool_native_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: { const ur_exp_command_buffer_desc_t *pstruct = (const ur_exp_command_buffer_desc_t *)ptr; @@ -1639,6 +1675,12 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES: { + const ur_exp_enqueue_usm_alloc_properties_t *pstruct = + (const ur_exp_enqueue_usm_alloc_properties_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: { const ur_exp_enqueue_ext_properties_t *pstruct = (const ur_exp_enqueue_ext_properties_t *)ptr; @@ -2975,6 +3017,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP"; break; + case UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP: + os << "UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4966,6 +5011,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -9914,6 +9972,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) { case UR_COMMAND_ENQUEUE_NATIVE_EXP: os << "UR_COMMAND_ENQUEUE_NATIVE_EXP"; break; + case UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_FREE_EXP: + os << "UR_COMMAND_ENQUEUE_USM_FREE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -10407,6 +10477,120 @@ printFlag(std::ostream &os, } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_native_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + const struct ur_usm_pool_native_desc_t params) { + os << "(struct ur_usm_pool_native_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, (params.pNext)); + + os << ", "; + os << ".pMem = "; + + ur::details::printPtr(os, (params.pMem)); + + os << ", "; + os << ".size = "; + + os << (params.size); + + os << ", "; + os << ".memType = "; + + os << (params.memType); + + os << ", "; + os << ".device = "; + + ur::details::printPtr(os, (params.device)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_usm_alloc_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_enqueue_usm_alloc_flag_t value) { + switch (value) { + case UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD: + os << "UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_usm_alloc_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, + uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD) == + (uint32_t)UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD) { + val ^= (uint32_t)UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_usm_alloc_properties_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_exp_enqueue_usm_alloc_properties_t params) { + os << "(struct ur_exp_enqueue_usm_alloc_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, (params.pNext)); + + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, (params.flags)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_image_copy_flag_t type /// @returns /// std::ostream & @@ -16206,42 +16390,32 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_enqueue_cooperative_kernel_launch_exp_params_t type +/// @brief Print operator for the ur_enqueue_usm_device_alloc_exp_params_t type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_enqueue_cooperative_kernel_launch_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_device_alloc_exp_params_t + *params) { os << ".hQueue = "; ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".hKernel = "; - - ur::details::printPtr(os, *(params->phKernel)); - - os << ", "; - os << ".workDim = "; - - os << *(params->pworkDim); - - os << ", "; - os << ".pGlobalWorkOffset = "; + os << ".pPool = "; - ur::details::printPtr(os, *(params->ppGlobalWorkOffset)); + ur::details::printPtr(os, *(params->ppPool)); os << ", "; - os << ".pGlobalWorkSize = "; + os << ".size = "; - ur::details::printPtr(os, *(params->ppGlobalWorkSize)); + os << *(params->psize); os << ", "; - os << ".pLocalWorkSize = "; + os << ".pProperties = "; - ur::details::printPtr(os, *(params->ppLocalWorkSize)); + ur::details::printPtr(os, *(params->ppProperties)); os << ", "; os << ".numEventsInWaitList = "; @@ -16264,6 +16438,11 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct os << "}"; } + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, *(params->pppMem)); + os << ", "; os << ".phEvent = "; @@ -16273,13 +16452,12 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t -/// type +/// @brief Print operator for the ur_enqueue_usm_shared_alloc_exp_params_t type /// @returns /// std::ostream & inline std::ostream &operator<<( std::ostream &os, - [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t + [[maybe_unused]] const struct ur_enqueue_usm_shared_alloc_exp_params_t *params) { os << ".hQueue = "; @@ -16287,9 +16465,19 @@ inline std::ostream &operator<<( ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".blocking = "; + os << ".pPool = "; - os << *(params->pblocking); + ur::details::printPtr(os, *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, *(params->ppProperties)); os << ", "; os << ".numEventsInWaitList = "; @@ -16312,6 +16500,11 @@ inline std::ostream &operator<<( os << "}"; } + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, *(params->pppMem)); + os << ", "; os << ".phEvent = "; @@ -16321,12 +16514,12 @@ inline std::ostream &operator<<( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_enqueue_native_command_exp_params_t type +/// @brief Print operator for the ur_enqueue_usm_host_alloc_exp_params_t type /// @returns /// std::ostream & inline std::ostream & operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_enqueue_native_command_exp_params_t + [[maybe_unused]] const struct ur_enqueue_usm_host_alloc_exp_params_t *params) { os << ".hQueue = "; @@ -16334,35 +16527,14 @@ operator<<(std::ostream &os, ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".pfnNativeEnqueue = "; - - os << reinterpret_cast(*(params->ppfnNativeEnqueue)); - - os << ", "; - os << ".data = "; - - ur::details::printPtr(os, *(params->pdata)); - - os << ", "; - os << ".numMemsInMemList = "; + os << ".pPool = "; - os << *(params->pnumMemsInMemList); + ur::details::printPtr(os, *(params->ppPool)); os << ", "; - os << ".phMemList = "; - ur::details::printPtr(os, - reinterpret_cast(*(params->pphMemList))); - if (*(params->pphMemList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumMemsInMemList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".size = "; - ur::details::printPtr(os, (*(params->pphMemList))[i]); - } - os << "}"; - } + os << *(params->psize); os << ", "; os << ".pProperties = "; @@ -16390,6 +16562,11 @@ operator<<(std::ostream &os, os << "}"; } + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, *(params->pppMem)); + os << ", "; os << ".phEvent = "; @@ -16399,129 +16576,291 @@ operator<<(std::ostream &os, } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type +/// @brief Print operator for the ur_enqueue_usm_free_exp_params_t type /// @returns /// std::ostream & inline std::ostream &operator<<( - std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params) { + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_free_exp_params_t *params) { - os << ".hContext = "; + os << ".hQueue = "; - ur::details::printPtr(os, *(params->phContext)); + ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".hDevice = "; + os << ".pPool = "; - ur::details::printPtr(os, *(params->phDevice)); + ur::details::printPtr(os, *(params->ppPool)); os << ", "; - os << ".hImage = "; - - ur::details::printPtr(os, reinterpret_cast(*(params->phImage))); - - return os; -} + os << ".pMem = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_sampled_image_handle_destroy_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params) { + ur::details::printPtr(os, *(params->ppMem)); - os << ".hContext = "; + os << ", "; + os << ".numEventsInWaitList = "; - ur::details::printPtr(os, *(params->phContext)); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".hDevice = "; - - ur::details::printPtr(os, *(params->phDevice)); + os << ".phEventWaitList = "; + ur::details::printPtr( + os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + } os << ", "; - os << ".hImage = "; + os << ".phEvent = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phImage))); + ur::details::printPtr(os, *(params->pphEvent)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_allocate_exp_params_t -/// type +/// @brief Print operator for the +/// ur_enqueue_cooperative_kernel_launch_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_bindless_images_image_allocate_exp_params_t - *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_enqueue_cooperative_kernel_launch_exp_params_t *params) { - os << ".hContext = "; + os << ".hQueue = "; - ur::details::printPtr(os, *(params->phContext)); + ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".hDevice = "; + os << ".hKernel = "; - ur::details::printPtr(os, *(params->phDevice)); + ur::details::printPtr(os, *(params->phKernel)); os << ", "; - os << ".pImageFormat = "; + os << ".workDim = "; - ur::details::printPtr(os, *(params->ppImageFormat)); + os << *(params->pworkDim); os << ", "; - os << ".pImageDesc = "; + os << ".pGlobalWorkOffset = "; - ur::details::printPtr(os, *(params->ppImageDesc)); + ur::details::printPtr(os, *(params->ppGlobalWorkOffset)); os << ", "; - os << ".phImageMem = "; + os << ".pGlobalWorkSize = "; - ur::details::printPtr(os, *(params->pphImageMem)); + ur::details::printPtr(os, *(params->ppGlobalWorkSize)); + + os << ", "; + os << ".pLocalWorkSize = "; + + ur::details::printPtr(os, *(params->ppLocalWorkSize)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = "; + ur::details::printPtr( + os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + } + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, *(params->pphEvent)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_free_exp_params_t +/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t /// type /// @returns /// std::ostream & inline std::ostream &operator<<( std::ostream &os, - [[maybe_unused]] const struct ur_bindless_images_image_free_exp_params_t + [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) { + os << ".hQueue = "; + + ur::details::printPtr(os, *(params->phQueue)); + + os << ", "; + os << ".blocking = "; + + os << *(params->pblocking); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = "; + ur::details::printPtr( + os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + } + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_native_command_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_native_command_exp_params_t + *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, *(params->phQueue)); + + os << ", "; + os << ".pfnNativeEnqueue = "; + + os << reinterpret_cast(*(params->ppfnNativeEnqueue)); + + os << ", "; + os << ".data = "; + + ur::details::printPtr(os, *(params->pdata)); + + os << ", "; + os << ".numMemsInMemList = "; + + os << *(params->pnumMemsInMemList); + + os << ", "; + os << ".phMemList = "; + ur::details::printPtr(os, + reinterpret_cast(*(params->pphMemList))); + if (*(params->pphMemList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumMemsInMemList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, (*(params->pphMemList))[i]); + } + os << "}"; + } + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = "; + ur::details::printPtr( + os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + } + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_host_alloc_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_host_alloc_params_t *params) { + os << ".hContext = "; ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pUSMDesc = "; - ur::details::printPtr(os, *(params->phDevice)); + ur::details::printPtr(os, *(params->ppUSMDesc)); os << ", "; - os << ".hImageMem = "; + os << ".pool = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); + ur::details::printPtr(os, *(params->ppool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, *(params->pppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_unsampled_image_create_exp_params_t type +/// @brief Print operator for the ur_usm_device_alloc_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_unsampled_image_create_exp_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_device_alloc_params_t *params) { os << ".hContext = "; @@ -16533,36 +16872,35 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".hImageMem = "; + os << ".pUSMDesc = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); + ur::details::printPtr(os, *(params->ppUSMDesc)); os << ", "; - os << ".pImageFormat = "; + os << ".pool = "; - ur::details::printPtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, *(params->ppool)); os << ", "; - os << ".pImageDesc = "; + os << ".size = "; - ur::details::printPtr(os, *(params->ppImageDesc)); + os << *(params->psize); os << ", "; - os << ".phImage = "; + os << ".ppMem = "; - ur::details::printPtr(os, *(params->pphImage)); + ur::details::printPtr(os, *(params->pppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_sampled_image_create_exp_params_t type +/// @brief Print operator for the ur_usm_shared_alloc_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_sampled_image_create_exp_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_shared_alloc_params_t *params) { os << ".hContext = "; @@ -16574,145 +16912,169 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".hImageMem = "; + os << ".pUSMDesc = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); + ur::details::printPtr(os, *(params->ppUSMDesc)); os << ", "; - os << ".pImageFormat = "; + os << ".pool = "; - ur::details::printPtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, *(params->ppool)); os << ", "; - os << ".pImageDesc = "; + os << ".size = "; - ur::details::printPtr(os, *(params->ppImageDesc)); + os << *(params->psize); os << ", "; - os << ".hSampler = "; + os << ".ppMem = "; - ur::details::printPtr(os, *(params->phSampler)); + ur::details::printPtr(os, *(params->pppMem)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_free_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_free_params_t *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".phImage = "; + os << ".pMem = "; - ur::details::printPtr(os, *(params->pphImage)); + ur::details::printPtr(os, *(params->ppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_copy_exp_params_t -/// type +/// @brief Print operator for the ur_usm_get_mem_alloc_info_params_t type /// @returns /// std::ostream & inline std::ostream &operator<<( std::ostream &os, - [[maybe_unused]] const struct ur_bindless_images_image_copy_exp_params_t - *params) { + [[maybe_unused]] const struct ur_usm_get_mem_alloc_info_params_t *params) { - os << ".hQueue = "; + os << ".hContext = "; - ur::details::printPtr(os, *(params->phQueue)); + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".pSrc = "; + os << ".pMem = "; - ur::details::printPtr(os, *(params->ppSrc)); + ur::details::printPtr(os, *(params->ppMem)); os << ", "; - os << ".pDst = "; + os << ".propName = "; - ur::details::printPtr(os, *(params->ppDst)); + os << *(params->ppropName); os << ", "; - os << ".pSrcImageDesc = "; + os << ".propSize = "; - ur::details::printPtr(os, *(params->ppSrcImageDesc)); + os << *(params->ppropSize); os << ", "; - os << ".pDstImageDesc = "; - - ur::details::printPtr(os, *(params->ppDstImageDesc)); + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), + *(params->ppropSize)); os << ", "; - os << ".pSrcImageFormat = "; + os << ".pPropSizeRet = "; - ur::details::printPtr(os, *(params->ppSrcImageFormat)); + ur::details::printPtr(os, *(params->ppPropSizeRet)); - os << ", "; - os << ".pDstImageFormat = "; + return os; +} - ur::details::printPtr(os, *(params->ppDstImageFormat)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_create_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_create_params_t *params) { - os << ", "; - os << ".pCopyRegion = "; + os << ".hContext = "; - ur::details::printPtr(os, *(params->ppCopyRegion)); + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".imageCopyFlags = "; + os << ".pPoolDesc = "; - ur::details::printFlag(os, - *(params->pimageCopyFlags)); + ur::details::printPtr(os, *(params->ppPoolDesc)); os << ", "; - os << ".numEventsInWaitList = "; + os << ".ppPool = "; + + ur::details::printPtr(os, *(params->pppPool)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_retain_params_t *params) { + + os << ".pPool = "; - os << *(params->pnumEventsInWaitList); + ur::details::printPtr(os, *(params->ppPool)); - os << ", "; - os << ".phEventWaitList = "; - ur::details::printPtr( - os, reinterpret_cast(*(params->pphEventWaitList))); - if (*(params->pphEventWaitList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + return os; +} - ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); - } - os << "}"; - } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_release_params_t *params) { - os << ", "; - os << ".phEvent = "; + os << ".pPool = "; - ur::details::printPtr(os, *(params->pphEvent)); + ur::details::printPtr(os, *(params->ppPool)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_get_info_exp_params_t -/// type +/// @brief Print operator for the ur_usm_pool_get_info_params_t type /// @returns /// std::ostream & inline std::ostream &operator<<( std::ostream &os, - [[maybe_unused]] const struct ur_bindless_images_image_get_info_exp_params_t - *params) { + [[maybe_unused]] const struct ur_usm_pool_get_info_params_t *params) { - os << ".hContext = "; + os << ".hPool = "; - ur::details::printPtr(os, *(params->phContext)); + ur::details::printPtr(os, *(params->phPool)); os << ", "; - os << ".hImageMem = "; + os << ".propName = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); + os << *(params->ppropName); os << ", "; - os << ".propName = "; + os << ".propSize = "; - os << *(params->ppropName); + os << *(params->ppropSize); os << ", "; os << ".pPropValue = "; - - ur::details::printPtr(os, *(params->ppPropValue)); + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), + *(params->ppropSize)); os << ", "; os << ".pPropSizeRet = "; @@ -16723,13 +17085,12 @@ inline std::ostream &operator<<( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_mipmap_get_level_exp_params_t type +/// @brief Print operator for the ur_usm_pitched_alloc_exp_params_t type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_mipmap_get_level_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_pitched_alloc_exp_params_t *params) { os << ".hContext = "; @@ -16741,99 +17102,96 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".hImageMem = "; + os << ".pUSMDesc = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); + ur::details::printPtr(os, *(params->ppUSMDesc)); os << ", "; - os << ".mipmapLevel = "; + os << ".pool = "; - os << *(params->pmipmapLevel); + ur::details::printPtr(os, *(params->ppool)); os << ", "; - os << ".phImageMem = "; + os << ".widthInBytes = "; - ur::details::printPtr(os, *(params->pphImageMem)); + os << *(params->pwidthInBytes); - return os; -} + os << ", "; + os << ".height = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_mipmap_free_exp_params_t -/// type -/// @returns -/// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_bindless_images_mipmap_free_exp_params_t - *params) { + os << *(params->pheight); - os << ".hContext = "; + os << ", "; + os << ".elementSizeBytes = "; - ur::details::printPtr(os, *(params->phContext)); + os << *(params->pelementSizeBytes); os << ", "; - os << ".hDevice = "; + os << ".ppMem = "; - ur::details::printPtr(os, *(params->phDevice)); + ur::details::printPtr(os, *(params->pppMem)); os << ", "; - os << ".hMem = "; + os << ".pResultPitch = "; - ur::details::printPtr(os, reinterpret_cast(*(params->phMem))); + ur::details::printPtr(os, *(params->ppResultPitch)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_import_external_memory_exp_params_t type +/// @brief Print operator for the ur_usm_import_exp_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_import_external_memory_exp_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_import_exp_params_t *params) { os << ".hContext = "; ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pMem = "; - ur::details::printPtr(os, *(params->phDevice)); + ur::details::printPtr(os, *(params->ppMem)); os << ", "; os << ".size = "; os << *(params->psize); - os << ", "; - os << ".memHandleType = "; + return os; +} - os << *(params->pmemHandleType); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_release_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_release_exp_params_t *params) { - os << ", "; - os << ".pExternalMemDesc = "; + os << ".hContext = "; - ur::details::printPtr(os, *(params->ppExternalMemDesc)); + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".phExternalMem = "; + os << ".pMem = "; - ur::details::printPtr(os, *(params->pphExternalMem)); + ur::details::printPtr(os, *(params->ppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the -/// ur_bindless_images_map_external_array_exp_params_t type +/// ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_map_external_array_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params) { os << ".hContext = "; @@ -16845,36 +17203,48 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".pImageFormat = "; + os << ".hImage = "; - ur::details::printPtr(os, *(params->ppImageFormat)); + ur::details::printPtr(os, reinterpret_cast(*(params->phImage))); - os << ", "; - os << ".pImageDesc = "; + return os; +} - ur::details::printPtr(os, *(params->ppImageDesc)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the +/// ur_bindless_images_sampled_image_handle_destroy_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<( + std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".hExternalMem = "; + os << ".hDevice = "; - ur::details::printPtr(os, *(params->phExternalMem)); + ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".phImageMem = "; + os << ".hImage = "; - ur::details::printPtr(os, *(params->pphImageMem)); + ur::details::printPtr(os, reinterpret_cast(*(params->phImage))); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_map_external_linear_memory_exp_params_t type +/// @brief Print operator for the ur_bindless_images_image_allocate_exp_params_t +/// type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_map_external_linear_memory_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_image_allocate_exp_params_t + *params) { os << ".hContext = "; @@ -16886,36 +17256,32 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".offset = "; - - os << *(params->poffset); - - os << ", "; - os << ".size = "; + os << ".pImageFormat = "; - os << *(params->psize); + ur::details::printPtr(os, *(params->ppImageFormat)); os << ", "; - os << ".hExternalMem = "; + os << ".pImageDesc = "; - ur::details::printPtr(os, *(params->phExternalMem)); + ur::details::printPtr(os, *(params->ppImageDesc)); os << ", "; - os << ".ppRetMem = "; + os << ".phImageMem = "; - ur::details::printPtr(os, *(params->pppRetMem)); + ur::details::printPtr(os, *(params->pphImageMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_release_external_memory_exp_params_t type +/// @brief Print operator for the ur_bindless_images_image_free_exp_params_t +/// type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_release_external_memory_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_image_free_exp_params_t + *params) { os << ".hContext = "; @@ -16927,21 +17293,21 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".hExternalMem = "; + os << ".hImageMem = "; - ur::details::printPtr(os, *(params->phExternalMem)); + ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); return os; } /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the -/// ur_bindless_images_import_external_semaphore_exp_params_t type +/// ur_bindless_images_unsampled_image_create_exp_params_t type /// @returns /// std::ostream & inline std::ostream & operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_import_external_semaphore_exp_params_t *params) { + ur_bindless_images_unsampled_image_create_exp_params_t *params) { os << ".hContext = "; @@ -16953,133 +17319,128 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".semHandleType = "; + os << ".hImageMem = "; - os << *(params->psemHandleType); + ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); os << ", "; - os << ".pExternalSemaphoreDesc = "; + os << ".pImageFormat = "; - ur::details::printPtr(os, *(params->ppExternalSemaphoreDesc)); + ur::details::printPtr(os, *(params->ppImageFormat)); os << ", "; - os << ".phExternalSemaphore = "; + os << ".pImageDesc = "; - ur::details::printPtr(os, *(params->pphExternalSemaphore)); + ur::details::printPtr(os, *(params->ppImageDesc)); + + os << ", "; + os << ".phImage = "; + + ur::details::printPtr(os, *(params->pphImage)); return os; } /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the -/// ur_bindless_images_release_external_semaphore_exp_params_t type +/// ur_bindless_images_sampled_image_create_exp_params_t type /// @returns /// std::ostream & inline std::ostream & operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_release_external_semaphore_exp_params_t *params) { + ur_bindless_images_sampled_image_create_exp_params_t *params) { os << ".hContext = "; - ur::details::printPtr(os, *(params->phContext)); + ur::details::printPtr(os, *(params->phContext)); + + os << ", "; + os << ".hDevice = "; + + ur::details::printPtr(os, *(params->phDevice)); + + os << ", "; + os << ".hImageMem = "; + + ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); + + os << ", "; + os << ".pImageFormat = "; + + ur::details::printPtr(os, *(params->ppImageFormat)); + + os << ", "; + os << ".pImageDesc = "; + + ur::details::printPtr(os, *(params->ppImageDesc)); os << ", "; - os << ".hDevice = "; + os << ".hSampler = "; - ur::details::printPtr(os, *(params->phDevice)); + ur::details::printPtr(os, *(params->phSampler)); os << ", "; - os << ".hExternalSemaphore = "; + os << ".phImage = "; - ur::details::printPtr(os, *(params->phExternalSemaphore)); + ur::details::printPtr(os, *(params->pphImage)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_wait_external_semaphore_exp_params_t type +/// @brief Print operator for the ur_bindless_images_image_copy_exp_params_t +/// type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_wait_external_semaphore_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_image_copy_exp_params_t + *params) { os << ".hQueue = "; ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".hSemaphore = "; - - ur::details::printPtr(os, *(params->phSemaphore)); - - os << ", "; - os << ".hasWaitValue = "; + os << ".pSrc = "; - os << *(params->phasWaitValue); + ur::details::printPtr(os, *(params->ppSrc)); os << ", "; - os << ".waitValue = "; + os << ".pDst = "; - os << *(params->pwaitValue); + ur::details::printPtr(os, *(params->ppDst)); os << ", "; - os << ".numEventsInWaitList = "; + os << ".pSrcImageDesc = "; - os << *(params->pnumEventsInWaitList); + ur::details::printPtr(os, *(params->ppSrcImageDesc)); os << ", "; - os << ".phEventWaitList = "; - ur::details::printPtr( - os, reinterpret_cast(*(params->pphEventWaitList))); - if (*(params->pphEventWaitList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pDstImageDesc = "; - ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); - } - os << "}"; - } + ur::details::printPtr(os, *(params->ppDstImageDesc)); os << ", "; - os << ".phEvent = "; - - ur::details::printPtr(os, *(params->pphEvent)); - - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_bindless_images_signal_external_semaphore_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_bindless_images_signal_external_semaphore_exp_params_t *params) { - - os << ".hQueue = "; + os << ".pSrcImageFormat = "; - ur::details::printPtr(os, *(params->phQueue)); + ur::details::printPtr(os, *(params->ppSrcImageFormat)); os << ", "; - os << ".hSemaphore = "; + os << ".pDstImageFormat = "; - ur::details::printPtr(os, *(params->phSemaphore)); + ur::details::printPtr(os, *(params->ppDstImageFormat)); os << ", "; - os << ".hasSignalValue = "; + os << ".pCopyRegion = "; - os << *(params->phasSignalValue); + ur::details::printPtr(os, *(params->ppCopyRegion)); os << ", "; - os << ".signalValue = "; + os << ".imageCopyFlags = "; - os << *(params->psignalValue); + ur::details::printFlag(os, + *(params->pimageCopyFlags)); os << ", "; os << ".numEventsInWaitList = "; @@ -17111,47 +17472,50 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_host_alloc_params_t type +/// @brief Print operator for the ur_bindless_images_image_get_info_exp_params_t +/// type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_host_alloc_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_image_get_info_exp_params_t + *params) { os << ".hContext = "; ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".pUSMDesc = "; + os << ".hImageMem = "; - ur::details::printPtr(os, *(params->ppUSMDesc)); + ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); os << ", "; - os << ".pool = "; + os << ".propName = "; - ur::details::printPtr(os, *(params->ppool)); + os << *(params->ppropName); os << ", "; - os << ".size = "; + os << ".pPropValue = "; - os << *(params->psize); + ur::details::printPtr(os, *(params->ppPropValue)); os << ", "; - os << ".ppMem = "; + os << ".pPropSizeRet = "; - ur::details::printPtr(os, *(params->pppMem)); + ur::details::printPtr(os, *(params->ppPropSizeRet)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_device_alloc_params_t type +/// @brief Print operator for the +/// ur_bindless_images_mipmap_get_level_exp_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_device_alloc_params_t *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_mipmap_get_level_exp_params_t *params) { os << ".hContext = "; @@ -17163,35 +17527,32 @@ operator<<(std::ostream &os, ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".pUSMDesc = "; - - ur::details::printPtr(os, *(params->ppUSMDesc)); - - os << ", "; - os << ".pool = "; + os << ".hImageMem = "; - ur::details::printPtr(os, *(params->ppool)); + ur::details::printPtr(os, reinterpret_cast(*(params->phImageMem))); os << ", "; - os << ".size = "; + os << ".mipmapLevel = "; - os << *(params->psize); + os << *(params->pmipmapLevel); os << ", "; - os << ".ppMem = "; + os << ".phImageMem = "; - ur::details::printPtr(os, *(params->pppMem)); + ur::details::printPtr(os, *(params->pphImageMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_shared_alloc_params_t type +/// @brief Print operator for the ur_bindless_images_mipmap_free_exp_params_t +/// type /// @returns /// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_shared_alloc_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_mipmap_free_exp_params_t + *params) { os << ".hContext = "; @@ -17203,14 +17564,30 @@ operator<<(std::ostream &os, ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".pUSMDesc = "; + os << ".hMem = "; - ur::details::printPtr(os, *(params->ppUSMDesc)); + ur::details::printPtr(os, reinterpret_cast(*(params->phMem))); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the +/// ur_bindless_images_import_external_memory_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_import_external_memory_exp_params_t *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".pool = "; + os << ".hDevice = "; - ur::details::printPtr(os, *(params->ppool)); + ur::details::printPtr(os, *(params->phDevice)); os << ", "; os << ".size = "; @@ -17218,170 +17595,175 @@ operator<<(std::ostream &os, os << *(params->psize); os << ", "; - os << ".ppMem = "; - - ur::details::printPtr(os, *(params->pppMem)); - - return os; -} + os << ".memHandleType = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_free_params_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_free_params_t *params) { + os << *(params->pmemHandleType); - os << ".hContext = "; + os << ", "; + os << ".pExternalMemDesc = "; - ur::details::printPtr(os, *(params->phContext)); + ur::details::printPtr(os, *(params->ppExternalMemDesc)); os << ", "; - os << ".pMem = "; + os << ".phExternalMem = "; - ur::details::printPtr(os, *(params->ppMem)); + ur::details::printPtr(os, *(params->pphExternalMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_get_mem_alloc_info_params_t type +/// @brief Print operator for the +/// ur_bindless_images_map_external_array_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_usm_get_mem_alloc_info_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_map_external_array_exp_params_t *params) { os << ".hContext = "; ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".pMem = "; + os << ".hDevice = "; - ur::details::printPtr(os, *(params->ppMem)); + ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".propName = "; + os << ".pImageFormat = "; - os << *(params->ppropName); + ur::details::printPtr(os, *(params->ppImageFormat)); os << ", "; - os << ".propSize = "; + os << ".pImageDesc = "; - os << *(params->ppropSize); + ur::details::printPtr(os, *(params->ppImageDesc)); os << ", "; - os << ".pPropValue = "; - ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << ".hExternalMem = "; + + ur::details::printPtr(os, *(params->phExternalMem)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phImageMem = "; - ur::details::printPtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, *(params->pphImageMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_create_params_t type +/// @brief Print operator for the +/// ur_bindless_images_map_external_linear_memory_exp_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_pool_create_params_t *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_map_external_linear_memory_exp_params_t *params) { os << ".hContext = "; ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".pPoolDesc = "; + os << ".hDevice = "; - ur::details::printPtr(os, *(params->ppPoolDesc)); + ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".ppPool = "; + os << ".offset = "; - ur::details::printPtr(os, *(params->pppPool)); + os << *(params->poffset); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".hExternalMem = "; + + ur::details::printPtr(os, *(params->phExternalMem)); + + os << ", "; + os << ".ppRetMem = "; + + ur::details::printPtr(os, *(params->pppRetMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_retain_params_t type +/// @brief Print operator for the +/// ur_bindless_images_release_external_memory_exp_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_pool_retain_params_t *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_release_external_memory_exp_params_t *params) { - os << ".pPool = "; + os << ".hContext = "; - ur::details::printPtr(os, *(params->ppPool)); + ur::details::printPtr(os, *(params->phContext)); - return os; -} + os << ", "; + os << ".hDevice = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_release_params_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_pool_release_params_t *params) { + ur::details::printPtr(os, *(params->phDevice)); - os << ".pPool = "; + os << ", "; + os << ".hExternalMem = "; - ur::details::printPtr(os, *(params->ppPool)); + ur::details::printPtr(os, *(params->phExternalMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_get_info_params_t type +/// @brief Print operator for the +/// ur_bindless_images_import_external_semaphore_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_usm_pool_get_info_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_import_external_semaphore_exp_params_t *params) { - os << ".hPool = "; + os << ".hContext = "; - ur::details::printPtr(os, *(params->phPool)); + ur::details::printPtr(os, *(params->phContext)); os << ", "; - os << ".propName = "; + os << ".hDevice = "; - os << *(params->ppropName); + ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".propSize = "; + os << ".semHandleType = "; - os << *(params->ppropSize); + os << *(params->psemHandleType); os << ", "; - os << ".pPropValue = "; - ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), - *(params->ppropSize)); + os << ".pExternalSemaphoreDesc = "; + + ur::details::printPtr(os, *(params->ppExternalSemaphoreDesc)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phExternalSemaphore = "; - ur::details::printPtr(os, *(params->ppPropSizeRet)); + ur::details::printPtr(os, *(params->pphExternalSemaphore)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pitched_alloc_exp_params_t type +/// @brief Print operator for the +/// ur_bindless_images_release_external_semaphore_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_usm_pitched_alloc_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_release_external_semaphore_exp_params_t *params) { os << ".hContext = "; @@ -17393,84 +17775,123 @@ inline std::ostream &operator<<( ur::details::printPtr(os, *(params->phDevice)); os << ", "; - os << ".pUSMDesc = "; + os << ".hExternalSemaphore = "; - ur::details::printPtr(os, *(params->ppUSMDesc)); + ur::details::printPtr(os, *(params->phExternalSemaphore)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the +/// ur_bindless_images_wait_external_semaphore_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_wait_external_semaphore_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".pool = "; + os << ".hSemaphore = "; - ur::details::printPtr(os, *(params->ppool)); + ur::details::printPtr(os, *(params->phSemaphore)); os << ", "; - os << ".widthInBytes = "; + os << ".hasWaitValue = "; - os << *(params->pwidthInBytes); + os << *(params->phasWaitValue); os << ", "; - os << ".height = "; + os << ".waitValue = "; - os << *(params->pheight); + os << *(params->pwaitValue); os << ", "; - os << ".elementSizeBytes = "; + os << ".numEventsInWaitList = "; - os << *(params->pelementSizeBytes); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".ppMem = "; + os << ".phEventWaitList = "; + ur::details::printPtr( + os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, *(params->pppMem)); + ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + } os << ", "; - os << ".pResultPitch = "; + os << ".phEvent = "; - ur::details::printPtr(os, *(params->ppResultPitch)); + ur::details::printPtr(os, *(params->pphEvent)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_import_exp_params_t type +/// @brief Print operator for the +/// ur_bindless_images_signal_external_semaphore_exp_params_t type /// @returns /// std::ostream & inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_import_exp_params_t *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_signal_external_semaphore_exp_params_t *params) { - os << ".hContext = "; + os << ".hQueue = "; - ur::details::printPtr(os, *(params->phContext)); + ur::details::printPtr(os, *(params->phQueue)); os << ", "; - os << ".pMem = "; + os << ".hSemaphore = "; - ur::details::printPtr(os, *(params->ppMem)); + ur::details::printPtr(os, *(params->phSemaphore)); os << ", "; - os << ".size = "; + os << ".hasSignalValue = "; - os << *(params->psize); + os << *(params->phasSignalValue); - return os; -} + os << ", "; + os << ".signalValue = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_release_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_usm_release_exp_params_t *params) { + os << *(params->psignalValue); - os << ".hContext = "; + os << ", "; + os << ".numEventsInWaitList = "; - ur::details::printPtr(os, *(params->phContext)); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".pMem = "; + os << ".phEventWaitList = "; + ur::details::printPtr( + os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, *(params->ppMem)); + ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + } + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, *(params->pphEvent)); return os; } @@ -19890,6 +20311,18 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, os << (const struct ur_enqueue_events_wait_with_barrier_ext_params_t *) params; } break; + case UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_device_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_shared_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_host_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: { + os << (const struct ur_enqueue_usm_free_exp_params_t *)params; + } break; case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *) params; @@ -19900,6 +20333,42 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, case UR_FUNCTION_ENQUEUE_NATIVE_COMMAND_EXP: { os << (const struct ur_enqueue_native_command_exp_params_t *)params; } break; + case UR_FUNCTION_USM_HOST_ALLOC: { + os << (const struct ur_usm_host_alloc_params_t *)params; + } break; + case UR_FUNCTION_USM_DEVICE_ALLOC: { + os << (const struct ur_usm_device_alloc_params_t *)params; + } break; + case UR_FUNCTION_USM_SHARED_ALLOC: { + os << (const struct ur_usm_shared_alloc_params_t *)params; + } break; + case UR_FUNCTION_USM_FREE: { + os << (const struct ur_usm_free_params_t *)params; + } break; + case UR_FUNCTION_USM_GET_MEM_ALLOC_INFO: { + os << (const struct ur_usm_get_mem_alloc_info_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_CREATE: { + os << (const struct ur_usm_pool_create_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_RETAIN: { + os << (const struct ur_usm_pool_retain_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_RELEASE: { + os << (const struct ur_usm_pool_release_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_GET_INFO: { + os << (const struct ur_usm_pool_get_info_params_t *)params; + } break; + case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: { + os << (const struct ur_usm_pitched_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_USM_IMPORT_EXP: { + os << (const struct ur_usm_import_exp_params_t *)params; + } break; + case UR_FUNCTION_USM_RELEASE_EXP: { + os << (const struct ur_usm_release_exp_params_t *)params; + } break; case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: { os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *) @@ -19969,42 +20438,6 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, os << (const struct ur_bindless_images_signal_external_semaphore_exp_params_t *)params; } break; - case UR_FUNCTION_USM_HOST_ALLOC: { - os << (const struct ur_usm_host_alloc_params_t *)params; - } break; - case UR_FUNCTION_USM_DEVICE_ALLOC: { - os << (const struct ur_usm_device_alloc_params_t *)params; - } break; - case UR_FUNCTION_USM_SHARED_ALLOC: { - os << (const struct ur_usm_shared_alloc_params_t *)params; - } break; - case UR_FUNCTION_USM_FREE: { - os << (const struct ur_usm_free_params_t *)params; - } break; - case UR_FUNCTION_USM_GET_MEM_ALLOC_INFO: { - os << (const struct ur_usm_get_mem_alloc_info_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_CREATE: { - os << (const struct ur_usm_pool_create_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_RETAIN: { - os << (const struct ur_usm_pool_retain_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_RELEASE: { - os << (const struct ur_usm_pool_release_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_GET_INFO: { - os << (const struct ur_usm_pool_get_info_params_t *)params; - } break; - case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: { - os << (const struct ur_usm_pitched_alloc_exp_params_t *)params; - } break; - case UR_FUNCTION_USM_IMPORT_EXP: { - os << (const struct ur_usm_import_exp_params_t *)params; - } break; - case UR_FUNCTION_USM_RELEASE_EXP: { - os << (const struct ur_usm_release_exp_params_t *)params; - } break; case UR_FUNCTION_COMMAND_BUFFER_CREATE_EXP: { os << (const struct ur_command_buffer_create_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-ASYNC-ALLOC.rst b/scripts/core/EXP-ASYNC-ALLOC.rst new file mode 100644 index 0000000000..8f337febcb --- /dev/null +++ b/scripts/core/EXP-ASYNC-ALLOC.rst @@ -0,0 +1,78 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-async-allocations: + +================================================================================ +Async Allocation Functions +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +Asynchronous allocations can allow queues to allocate and free memory between +UR command enqueues without forcing synchronization points in the asynchronous +command DAG associated with a queue. This can allow applications to compose +memory allocation and command execution asynchronously, which can improve +performance. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP +* ${x}_command_t + * ${X}_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_FREE_EXP +* ${x}_exp_enqueue_usm_alloc_flags_t + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_enqueue_usm_alloc_properties_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueUSMDeviceAllocExp +* ${x}EnqueueUSMSharedAllocExp +* ${x}EnqueueUSMHostAllocExp +* ${x}EnqueueUSMFreeExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+---------------------------+ +| Revision | Changes | ++===========+===========================+ +| 1.0 | Initial Draft | ++-----------+---------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return true for the new +``${X}_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP`` device info query. + + +Contributors +-------------------------------------------------------------------------------- + +* Hugh Delaney `hugh.delaney@codeplay.com `_ diff --git a/scripts/core/exp-async-alloc.yml b/scripts/core/exp-async-alloc.yml new file mode 100644 index 0000000000..40b98d35b4 --- /dev/null +++ b/scripts/core/exp-async-alloc.yml @@ -0,0 +1,239 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for enqueuing asynchronous USM allocations" +ordinal: "99" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support USM allocation enqueuing." +name: $x_device_info_t +etors: + - name: ENQUEUE_USM_ALLOCATIONS_EXP + value: "0x2023" + desc: "[$x_bool_t] returns true if the device supports USM allocation enqueueing" + +--- #-------------------------------------------------------------------------- +type: struct +desc: "USM native pool descriptor type" +details: + - Specify these properties in $xUSMPoolCreate via $x_usm_pool_desc_t + as part of a `pNext` chain. +class: $xUSM +name: $x_usm_pool_native_desc_t +base: $x_base_desc_t +members: + - type: "void*" + name: pMem + desc: "[in] USM memory object" + - type: "size_t" + name: size + desc: "[in] size of USM memory object" + - type: "$x_usm_type_t" + name: memType + desc: "[in] type of USM memory object" + - type: "$x_device_handle_t" + name: device + desc: "[in] device associated with the USM memory object" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Command Type experimental enumerations." +name: $x_command_t +etors: + - name: ENQUEUE_USM_DEVICE_ALLOC_EXP + value: "0x2005" + desc: Event created by $xEnqueueUSMDeviceAllocExp + - name: ENQUEUE_USM_SHARED_ALLOC_EXP + value: "0x2006" + desc: Event created by $xEnqueueUSMSharedAllocExp + - name: ENQUEUE_USM_HOST_ALLOC_EXP + value: "0x2007" + desc: Event created by $xEnqueueUSMHostAllocExp + - name: ENQUEUE_USM_FREE_EXP + value: "0x2008" + desc: Event created by $xEnqueueUSMFreeExp + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Enqueue USM allocation flags" +name: $x_exp_enqueue_usm_alloc_flags_t +etors: + - name: TBD + desc: "reserved for future use." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Enqueue USM allocation properties" +name: $x_exp_enqueue_usm_alloc_properties_t +base: $x_base_properties_t +members: + - type: $x_exp_enqueue_usm_alloc_flags_t + name: flags + desc: "[in] enqueue USM allocation flags" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Structure type experimental enumerations" +name: $x_structure_type_t +etors: + - name: EXP_ENQUEUE_USM_ALLOC_PROPERTIES + desc: $x_exp_enqueue_usm_alloc_properties_t + value: "0x3001" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM device allocation" +class: $xEnqueue +name: USMDeviceAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_enqueue_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue asynchronous USM allocation properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM device allocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM shared allocation" +class: $xEnqueue +name: USMSharedAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_enqueue_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue asynchronous USM allocation properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM shared allocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM host allocation" +class: $xEnqueue +name: USMHostAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_enqueue_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue asynchronous USM allocation properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM host allocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM deallocation" +class: $xEnqueue +name: USMFreeExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: void* + name: pMem + desc: "[in] pointer to USM memory object" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM deallocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index c774642482..46dd35e9ab 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -601,6 +601,18 @@ etors: - name: PHYSICAL_MEM_GET_INFO desc: Enumerator for $xPhysicalMemGetInfo value: '249' +- name: ENQUEUE_USM_DEVICE_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMDeviceAllocExp + value: '250' +- name: ENQUEUE_USM_SHARED_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMSharedAllocExp + value: '251' +- name: ENQUEUE_USM_HOST_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMHostAllocExp + value: '252' +- name: ENQUEUE_USM_FREE_EXP + desc: Enumerator for $xEnqueueUSMFreeExp + value: '253' --- type: enum desc: Defines structure types @@ -711,3 +723,6 @@ etors: - name: USM_ALLOC_LOCATION_DESC desc: $x_usm_alloc_location_desc_t value: '35' +- name: USM_POOL_NATIVE_DESC + desc: $x_usm_pool_native_desc_t + value: '36' diff --git a/source/adapters/cuda/common.cpp b/source/adapters/cuda/common.cpp index 89500d1a1c..8a524a1cf6 100644 --- a/source/adapters/cuda/common.cpp +++ b/source/adapters/cuda/common.cpp @@ -47,8 +47,7 @@ void checkErrorUR(CUresult Result, const char *Function, int Line, cuGetErrorName(Result, &ErrorName); cuGetErrorString(Result, &ErrorString); std::stringstream SS; - SS << "\nUR CUDA ERROR:" - << "\n\tValue: " << Result + SS << "\nUR CUDA ERROR:" << "\n\tValue: " << Result << "\n\tName: " << ErrorName << "\n\tDescription: " << ErrorString << "\n\tFunction: " << Function << "\n\tSource Location: " << File @@ -70,9 +69,9 @@ void checkErrorUR(ur_result_t Result, const char *Function, int Line, } std::stringstream SS; - SS << "\nUR ERROR:" - << "\n\tValue: " << Result << "\n\tFunction: " << Function - << "\n\tSource Location: " << File << ":" << Line << "\n"; + SS << "\nUR ERROR:" << "\n\tValue: " << Result + << "\n\tFunction: " << Function << "\n\tSource Location: " << File + << ":" << Line << "\n"; logger::error("{}", SS.str()); if (std::getenv("PI_CUDA_ABORT") != nullptr) { diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index e40927b7a8..4c1482e2e0 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -530,3 +530,123 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( } } } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that + ///< identifies the asynchronous USM deallocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/common.cpp b/source/adapters/hip/common.cpp index bfe158fced..e7844de8dd 100644 --- a/source/adapters/hip/common.cpp +++ b/source/adapters/hip/common.cpp @@ -80,8 +80,7 @@ void checkErrorUR(amd_comgr_status_t Result, const char *Function, int Line, break; } std::stringstream SS; - SS << "\nUR HIP ERROR:" - << "\n\tValue: " << Result + SS << "\nUR HIP ERROR:" << "\n\tValue: " << Result << "\n\tName: " << ErrorName << "\n\tDescription: " << ErrorString << "\n\tFunction: " << Function << "\n\tSource Location: " << File @@ -107,8 +106,7 @@ void checkErrorUR(hipError_t Result, const char *Function, int Line, const char *ErrorName = hipGetErrorName(Result); std::stringstream SS; - SS << "\nUR HIP ERROR:" - << "\n\tValue: " << Result + SS << "\nUR HIP ERROR:" << "\n\tValue: " << Result << "\n\tName: " << ErrorName << "\n\tDescription: " << ErrorString << "\n\tFunction: " << Function << "\n\tSource Location: " << File @@ -130,9 +128,9 @@ void checkErrorUR(ur_result_t Result, const char *Function, int Line, } std::stringstream SS; - SS << "\nUR HIP ERROR:" - << "\n\tValue: " << Result << "\n\tFunction: " << Function - << "\n\tSource Location: " << File << ":" << Line << "\n"; + SS << "\nUR HIP ERROR:" << "\n\tValue: " << Result + << "\n\tFunction: " << Function << "\n\tSource Location: " << File + << ":" << Line << "\n"; logger::error("{}", SS.str()); if (std::getenv("PI_HIP_ABORT") != nullptr || diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 922098e4a1..fb689d2353 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -478,3 +478,123 @@ ur_result_t umfPoolMallocHelper(ur_usm_pool_handle_t hPool, void **ppMem, } return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that + ///< identifies the asynchronous USM deallocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 67dcd513e5..1fe7b4c273 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -192,115 +192,6 @@ ur_result_t urContextSetExtendedDeleter( } // namespace ur::level_zero ur_result_t ur_context_handle_t_::initialize() { - - // Helper lambda to create various USM allocators for a device. - // Note that the CCS devices and their respective subdevices share a - // common ze_device_handle and therefore, also share USM allocators. - auto createUSMAllocators = [this](ur_device_handle_t Device) { - auto MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), Device) - .second; - auto UmfDeviceParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Device]); - DeviceMemPools.emplace( - std::piecewise_construct, std::make_tuple(Device->ZeDevice), - std::make_tuple(umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), - std::move(MemProvider), - UmfDeviceParamsHandle.get()) - .second)); - - MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), Device) - .second; - - auto UmfSharedParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Shared]); - SharedMemPools.emplace( - std::piecewise_construct, std::make_tuple(Device->ZeDevice), - std::make_tuple(umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), - std::move(MemProvider), - UmfSharedParamsHandle.get()) - .second)); - - MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), Device) - .second; - - auto UmfSharedROParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance - .Configs[usm::DisjointPoolMemType::SharedReadOnly]); - SharedReadOnlyMemPools.emplace( - std::piecewise_construct, std::make_tuple(Device->ZeDevice), - std::make_tuple(umf::poolMakeUniqueFromOps( - umfDisjointPoolOps(), std::move(MemProvider), - UmfSharedROParamsHandle.get()) - .second)); - - MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), Device) - .second; - DeviceMemProxyPools.emplace( - std::piecewise_construct, std::make_tuple(Device->ZeDevice), - std::make_tuple( - umf::poolMakeUnique(std::move(MemProvider)).second)); - - MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), Device) - .second; - SharedMemProxyPools.emplace( - std::piecewise_construct, std::make_tuple(Device->ZeDevice), - std::make_tuple( - umf::poolMakeUnique(std::move(MemProvider)).second)); - - MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), Device) - .second; - SharedReadOnlyMemProxyPools.emplace( - std::piecewise_construct, std::make_tuple(Device->ZeDevice), - std::make_tuple( - umf::poolMakeUnique(std::move(MemProvider)).second)); - }; - - // Recursive helper to call createUSMAllocators for all sub-devices - std::function createUSMAllocatorsRecursive; - createUSMAllocatorsRecursive = - [createUSMAllocators, - &createUSMAllocatorsRecursive](ur_device_handle_t Device) -> void { - createUSMAllocators(Device); - for (auto &SubDevice : Device->SubDevices) - createUSMAllocatorsRecursive(SubDevice); - }; - - // Create USM pool for each pair (device, context). - // - for (auto &Device : Devices) { - createUSMAllocatorsRecursive(Device); - } - // Create USM pool for host. Device and Shared USM allocations - // are device-specific. Host allocations are not device-dependent therefore - // we don't need a map with device as key. - auto MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), nullptr) - .second; - auto UmfHostParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host]); - HostMemPool = - umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), std::move(MemProvider), - UmfHostParamsHandle.get()) - .second; - - MemProvider = umf::memoryProviderMakeUnique( - reinterpret_cast(this), nullptr) - .second; - HostMemProxyPool = - umf::poolMakeUnique(std::move(MemProvider)).second; - - // We may allocate memory to this root device so create allocators. - if (SingleRootDevice && - DeviceMemPools.find(SingleRootDevice->ZeDevice) == DeviceMemPools.end()) { - createUSMAllocators(SingleRootDevice); - } - // Create the immediate command list to be used for initializations. // Created as synchronous so level-zero performs implicit synchronization and // there is no need to query for completion in the plugin @@ -311,7 +202,7 @@ ur_result_t ur_context_handle_t_::initialize() { // D2D migartion, if no P2P, is broken since it should use // immediate command-list for the specfic devices, and this single one. // - ur_device_handle_t Device = SingleRootDevice ? SingleRootDevice : Devices[0]; + ur_device_handle_t Device = Devices[0]; // Prefer to use copy engine for initialization copies, // if available and allowed (main copy engine with index 0). diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 43608e8bfc..9663f52472 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -24,6 +24,7 @@ #include "common.hpp" #include "queue.hpp" +#include "usm.hpp" #include @@ -51,15 +52,18 @@ typedef struct _ze_intel_event_sync_mode_exp_desc_t { ze_intel_event_sync_mode_exp_flags_t syncModeFlags; } ze_intel_event_sync_mode_exp_desc_t; +extern const bool UseUSMAllocator; + struct ur_context_handle_t_ : _ur_object { ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices, const ur_device_handle_t *Devs, bool OwnZeContext) : ZeContext{ZeContext}, Devices{Devs, Devs + NumDevices}, - NumDevices{NumDevices} { + NumDevices{NumDevices}, DefaultPool{this, nullptr, !UseUSMAllocator} { OwnNativeHandle = OwnZeContext; } - ur_context_handle_t_(ze_context_handle_t ZeContext) : ZeContext{ZeContext} {} + ur_context_handle_t_(ze_context_handle_t ZeContext) + : ZeContext{ZeContext}, DefaultPool{this, nullptr} {} // A L0 context handle is primarily used during creation and management of // resources that may be used by multiple devices. @@ -94,13 +98,6 @@ struct ur_context_handle_t_ : _ur_object { // compute and copy command list caches. ur_mutex ZeCommandListCacheMutex; - // If context contains one device or sub-devices of the same device, we want - // to save this device. - // This field is only set at ur_context_handle_t creation time, and cannot - // change. Therefore it can be accessed without holding a lock on this - // ur_context_handle_t. - ur_device_handle_t SingleRootDevice = nullptr; - // Cache of all currently available/completed command/copy lists. // Note that command-list can only be re-used on the same device. // @@ -123,24 +120,11 @@ struct ur_context_handle_t_ : _ur_object { // Store USM pool for USM shared and device allocations. There is 1 memory // pool per each pair of (context, device) per each memory type. - std::unordered_map - DeviceMemPools; - std::unordered_map - SharedMemPools; - std::unordered_map - SharedReadOnlyMemPools; - - // Store the host memory pool. It does not depend on any device. - umf::pool_unique_handle_t HostMemPool; - - // Allocation-tracking proxy pools for direct allocations. No pooling used. - std::unordered_map - DeviceMemProxyPools; - std::unordered_map - SharedMemProxyPools; - std::unordered_map - SharedReadOnlyMemProxyPools; - umf::pool_unique_handle_t HostMemProxyPool; + // It's either a DisjointPool implementation from UMF or an + // allocation-tracking proxy pool for direct allocations that does not + // internally pool memory. Actual implementation during runtime is decided by + // the 'UseUSMAllocator' variable value. + ur_usm_pool_handle_t_ DefaultPool; // Map associating pools created with urUsmPoolCreate and internal pools std::list UsmPoolHandles{}; diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 4a5cb787dc..925dd053cc 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -1515,9 +1515,7 @@ ur_result_t urMemImageCreate( // own the image. // TODO: Implement explicit copying for acessing the image from other devices // in the context. - ur_device_handle_t Device = Context->SingleRootDevice - ? Context->SingleRootDevice - : Context->Devices[0]; + ur_device_handle_t Device = Context->Devices[0]; ze_image_handle_t ZeImage; ZE2UR_CALL(zeImageCreate, (Context->ZeContext, Device->ZeDevice, &ZeImageDesc, &ZeImage)); @@ -2079,58 +2077,22 @@ ur_result_t _ur_buffer::getBufferZeHandle(char *&ZeHandle, LastDeviceWithValidAllocation = Device; return UR_RESULT_SUCCESS; } - // Reads user setting on how to deal with buffers in contexts where - // all devices have the same root-device. Returns "true" if the - // preference is to have allocate on each [sub-]device and migrate - // normally (copy) to other sub-devices as needed. Returns "false" - // if the preference is to have single root-device allocations - // serve the needs of all [sub-]devices, meaning potentially more - // cross-tile traffic. - // - static const bool SingleRootDeviceBufferMigration = [] { - const char *UrRet = - std::getenv("UR_L0_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION"); - const char *PiRet = - std::getenv("SYCL_PI_LEVEL_ZERO_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION"); - const char *EnvStr = UrRet ? UrRet : (PiRet ? PiRet : nullptr); - if (EnvStr) - return (std::stoi(EnvStr) != 0); - // The default is to migrate normally, which may not always be the - // best option (depends on buffer access patterns), but is an - // overall win on the set of the available benchmarks. - return true; - }(); // Peform actual device allocation as needed. if (!Allocation.ZeHandle) { - if (!SingleRootDeviceBufferMigration && UrContext->SingleRootDevice && - UrContext->SingleRootDevice != Device) { - // If all devices in the context are sub-devices of the same device - // then we reuse root-device allocation by all sub-devices in the - // context. - // TODO: we can probably generalize this and share root-device - // allocations by its own sub-devices even if not all other - // devices in the context have the same root. - UR_CALL(getZeHandle(ZeHandle, AccessMode, UrContext->SingleRootDevice, - phWaitEvents, numWaitEvents)); - Allocation.ReleaseAction = allocation_t::keep; - Allocation.ZeHandle = ZeHandle; - Allocation.Valid = true; - return UR_RESULT_SUCCESS; - } else { // Create device allocation - if (DisjointPoolConfigInstance.EnableBuffers) { - Allocation.ReleaseAction = allocation_t::free; - ur_usm_desc_t USMDesc{}; - USMDesc.align = getAlignment(); - ur_usm_pool_handle_t Pool{}; - UR_CALL(ur::level_zero::urUSMDeviceAlloc( - UrContext, Device, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); - } else { - Allocation.ReleaseAction = allocation_t::free_native; - UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle), - UrContext, Device, Size)); - } + // Create device allocation + if (DisjointPoolConfigInstance.EnableBuffers) { + Allocation.ReleaseAction = allocation_t::free; + ur_usm_desc_t USMDesc{}; + USMDesc.align = getAlignment(); + ur_usm_pool_handle_t Pool{}; + UR_CALL(ur::level_zero::urUSMDeviceAlloc( + UrContext, Device, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); + } else { + Allocation.ReleaseAction = allocation_t::free_native; + UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle), + UrContext, Device, Size)); } Allocation.ZeHandle = ZeHandle; } else { diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index c237581016..0a830b7140 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -210,6 +210,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = ur::level_zero::urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = ur::level_zero::urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur::level_zero::urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = ur::level_zero::urEnqueueUSMFreeExp; pDdiTable->pfnCooperativeKernelLaunchExp = ur::level_zero::urEnqueueCooperativeKernelLaunchExp; pDdiTable->pfnTimestampRecordingExp = diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 8803b86b07..488e80f9db 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -466,6 +466,26 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, + ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 19480e83c5..1308f10b69 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -21,13 +21,15 @@ #include "ur_level_zero.hpp" #include "ur_util.hpp" +#include #include namespace umf { ur_result_t getProviderNativeError(const char *providerName, int32_t nativeError) { if (strcmp(providerName, "Level Zero") == 0) { - return ze2urResult(static_cast(nativeError)); + // L0 provider stores native errors of ur_result_t type + return static_cast(nativeError); } return UR_RESULT_ERROR_UNKNOWN; @@ -332,66 +334,15 @@ ur_result_t urUSMHostAlloc( size_t Size, /// [out] pointer to USM host memory object void **RetMem) { - - uint32_t Align = USMDesc ? USMDesc->align : 0; - // L0 supports alignment up to 64KB and silently ignores higher values. - // We flag alignment > 64KB as an invalid value. - // L0 spec says that alignment values that are not powers of 2 are invalid. - // If alignment == 0, then we are allowing the L0 driver to choose the - // alignment so no need to check. - if (Align > 0) { - if (Align > 65536 || (Align & (Align - 1)) != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - } - - ur_platform_handle_t Plt = Context->getPlatform(); - // If indirect access tracking is enabled then lock the mutex which is - // guarding contexts container in the platform. This prevents new kernels from - // being submitted in any context while we are in the process of allocating a - // memory, this is needed to properly capture allocations by kernels with - // indirect access. This lock also protects access to the context's data - // structures. If indirect access tracking is not enabled then lock context - // mutex to protect access to context's data structures. - std::shared_lock ContextLock(Context->Mutex, - std::defer_lock); - std::unique_lock IndirectAccessTrackingLock( - Plt->ContextsMutex, std::defer_lock); - if (IndirectAccessTrackingEnabled) { - IndirectAccessTrackingLock.lock(); - // We are going to defer memory release if there are kernels with indirect - // access, that is why explicitly retain context to be sure that it is - // released after all memory allocations in this context are released. - UR_CALL(ur::level_zero::urContextRetain(Context)); + ur_usm_pool_handle_t USMPool = nullptr; + if (Pool) { + USMPool = Pool; } else { - ContextLock.lock(); - } - - // There is a single allocator for Host USM allocations, so we don't need to - // find the allocator depending on context as we do for Shared and Device - // allocations. - umf_memory_pool_handle_t hPoolInternal = nullptr; - if (!UseUSMAllocator) { - hPoolInternal = Context->HostMemProxyPool.get(); - } else if (Pool) { - hPoolInternal = Pool->HostMemPool.get(); - } else { - hPoolInternal = Context->HostMemPool.get(); - } - - *RetMem = umfPoolAlignedMalloc(hPoolInternal, Size, Align); - if (*RetMem == nullptr) { - auto umfRet = umfPoolGetLastAllocationError(hPoolInternal); - return umf2urResult(umfRet); - } - - if (IndirectAccessTrackingEnabled) { - // Keep track of all memory allocations in the context - Context->MemAllocs.emplace(std::piecewise_construct, - std::forward_as_tuple(*RetMem), - std::forward_as_tuple(Context)); + USMPool = &Context->DefaultPool; } - return UR_RESULT_SUCCESS; + return USMPool->allocate(Context, nullptr, USMDesc, UR_USM_TYPE_HOST, Size, + RetMem); } ur_result_t urUSMDeviceAlloc( @@ -408,72 +359,15 @@ ur_result_t urUSMDeviceAlloc( /// [out] pointer to USM device memory object void **RetMem) { - uint32_t Alignment = USMDesc ? USMDesc->align : 0; - - // L0 supports alignment up to 64KB and silently ignores higher values. - // We flag alignment > 64KB as an invalid value. - // L0 spec says that alignment values that are not powers of 2 are invalid. - // If alignment == 0, then we are allowing the L0 driver to choose the - // alignment so no need to check. - if (Alignment > 0) { - if (Alignment > 65536 || (Alignment & (Alignment - 1)) != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - } - - ur_platform_handle_t Plt = Device->Platform; - - // If indirect access tracking is enabled then lock the mutex which is - // guarding contexts container in the platform. This prevents new kernels from - // being submitted in any context while we are in the process of allocating a - // memory, this is needed to properly capture allocations by kernels with - // indirect access. This lock also protects access to the context's data - // structures. If indirect access tracking is not enabled then lock context - // mutex to protect access to context's data structures. - std::shared_lock ContextLock(Context->Mutex, - std::defer_lock); - std::unique_lock IndirectAccessTrackingLock( - Plt->ContextsMutex, std::defer_lock); - if (IndirectAccessTrackingEnabled) { - IndirectAccessTrackingLock.lock(); - // We are going to defer memory release if there are kernels with indirect - // access, that is why explicitly retain context to be sure that it is - // released after all memory allocations in this context are released. - UR_CALL(ur::level_zero::urContextRetain(Context)); - } else { - ContextLock.lock(); - } - - umf_memory_pool_handle_t hPoolInternal = nullptr; - if (!UseUSMAllocator) { - auto It = Context->DeviceMemProxyPools.find(Device->ZeDevice); - if (It == Context->DeviceMemProxyPools.end()) - return UR_RESULT_ERROR_INVALID_VALUE; - - hPoolInternal = It->second.get(); - } else if (Pool) { - hPoolInternal = Pool->DeviceMemPools[Device].get(); + ur_usm_pool_handle_t USMPool = nullptr; + if (Pool) { + USMPool = Pool; } else { - auto It = Context->DeviceMemPools.find(Device->ZeDevice); - if (It == Context->DeviceMemPools.end()) - return UR_RESULT_ERROR_INVALID_VALUE; - - hPoolInternal = It->second.get(); - } - - *RetMem = umfPoolAlignedMalloc(hPoolInternal, Size, Alignment); - if (*RetMem == nullptr) { - auto umfRet = umfPoolGetLastAllocationError(hPoolInternal); - return umf2urResult(umfRet); - } - - if (IndirectAccessTrackingEnabled) { - // Keep track of all memory allocations in the context - Context->MemAllocs.emplace(std::piecewise_construct, - std::forward_as_tuple(*RetMem), - std::forward_as_tuple(Context)); + USMPool = &Context->DefaultPool; } - return UR_RESULT_SUCCESS; + return USMPool->allocate(Context, Device, USMDesc, UR_USM_TYPE_DEVICE, Size, + RetMem); } ur_result_t urUSMSharedAlloc( @@ -489,100 +383,15 @@ ur_result_t urUSMSharedAlloc( size_t Size, /// [out] pointer to USM shared memory object void **RetMem) { - - uint32_t Alignment = USMDesc ? USMDesc->align : 0; - - ur_usm_host_mem_flags_t UsmHostFlags{}; - - // See if the memory is going to be read-only on the device. - bool DeviceReadOnly = false; - ur_usm_device_mem_flags_t UsmDeviceFlags{}; - - void *pNext = USMDesc ? const_cast(USMDesc->pNext) : nullptr; - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = - reinterpret_cast(pNext); - if (BaseDesc->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const ur_usm_device_desc_t *UsmDeviceDesc = - reinterpret_cast(pNext); - UsmDeviceFlags = UsmDeviceDesc->flags; - } - if (BaseDesc->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const ur_usm_host_desc_t *UsmHostDesc = - reinterpret_cast(pNext); - UsmHostFlags = UsmHostDesc->flags; - std::ignore = UsmHostFlags; - } - pNext = const_cast(BaseDesc->pNext); - } - DeviceReadOnly = UsmDeviceFlags & UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; - - // L0 supports alignment up to 64KB and silently ignores higher values. - // We flag alignment > 64KB as an invalid value. - // L0 spec says that alignment values that are not powers of 2 are invalid. - // If alignment == 0, then we are allowing the L0 driver to choose the - // alignment so no need to check. - if (Alignment > 0) { - if (Alignment > 65536 || (Alignment & (Alignment - 1)) != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - } - - ur_platform_handle_t Plt = Device->Platform; - - // If indirect access tracking is enabled then lock the mutex which is - // guarding contexts container in the platform. This prevents new kernels from - // being submitted in any context while we are in the process of allocating a - // memory, this is needed to properly capture allocations by kernels with - // indirect access. This lock also protects access to the context's data - // structures. If indirect access tracking is not enabled then lock context - // mutex to protect access to context's data structures. - std::scoped_lock Lock( - IndirectAccessTrackingEnabled ? Plt->ContextsMutex : Context->Mutex); - - if (IndirectAccessTrackingEnabled) { - // We are going to defer memory release if there are kernels with indirect - // access, that is why explicitly retain context to be sure that it is - // released after all memory allocations in this context are released. - UR_CALL(ur::level_zero::urContextRetain(Context)); - } - - umf_memory_pool_handle_t hPoolInternal = nullptr; - if (!UseUSMAllocator) { - auto &Allocator = (DeviceReadOnly ? Context->SharedReadOnlyMemProxyPools - : Context->SharedMemProxyPools); - auto It = Allocator.find(Device->ZeDevice); - if (It == Allocator.end()) - return UR_RESULT_ERROR_INVALID_VALUE; - - hPoolInternal = It->second.get(); - } else if (Pool) { - hPoolInternal = (DeviceReadOnly) - ? Pool->SharedReadOnlyMemPools[Device].get() - : Pool->SharedMemPools[Device].get(); + ur_usm_pool_handle_t USMPool = nullptr; + if (Pool) { + USMPool = Pool; } else { - auto &Allocator = (DeviceReadOnly ? Context->SharedReadOnlyMemPools - : Context->SharedMemPools); - auto It = Allocator.find(Device->ZeDevice); - if (It == Allocator.end()) - return UR_RESULT_ERROR_INVALID_VALUE; - - hPoolInternal = It->second.get(); - } - - *RetMem = umfPoolAlignedMalloc(hPoolInternal, Size, Alignment); - if (*RetMem == nullptr) { - auto umfRet = umfPoolGetLastAllocationError(hPoolInternal); - return umf2urResult(umfRet); + USMPool = &Context->DefaultPool; } - if (IndirectAccessTrackingEnabled) { - // Keep track of all memory allocations in the context - Context->MemAllocs.emplace(std::piecewise_construct, - std::forward_as_tuple(*RetMem), - std::forward_as_tuple(Context)); - } - - return UR_RESULT_SUCCESS; + return USMPool->allocate(Context, Device, USMDesc, UR_USM_TYPE_SHARED, Size, + RetMem); } ur_result_t @@ -667,26 +476,8 @@ ur_result_t urUSMGetMemAllocInfo( std::shared_lock ContextLock(Context->Mutex); - auto SearchMatchingPool = - [](std::unordered_map - &PoolMap, - umf_memory_pool_handle_t UMFPool) { - for (auto &PoolPair : PoolMap) { - if (PoolPair.second.get() == UMFPool) { - return true; - } - } - return false; - }; - for (auto &Pool : Context->UsmPoolHandles) { - if (SearchMatchingPool(Pool->DeviceMemPools, UMFPool)) { - return ReturnValue(Pool); - } - if (SearchMatchingPool(Pool->SharedMemPools, UMFPool)) { - return ReturnValue(Pool); - } - if (Pool->HostMemPool.get() == UMFPool) { + if (Pool->hasPool(UMFPool)) { return ReturnValue(Pool); } } @@ -803,6 +594,126 @@ ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); return UR_RESULT_SUCCESS; } + +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that + ///< identifies the asynchronous USM deallocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} } // namespace ur::level_zero static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { @@ -1034,86 +945,245 @@ ur_result_t L0HostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, return USMHostAllocImpl(ResultPtr, Context, /* flags */ 0, Size, Alignment); } -ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc) { - - this->Context = Context; - zeroInit = static_cast(PoolDesc->flags & - UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK); - - void *pNext = const_cast(PoolDesc->pNext); - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = - reinterpret_cast(pNext); - switch (BaseDesc->stype) { - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *Limits = - reinterpret_cast(BaseDesc); - for (auto &config : DisjointPoolConfigs.Configs) { - config.MaxPoolableSize = Limits->maxPoolableSize; - config.SlabMinSize = Limits->minDriverAllocSize; +static usm::DisjointPoolMemType +DescToDisjointPoolMemType(const usm::pool_descriptor &desc) { + switch (desc.type) { + case UR_USM_TYPE_DEVICE: + return usm::DisjointPoolMemType::Device; + case UR_USM_TYPE_SHARED: + if (desc.deviceReadOnly) + return usm::DisjointPoolMemType::SharedReadOnly; + else + return usm::DisjointPoolMemType::Shared; + case UR_USM_TYPE_HOST: + return usm::DisjointPoolMemType::Host; + default: + throw UR_RESULT_ERROR_INVALID_ARGUMENT; + } +} + +typedef usm::pool_descriptor l0_memory_provider_params_t; + +template +static umf::provider_unique_handle_t +MakeProvider(ProviderParams *Params = nullptr) { + if constexpr (std::is_same_v) { + auto [Ret, FixedProvider] = + umf::providerMakeUniqueFromOps(umfFixedMemoryProviderOps(), *Params); + if (Ret) { + logger::error("urUSMPoolCreate: failed to create UMF fixed provider"); + throw UsmAllocationException(umf::umf2urResult(Ret)); + } + + return std::move(FixedProvider); + } else if constexpr (std::is_same_v) { + umf_result_t Ret = UMF_RESULT_SUCCESS; + umf::provider_unique_handle_t &&L0Provider = nullptr; + + switch (Params->type) { + case UR_USM_TYPE_HOST: + std::tie(Ret, L0Provider) = + umf::memoryProviderMakeUnique(Params->hContext, + Params->hDevice); + break; + case UR_USM_TYPE_DEVICE: + std::tie(Ret, L0Provider) = + umf::memoryProviderMakeUnique( + Params->hContext, Params->hDevice); + break; + case UR_USM_TYPE_SHARED: + if (Params->deviceReadOnly) { + std::tie(Ret, L0Provider) = + umf::memoryProviderMakeUnique( + Params->hContext, Params->hDevice); + } else { + std::tie(Ret, L0Provider) = + umf::memoryProviderMakeUnique( + Params->hContext, Params->hDevice); } break; + default: + logger::error("urUSMPoolCreate: invalid USM type found"); + Ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (Ret != UMF_RESULT_SUCCESS) { + logger::error("urUSMPoolCreate: failed to create UMF provider"); + throw UsmAllocationException(umf::umf2urResult(Ret)); } - default: { - logger::error("urUSMPoolCreate: unexpected chained stype"); - throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); + + return std::move(L0Provider); + } + + return nullptr; +} + +template +static umf::pool_unique_handle_t +MakePool(umf::provider_unique_handle_t &Provider, + PoolParams *Params = nullptr) { + if constexpr (std::is_same_v) { + auto UmfParamsHandle = usm::getUmfParamsHandle(*Params); + auto [Ret, Pool] = umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(Provider), UmfParamsHandle.get()); + if (Ret != UMF_RESULT_SUCCESS) { + logger::error("urUSMPoolCreate: failed to create UMF pool"); + throw UsmAllocationException(umf::umf2urResult(Ret)); + } + + return std::move(Pool); + } else if constexpr (std::is_same_v) { + auto [Ret, Pool] = umf::poolMakeUnique(std::move(Provider)); + if (Ret != UMF_RESULT_SUCCESS) { + logger::error("urUSMPoolCreate: failed to create UMF pool"); + throw UsmAllocationException(umf::umf2urResult(Ret)); + } + + return std::move(Pool); + } + + return nullptr; +} + +ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_usm_pool_desc_t *PoolDesc, + bool IsProxy) + : Context(Context) { + // TODO: handle zero-init flag 'UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK' + auto DisjointPoolConfigs = InitializeDisjointPoolConfig(); + if (auto Limits = find_stype_node(PoolDesc)) { + for (auto &Config : DisjointPoolConfigs.Configs) { + Config.MaxPoolableSize = Limits->maxPoolableSize; + Config.SlabMinSize = Limits->minDriverAllocSize; + } + } + + // Create a native pool from user provided buffer. + if (auto NativeDesc = find_stype_node(PoolDesc)) { + + umf_fixed_memory_provider_params_handle_t FixedProviderParams = nullptr; + auto Ret = umfFixedMemoryProviderParamsCreate( + &FixedProviderParams, NativeDesc->pMem, NativeDesc->size); + if (Ret) { + logger::error( + "urUSMPoolCreate: failed to create UMF fixed provider params"); + throw UsmAllocationException(umf::umf2urResult(Ret)); + } + + auto Provider = MakeProvider(&FixedProviderParams); + auto Desc = usm::pool_descriptor{this, Context, NativeDesc->device, + NativeDesc->memType, false}; + PoolManager.addPool(Desc, MakePool(Provider)); + return; // Skip the default pool initialization. + } + + auto [Ret, Descriptors] = usm::pool_descriptor::create(this, Context); + if (Ret) { + logger::error("urUSMPoolCreate: failed to create pool descriptors"); + throw UsmAllocationException(Ret); + } + + for (auto &Desc : Descriptors) { + umf::pool_unique_handle_t Pool = nullptr; + if (IsProxy) { + auto Provider = MakeProvider(&Desc); + PoolManager.addPool(Desc, MakePool(Provider)); + } else { + auto Provider = MakeProvider(&Desc); + auto &PoolConfig = + DisjointPoolConfigs.Configs[DescToDisjointPoolMemType(Desc)]; + Pool = MakePool(Provider, &PoolConfig); } + + Ret = PoolManager.addPool(Desc, std::move(Pool)); + if (Ret) { + logger::error("urUSMPoolCreate: failed to store UMF pool"); + throw UsmAllocationException(Ret); } - pNext = const_cast(BaseDesc->pNext); - } - - auto MemProvider = - umf::memoryProviderMakeUnique(Context, nullptr) - .second; - - auto UmfHostParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host]); - HostMemPool = - umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), std::move(MemProvider), - UmfHostParamsHandle.get()) - .second; - - for (auto device : Context->Devices) { - MemProvider = - umf::memoryProviderMakeUnique(Context, device) - .second; - auto UmfDeviceParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Device]); - DeviceMemPools.emplace( - std::piecewise_construct, std::make_tuple(device), - std::make_tuple(umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), - std::move(MemProvider), - UmfDeviceParamsHandle.get()) - .second)); - - MemProvider = - umf::memoryProviderMakeUnique(Context, device) - .second; - auto UmfSharedParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Shared]); - SharedMemPools.emplace( - std::piecewise_construct, std::make_tuple(device), - std::make_tuple(umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), - std::move(MemProvider), - UmfSharedParamsHandle.get()) - .second)); - - MemProvider = umf::memoryProviderMakeUnique( - Context, device) - .second; - auto UmfSharedROParamsHandle = getUmfParamsHandle( - DisjointPoolConfigInstance - .Configs[usm::DisjointPoolMemType::SharedReadOnly]); - SharedReadOnlyMemPools.emplace( - std::piecewise_construct, std::make_tuple(device), - std::make_tuple(umf::poolMakeUniqueFromOps( - umfDisjointPoolOps(), std::move(MemProvider), - UmfSharedROParamsHandle.get()) - .second)); } } +umf_memory_pool_handle_t +ur_usm_pool_handle_t_::getPool(const usm::pool_descriptor &Desc) { + auto PoolOpt = PoolManager.getPool(Desc); + return PoolOpt.has_value() ? PoolOpt.value() : nullptr; +} + +ur_result_t ur_usm_pool_handle_t_::allocate(ur_context_handle_t Context, + ur_device_handle_t Device, + const ur_usm_desc_t *USMDesc, + ur_usm_type_t Type, size_t Size, + void **RetMem) { + uint32_t Alignment = USMDesc ? USMDesc->align : 0; + // L0 supports alignment up to 64KB and silently ignores higher values. + // We flag alignment > 64KB as an invalid value. + // L0 spec says that alignment values that are not powers of 2 are invalid. + // If alignment == 0, then we are allowing the L0 driver to choose the + // alignment so no need to check. + if (Alignment > 0) { + if (Alignment > 65536 || (Alignment & (Alignment - 1)) != 0) + return UR_RESULT_ERROR_INVALID_VALUE; + } + + // Handle the extension structures for 'ur_usm_desc_t'. + if (auto UsmHostDesc = find_stype_node(USMDesc)) { + std::ignore = UsmHostDesc; // Unused + } + + bool DeviceReadOnly = false; + if (auto UsmDeviceDesc = find_stype_node(USMDesc)) { + DeviceReadOnly = + (Type == UR_USM_TYPE_SHARED) && + (UsmDeviceDesc->flags & UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY); + } + + ur_platform_handle_t Plt = + (Device) ? Device->Platform : Context->getPlatform(); + // If indirect access tracking is enabled then lock the mutex which is + // guarding contexts container in the platform. This prevents new kernels from + // being submitted in any context while we are in the process of allocating a + // memory, this is needed to properly capture allocations by kernels with + // indirect access. This lock also protects access to the context's data + // structures. If indirect access tracking is not enabled then lock context + // mutex to protect access to context's data structures. + std::scoped_lock Lock( + IndirectAccessTrackingEnabled ? Plt->ContextsMutex : Context->Mutex); + + if (IndirectAccessTrackingEnabled) { + // We are going to defer memory release if there are kernels with indirect + // access, that is why explicitly retain context to be sure that it is + // released after all memory allocations in this context are released. + UR_CALL(ur::level_zero::urContextRetain(Context)); + } + + auto umfPool = getPool( + usm::pool_descriptor{this, Context, Device, Type, DeviceReadOnly}); + if (!umfPool) { + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + + *RetMem = umfPoolAlignedMalloc(umfPool, Size, Alignment); + if (*RetMem == nullptr) { + auto umfRet = umfPoolGetLastAllocationError(umfPool); + return umf::umf2urResult(umfRet); + } + + if (IndirectAccessTrackingEnabled) { + // Keep track of all memory allocations in the context + Context->MemAllocs.emplace(std::piecewise_construct, + std::forward_as_tuple(*RetMem), + std::forward_as_tuple(Context)); + } + + return UR_RESULT_SUCCESS; +} + +bool ur_usm_pool_handle_t_::hasPool(const umf_memory_pool_handle_t Pool) { + return PoolManager.hasPool(Pool); +} + // If indirect access tracking is not enabled then this functions just performs // zeMemFree. If indirect access tracking is enabled then reference counting is // performed. diff --git a/source/adapters/level_zero/usm.hpp b/source/adapters/level_zero/usm.hpp index 2fe74a5ecf..f812d441f1 100644 --- a/source/adapters/level_zero/usm.hpp +++ b/source/adapters/level_zero/usm.hpp @@ -11,28 +11,25 @@ #include "common.hpp" +#include "ur_pool_manager.hpp" #include usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); struct ur_usm_pool_handle_t_ : _ur_object { - bool zeroInit; - - usm::DisjointPoolAllConfigs DisjointPoolConfigs = - InitializeDisjointPoolConfig(); + ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_usm_pool_desc_t *PoolDesc, bool IsProxy = false); - std::unordered_map - DeviceMemPools; - std::unordered_map - SharedMemPools; - std::unordered_map - SharedReadOnlyMemPools; - umf::pool_unique_handle_t HostMemPool; + ur_result_t allocate(ur_context_handle_t Context, ur_device_handle_t Device, + const ur_usm_desc_t *USMDesc, ur_usm_type_t Type, + size_t Size, void **RetMem); + bool hasPool(const umf_memory_pool_handle_t Pool); - ur_context_handle_t Context{}; + ur_context_handle_t Context; - ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc); +private: + umf_memory_pool_handle_t getPool(const usm::pool_descriptor &Desc); + usm::pool_manager PoolManager; }; // Exception type to pass allocation errors @@ -125,7 +122,7 @@ class L0MemoryProvider : public USMMemoryProviderBase { umf_result_t free(void *Ptr, size_t Size) override; umf_result_t get_min_page_size(void *, size_t *) override; // TODO: Different name for each provider (Host/Shared/SharedRO/Device) - const char *get_name() override { return "L0"; }; + const char *get_name() override { return "Level Zero"; }; umf_result_t get_ipc_handle_size(size_t *) override; umf_result_t get_ipc_handle(const void *, size_t, void *) override; umf_result_t put_ipc_handle(void *) override; diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index aa7d27978a..9033ad3ff2 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -342,6 +342,49 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, } catch (...) { return exceptionToResult(std::current_exception()); } +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) try { + return hQueue->get().enqueueUSMDeviceAllocExp( + pPool, size, pProperties, numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) try { + return hQueue->get().enqueueUSMSharedAllocExp( + pPool, size, pProperties, numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) try { + return hQueue->get().enqueueUSMHostAllocExp(pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} +ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, + ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) try { + return hQueue->get().enqueueUSMFreeExp(pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index 82a43e9768..3193c6f66b 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -128,6 +128,24 @@ struct ur_queue_t_ { bool, void *, size_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMDeviceAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMSharedAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMHostAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFreeExp(ur_usm_pool_handle_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; virtual ur_result_t bindlessImagesImageCopyExp( const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index d33ac12f7e..f268ead00a 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -802,6 +802,62 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueWriteHostPipe( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMDeviceAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMSharedAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMHostAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFreeExp( + ur_usm_pool_handle_t pPool, void *pMem, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + ur_result_t ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp( const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 2c51d8ac1f..5779eed05e 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -215,6 +215,25 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ { uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMDeviceAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMSharedAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMHostAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFreeExp(ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; ur_result_t bindlessImagesImageCopyExp( const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 6d5034d07b..d21f381da9 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -7287,6 +7287,268 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_device_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMDeviceAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMDeviceAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMDeviceAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_shared_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMSharedAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMSharedAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMSharedAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_host_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMHostAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMHostAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMHostAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_free_exp_params_t params = { + &hQueue, &pPool, &pMem, &numEventsInWaitList, &phEventWaitList, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMFreeExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMFreeExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMFreeExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -11219,6 +11481,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = driver::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = driver::urEnqueueUSMDeviceAllocExp; + + pDdiTable->pfnUSMSharedAllocExp = driver::urEnqueueUSMSharedAllocExp; + + pDdiTable->pfnUSMHostAllocExp = driver::urEnqueueUSMHostAllocExp; + + pDdiTable->pfnUSMFreeExp = driver::urEnqueueUSMFreeExp; + pDdiTable->pfnCooperativeKernelLaunchExp = driver::urEnqueueCooperativeKernelLaunchExp; diff --git a/source/adapters/native_cpu/usm.cpp b/source/adapters/native_cpu/usm.cpp index 2fe0d551a8..2bea4c08a4 100644 --- a/source/adapters/native_cpu/usm.cpp +++ b/source/adapters/native_cpu/usm.cpp @@ -155,3 +155,64 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, std::ignore = HostPtr; DIE_NO_IMPLEMENTATION; } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 7961cb76ff..221b08a5d8 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -728,3 +728,48 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( [[maybe_unused]] size_t *pPropSizeRet) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, + [[maybe_unused]] const size_t size, + [[maybe_unused]] const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] void **ppMem, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, + [[maybe_unused]] const size_t size, + [[maybe_unused]] const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] void **ppMem, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, + [[maybe_unused]] const size_t size, + [[maybe_unused]] const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] void **ppMem, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, [[maybe_unused]] void *pMem, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 417016d630..63a0e0764c 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -36,15 +36,11 @@ target_include_directories(ur_common PUBLIC message(STATUS "Download Unified Memory Framework from github.com") if (NOT DEFINED UMF_REPO) - set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") + set(UMF_REPO "https://github.com/ldorau/unified-memory-framework.git") endif() if (NOT DEFINED UMF_TAG) - # commit ace9f4a60b686463fdad15cd016c548237cb79e0 - # Author: RafaÅ‚ Rudnicki - # Date: Mon Feb 10 11:39:15 2025 +0100 - # Merge pull request #1088 from ldorau/Fix_remove_CUDA_ERROR_INVALID_RESOURCE_TYPE - set(UMF_TAG ace9f4a60b686463fdad15cd016c548237cb79e0) + set(UMF_TAG umf-for-async-api) endif() message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") diff --git a/source/common/logger/ur_sinks.hpp b/source/common/logger/ur_sinks.hpp index 8f580bc04d..fe2af9dde0 100644 --- a/source/common/logger/ur_sinks.hpp +++ b/source/common/logger/ur_sinks.hpp @@ -28,8 +28,8 @@ class Sink { void log(logger::Level level, const char *fmt, Args &&...args) { std::ostringstream buffer; if (!skip_prefix && level != logger::Level::QUIET) { - buffer << "<" << logger_name << ">" - << "[" << level_to_str(level) << "]: "; + buffer << "<" << logger_name << ">" << "[" << level_to_str(level) + << "]: "; } format(buffer, fmt, std::forward(args)...); diff --git a/source/common/stype_map_helpers.def b/source/common/stype_map_helpers.def index ec2856e60d..f745467150 100644 --- a/source/common/stype_map_helpers.def +++ b/source/common/stype_map_helpers.def @@ -72,6 +72,8 @@ struct stype_map : stype_map_impl struct stype_map : stype_map_impl {}; template <> +struct stype_map : stype_map_impl {}; +template <> struct stype_map : stype_map_impl {}; template <> struct stype_map : stype_map_impl {}; @@ -100,5 +102,7 @@ struct stype_map : stype_map_impl struct stype_map : stype_map_impl {}; template <> +struct stype_map : stype_map_impl {}; +template <> struct stype_map : stype_map_impl {}; diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index 5d2beda18d..45dc7e39fc 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -289,6 +289,13 @@ template struct pool_manager { return it->second.get(); } + + bool hasPool(const umf_memory_pool_handle_t hPool) noexcept { + return std::any_of(descToPoolMap.begin(), descToPoolMap.end(), + [hPool](const auto &descPoolPair) { + return descPoolPair.second.get() == hPool; + }); + } }; inline umf::pool_unique_handle_t diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f4a7b7e60a..ae6773f1d3 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6008,6 +6008,232 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent) { + auto pfnUSMDeviceAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + + if (nullptr == pfnUSMDeviceAllocExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_enqueue_usm_device_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, + "urEnqueueUSMDeviceAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMDeviceAllocExp\n"); + + ur_result_t result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, + "urEnqueueUSMDeviceAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMDeviceAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent) { + auto pfnUSMSharedAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + + if (nullptr == pfnUSMSharedAllocExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_enqueue_usm_shared_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, + "urEnqueueUSMSharedAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMSharedAllocExp\n"); + + ur_result_t result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, + "urEnqueueUSMSharedAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMSharedAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent) { + auto pfnUSMHostAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + + if (nullptr == pfnUSMHostAllocExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_enqueue_usm_host_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, + "urEnqueueUSMHostAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMHostAllocExp\n"); + + ur_result_t result = + pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, + "urEnqueueUSMHostAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMHostAllocExp({}) -> {};\n", args_str.str(), + result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent) { + auto pfnUSMFreeExp = getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + + if (nullptr == pfnUSMFreeExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_enqueue_usm_free_exp_params_t params = { + &hQueue, &pPool, &pMem, &numEventsInWaitList, &phEventWaitList, &phEvent}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_ENQUEUE_USM_FREE_EXP, "urEnqueueUSMFreeExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMFreeExp\n"); + + ur_result_t result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_FREE_EXP, + "urEnqueueUSMFreeExp", ¶ms, &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams(args_str, UR_FUNCTION_ENQUEUE_USM_FREE_EXP, + ¶ms); + logger.info(" <--- urEnqueueUSMFreeExp({}) -> {};\n", args_str.str(), + result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -9543,6 +9769,20 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur_tracing_layer::urEnqueueKernelLaunchCustomExp; + dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_tracing_layer::urEnqueueUSMDeviceAllocExp; + + dditable.pfnUSMSharedAllocExp = pDdiTable->pfnUSMSharedAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_tracing_layer::urEnqueueUSMSharedAllocExp; + + dditable.pfnUSMHostAllocExp = pDdiTable->pfnUSMHostAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_tracing_layer::urEnqueueUSMHostAllocExp; + + dditable.pfnUSMFreeExp = pDdiTable->pfnUSMFreeExp; + pDdiTable->pfnUSMFreeExp = ur_tracing_layer::urEnqueueUSMFreeExp; + dditable.pfnCooperativeKernelLaunchExp = pDdiTable->pfnCooperativeKernelLaunchExp; pDdiTable->pfnCooperativeKernelLaunchExp = diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index eb2bd4c353..f3ea96cf6e 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -496,7 +496,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( if (pPropValue == NULL && pPropSizeRet == NULL) return UR_RESULT_ERROR_INVALID_NULL_POINTER; - if (UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName) + if (UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName) return UR_RESULT_ERROR_INVALID_ENUMERATION; if (propSize == 0 && pPropValue != NULL) @@ -6574,6 +6574,267 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent) { + auto pfnUSMDeviceAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (NULL == ppMem) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + + if (NULL != pProperties && + UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags) + return UR_RESULT_ERROR_INVALID_ENUMERATION; + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent) { + auto pfnUSMSharedAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (NULL == ppMem) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + + if (NULL != pProperties && + UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags) + return UR_RESULT_ERROR_INVALID_ENUMERATION; + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent) { + auto pfnUSMHostAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (NULL == ppMem) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + + if (NULL != pProperties && + UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags) + return UR_RESULT_ERROR_INVALID_ENUMERATION; + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = + pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent) { + auto pfnUSMFreeExp = getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (NULL == pMem) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -10199,6 +10460,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur_validation_layer::urEnqueueKernelLaunchCustomExp; + dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_validation_layer::urEnqueueUSMDeviceAllocExp; + + dditable.pfnUSMSharedAllocExp = pDdiTable->pfnUSMSharedAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_validation_layer::urEnqueueUSMSharedAllocExp; + + dditable.pfnUSMHostAllocExp = pDdiTable->pfnUSMHostAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_validation_layer::urEnqueueUSMHostAllocExp; + + dditable.pfnUSMFreeExp = pDdiTable->pfnUSMFreeExp; + pDdiTable->pfnUSMFreeExp = ur_validation_layer::urEnqueueUSMFreeExp; + dditable.pfnCooperativeKernelLaunchExp = pDdiTable->pfnCooperativeKernelLaunchExp; pDdiTable->pfnCooperativeKernelLaunchExp = diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index 1425c602d6..041594d4ae 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -85,11 +85,15 @@ EXPORTS urEnqueueReadHostPipe urEnqueueTimestampRecordingExp urEnqueueUSMAdvise + urEnqueueUSMDeviceAllocExp urEnqueueUSMFill urEnqueueUSMFill2D + urEnqueueUSMFreeExp + urEnqueueUSMHostAllocExp urEnqueueUSMMemcpy urEnqueueUSMMemcpy2D urEnqueueUSMPrefetch + urEnqueueUSMSharedAllocExp urEnqueueWriteHostPipe urEventCreateWithNativeHandle urEventGetInfo @@ -282,11 +286,15 @@ EXPORTS urPrintEnqueueReadHostPipeParams urPrintEnqueueTimestampRecordingExpParams urPrintEnqueueUsmAdviseParams + urPrintEnqueueUsmDeviceAllocExpParams urPrintEnqueueUsmFillParams urPrintEnqueueUsmFill_2dParams + urPrintEnqueueUsmFreeExpParams + urPrintEnqueueUsmHostAllocExpParams urPrintEnqueueUsmMemcpyParams urPrintEnqueueUsmMemcpy_2dParams urPrintEnqueueUsmPrefetchParams + urPrintEnqueueUsmSharedAllocExpParams urPrintEnqueueWriteHostPipeParams urPrintEventCreateWithNativeHandleParams urPrintEventGetInfoParams @@ -312,6 +320,8 @@ EXPORTS urPrintExpEnqueueExtProperties urPrintExpEnqueueNativeCommandFlags urPrintExpEnqueueNativeCommandProperties + urPrintExpEnqueueUsmAllocFlags + urPrintExpEnqueueUsmAllocProperties urPrintExpExternalMemDesc urPrintExpExternalMemType urPrintExpExternalSemaphoreDesc @@ -486,6 +496,7 @@ EXPORTS urPrintUsmPoolGetInfoParams urPrintUsmPoolInfo urPrintUsmPoolLimitsDesc + urPrintUsmPoolNativeDesc urPrintUsmPoolReleaseParams urPrintUsmPoolRetainParams urPrintUsmReleaseExpParams diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index ebb413c985..6ab962b102 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -85,11 +85,15 @@ urEnqueueReadHostPipe; urEnqueueTimestampRecordingExp; urEnqueueUSMAdvise; + urEnqueueUSMDeviceAllocExp; urEnqueueUSMFill; urEnqueueUSMFill2D; + urEnqueueUSMFreeExp; + urEnqueueUSMHostAllocExp; urEnqueueUSMMemcpy; urEnqueueUSMMemcpy2D; urEnqueueUSMPrefetch; + urEnqueueUSMSharedAllocExp; urEnqueueWriteHostPipe; urEventCreateWithNativeHandle; urEventGetInfo; @@ -282,11 +286,15 @@ urPrintEnqueueReadHostPipeParams; urPrintEnqueueTimestampRecordingExpParams; urPrintEnqueueUsmAdviseParams; + urPrintEnqueueUsmDeviceAllocExpParams; urPrintEnqueueUsmFillParams; urPrintEnqueueUsmFill_2dParams; + urPrintEnqueueUsmFreeExpParams; + urPrintEnqueueUsmHostAllocExpParams; urPrintEnqueueUsmMemcpyParams; urPrintEnqueueUsmMemcpy_2dParams; urPrintEnqueueUsmPrefetchParams; + urPrintEnqueueUsmSharedAllocExpParams; urPrintEnqueueWriteHostPipeParams; urPrintEventCreateWithNativeHandleParams; urPrintEventGetInfoParams; @@ -312,6 +320,8 @@ urPrintExpEnqueueExtProperties; urPrintExpEnqueueNativeCommandFlags; urPrintExpEnqueueNativeCommandProperties; + urPrintExpEnqueueUsmAllocFlags; + urPrintExpEnqueueUsmAllocProperties; urPrintExpExternalMemDesc; urPrintExpExternalMemType; urPrintExpExternalSemaphoreDesc; @@ -486,6 +496,7 @@ urPrintUsmPoolGetInfoParams; urPrintUsmPoolInfo; urPrintUsmPoolLimitsDesc; + urPrintUsmPoolNativeDesc; urPrintUsmPoolReleaseParams; urPrintUsmPoolRetainParams; urPrintUsmReleaseExpParams; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index ec6081509f..828c380d8e 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -6170,6 +6170,276 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMDeviceAllocExp = dditable->ur.EnqueueExp.pfnUSMDeviceAllocExp; + if (nullptr == pfnUSMDeviceAllocExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + + // forward to device-platform + result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any + // output handles below. + if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) + return result; + try { + // convert platform handle to loader handle + if (nullptr != phEvent) + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMSharedAllocExp = dditable->ur.EnqueueExp.pfnUSMSharedAllocExp; + if (nullptr == pfnUSMSharedAllocExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + + // forward to device-platform + result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any + // output handles below. + if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) + return result; + try { + // convert platform handle to loader handle + if (nullptr != phEvent) + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMHostAllocExp = dditable->ur.EnqueueExp.pfnUSMHostAllocExp; + if (nullptr == pfnUSMHostAllocExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + + // forward to device-platform + result = + pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any + // output handles below. + if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) + return result; + try { + // convert platform handle to loader handle + if (nullptr != phEvent) + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMFreeExp = dditable->ur.EnqueueExp.pfnUSMFreeExp; + if (nullptr == pfnUSMFreeExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + + // forward to device-platform + result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitListLocal.data(), phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any + // output handles below. + if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) + return result; + try { + // convert platform handle to loader handle + if (nullptr != phEvent) + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -9697,6 +9967,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // return pointers to loader's DDIs pDdiTable->pfnKernelLaunchCustomExp = ur_loader::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = ur_loader::urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = ur_loader::urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_loader::urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = ur_loader::urEnqueueUSMFreeExp; pDdiTable->pfnCooperativeKernelLaunchExp = ur_loader::urEnqueueCooperativeKernelLaunchExp; pDdiTable->pfnTimestampRecordingExp = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 5761dab3a4..190c3dcea0 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -869,7 +869,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName` +/// + `::UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -6701,6 +6701,203 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent) try { + auto pfnUSMDeviceAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + if (nullptr == pfnUSMDeviceAllocExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent) try { + auto pfnUSMSharedAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + if (nullptr == pfnUSMSharedAllocExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent) try { + auto pfnUSMHostAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + if (nullptr == pfnUSMHostAllocExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM deallocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent) try { + auto pfnUSMFreeExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + if (nullptr == pfnUSMFreeExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index d75272a1ae..c670ec59a2 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -897,6 +897,32 @@ ur_result_t urPrintExpDevice_2dBlockArrayCapabilityFlags( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintUsmPoolNativeDesc(const struct ur_usm_pool_native_desc_t params, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t +urPrintExpEnqueueUsmAllocFlags(enum ur_exp_enqueue_usm_alloc_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpEnqueueUsmAllocProperties( + const struct ur_exp_enqueue_usm_alloc_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1764,6 +1790,38 @@ ur_result_t urPrintEnqueueEventsWaitWithBarrierExtParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEnqueueUsmDeviceAllocExpParams( + const struct ur_enqueue_usm_device_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmSharedAllocExpParams( + const struct ur_enqueue_usm_shared_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmHostAllocExpParams( + const struct ur_enqueue_usm_host_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmFreeExpParams( + const struct ur_enqueue_usm_free_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintEnqueueCooperativeKernelLaunchExpParams( const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 7023161cb1..f91e9312c3 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -783,7 +783,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName` +/// + `::UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5889,6 +5889,172 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM device allocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM shared allocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & +/// pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] minimum size in bytes of the USM memory object to be allocated + const size_t size, + /// [in][optional] pointer to the enqueue asynchronous USM allocation + /// properties + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out] pointer to USM memory object + void **ppMem, + /// [out][optional] return an event object that identifies the + /// asynchronous USM host allocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM deallocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMFreeExp( + /// [in] handle of the queue object + ur_queue_handle_t hQueue, + /// [in][optional] handle of the USM memory pool + ur_usm_pool_handle_t pPool, + /// [in] pointer to USM memory object + void *pMem, + /// [in] size of the event wait list + uint32_t numEventsInWaitList, + /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of + /// events that must be complete before the kernel execution. + /// If nullptr, the numEventsInWaitList must be 0, indicating no wait + /// events. + const ur_event_handle_t *phEventWaitList, + /// [out][optional] return an event object that identifies the + /// asynchronous USM deallocation + ur_event_handle_t *phEvent) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index fd58154d56..9505685143 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -85,6 +85,18 @@ function(add_adapter_tests adapter) target_link_libraries(test-adapter-${adapter}_ipc PRIVATE ur_umf ) + + add_adapter_test(${adapter}_native_pool + FIXTURE DEVICES + SOURCES + native_pool_tests.cpp + ENVIRONMENT + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + ) + + target_link_libraries(test-adapter-${adapter}_native_pool PRIVATE + ur_umf + ) endif() add_adapter_test(${adapter}_mem_buffer_map diff --git a/test/adapters/level_zero/native_pool_tests.cpp b/test/adapters/level_zero/native_pool_tests.cpp new file mode 100644 index 0000000000..86d3c8994c --- /dev/null +++ b/test/adapters/level_zero/native_pool_tests.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT SPDX-License-Identifier: Apache-2.0 WITH +// LLVM-exception + +#include + +using urL0NativePoolTest = uur::urContextTest; +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urL0NativePoolTest); + +TEST_P(urL0NativePoolTest, SuccessHost) { + ur_device_usm_access_capability_flags_t hostUSMSupport = 0; + ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, hostUSMSupport)); + if (!hostUSMSupport) { + GTEST_SKIP() << "Host USM is not supported."; + } + + void *ptr = nullptr; + size_t allocSize = sizeof(int) * 1024; + ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, nullptr, allocSize, &ptr)); + ASSERT_NE(ptr, nullptr); + + // Set native pool descriptor buffer to the USM allocation + ur_usm_pool_native_desc_t nativePoolDesc{}; + nativePoolDesc.stype = UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC; + nativePoolDesc.pNext = nullptr; + nativePoolDesc.pMem = ptr; + nativePoolDesc.size = allocSize; + nativePoolDesc.memType = UR_USM_TYPE_HOST; + nativePoolDesc.device = nullptr; + + ur_usm_pool_desc_t poolDesc{}; + poolDesc.stype = UR_STRUCTURE_TYPE_USM_POOL_DESC; + poolDesc.pNext = &nativePoolDesc; + poolDesc.flags = 0; + + ur_usm_pool_handle_t pool = nullptr; + ASSERT_SUCCESS(urUSMPoolCreate(context, &poolDesc, &pool)); + + void *samePtr = nullptr; + ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, pool, allocSize, &samePtr)); + ASSERT_EQ(ptr, samePtr); + ASSERT_SUCCESS(urUSMFree(context, samePtr)); + + ASSERT_SUCCESS(urUSMPoolRelease(pool)); + ASSERT_SUCCESS(urUSMFree(context, ptr)); +} + +// TEST_P(urL0NativePoolTest, FailSize) { +// ur_device_usm_access_capability_flags_t hostUSMSupport = 0; +// ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, hostUSMSupport)); +// if (!hostUSMSupport) { +// GTEST_SKIP() << "Host USM is not supported."; +// } + +// void *ptr = nullptr; +// size_t allocSize = sizeof(int) * 1024; +// ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, nullptr, allocSize, &ptr)); +// ASSERT_NE(ptr, nullptr); + +// // Set native pool descriptor buffer to the USM allocation +// ur_usm_pool_native_desc_t nativePoolDesc{}; +// nativePoolDesc.stype = UR_STRUCTURE_TYPE_USM_POOL_NATIVE_DESC; +// nativePoolDesc.pNext = nullptr; +// nativePoolDesc.pMem = ptr; +// nativePoolDesc.size = allocSize; +// nativePoolDesc.memType = UR_USM_TYPE_HOST; +// nativePoolDesc.device = nullptr; + +// ur_usm_pool_desc_t poolDesc{}; +// poolDesc.stype = UR_STRUCTURE_TYPE_USM_POOL_DESC; +// poolDesc.pNext = &nativePoolDesc; +// poolDesc.flags = 0; + +// ur_usm_pool_handle_t pool = nullptr; +// ASSERT_SUCCESS(urUSMPoolCreate(context, &poolDesc, &pool)); + +// void *samePtr = nullptr; +// ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, pool, allocSize, &samePtr)); +// ASSERT_EQ(ptr, samePtr); +// ASSERT_SUCCESS(urUSMFree(context, samePtr)); + +// ASSERT_SUCCESS(urUSMPoolRelease(pool)); +// ASSERT_SUCCESS(urUSMFree(context, ptr)); +// } diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index c94ff56dbd..ae60f2ed07 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -475,10 +475,10 @@ getDriverVersion(ur_device_handle_t hDevice) { if (major < minMajor || (major == minMajor && minor < minMinor) || \ (major == minMajor && minor == minMinor && patch < minPatch)) { \ GTEST_SKIP() << "Skipping test because driver version is too old for " \ - << adapterName << ". " \ - << "Driver version: " << major << "." << minor << "." \ - << patch << " Minimum required version: " << minMajor \ - << "." << minMinor << "." << minPatch; \ + << adapterName << ". " << "Driver version: " << major \ + << "." << minor << "." << patch \ + << " Minimum required version: " << minMajor << "." \ + << minMinor << "." << minPatch; \ } \ } \ } while (0) diff --git a/tools/urinfo/urinfo.cpp b/tools/urinfo/urinfo.cpp index c86d166034..293ff2bce0 100644 --- a/tools/urinfo/urinfo.cpp +++ b/tools/urinfo/urinfo.cpp @@ -145,15 +145,15 @@ devices which are currently visible in the local execution environment. << "]"; } else { std::cout << "[adapter(" << adapterIndex << "," << adapter_backend - << "):" - << "platform(" << platformIndex << "):" - << "device(" << deviceIndex << "," << device_type << ")]"; + << "):" << "platform(" << platformIndex + << "):" << "device(" << deviceIndex << "," << device_type + << ")]"; } std::cout << " " << urinfo::getPlatformName(platform) << ", " << urinfo::getDeviceName(device) << " " - << urinfo::getDeviceVersion(device) << " " - << "[" << urinfo::getDeviceDriverVersion(device) << "]\n"; + << urinfo::getDeviceVersion(device) << " " << "[" + << urinfo::getDeviceDriverVersion(device) << "]\n"; adapter_device_id++; } @@ -163,16 +163,14 @@ devices which are currently visible in the local execution environment. void printDetail() { std::cout << "\n" - << "[loader]:" - << "\n"; + << "[loader]:" << "\n"; urinfo::printLoaderConfigInfos(loaderConfig); for (size_t adapterIndex = 0; adapterIndex < adapters.size(); adapterIndex++) { auto adapter = adapters[adapterIndex]; std::cout << "\n" - << "[adapter(" << adapterIndex << ")]:" - << "\n"; + << "[adapter(" << adapterIndex << ")]:" << "\n"; urinfo::printAdapterInfos(adapter); size_t numPlatforms = adapterPlatformsMap[adapter].size(); @@ -180,19 +178,17 @@ devices which are currently visible in the local execution environment. platformIndex++) { auto platform = adapterPlatformsMap[adapter][platformIndex]; std::cout << "\n" - << "[adapter(" << adapterIndex << ")," - << "platform(" << platformIndex << ")]:" - << "\n"; + << "[adapter(" << adapterIndex << ")," << "platform(" + << platformIndex << ")]:" << "\n"; urinfo::printPlatformInfos(platform); size_t numDevices = platformDevicesMap[platform].size(); for (size_t deviceI = 0; deviceI < numDevices; deviceI++) { auto device = platformDevicesMap[platform][deviceI]; std::cout << "\n" - << "[adapter(" << adapterIndex << ")," - << "platform(" << platformIndex << ")," - << "device(" << deviceI << ")]:" - << "\n"; + << "[adapter(" << adapterIndex << ")," << "platform(" + << platformIndex << ")," << "device(" << deviceI + << ")]:" << "\n"; urinfo::printDeviceInfos(device); } } diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d01245138f..d7e4e63021 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -420,5 +420,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP); } } // namespace urinfo