Skip to content

[Bindless][SYCL][UR] Create a sampled image with a single UR API #18384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions sycl/source/detail/bindless_images.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ __SYCL_EXPORT sampled_image_handle
create_image(image_mem_handle memHandle, const bindless_image_sampler &sampler,
const image_descriptor &desc, const sycl::device &syclDevice,
const sycl::context &syclContext) {
return create_image(reinterpret_cast<void*>(memHandle.raw_handle),
return create_image(reinterpret_cast<void *>(memHandle.raw_handle),
0 /*pitch*/, sampler, desc, syclDevice, syclContext);
}

Expand All @@ -280,14 +280,14 @@ __SYCL_EXPORT sampled_image_handle
create_image(image_mem &imgMem, const bindless_image_sampler &sampler,
const image_descriptor &desc, const sycl::device &syclDevice,
const sycl::context &syclContext) {
return create_image(reinterpret_cast<void*>(imgMem.get_handle().raw_handle),
return create_image(reinterpret_cast<void *>(imgMem.get_handle().raw_handle),
0 /*pitch*/, sampler, desc, syclDevice, syclContext);
}

__SYCL_EXPORT sampled_image_handle
create_image(image_mem &imgMem, const bindless_image_sampler &sampler,
const image_descriptor &desc, const sycl::queue &syclQueue) {
return create_image(reinterpret_cast<void*>(imgMem.get_handle().raw_handle),
return create_image(reinterpret_cast<void *>(imgMem.get_handle().raw_handle),
0 /*pitch*/, sampler, desc, syclQueue.get_device(),
syclQueue.get_context());
}
Expand Down Expand Up @@ -367,10 +367,6 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
translate_cubemap_filter_mode(sampler.cubemap_filtering)};
UrAddrModes.pNext = &UrCubemapProps;

ur_sampler_handle_t urSampler = nullptr;
Adapter->call<sycl::errc::runtime, sycl::detail::UrApiKind::urSamplerCreate>(
urCtx, &UrSamplerProps, &urSampler);

ur_image_desc_t urDesc;
ur_image_format_t urFormat;
populate_ur_structs(desc, urDesc, urFormat, pitch);
Expand All @@ -381,7 +377,7 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
sycl::detail::UrApiKind::urBindlessImagesSampledImageCreateExp>(
urCtx, urDevice,
reinterpret_cast<ur_exp_image_mem_native_handle_t>(devPtr), &urFormat,
&urDesc, urSampler, &urImageHandle);
&urDesc, &UrSamplerProps, &urImageHandle);

return sampled_image_handle{urImageHandle};
}
Expand Down Expand Up @@ -650,7 +646,7 @@ __SYCL_EXPORT external_semaphore import_external_semaphore(
urCtx, urDevice, urHandleType, &urExternalSemDesc, &urExternalSemaphore);

return external_semaphore{urExternalSemaphore,
externalSemaphoreDesc.handle_type};
externalSemaphoreDesc.handle_type};
}

template <>
Expand Down
13 changes: 9 additions & 4 deletions unified-runtime/include/ur_api.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions unified-runtime/include/ur_ddi.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions unified-runtime/include/ur_print.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions unified-runtime/scripts/core/EXP-BINDLESS-IMAGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ Changelog
+----------+-------------------------------------------------------------+
| 23.0 | Added BindlessImagesFreeMappedLinearMemory function. |
+----------+-------------------------------------------------------------+
| 24.0 || Update the ${x}BindlessImagesSampledImageCreateExp API |
| || to take a sampler description instead of sampler handle. |
+----------+-------------------------------------------------------------+

Contributors
--------------------------------------------------------------------------------
Expand All @@ -300,3 +303,4 @@ Contributors
* Chedy Najjar `[email protected] <[email protected]>`_
* Sean Stirling `[email protected] <[email protected]>`_
* Peter Zuzek `[email protected] [email protected] <[email protected]>`_
* Georgi Mirazchiyski `[email protected] <[email protected]>`_
7 changes: 4 additions & 3 deletions unified-runtime/scripts/core/exp-bindless-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ name: SampledImageCreateExp
ordinal: "0"
analogue:
- "**cuTexObjectCreate**"
- "**hipTexObjectCreate**"
params:
- type: $x_context_handle_t
name: hContext
Expand All @@ -543,9 +544,9 @@ params:
- type: "const $x_image_desc_t*"
name: pImageDesc
desc: "[in] pointer to image description"
- type: $x_sampler_handle_t
name: hSampler
desc: "[in] sampler to be used"
- type: const $x_sampler_desc_t*
name: pSamplerDesc
desc: "[in] pointer to sampler description to be used"
- type: $x_exp_image_native_handle_t*
name: phImage
desc: "[out][alloc] pointer to handle of image object created"
Expand Down
185 changes: 112 additions & 73 deletions unified-runtime/source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,75 +154,127 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format,
}
}

ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
ur_result_t urToCudaFilterMode(ur_sampler_filter_mode_t FilterMode,
CUfilter_mode &CudaFilterMode) {
switch (FilterMode) {
case UR_SAMPLER_FILTER_MODE_NEAREST:
CudaFilterMode = CU_TR_FILTER_MODE_POINT;
break;
case UR_SAMPLER_FILTER_MODE_LINEAR:
CudaFilterMode = CU_TR_FILTER_MODE_LINEAR;
break;
default:
setErrorMessage("Invalid filter mode was requested for CUDA.",
UR_RESULT_ERROR_INVALID_VALUE);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}

return UR_RESULT_SUCCESS;
}

ur_result_t urToCudaAddressingMode(ur_sampler_addressing_mode_t AddressMode,
CUaddress_mode &CudaAddressMode) {
switch (AddressMode) {
case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
CudaAddressMode = CU_TR_ADDRESS_MODE_CLAMP;
break;
case UR_SAMPLER_ADDRESSING_MODE_CLAMP:
CudaAddressMode = CU_TR_ADDRESS_MODE_BORDER;
break;
case UR_SAMPLER_ADDRESSING_MODE_REPEAT:
CudaAddressMode = CU_TR_ADDRESS_MODE_WRAP;
break;
case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
CudaAddressMode = CU_TR_ADDRESS_MODE_MIRROR;
break;
default:
setErrorMessage("Invalid addressing mode was requested for CUDA.",
UR_RESULT_ERROR_INVALID_VALUE);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}

return UR_RESULT_SUCCESS;
}

ur_result_t urTextureCreate(const ur_sampler_desc_t *pSamplerDesc,
const ur_image_desc_t *pImageDesc,
const CUDA_RESOURCE_DESC &ResourceDesc,
const unsigned int normalized_dtype_flag,
ur_exp_image_native_handle_t *phRetImage) {

try {
/// pi_sampler_properties
/// Layout of UR samplers for CUDA
///
/// Sampler property layout:
/// | <bits> | <usage>
/// -----------------------------------
/// | 31 30 ... 13 | N/A
/// | 12 | cubemap filter mode
/// | 11 | mip filter mode
/// | 10 9 8 | addressing mode 3
/// | 7 6 5 | addressing mode 2
/// | 4 3 2 | addressing mode 1
/// | 1 | filter mode
/// | 0 | normalize coords
CUDA_TEXTURE_DESC ImageTexDesc = {};
CUaddress_mode AddrMode[3] = {};
for (size_t i = 0; i < 3; i++) {
ur_sampler_addressing_mode_t AddrModeProp =
hSampler->getAddressingModeDim(i);
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
AddrMode[i] = CU_TR_ADDRESS_MODE_CLAMP;
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
AddrMode[i] = CU_TR_ADDRESS_MODE_BORDER;
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
AddrMode[i] = CU_TR_ADDRESS_MODE_WRAP;
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
AddrMode[i] = CU_TR_ADDRESS_MODE_MIRROR;
// Enumarate to linked properties (extension-specific structures).
void *pNext = const_cast<void *>(pSamplerDesc->pNext);
while (pNext != nullptr) {
const ur_base_desc_t *BaseDesc =
reinterpret_cast<const ur_base_desc_t *>(pNext);
if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) {
// UR Mipmap properties
const ur_exp_sampler_mip_properties_t *SamplerMipProperties =
reinterpret_cast<const ur_exp_sampler_mip_properties_t *>(pNext);
ImageTexDesc.maxMipmapLevelClamp =
SamplerMipProperties->maxMipmapLevelClamp;
ImageTexDesc.minMipmapLevelClamp =
SamplerMipProperties->minMipmapLevelClamp;
ImageTexDesc.maxAnisotropy = SamplerMipProperties->maxAnisotropy;
// Cuda Mipmap attributes
CUfilter_mode MipFilterMode;
ur_sampler_filter_mode_t MipFilterModeProp =
SamplerMipProperties->mipFilterMode;
UR_CALL(urToCudaFilterMode(MipFilterModeProp, MipFilterMode));
ImageTexDesc.mipmapFilterMode = MipFilterMode;
} else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES) {
// UR Addressing modes
const ur_exp_sampler_addr_modes_t *SamplerAddrModes =
reinterpret_cast<const ur_exp_sampler_addr_modes_t *>(pNext);
// Cuda Addressing modes
CUaddress_mode AddrMode[3] = {};
for (size_t i = 0; i < 3; i++) {
ur_sampler_addressing_mode_t AddrModeProp =
SamplerAddrModes->addrModes[i];
UR_CALL(urToCudaAddressingMode(AddrModeProp, AddrMode[i]));
}
// The address modes can interfere with other dimensions
// e.g. 1D texture sampling can be interfered with when setting other
// dimension address modes despite their nonexistence.
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
? AddrMode[1]
: ImageTexDesc.addressMode[1]; // 2D
ImageTexDesc.addressMode[2] = pImageDesc->depth > 0
? AddrMode[2]
: ImageTexDesc.addressMode[2]; // 3D
} else if (BaseDesc->stype ==
UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES) {
// UR Cubemap properties
const ur_exp_sampler_cubemap_properties_t *SamplerCubemapProperties =
reinterpret_cast<const ur_exp_sampler_cubemap_properties_t *>(
pNext);
ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp =
SamplerCubemapProperties->cubemapFilterMode;
// Cuda Cubemap attributes
if (CubemapFilterModeProp ==
UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) {
#if CUDA_VERSION >= 11060
ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP;
#else
setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS "
"feature requires cuda 11.6 or later.",
UR_RESULT_ERROR_UNSUPPORTED_FEATURE);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
#endif
}
}
pNext = const_cast<void *>(BaseDesc->pNext);
}

CUfilter_mode FilterMode;
ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode();
FilterMode =
FilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
CUfilter_mode FilterMode = pSamplerDesc->filterMode
? CU_TR_FILTER_MODE_LINEAR
: CU_TR_FILTER_MODE_POINT;
ImageTexDesc.filterMode = FilterMode;

// Mipmap attributes
CUfilter_mode MipFilterMode;
ur_sampler_filter_mode_t MipFilterModeProp = hSampler->getMipFilterMode();
MipFilterMode =
MipFilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
ImageTexDesc.mipmapFilterMode = MipFilterMode;
ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);

// The address modes can interfere with other dimensions
// e.g. 1D texture sampling can be interfered with when setting other
// dimension address modes despite their nonexistence.
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
? AddrMode[1]
: ImageTexDesc.addressMode[1]; // 2D
ImageTexDesc.addressMode[2] =
pImageDesc->depth > 0 ? AddrMode[2] : ImageTexDesc.addressMode[2]; // 3D

// flags takes the normalized coordinates setting -- unnormalized is default
ImageTexDesc.flags = (hSampler->isNormalizedCoords())
ImageTexDesc.flags = (pSamplerDesc->normalizedCoords)
? CU_TRSF_NORMALIZED_COORDINATES
: ImageTexDesc.flags;

Expand All @@ -231,20 +283,6 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
if (!normalized_dtype_flag) {
ImageTexDesc.flags |= CU_TRSF_READ_AS_INTEGER;
}
// Cubemap attributes
ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp =
hSampler->getCubemapFilterMode();
if (CubemapFilterModeProp == UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) {
#if CUDA_VERSION >= 11060
ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP;
#else
setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS "
"feature requires cuda 11.6 or later.",
UR_RESULT_ERROR_UNSUPPORTED_FEATURE);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
#endif
}

CUtexObject Texture;
UR_CHECK_ERROR(
cuTexObjectCreate(&Texture, &ResourceDesc, &ImageTexDesc, nullptr));
Expand Down Expand Up @@ -506,7 +544,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage) {
const ur_sampler_desc_t *pSamplerDesc,
ur_exp_image_native_handle_t *phImage) {
UR_ASSERT(std::find(hContext->getDevices().begin(),
hContext->getDevices().end(),
hDevice) != hContext->getDevices().end(),
Expand Down Expand Up @@ -573,7 +612,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
return UR_RESULT_ERROR_INVALID_VALUE;
}

UR_CHECK_ERROR(urTextureCreate(hSampler, pImageDesc, image_res_desc,
UR_CHECK_ERROR(urTextureCreate(pSamplerDesc, pImageDesc, image_res_desc,
normalized_dtype_flag, phImage));

} catch (ur_result_t Err) {
Expand Down
7 changes: 6 additions & 1 deletion unified-runtime/source/adapters/cuda/image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ ur_result_t
cudaToUrImageChannelFormat(CUarray_format cuda_format,
ur_image_channel_type_t *return_image_channel_type);

ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
ur_result_t urToCudaFilterMode(ur_sampler_filter_mode_t FilterMode,
CUfilter_mode &CudaFilterMode);
ur_result_t urToCudaAddressingMode(ur_sampler_addressing_mode_t AddressMode,
CUaddress_mode &CudaAddressMode);

ur_result_t urTextureCreate(const ur_sampler_desc_t *pSamplerDesc,
const ur_image_desc_t *pImageDesc,
const CUDA_RESOURCE_DESC &ResourceDesc,
const unsigned int normalized_dtype_flag,
Expand Down
Loading
Loading