Skip to content

Commit 6693ade

Browse files
committed
[Bindless][SYCL][UR] Create a sampled image with a single UR API
We no longer need to create a UR sampler object in order to create a sampled image. The backends can simply use the sampler description to create the sampled image.
1 parent 5512617 commit 6693ade

File tree

25 files changed

+405
-284
lines changed

25 files changed

+405
-284
lines changed

sycl/source/detail/bindless_images.cpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ __SYCL_EXPORT sampled_image_handle
265265
create_image(image_mem_handle memHandle, const bindless_image_sampler &sampler,
266266
const image_descriptor &desc, const sycl::device &syclDevice,
267267
const sycl::context &syclContext) {
268-
return create_image(reinterpret_cast<void*>(memHandle.raw_handle),
268+
return create_image(reinterpret_cast<void *>(memHandle.raw_handle),
269269
0 /*pitch*/, sampler, desc, syclDevice, syclContext);
270270
}
271271

@@ -280,14 +280,14 @@ __SYCL_EXPORT sampled_image_handle
280280
create_image(image_mem &imgMem, const bindless_image_sampler &sampler,
281281
const image_descriptor &desc, const sycl::device &syclDevice,
282282
const sycl::context &syclContext) {
283-
return create_image(reinterpret_cast<void*>(imgMem.get_handle().raw_handle),
283+
return create_image(reinterpret_cast<void *>(imgMem.get_handle().raw_handle),
284284
0 /*pitch*/, sampler, desc, syclDevice, syclContext);
285285
}
286286

287287
__SYCL_EXPORT sampled_image_handle
288288
create_image(image_mem &imgMem, const bindless_image_sampler &sampler,
289289
const image_descriptor &desc, const sycl::queue &syclQueue) {
290-
return create_image(reinterpret_cast<void*>(imgMem.get_handle().raw_handle),
290+
return create_image(reinterpret_cast<void *>(imgMem.get_handle().raw_handle),
291291
0 /*pitch*/, sampler, desc, syclQueue.get_device(),
292292
syclQueue.get_context());
293293
}
@@ -367,10 +367,6 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
367367
translate_cubemap_filter_mode(sampler.cubemap_filtering)};
368368
UrAddrModes.pNext = &UrCubemapProps;
369369

370-
ur_sampler_handle_t urSampler = nullptr;
371-
Adapter->call<sycl::errc::runtime, sycl::detail::UrApiKind::urSamplerCreate>(
372-
urCtx, &UrSamplerProps, &urSampler);
373-
374370
ur_image_desc_t urDesc;
375371
ur_image_format_t urFormat;
376372
populate_ur_structs(desc, urDesc, urFormat, pitch);
@@ -381,7 +377,7 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler,
381377
sycl::detail::UrApiKind::urBindlessImagesSampledImageCreateExp>(
382378
urCtx, urDevice,
383379
reinterpret_cast<ur_exp_image_mem_native_handle_t>(devPtr), &urFormat,
384-
&urDesc, urSampler, &urImageHandle);
380+
&urDesc, &UrSamplerProps, &urImageHandle);
385381

386382
return sampled_image_handle{urImageHandle};
387383
}
@@ -632,7 +628,7 @@ __SYCL_EXPORT external_semaphore import_external_semaphore(
632628
urCtx, urDevice, urHandleType, &urExternalSemDesc, &urExternalSemaphore);
633629

634630
return external_semaphore{urExternalSemaphore,
635-
externalSemaphoreDesc.handle_type};
631+
externalSemaphoreDesc.handle_type};
636632
}
637633

638634
template <>

unified-runtime/include/ur_api.h

+10-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/include/ur_ddi.h

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/include/ur_print.hpp

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

unified-runtime/scripts/core/EXP-BINDLESS-IMAGES.rst

+4
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ Changelog
286286
| || - GetImageUnsampledHandleSupportExp |
287287
| || - GetImageSampledHandleSupportExp |
288288
+----------+-------------------------------------------------------------+
289+
| 23.0 || Update the ${x}BindlessImagesSampledImageCreateExp API |
290+
| || to take a sampled description instead of sampler handle. |
291+
+----------+-------------------------------------------------------------+
289292

290293
Contributors
291294
--------------------------------------------------------------------------------
@@ -296,3 +299,4 @@ Contributors
296299
297300
298301
302+
* Georgi Mirazchiyski `[email protected] <[email protected]>`_

unified-runtime/scripts/core/exp-bindless-images.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ name: SampledImageCreateExp
527527
ordinal: "0"
528528
analogue:
529529
- "**cuTexObjectCreate**"
530+
- "**hipTexObjectCreate**"
530531
params:
531532
- type: $x_context_handle_t
532533
name: hContext
@@ -543,9 +544,9 @@ params:
543544
- type: "const $x_image_desc_t*"
544545
name: pImageDesc
545546
desc: "[in] pointer to image description"
546-
- type: $x_sampler_handle_t
547-
name: hSampler
548-
desc: "[in] sampler to be used"
547+
- type: const $x_sampler_desc_t*
548+
name: pSamplerDesc
549+
desc: "[in] pointer to sampler description to be used"
549550
- type: $x_exp_image_native_handle_t*
550551
name: phImage
551552
desc: "[out][alloc] pointer to handle of image object created"

unified-runtime/source/adapters/cuda/image.cpp

+112-73
Original file line numberDiff line numberDiff line change
@@ -154,75 +154,127 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format,
154154
}
155155
}
156156

157-
ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
157+
ur_result_t urToCudaFilterMode(ur_sampler_filter_mode_t FilterMode,
158+
CUfilter_mode &CudaFilterMode) {
159+
switch (FilterMode) {
160+
case UR_SAMPLER_FILTER_MODE_NEAREST:
161+
CudaFilterMode = CU_TR_FILTER_MODE_POINT;
162+
break;
163+
case UR_SAMPLER_FILTER_MODE_LINEAR:
164+
CudaFilterMode = CU_TR_FILTER_MODE_LINEAR;
165+
break;
166+
default:
167+
setErrorMessage("Unsupported filter mode",
168+
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
169+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
170+
}
171+
172+
return UR_RESULT_SUCCESS;
173+
}
174+
175+
ur_result_t urToCudaAddressingMode(ur_sampler_addressing_mode_t AddressMode,
176+
CUaddress_mode &CudaAddressMode) {
177+
switch (AddressMode) {
178+
case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
179+
CudaAddressMode = CU_TR_ADDRESS_MODE_CLAMP;
180+
break;
181+
case UR_SAMPLER_ADDRESSING_MODE_CLAMP:
182+
CudaAddressMode = CU_TR_ADDRESS_MODE_BORDER;
183+
break;
184+
case UR_SAMPLER_ADDRESSING_MODE_REPEAT:
185+
CudaAddressMode = CU_TR_ADDRESS_MODE_WRAP;
186+
break;
187+
case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
188+
CudaAddressMode = CU_TR_ADDRESS_MODE_MIRROR;
189+
break;
190+
default:
191+
setErrorMessage("Unsupported addressing mode",
192+
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
193+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
194+
}
195+
196+
return UR_RESULT_SUCCESS;
197+
}
198+
199+
ur_result_t urTextureCreate(const ur_sampler_desc_t *pSamplerDesc,
158200
const ur_image_desc_t *pImageDesc,
159201
const CUDA_RESOURCE_DESC &ResourceDesc,
160202
const unsigned int normalized_dtype_flag,
161203
ur_exp_image_native_handle_t *phRetImage) {
162-
163204
try {
164-
/// pi_sampler_properties
165-
/// Layout of UR samplers for CUDA
166-
///
167-
/// Sampler property layout:
168-
/// | <bits> | <usage>
169-
/// -----------------------------------
170-
/// | 31 30 ... 13 | N/A
171-
/// | 12 | cubemap filter mode
172-
/// | 11 | mip filter mode
173-
/// | 10 9 8 | addressing mode 3
174-
/// | 7 6 5 | addressing mode 2
175-
/// | 4 3 2 | addressing mode 1
176-
/// | 1 | filter mode
177-
/// | 0 | normalize coords
178205
CUDA_TEXTURE_DESC ImageTexDesc = {};
179-
CUaddress_mode AddrMode[3] = {};
180-
for (size_t i = 0; i < 3; i++) {
181-
ur_sampler_addressing_mode_t AddrModeProp =
182-
hSampler->getAddressingModeDim(i);
183-
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
184-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
185-
AddrMode[i] = CU_TR_ADDRESS_MODE_CLAMP;
186-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
187-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
188-
AddrMode[i] = CU_TR_ADDRESS_MODE_BORDER;
189-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
190-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
191-
AddrMode[i] = CU_TR_ADDRESS_MODE_WRAP;
192-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
193-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
194-
AddrMode[i] = CU_TR_ADDRESS_MODE_MIRROR;
206+
// Enumarate to linked properties (extension-specific structures).
207+
void *pNext = const_cast<void *>(pSamplerDesc->pNext);
208+
while (pNext != nullptr) {
209+
const ur_base_desc_t *BaseDesc =
210+
reinterpret_cast<const ur_base_desc_t *>(pNext);
211+
if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) {
212+
// UR Mipmap properties
213+
const ur_exp_sampler_mip_properties_t *SamplerMipProperties =
214+
reinterpret_cast<const ur_exp_sampler_mip_properties_t *>(pNext);
215+
ImageTexDesc.maxMipmapLevelClamp =
216+
SamplerMipProperties->maxMipmapLevelClamp;
217+
ImageTexDesc.minMipmapLevelClamp =
218+
SamplerMipProperties->minMipmapLevelClamp;
219+
ImageTexDesc.maxAnisotropy = SamplerMipProperties->maxAnisotropy;
220+
// Cuda Mipmap attributes
221+
CUfilter_mode MipFilterMode;
222+
ur_sampler_filter_mode_t MipFilterModeProp =
223+
SamplerMipProperties->mipFilterMode;
224+
UR_CALL(urToCudaFilterMode(MipFilterModeProp, MipFilterMode));
225+
ImageTexDesc.mipmapFilterMode = MipFilterMode;
226+
} else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES) {
227+
// UR Addressing modes
228+
const ur_exp_sampler_addr_modes_t *SamplerAddrModes =
229+
reinterpret_cast<const ur_exp_sampler_addr_modes_t *>(pNext);
230+
// Cuda Addressing modes
231+
CUaddress_mode AddrMode[3] = {};
232+
for (size_t i = 0; i < 3; i++) {
233+
ur_sampler_addressing_mode_t AddrModeProp =
234+
SamplerAddrModes->addrModes[i];
235+
UR_CALL(urToCudaAddressingMode(AddrModeProp, AddrMode[i]));
236+
}
237+
// The address modes can interfere with other dimensions
238+
// e.g. 1D texture sampling can be interfered with when setting other
239+
// dimension address modes despite their nonexistence.
240+
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
241+
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
242+
? AddrMode[1]
243+
: ImageTexDesc.addressMode[1]; // 2D
244+
ImageTexDesc.addressMode[2] = pImageDesc->depth > 0
245+
? AddrMode[2]
246+
: ImageTexDesc.addressMode[2]; // 3D
247+
} else if (BaseDesc->stype ==
248+
UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES) {
249+
// UR Cubemap properties
250+
const ur_exp_sampler_cubemap_properties_t *SamplerCubemapProperties =
251+
reinterpret_cast<const ur_exp_sampler_cubemap_properties_t *>(
252+
pNext);
253+
ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp =
254+
SamplerCubemapProperties->cubemapFilterMode;
255+
// Cuda Cubemap attributes
256+
if (CubemapFilterModeProp ==
257+
UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) {
258+
#if CUDA_VERSION >= 11060
259+
ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP;
260+
#else
261+
setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS "
262+
"feature requires cuda 11.6 or later.",
263+
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
264+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
265+
#endif
266+
}
195267
}
268+
pNext = const_cast<void *>(BaseDesc->pNext);
196269
}
197270

198-
CUfilter_mode FilterMode;
199-
ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode();
200-
FilterMode =
201-
FilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
271+
CUfilter_mode FilterMode = pSamplerDesc->filterMode
272+
? CU_TR_FILTER_MODE_LINEAR
273+
: CU_TR_FILTER_MODE_POINT;
202274
ImageTexDesc.filterMode = FilterMode;
203275

204-
// Mipmap attributes
205-
CUfilter_mode MipFilterMode;
206-
ur_sampler_filter_mode_t MipFilterModeProp = hSampler->getMipFilterMode();
207-
MipFilterMode =
208-
MipFilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
209-
ImageTexDesc.mipmapFilterMode = MipFilterMode;
210-
ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
211-
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
212-
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);
213-
214-
// The address modes can interfere with other dimensions
215-
// e.g. 1D texture sampling can be interfered with when setting other
216-
// dimension address modes despite their nonexistence.
217-
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
218-
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
219-
? AddrMode[1]
220-
: ImageTexDesc.addressMode[1]; // 2D
221-
ImageTexDesc.addressMode[2] =
222-
pImageDesc->depth > 0 ? AddrMode[2] : ImageTexDesc.addressMode[2]; // 3D
223-
224276
// flags takes the normalized coordinates setting -- unnormalized is default
225-
ImageTexDesc.flags = (hSampler->isNormalizedCoords())
277+
ImageTexDesc.flags = (pSamplerDesc->normalizedCoords)
226278
? CU_TRSF_NORMALIZED_COORDINATES
227279
: ImageTexDesc.flags;
228280

@@ -231,20 +283,6 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
231283
if (!normalized_dtype_flag) {
232284
ImageTexDesc.flags |= CU_TRSF_READ_AS_INTEGER;
233285
}
234-
// Cubemap attributes
235-
ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp =
236-
hSampler->getCubemapFilterMode();
237-
if (CubemapFilterModeProp == UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) {
238-
#if CUDA_VERSION >= 11060
239-
ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP;
240-
#else
241-
setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS "
242-
"feature requires cuda 11.6 or later.",
243-
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
244-
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
245-
#endif
246-
}
247-
248286
CUtexObject Texture;
249287
UR_CHECK_ERROR(
250288
cuTexObjectCreate(&Texture, &ResourceDesc, &ImageTexDesc, nullptr));
@@ -506,7 +544,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
506544
ur_context_handle_t hContext, ur_device_handle_t hDevice,
507545
ur_exp_image_mem_native_handle_t hImageMem,
508546
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
509-
ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage) {
547+
const ur_sampler_desc_t *pSamplerDesc,
548+
ur_exp_image_native_handle_t *phImage) {
510549
UR_ASSERT(std::find(hContext->getDevices().begin(),
511550
hContext->getDevices().end(),
512551
hDevice) != hContext->getDevices().end(),
@@ -573,7 +612,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
573612
return UR_RESULT_ERROR_INVALID_VALUE;
574613
}
575614

576-
UR_CHECK_ERROR(urTextureCreate(hSampler, pImageDesc, image_res_desc,
615+
UR_CHECK_ERROR(urTextureCreate(pSamplerDesc, pImageDesc, image_res_desc,
577616
normalized_dtype_flag, phImage));
578617

579618
} catch (ur_result_t Err) {

0 commit comments

Comments
 (0)