Skip to content

Commit 313c4f0

Browse files
[SYCL] Optimize handler::StoreLambda implementation (#17669)
Current implementation creates descriptions of all kernel params, then process them one by one. It's possible to process each param right away.
1 parent ef97d89 commit 313c4f0

File tree

4 files changed

+56
-15
lines changed

4 files changed

+56
-15
lines changed

Diff for: sycl/include/sycl/handler.hpp

+16-14
Original file line numberDiff line numberDiff line change
@@ -384,16 +384,6 @@ template <int Dims> bool range_size_fits_in_size_t(const range<Dims> &r) {
384384
return true;
385385
}
386386

387-
template <typename KernelNameType>
388-
std::vector<kernel_param_desc_t> getKernelParamDescs() {
389-
std::vector<kernel_param_desc_t> Result;
390-
int NumParams = getKernelNumParams<KernelNameType>();
391-
Result.reserve(NumParams);
392-
for (int I = 0; I < NumParams; ++I) {
393-
Result.push_back(getKernelParamDesc<KernelNameType>(I));
394-
}
395-
return Result;
396-
}
397387
} // namespace detail
398388

399389
/// Command group handler class.
@@ -485,20 +475,30 @@ class __SYCL_EXPORT handler {
485475
"a single kernel or explicit memory operation.");
486476
}
487477

488-
/// Extracts and prepares kernel arguments from the lambda using information
489-
/// from the built-ins or integration header.
478+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
479+
// TODO: Those functions are not used anymore, remove it in the next
480+
// ABI-breaking window.
490481
void extractArgsAndReqsFromLambda(
491482
char *LambdaPtr,
492483
const std::vector<detail::kernel_param_desc_t> &ParamDescs, bool IsESIMD);
493-
// TODO Unused, remove during ABI breaking window
494484
void
495485
extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum,
496486
const detail::kernel_param_desc_t *KernelArgs,
497487
bool IsESIMD);
488+
#endif
489+
/// Extracts and prepares kernel arguments from the lambda using information
490+
/// from the built-ins or integration header.
491+
void extractArgsAndReqsFromLambda(
492+
char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int),
493+
size_t NumKernelParams, bool IsESIMD);
498494

499495
/// Extracts and prepares kernel arguments set via set_arg(s).
500496
void extractArgsAndReqs();
501497

498+
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
499+
// TODO: processArg need not to be public
500+
__SYCL_DLL_LOCAL
501+
#endif
502502
void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
503503
const int Size, const size_t Index, size_t &IndexShift,
504504
bool IsKernelCreatedFromSource, bool IsESIMD);
@@ -770,9 +770,11 @@ class __SYCL_EXPORT handler {
770770
// header, so don't perform things that require it.
771771
if constexpr (KernelHasName) {
772772
// TODO support ESIMD in no-integration-header case too.
773+
773774
clearArgs();
774775
extractArgsAndReqsFromLambda(MHostKernel->getPtr(),
775-
detail::getKernelParamDescs<KernelName>(),
776+
&(detail::getKernelParamDesc<KernelName>),
777+
detail::getKernelNumParams<KernelName>(),
776778
detail::isKernelESIMD<KernelName>());
777779
MKernelName = detail::getKernelName<KernelName>();
778780
} else {

Diff for: sycl/source/handler.cpp

+38-1
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,43 @@ void handler::extractArgsAndReqs() {
11151115
}
11161116
}
11171117

1118+
void handler::extractArgsAndReqsFromLambda(
1119+
char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int),
1120+
size_t NumKernelParams, bool IsESIMD) {
1121+
size_t IndexShift = 0;
1122+
impl->MArgs.reserve(MaxNumAdditionalArgs * NumKernelParams);
1123+
1124+
for (size_t I = 0; I < NumKernelParams; ++I) {
1125+
detail::kernel_param_desc_t ParamDesc = ParamDescGetter(I);
1126+
void *Ptr = LambdaPtr + ParamDesc.offset;
1127+
const detail::kernel_param_kind_t &Kind = ParamDesc.kind;
1128+
const int &Size = ParamDesc.info;
1129+
if (Kind == detail::kernel_param_kind_t::kind_accessor) {
1130+
// For args kind of accessor Size is information about accessor.
1131+
// The first 11 bits of Size encodes the accessor target.
1132+
const access::target AccTarget =
1133+
static_cast<access::target>(Size & AccessTargetMask);
1134+
if ((AccTarget == access::target::device ||
1135+
AccTarget == access::target::constant_buffer) ||
1136+
(AccTarget == access::target::image ||
1137+
AccTarget == access::target::image_array)) {
1138+
detail::AccessorBaseHost *AccBase =
1139+
static_cast<detail::AccessorBaseHost *>(Ptr);
1140+
Ptr = detail::getSyclObjImpl(*AccBase).get();
1141+
} else if (AccTarget == access::target::local) {
1142+
detail::LocalAccessorBaseHost *LocalAccBase =
1143+
static_cast<detail::LocalAccessorBaseHost *>(Ptr);
1144+
Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
1145+
}
1146+
}
1147+
processArg(Ptr, Kind, Size, I, IndexShift,
1148+
/*IsKernelCreatedFromSource=*/false, IsESIMD);
1149+
}
1150+
}
1151+
1152+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
1153+
// TODO: Those functions are not used anymore, remove it in the next
1154+
// ABI-breaking window.
11181155
void handler::extractArgsAndReqsFromLambda(
11191156
char *LambdaPtr, const std::vector<detail::kernel_param_desc_t> &ParamDescs,
11201157
bool IsESIMD) {
@@ -1149,14 +1186,14 @@ void handler::extractArgsAndReqsFromLambda(
11491186
}
11501187
}
11511188

1152-
// TODO Unused, remove during ABI breaking window
11531189
void handler::extractArgsAndReqsFromLambda(
11541190
char *LambdaPtr, size_t KernelArgsNum,
11551191
const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
11561192
std::vector<detail::kernel_param_desc_t> ParamDescs(
11571193
KernelArgs, KernelArgs + KernelArgsNum);
11581194
extractArgsAndReqsFromLambda(LambdaPtr, ParamDescs, IsESIMD);
11591195
}
1196+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
11601197

11611198
// Calling methods of kernel_impl requires knowledge of class layout.
11621199
// As this is impossible in header, there's a function that calls necessary

Diff for: sycl/test/abi/sycl_symbols_linux.dump

+1
Original file line numberDiff line numberDiff line change
@@ -3567,6 +3567,7 @@ _ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE
35673567
_ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm
35683568
_ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcRKSt6vectorINS0_6detail19kernel_param_desc_tESaIS5_EEb
35693569
_ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb
3570+
_ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcPFNS0_6detail19kernel_param_desc_tEiEmb
35703571
_ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm
35713572
_ZN4sycl3_V17handler28setArgsToAssociatedAccessorsEv
35723573
_ZN4sycl3_V17handler28setStateExplicitKernelBundleEv

Diff for: sycl/test/abi/sycl_symbols_windows.dump

+1
Original file line numberDiff line numberDiff line change
@@ -3987,6 +3987,7 @@
39873987
?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uexternal_semaphore@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z
39883988
?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ
39893989
?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEADAEBV?$vector@Ukernel_param_desc_t@detail@_V1@sycl@@V?$allocator@Ukernel_param_desc_t@detail@_V1@sycl@@@std@@@std@@_N@Z
3990+
?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEADP6A?AUkernel_param_desc_t@detail@23@H@Z_K_N@Z
39903991
?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z
39913992
?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z
39923993
?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ

0 commit comments

Comments
 (0)