@@ -715,17 +715,17 @@ class __SYCL_EXPORT handler {
715
715
// / \param KernelFunc is a SYCL kernel function
716
716
// / \param ParamDescs is the vector of kernel parameter descriptors.
717
717
template <typename KernelName, typename KernelType, int Dims,
718
- typename LambdaArgType>
719
- void StoreLambda (KernelType KernelFunc) {
718
+ typename LambdaArgType, typename KernelTypeUniversalRef >
719
+ void StoreLambda (KernelTypeUniversalRef && KernelFunc) {
720
720
constexpr bool IsCallableWithKernelHandler =
721
721
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
722
722
LambdaArgType>::value;
723
723
724
724
// Not using `std::make_unique` to avoid unnecessary instantiations of
725
725
// `std::unique_ptr<HostKernel<...>>`. Only
726
726
// `std::unique_ptr<HostKernelBase>` is necessary.
727
- MHostKernel.reset (
728
- new detail::HostKernel<KernelType, LambdaArgType, Dims >(KernelFunc));
727
+ MHostKernel.reset (new detail::HostKernel<KernelType, LambdaArgType, Dims>(
728
+ std::forward<KernelTypeUniversalRef >(KernelFunc) ));
729
729
730
730
constexpr bool KernelHasName =
731
731
detail::getKernelName<KernelName>() != nullptr &&
@@ -739,7 +739,7 @@ class __SYCL_EXPORT handler {
739
739
#ifdef __INTEL_SYCL_USE_INTEGRATION_HEADERS
740
740
static_assert (
741
741
!KernelHasName ||
742
- sizeof (KernelFunc ) == detail::getKernelSize<KernelName>(),
742
+ sizeof (KernelType ) == detail::getKernelSize<KernelName>(),
743
743
" Unexpected kernel lambda size. This can be caused by an "
744
744
" external host compiler producing a lambda with an "
745
745
" unexpected layout. This is a limitation of the compiler."
@@ -1133,7 +1133,7 @@ class __SYCL_EXPORT handler {
1133
1133
typename KernelName, typename KernelType, int Dims,
1134
1134
typename PropertiesT = ext::oneapi::experimental::empty_properties_t >
1135
1135
void parallel_for_lambda_impl (range<Dims> UserRange, PropertiesT Props,
1136
- KernelType KernelFunc) {
1136
+ const KernelType & KernelFunc) {
1137
1137
#ifndef __SYCL_DEVICE_ONLY__
1138
1138
throwIfActionIsCreated ();
1139
1139
throwOnKernelParameterMisuse<KernelName, KernelType>();
@@ -1545,19 +1545,22 @@ class __SYCL_EXPORT handler {
1545
1545
// methods side.
1546
1546
1547
1547
template <typename ... TypesToForward, typename ... ArgsTy>
1548
- static void kernel_single_task_unpack (handler *h, ArgsTy... Args) {
1549
- h->kernel_single_task <TypesToForward..., Props...>(Args...);
1548
+ static void kernel_single_task_unpack (handler *h, ArgsTy &&...Args) {
1549
+ h->kernel_single_task <TypesToForward..., Props...>(
1550
+ std::forward<ArgsTy>(Args)...);
1550
1551
}
1551
1552
1552
1553
template <typename ... TypesToForward, typename ... ArgsTy>
1553
- static void kernel_parallel_for_unpack (handler *h, ArgsTy... Args) {
1554
- h->kernel_parallel_for <TypesToForward..., Props...>(Args...);
1554
+ static void kernel_parallel_for_unpack (handler *h, ArgsTy &&...Args) {
1555
+ h->kernel_parallel_for <TypesToForward..., Props...>(
1556
+ std::forward<ArgsTy>(Args)...);
1555
1557
}
1556
1558
1557
1559
template <typename ... TypesToForward, typename ... ArgsTy>
1558
1560
static void kernel_parallel_for_work_group_unpack (handler *h,
1559
- ArgsTy... Args) {
1560
- h->kernel_parallel_for_work_group <TypesToForward..., Props...>(Args...);
1561
+ ArgsTy &&...Args) {
1562
+ h->kernel_parallel_for_work_group <TypesToForward..., Props...>(
1563
+ std::forward<ArgsTy>(Args)...);
1561
1564
}
1562
1565
};
1563
1566
@@ -1622,9 +1625,9 @@ class __SYCL_EXPORT handler {
1622
1625
void kernel_single_task_wrapper (const KernelType &KernelFunc) {
1623
1626
unpack<KernelName, KernelType, PropertiesT,
1624
1627
detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
1625
- KernelFunc, [&](auto Unpacker, auto ... args) {
1628
+ KernelFunc, [&](auto Unpacker, auto && ...args ) {
1626
1629
Unpacker.template kernel_single_task_unpack <KernelName, KernelType>(
1627
- args...);
1630
+ std::forward< decltype ( args)>(args) ...);
1628
1631
});
1629
1632
}
1630
1633
@@ -1635,9 +1638,10 @@ class __SYCL_EXPORT handler {
1635
1638
unpack<KernelName, KernelType, PropertiesT,
1636
1639
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1637
1640
ElementType>::value>(
1638
- KernelFunc, [&](auto Unpacker, auto ... args) {
1641
+ KernelFunc, [&](auto Unpacker, auto && ...args ) {
1639
1642
Unpacker.template kernel_parallel_for_unpack <KernelName, ElementType,
1640
- KernelType>(args...);
1643
+ KernelType>(
1644
+ std::forward<decltype (args)>(args)...);
1641
1645
});
1642
1646
}
1643
1647
@@ -1648,9 +1652,10 @@ class __SYCL_EXPORT handler {
1648
1652
unpack<KernelName, KernelType, PropertiesT,
1649
1653
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1650
1654
ElementType>::value>(
1651
- KernelFunc, [&](auto Unpacker, auto ... args) {
1655
+ KernelFunc, [&](auto Unpacker, auto && ...args ) {
1652
1656
Unpacker.template kernel_parallel_for_work_group_unpack <
1653
- KernelName, ElementType, KernelType>(args...);
1657
+ KernelName, ElementType, KernelType>(
1658
+ std::forward<decltype (args)>(args)...);
1654
1659
});
1655
1660
}
1656
1661
@@ -1900,21 +1905,21 @@ class __SYCL_EXPORT handler {
1900
1905
void parallel_for (range<1 > NumWorkItems, const KernelType &KernelFunc) {
1901
1906
parallel_for_lambda_impl<KernelName>(
1902
1907
NumWorkItems, ext::oneapi::experimental::empty_properties_t {},
1903
- std::move ( KernelFunc) );
1908
+ KernelFunc);
1904
1909
}
1905
1910
1906
1911
template <typename KernelName = detail::auto_name, typename KernelType>
1907
1912
void parallel_for (range<2 > NumWorkItems, const KernelType &KernelFunc) {
1908
1913
parallel_for_lambda_impl<KernelName>(
1909
1914
NumWorkItems, ext::oneapi::experimental::empty_properties_t {},
1910
- std::move ( KernelFunc) );
1915
+ KernelFunc);
1911
1916
}
1912
1917
1913
1918
template <typename KernelName = detail::auto_name, typename KernelType>
1914
1919
void parallel_for (range<3 > NumWorkItems, const KernelType &KernelFunc) {
1915
1920
parallel_for_lambda_impl<KernelName>(
1916
1921
NumWorkItems, ext::oneapi::experimental::empty_properties_t {},
1917
- std::move ( KernelFunc) );
1922
+ KernelFunc);
1918
1923
}
1919
1924
1920
1925
// / Enqueues a command to the SYCL runtime to invoke \p Func once.
0 commit comments