Skip to content

Commit f97ee7d

Browse files
lukasm91anstaf
authored andcommitted
Fix performance for CUDA >= 9.2 (GT v1.0-branch) (#1326)
- sets `GT_CONSTEXPR` to `constexpr` for nvcc - introduces `const_ref` to fix performance for CUDA >= 9.2 (`T` for small data types, `T const&` for large data types) Backport to GridTools 1.0. See also #1326 and 9bb2d64
1 parent 1105332 commit f97ee7d

File tree

13 files changed

+95
-48
lines changed

13 files changed

+95
-48
lines changed

include/gridtools/common/array.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "../meta/macros.hpp"
2121
#include "../meta/repeat.hpp"
2222
#include "defs.hpp"
23+
#include "generic_metafunctions/const_ref.hpp"
2324
#include "generic_metafunctions/utility.hpp"
2425
#include "gt_assert.hpp"
2526
#include "host_device.hpp"
@@ -117,13 +118,13 @@ namespace gridtools {
117118
}
118119

119120
template <size_t I, typename T, size_t D>
120-
static GT_FUNCTION GT_CONSTEXPR const T &get(const array<T, D> &arr) noexcept {
121+
static GT_FUNCTION GT_CONSTEXPR GT_META_CALL(const_ref, T) get(const array<T, D> &arr) noexcept {
121122
GT_STATIC_ASSERT(I < D, "index is out of bounds");
122123
return arr.m_array[I];
123124
}
124125

125126
template <size_t I, typename T, size_t D>
126-
static GT_FUNCTION GT_CONSTEXPR T &&get(array<T, D> &&arr) noexcept {
127+
static GT_FUNCTION GT_CONSTEXPR T get(array<T, D> &&arr) noexcept {
127128
GT_STATIC_ASSERT(I < D, "index is out of bounds");
128129
return wstd::move(arr.m_array[I]);
129130
}
@@ -188,13 +189,13 @@ namespace gridtools {
188189
}
189190

190191
template <size_t I, typename T, size_t D>
191-
GT_FUNCTION GT_CONSTEXPR const T &get(const array<T, D> &arr) noexcept {
192+
GT_FUNCTION GT_CONSTEXPR GT_META_CALL(const_ref, T) get(const array<T, D> &arr) noexcept {
192193
GT_STATIC_ASSERT(I < D, "index is out of bounds");
193194
return arr.m_array[I];
194195
}
195196

196197
template <size_t I, typename T, size_t D>
197-
GT_FUNCTION GT_CONSTEXPR T &&get(array<T, D> &&arr) noexcept {
198+
GT_FUNCTION GT_CONSTEXPR T get(array<T, D> &&arr) noexcept {
198199
GT_STATIC_ASSERT(I < D, "index is out of bounds");
199200
return wstd::move(get<I>(arr));
200201
}

include/gridtools/common/defs.hpp

-4
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,7 @@
4040
#endif
4141
#endif
4242

43-
#ifdef __CUDA_ARCH__
44-
#define GT_CONSTEXPR
45-
#else
4643
#define GT_CONSTEXPR constexpr
47-
#endif
4844

4945
/**
5046
* Macro to allow make functions constexpr in c++14 (in case they are not only a return statement)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* GridTools
3+
*
4+
* Copyright (c) 2014-2019, ETH Zurich
5+
* All rights reserved.
6+
*
7+
* Please, refer to the LICENSE file in the root directory.
8+
* SPDX-License-Identifier: BSD-3-Clause
9+
*/
10+
11+
#pragma once
12+
13+
#include <type_traits>
14+
15+
#include "../../meta/macros.hpp"
16+
#include "../../meta/type_traits.hpp"
17+
18+
namespace gridtools {
19+
GT_META_LAZY_NAMESPACE {
20+
template <class T, class = void>
21+
struct const_ref : std::add_lvalue_reference<add_const_t<T>> {};
22+
23+
template <class T>
24+
struct const_ref<T,
25+
enable_if_t<!std::is_reference<T>::value && std::is_trivially_copy_constructible<T>::value &&
26+
sizeof(T) <= sizeof(add_pointer_t<T>)>> : std::add_const<T> {};
27+
}
28+
29+
#if !GT_BROKEN_TEMPLATE_ALIASES
30+
template <class T>
31+
using const_ref = typename lazy::const_ref<T>::type;
32+
#endif
33+
} // namespace gridtools

include/gridtools/common/gt_assert.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@
2929
#endif
3030

3131
#ifdef __CUDA_ARCH__
32+
#if __CUDACC_VER_MAJOR__ == 9 && __CUDACC_VER_MINOR__ == 2
33+
// we define this macro to an empty string for CUDA 9.2 because in certain cases, CUDA 9.2 tries to compile device
34+
// instantiations of certain constexpr function templates, which can lead to compile-time errors like "cannot use an
35+
// entity undefined in device code".
36+
#define __PRETTY_FUNCTION__ ""
37+
#endif
3238
#define GT_ASSERT_OR_THROW(cond, msg) assert(cond)
3339
#else
3440
#define GT_ASSERT_OR_THROW(cond, msg) \

include/gridtools/common/pair.hpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <utility>
1414

1515
#include "defs.hpp"
16+
#include "generic_metafunctions/const_ref.hpp"
1617
#include "generic_metafunctions/utility.hpp"
1718
#include "host_device.hpp"
1819

@@ -133,30 +134,30 @@ namespace gridtools {
133134
template <>
134135
struct pair_get<0> {
135136
template <typename T1, typename T2>
136-
static GT_CONSTEXPR GT_FUNCTION const T1 &const_get(const pair<T1, T2> &p) noexcept {
137+
static GT_CONSTEXPR GT_FUNCTION GT_META_CALL(const_ref, T1) const_get(const pair<T1, T2> &p) noexcept {
137138
return p.first;
138139
}
139140
template <typename T1, typename T2>
140141
static GT_CONSTEXPR GT_FUNCTION T1 &get(pair<T1, T2> &p) noexcept {
141142
return p.first;
142143
}
143144
template <typename T1, typename T2>
144-
static GT_CONSTEXPR GT_FUNCTION T1 &&move_get(pair<T1, T2> &&p) noexcept {
145+
static GT_CONSTEXPR GT_FUNCTION T1 move_get(pair<T1, T2> &&p) noexcept {
145146
return wstd::move(p.first);
146147
}
147148
};
148149
template <>
149150
struct pair_get<1> {
150151
template <typename T1, typename T2>
151-
static GT_CONSTEXPR GT_FUNCTION const T2 &const_get(const pair<T1, T2> &p) noexcept {
152+
static GT_CONSTEXPR GT_FUNCTION GT_META_CALL(const_ref, T2) const_get(const pair<T1, T2> &p) noexcept {
152153
return p.second;
153154
}
154155
template <typename T1, typename T2>
155156
static GT_CONSTEXPR GT_FUNCTION T2 &get(pair<T1, T2> &p) noexcept {
156157
return p.second;
157158
}
158159
template <typename T1, typename T2>
159-
static GT_CONSTEXPR GT_FUNCTION T2 &&move_get(pair<T1, T2> &&p) noexcept {
160+
static GT_CONSTEXPR GT_FUNCTION T2 move_get(pair<T1, T2> &&p) noexcept {
160161
return wstd::move(p.second);
161162
}
162163
};

include/gridtools/common/tuple.hpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "../meta/type_traits.hpp"
1616
#include "../meta/utility.hpp"
1717
#include "defs.hpp"
18+
#include "generic_metafunctions/const_ref.hpp"
1819
#include "generic_metafunctions/utility.hpp"
1920
#include "host_device.hpp"
2021

@@ -50,7 +51,8 @@ namespace gridtools {
5051

5152
struct tuple_leaf_getter {
5253
template <size_t I, class T>
53-
static GT_CONSTEXPR GT_FUNCTION T const &get(tuple_leaf<I, T, false> const &obj) noexcept {
54+
static GT_CONSTEXPR GT_FUNCTION GT_META_CALL(const_ref, T)
55+
get(tuple_leaf<I, T, false> const &obj) noexcept {
5456
return obj.m_value;
5557
}
5658

@@ -60,12 +62,12 @@ namespace gridtools {
6062
}
6163

6264
template <size_t I, class T>
63-
static GT_CONSTEXPR GT_FUNCTION T &&get(tuple_leaf<I, T, false> &&obj) noexcept {
65+
static GT_CONSTEXPR GT_FUNCTION T get(tuple_leaf<I, T, false> &&obj) noexcept {
6466
return static_cast<T &&>(get<I>(obj));
6567
}
6668

6769
template <size_t I, class T>
68-
static GT_CONSTEXPR GT_FUNCTION T const &get(tuple_leaf<I, T, true> const &obj) noexcept {
70+
static GT_CONSTEXPR GT_FUNCTION GT_META_CALL(const_ref, T) get(tuple_leaf<I, T, true> const &obj) noexcept {
6971
return obj;
7072
}
7173

@@ -75,7 +77,7 @@ namespace gridtools {
7577
}
7678

7779
template <size_t I, class T>
78-
static GT_CONSTEXPR GT_FUNCTION T &&get(tuple_leaf<I, T, true> &&obj) noexcept {
80+
static GT_CONSTEXPR GT_FUNCTION T get(tuple_leaf<I, T, true> &&obj) noexcept {
7981
return static_cast<T &&>(obj);
8082
}
8183
};
@@ -168,7 +170,7 @@ namespace gridtools {
168170
tuple &operator=(tuple const &) = default;
169171
tuple &operator=(tuple &&) = default;
170172

171-
GT_CONSTEXPR GT_FUNCTION tuple(Ts const &... args) noexcept : m_impl(args...) {}
173+
GT_CONSTEXPR GT_FUNCTION tuple(GT_META_CALL(const_ref, Ts)... args) noexcept : m_impl(args...) {}
172174

173175
template <class... Args,
174176
enable_if_t<sizeof...(Ts) == sizeof...(Args) && conjunction<std::is_constructible<Ts, Args &&>...>::value,
@@ -198,7 +200,7 @@ namespace gridtools {
198200
T m_value;
199201
struct getter {
200202
template <size_t I, enable_if_t<I == 0, int> = 0>
201-
static GT_CONSTEXPR GT_FUNCTION T const &get(tuple const &obj) noexcept {
203+
static GT_CONSTEXPR GT_FUNCTION GT_META_CALL(const_ref, T) get(tuple const &obj) noexcept {
202204
return obj.m_value;
203205
}
204206

@@ -208,7 +210,7 @@ namespace gridtools {
208210
}
209211

210212
template <size_t I, enable_if_t<I == 0, int> = 0>
211-
static GT_CONSTEXPR GT_FUNCTION T &&get(tuple &&obj) noexcept {
213+
static GT_CONSTEXPR GT_FUNCTION T get(tuple &&obj) noexcept {
212214
return static_cast<T &&>(obj.m_value);
213215
}
214216
};
@@ -225,7 +227,7 @@ namespace gridtools {
225227
tuple &operator=(tuple const &) = default;
226228
tuple &operator=(tuple &&) = default;
227229

228-
GT_CONSTEXPR GT_FUNCTION tuple(T const &arg) noexcept : m_value(arg) {}
230+
GT_CONSTEXPR GT_FUNCTION tuple(GT_META_CALL(const_ref, T) arg) noexcept : m_value(arg) {}
229231

230232
template <class Arg, enable_if_t<std::is_constructible<T, Arg &&>::value, int> = 0>
231233
GT_CONSTEXPR GT_FUNCTION tuple(Arg &&arg) noexcept : m_value(wstd::forward<Arg>(arg)) {}

include/gridtools/common/tuple_util.hpp

+15-16
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,7 @@ namespace gridtools {
263263
enum class ref_kind { rvalue, lvalue, const_lvalue };
264264

265265
template <class>
266-
struct get_ref_kind;
267-
268-
template <class T>
269-
struct get_ref_kind<T &&> : std::integral_constant<ref_kind, ref_kind::rvalue> {};
266+
struct get_ref_kind : std::integral_constant<ref_kind, ref_kind::rvalue> {};
270267

271268
template <class T>
272269
struct get_ref_kind<T &> : std::integral_constant<ref_kind, ref_kind::lvalue> {};
@@ -279,7 +276,9 @@ namespace gridtools {
279276
struct add_ref;
280277

281278
template <class T>
282-
struct add_ref<ref_kind::rvalue, T> : std::add_rvalue_reference<T> {};
279+
struct add_ref<ref_kind::rvalue, T> {
280+
using type = T;
281+
};
283282

284283
template <class T>
285284
struct add_ref<ref_kind::lvalue, T> : std::add_lvalue_reference<T> {};
@@ -432,7 +431,7 @@ namespace gridtools {
432431
class Res = GT_META_CALL(from_types,
433432
(Tup,
434433
GT_META_CALL(get_results_t,
435-
(GT_META_CALL(get_accessors, Tup &&), GT_META_CALL(get_accessors, Tups &&)...))))>
434+
(GT_META_CALL(get_accessors, Tup), GT_META_CALL(get_accessors, Tups)...))))>
436435
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup, Tups &&... tups) const {
437436
using generators = GT_META_CALL(meta::transform,
438437
(get_transform_generator, GT_META_CALL(meta::make_indices_c, size<decay_t<Tup>>::value)));
@@ -506,7 +505,7 @@ namespace gridtools {
506505

507506
template <class Tup,
508507
class Accessors = GT_META_CALL(
509-
meta::transform, (get_accessors, GT_META_CALL(get_accessors, Tup &&))),
508+
meta::transform, (get_accessors, GT_META_CALL(get_accessors, Tup))),
510509
class First = GT_META_CALL(meta::first, GT_META_CALL(to_types, Tup)),
511510
class Res = GT_META_CALL(from_types, (First, GT_META_CALL(meta::flatten, Accessors)))>
512511
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
@@ -524,7 +523,7 @@ namespace gridtools {
524523
GT_META_DEFINE_ALIAS(get_drop_front_generator, meta::id, get_nth_f<N + I::value>);
525524

526525
template <class Tup,
527-
class Accessors = GT_META_CALL(get_accessors, Tup &&),
526+
class Accessors = GT_META_CALL(get_accessors, Tup),
528527
class Res = GT_META_CALL(from_types, (Tup, GT_META_CALL(meta::drop_front_c, (N, Accessors))))>
529528
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
530529
using generators = GT_META_CALL(meta::transform,
@@ -548,7 +547,7 @@ namespace gridtools {
548547
struct push_back_f {
549548
template <class Tup,
550549
class... Args,
551-
class Accessors = GT_META_CALL(get_accessors, Tup &&),
550+
class Accessors = GT_META_CALL(get_accessors, Tup),
552551
class Res = GT_META_CALL(
553552
from_types, (Tup, GT_META_CALL(meta::push_back, (Accessors, Args &&...))))>
554553
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup, Args &&... args) const {
@@ -572,7 +571,7 @@ namespace gridtools {
572571
struct push_front_f {
573572
template <class Tup,
574573
class... Args,
575-
class Accessors = GT_META_CALL(get_accessors, Tup &&),
574+
class Accessors = GT_META_CALL(get_accessors, Tup),
576575
class Res = GT_META_CALL(
577576
from_types, (Tup, GT_META_CALL(meta::push_front, (Accessors, Args &&...))))>
578577
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup, Args &&... args) const {
@@ -626,7 +625,7 @@ namespace gridtools {
626625
size_t N,
627626
class State,
628627
class Tup,
629-
class AllAccessors = GT_META_CALL(get_accessors, Tup &&),
628+
class AllAccessors = GT_META_CALL(get_accessors, Tup),
630629
class Accessors = GT_META_CALL(meta::drop_front_c, (I, AllAccessors)),
631630
class Res = GT_META_CALL(meta::lfold, (meta_fun, State &&, Accessors)),
632631
enable_if_t<(I + 4 < N), int> = 0>
@@ -643,14 +642,14 @@ namespace gridtools {
643642

644643
template <class State,
645644
class Tup,
646-
class Accessors = GT_META_CALL(get_accessors, Tup &&),
645+
class Accessors = GT_META_CALL(get_accessors, Tup),
647646
class Res = GT_META_CALL(meta::lfold, (meta_fun, State &&, Accessors))>
648647
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(State &&state, Tup &&tup) const {
649648
return impl<0, size<decay_t<Tup>>::value>(wstd::forward<State>(state), wstd::forward<Tup>(tup));
650649
}
651650

652651
template <class Tup,
653-
class AllAccessors = GT_META_CALL(get_accessors, Tup &&),
652+
class AllAccessors = GT_META_CALL(get_accessors, Tup),
654653
class StateAccessor = GT_META_CALL(meta::first, AllAccessors),
655654
class Accessors = GT_META_CALL(meta::drop_front_c, (1, AllAccessors)),
656655
class Res = GT_META_CALL(meta::lfold, (meta_fun, StateAccessor, Accessors))>
@@ -747,7 +746,7 @@ namespace gridtools {
747746
template <class Tup,
748747
class First = GT_META_CALL(meta::first, GT_META_CALL(to_types, Tup)),
749748
class Accessors = GT_META_CALL(
750-
meta::transform, (get_accessors, GT_META_CALL(get_accessors, Tup &&))),
749+
meta::transform, (get_accessors, GT_META_CALL(get_accessors, Tup))),
751750
class Types = GT_META_CALL(meta::transpose, Accessors),
752751
class InnerTuples = GT_META_CALL(
753752
meta::transform, (get_inner_tuple_f<Tup>::template apply, Types)),
@@ -769,7 +768,7 @@ namespace gridtools {
769768
};
770769

771770
template <class Tup,
772-
class Accessors = GT_META_CALL(get_accessors, Tup &&),
771+
class Accessors = GT_META_CALL(get_accessors, Tup),
773772
class Res = GT_META_CALL(from_types, (Tup, GT_META_CALL(meta::reverse, Accessors)))>
774773
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {
775774
using n_t = size<decay_t<Tup>>;
@@ -809,7 +808,7 @@ namespace gridtools {
809808
(I::value == N, insert_val_generator_f, insert_tup_generator_f<I::value - 1>))));
810809

811810
template <class Tup,
812-
class Accessors = GT_META_CALL(get_accessors, Tup &&),
811+
class Accessors = GT_META_CALL(get_accessors, Tup),
813812
class Types = GT_META_CALL(meta::insert_c, (N, Accessors, Val)),
814813
class Res = GT_META_CALL(from_types, (Tup, Types))>
815814
GT_TARGET GT_FORCE_INLINE GT_CONSTEXPR Res operator()(Tup &&tup) const {

include/gridtools/stencil_composition/expressions/expr_base.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,15 @@ namespace gridtools {
6363
return arg;
6464
}
6565

66+
// intel compiler 18.0 segfaults if this is a value. On the other hand, nvcc performs much worse in the
67+
// dycore if it is a lvalue reference
68+
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER <= 1800)
6669
template <class Eval, class Arg, enable_if_t<!std::is_arithmetic<Arg>::value, int> = 0>
6770
GT_FUNCTION GT_CONSTEXPR auto apply_eval(Eval &eval, Arg const &arg) GT_AUTO_RETURN(eval(arg));
71+
#else
72+
template <class Eval, class Arg, enable_if_t<!std::is_arithmetic<Arg>::value, int> = 0>
73+
GT_FUNCTION GT_CONSTEXPR auto apply_eval(Eval &eval, Arg arg) GT_AUTO_RETURN(eval(wstd::move(arg)));
74+
#endif
6875

6976
template <class Eval, class Op, class Arg>
7077
GT_FUNCTION GT_CONSTEXPR auto value(Eval &eval, expr<Op, Arg> const &arg)

include/gridtools/stencil_composition/sid/composite.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ namespace gridtools {
9595
using type = item_generator;
9696

9797
template <class Args, class Res = GT_META_CALL(tuple_util::element, (PrimaryIndex::value, Args))>
98-
Res const &operator()(Args const &args) const noexcept {
98+
GT_META_CALL(const_ref, Res)
99+
operator()(Args const &args) const noexcept {
99100
GT_STATIC_ASSERT(
100101
(conjunction<
101102
std::is_same<GT_META_CALL(tuple_util::element, (SecondaryIndices::value, Args)),

include/gridtools/stencil_composition/stencil_functions.hpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,8 @@ namespace gridtools {
135135
Transforms m_transforms;
136136

137137
template <class Accessor>
138-
GT_FUNCTION auto operator()(Accessor &&acc) const
139-
GT_AUTO_RETURN(tuple_util::host_device::get<decay_t<Accessor>::index_t::value>(m_transforms)(
140-
m_eval, std::forward<Accessor>(acc)));
138+
GT_FUNCTION auto operator()(Accessor acc) const GT_AUTO_RETURN(
139+
tuple_util::host_device::get<decay_t<Accessor>::index_t::value>(m_transforms)(m_eval, wstd::move(acc)));
141140

142141
template <class Op, class... Ts>
143142
GT_FUNCTION auto operator()(expr<Op, Ts...> const &arg) const
@@ -227,12 +226,12 @@ namespace gridtools {
227226
*/
228227
template <class Eval,
229228
class... Args,
230-
class Res = typename call_interfaces_impl_::get_result_type<Eval, ReturnType, decay_t<Args>...>::type,
229+
class Res = typename call_interfaces_impl_::get_result_type<Eval, ReturnType, Args...>::type,
231230
enable_if_t<sizeof...(Args) + 1 == meta::length<params_t>::value, int> = 0>
232-
GT_FUNCTION static Res with(Eval &eval, Args &&... args) {
231+
GT_FUNCTION static Res with(Eval &eval, Args... args) {
233232
Res res;
234-
call_interfaces_impl_::evaluate_bound_functor<Functor, Region, OffI, OffJ, OffK>(eval,
235-
tuple_util::host_device::insert<out_param_index>(res, tuple<Args &&...>{std::forward<Args>(args)...}));
233+
call_interfaces_impl_::evaluate_bound_functor<Functor, Region, OffI, OffJ, OffK>(
234+
eval, tuple_util::host_device::insert<out_param_index>(res, tuple<Args &&...>{wstd::move(args)...}));
236235
return res;
237236
}
238237
};

0 commit comments

Comments
 (0)