Skip to content

Commit f9d3b5d

Browse files
Mark all functions inline
- it helps the optimizer - it prevents regression like #617
1 parent b103d28 commit f9d3b5d

30 files changed

+2062
-1636
lines changed

include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,61 +25,69 @@ namespace xsimd {
2525

2626
// bitwise_lshift
2727
template<class A, class T, class/*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
28-
batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
28+
inline batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
2929
return detail::apply([](T x, T y) { return x << y;}, self, other);
3030
}
3131

3232
// bitwise_rshift
3333
template<class A, class T, class/*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
34-
batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
34+
inline batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
3535
return detail::apply([](T x, T y) { return x >> y;}, self, other);
3636
}
3737

3838
// div
3939
template<class A, class T, class=typename std::enable_if<std::is_integral<T>::value, void>::type>
40-
batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
40+
inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
4141
return detail::apply([](T x, T y) -> T { return x / y;}, self, other);
4242
}
4343

4444
// fma
45-
template<class A, class T> batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
45+
template<class A, class T>
46+
inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
4647
return x * y + z;
4748
}
4849

49-
template<class A, class T> batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
50+
template<class A, class T>
51+
inline batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
5052
auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
5153
auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
5254
return {res_r, res_i};
5355
}
5456

5557
// fms
56-
template<class A, class T> batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
58+
template<class A, class T>
59+
inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
5760
return x * y - z;
5861
}
5962

60-
template<class A, class T> batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
63+
template<class A, class T>
64+
inline batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
6165
auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
6266
auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
6367
return {res_r, res_i};
6468
}
6569

6670
// fnma
67-
template<class A, class T> batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
71+
template<class A, class T>
72+
inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
6873
return -x * y + z;
6974
}
7075

71-
template<class A, class T> batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
76+
template<class A, class T>
77+
inline batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
7278
auto res_r = - fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
7379
auto res_i = - fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
7480
return {res_r, res_i};
7581
}
7682

7783
// fnms
78-
template<class A, class T> batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
84+
template<class A, class T>
85+
inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
7986
return -x * y - z;
8087
}
8188

82-
template<class A, class T> batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
89+
template<class A, class T>
90+
inline batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
8391
auto res_r = - fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
8492
auto res_i = - fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
8593
return {res_r, res_i};
@@ -89,7 +97,7 @@ namespace xsimd {
8997

9098
// mul
9199
template<class A, class T, class/*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
92-
batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
100+
inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
93101
return detail::apply([](T x, T y) -> T { return x * y;}, self, other);
94102
}
95103

include/xsimd/arch/generic/xsimd_generic_complex.hpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,47 +24,47 @@ namespace xsimd {
2424

2525
// real
2626
template <class A, class T>
27-
batch<T, A> real(batch<T, A> const& self, requires_arch<generic>) {
27+
inline batch<T, A> real(batch<T, A> const& self, requires_arch<generic>) {
2828
return self;
2929
}
3030

3131
template <class A, class T>
32-
batch<T, A> real(batch<std::complex<T>, A> const& self, requires_arch<generic>) {
32+
inline batch<T, A> real(batch<std::complex<T>, A> const& self, requires_arch<generic>) {
3333
return self.real();
3434
}
3535

3636
// imag
3737
template <class A, class T>
38-
batch<T, A> imag(batch<T, A> const& /*self*/, requires_arch<generic>) {
38+
inline batch<T, A> imag(batch<T, A> const& /*self*/, requires_arch<generic>) {
3939
return batch<T, A>(T(0));
4040
}
4141

4242
template <class A, class T>
43-
batch<T, A> imag(batch<std::complex<T>, A> const& self, requires_arch<generic>) {
43+
inline batch<T, A> imag(batch<std::complex<T>, A> const& self, requires_arch<generic>) {
4444
return self.imag();
4545
}
46-
46+
4747
// arg
4848
template<class A, class T>
49-
real_batch_type_t<batch<T, A>> arg(batch<T, A> const& self, requires_arch<generic>) {
49+
inline real_batch_type_t<batch<T, A>> arg(batch<T, A> const& self, requires_arch<generic>) {
5050
return atan2(imag(self), real(self));
5151
}
5252

5353
// conj
5454
template<class A, class T>
55-
complex_batch_type_t<batch<T, A>> conj(batch<T, A> const& self, requires_arch<generic>) {
55+
inline complex_batch_type_t<batch<T, A>> conj(batch<T, A> const& self, requires_arch<generic>) {
5656
return {real(self), - imag(self)};
5757
}
5858

5959
// norm
6060
template<class A, class T>
61-
real_batch_type_t<batch<T, A>> norm(batch<T, A> const& self, requires_arch<generic>) {
61+
inline real_batch_type_t<batch<T, A>> norm(batch<T, A> const& self, requires_arch<generic>) {
6262
return {fma(real(self), real(self), imag(self) * imag(self))};
6363
}
6464

6565
// proj
6666
template<class A, class T>
67-
complex_batch_type_t<batch<T, A>> proj(batch<T, A> const& self, requires_arch<generic>) {
67+
inline complex_batch_type_t<batch<T, A>> proj(batch<T, A> const& self, requires_arch<generic>) {
6868
using batch_type = complex_batch_type_t<batch<T, A>>;
6969
using real_batch = typename batch_type::real_batch;
7070
using real_value_type = typename real_batch::value_type;
@@ -76,11 +76,10 @@ namespace xsimd {
7676
}
7777

7878
template <class A, class T>
79-
batch_bool<T, A> isnan(batch<std::complex<T>, A> const& self, requires_arch<generic>) {
79+
inline batch_bool<T, A> isnan(batch<std::complex<T>, A> const& self, requires_arch<generic>) {
8080
return batch_bool<T, A>(isnan(self.real()) || isnan(self.imag()));
8181
}
8282
}
8383
}
8484

8585
#endif
86-

include/xsimd/arch/generic/xsimd_generic_details.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ namespace xsimd {
105105

106106
namespace detail {
107107
template<class F, class A, class T, class... Batches>
108-
batch<T, A> apply(F&& func, batch<T, A> const& self, batch<T, A> const& other) {
108+
inline batch<T, A> apply(F&& func, batch<T, A> const& self, batch<T, A> const& other) {
109109
constexpr std::size_t size = batch<T, A>::size;
110110
alignas(A::alignment()) T self_buffer[size];
111111
alignas(A::alignment()) T other_buffer[size];
@@ -207,8 +207,6 @@ namespace xsimd {
207207
}
208208
}
209209

210-
211-
212210
}
213211

214212
}

include/xsimd/arch/generic/xsimd_generic_logical.hpp

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,82 +22,92 @@ namespace xsimd {
2222
using namespace types;
2323

2424
// ge
25-
template<class A, class T> batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
25+
template<class A, class T>
26+
inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
2627
return other <= self;
2728
}
2829

2930
// gt
30-
template<class A, class T> batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
31+
template<class A, class T>
32+
inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
3133
return other < self;
3234
}
3335

3436
// is_even
35-
template<class A, class T> batch_bool<T, A> is_even(batch<T, A> const& self, requires_arch<generic>) {
37+
template<class A, class T>
38+
inline batch_bool<T, A> is_even(batch<T, A> const& self, requires_arch<generic>) {
3639
return is_flint(self * T(0.5));
3740
}
3841

3942
// is_flint
40-
template<class A, class T> batch_bool<T, A> is_flint(batch<T, A> const& self, requires_arch<generic>) {
43+
template<class A, class T>
44+
inline batch_bool<T, A> is_flint(batch<T, A> const& self, requires_arch<generic>) {
4145
auto frac = select(isnan(self - self), constants::nan<batch<T, A>>(), self - trunc(self));
4246
return frac == T(0.);
4347
}
4448

4549
// is_odd
46-
template<class A, class T> batch_bool<T, A> is_odd(batch<T, A> const& self, requires_arch<generic>) {
50+
template<class A, class T>
51+
inline batch_bool<T, A> is_odd(batch<T, A> const& self, requires_arch<generic>) {
4752
return is_even(self - T(1.));
4853
}
4954

5055
// isinf
5156
template<class A, class T, class=typename std::enable_if<std::is_integral<T>::value, void>::type>
52-
batch_bool<T, A> isinf(batch<T, A> const& , requires_arch<generic>) {
57+
inline batch_bool<T, A> isinf(batch<T, A> const& , requires_arch<generic>) {
5358
return batch_bool<T, A>(false);
5459
}
55-
template<class A> batch_bool<float, A> isinf(batch<float, A> const& self, requires_arch<generic>) {
60+
template<class A>
61+
inline batch_bool<float, A> isinf(batch<float, A> const& self, requires_arch<generic>) {
5662
return abs(self) == std::numeric_limits<float>::infinity();
5763
}
58-
template<class A> batch_bool<double, A> isinf(batch<double, A> const& self, requires_arch<generic>) {
64+
template<class A>
65+
inline batch_bool<double, A> isinf(batch<double, A> const& self, requires_arch<generic>) {
5966
return abs(self) == std::numeric_limits<double>::infinity();
6067
}
6168

6269
// isfinite
6370
template<class A, class T, class=typename std::enable_if<std::is_integral<T>::value, void>::type>
64-
batch_bool<T, A> isfinite(batch<T, A> const& , requires_arch<generic>) {
71+
inline batch_bool<T, A> isfinite(batch<T, A> const& , requires_arch<generic>) {
6572
return batch_bool<T, A>(true);
6673
}
67-
template<class A> batch_bool<float, A> isfinite(batch<float, A> const& self, requires_arch<generic>) {
74+
template<class A>
75+
inline batch_bool<float, A> isfinite(batch<float, A> const& self, requires_arch<generic>) {
6876
return (self - self) == 0;
6977
}
70-
template<class A> batch_bool<double, A> isfinite(batch<double, A> const& self, requires_arch<generic>) {
78+
template<class A>
79+
inline batch_bool<double, A> isfinite(batch<double, A> const& self, requires_arch<generic>) {
7180
return (self - self) == 0;
7281
}
7382

7483
// isnan
7584
template<class A, class T, class=typename std::enable_if<std::is_integral<T>::value, void>::type>
76-
batch_bool<T, A> isnan(batch<T, A> const& , requires_arch<generic>) {
85+
inline batch_bool<T, A> isnan(batch<T, A> const& , requires_arch<generic>) {
7786
return batch_bool<T, A>(false);
7887
}
7988

8089
// le
8190
template<class A, class T, class=typename std::enable_if<std::is_integral<T>::value, void>::type>
82-
batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
91+
inline batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
8392
return (self < other) || (self == other);
8493
}
8594

8695

8796
// neq
88-
template<class A, class T> batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
97+
template<class A, class T>
98+
inline batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
8999
return !(other == self);
90100
}
91101

92102
// logical_and
93103
template <class A, class T>
94-
batch<T, A> logical_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
104+
inline batch<T, A> logical_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
95105
return detail::apply([](T x, T y) { return x && y;}, self, other);
96106
}
97107

98108
// logical_or
99109
template <class A, class T>
100-
batch<T, A> logical_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
110+
inline batch<T, A> logical_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
101111
return detail::apply([](T x, T y) { return x || y;}, self, other);
102112
}
103113
}

0 commit comments

Comments
 (0)