1
1
/* **************************************************************************
2
- * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3
- * Martin Renou *
4
- * Copyright (c) QuantStack *
5
- * Copyright (c) Serge Guelton *
6
- * *
7
- * Distributed under the terms of the BSD 3-Clause License. *
8
- * *
9
- * The full license is in the file LICENSE, distributed with this software. *
10
- ****************************************************************************/
2
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3
+ * Martin Renou *
4
+ * Copyright (c) QuantStack *
5
+ * Copyright (c) Serge Guelton *
6
+ * *
7
+ * Distributed under the terms of the BSD 3-Clause License. *
8
+ * *
9
+ * The full license is in the file LICENSE, distributed with this software. *
10
+ ****************************************************************************/
11
11
12
12
#ifndef XSIMD_BENCHMARK_HPP
13
13
#define XSIMD_BENCHMARK_HPP
14
14
15
+ #include " xsimd/xsimd.hpp"
15
16
#include < chrono>
17
+ #include < iostream>
16
18
#include < string>
17
19
#include < vector>
18
- #include < iostream>
19
- #include " xsimd/xsimd.hpp"
20
20
21
21
namespace xsimd
22
22
{
23
23
template <class T >
24
24
std::string batch_name ();
25
25
26
- template <> inline std::string batch_name<batch<float , 4 >>() { return " sse/neon float" ; }
27
- template <> inline std::string batch_name<batch<double , 2 >>() { return " sse/neon double" ; }
28
- template <> inline std::string batch_name<batch<float , 8 >>() { return " avx float" ; }
29
- template <> inline std::string batch_name<batch<double , 4 >>() { return " avx double" ; }
30
- template <> inline std::string batch_name<batch<float , 7 >>() { return " fallback float" ; }
31
- template <> inline std::string batch_name<batch<double , 3 >>() { return " fallback double" ; }
26
+ template <>
27
+ inline std::string batch_name<batch<float , 4 >>() { return " sse/neon float" ; }
28
+ template <>
29
+ inline std::string batch_name<batch<double , 2 >>() { return " sse/neon double" ; }
30
+ template <>
31
+ inline std::string batch_name<batch<float , 8 >>() { return " avx float" ; }
32
+ template <>
33
+ inline std::string batch_name<batch<double , 4 >>() { return " avx double" ; }
34
+ template <>
35
+ inline std::string batch_name<batch<float , 7 >>() { return " fallback float" ; }
36
+ template <>
37
+ inline std::string batch_name<batch<double , 3 >>() { return " fallback double" ; }
32
38
33
39
using duration_type = std::chrono::duration<double , std::milli>;
34
40
@@ -111,7 +117,7 @@ namespace xsimd
111
117
auto start = std::chrono::steady_clock::now ();
112
118
for (size_t i = 0 ; i < s; ++i)
113
119
{
114
- res[i] = f (lhs[i], rhs[i]);
120
+ res[i] = f (lhs[i], rhs[i]);
115
121
}
116
122
auto end = std::chrono::steady_clock::now ();
117
123
auto tmp = end - start;
@@ -130,7 +136,7 @@ namespace xsimd
130
136
auto start = std::chrono::steady_clock::now ();
131
137
for (size_t i = 0 ; i < s; ++i)
132
138
{
133
- res[i] = f (op0[i], op1[i], op2[i]);
139
+ res[i] = f (op0[i], op1[i], op2[i]);
134
140
}
135
141
auto end = std::chrono::steady_clock::now ();
136
142
auto tmp = end - start;
@@ -175,7 +181,7 @@ namespace xsimd
175
181
size_t k = j + B::size;
176
182
size_t l = k + B::size;
177
183
B blhs (&lhs[i], aligned_mode ()), blhs2 (&lhs[j], aligned_mode ()),
178
- blhs3 (&lhs[k], aligned_mode ()), blhs4 (&lhs[l], aligned_mode ());
184
+ blhs3 (&lhs[k], aligned_mode ()), blhs4 (&lhs[l], aligned_mode ());
179
185
B bres = f (blhs);
180
186
B bres2 = f (blhs2);
181
187
B bres3 = f (blhs3);
@@ -228,9 +234,9 @@ namespace xsimd
228
234
size_t k = j + B::size;
229
235
size_t l = k + B::size;
230
236
B blhs (&lhs[i], aligned_mode ()), brhs (&rhs[i], aligned_mode ()),
231
- blhs2 (&lhs[j], aligned_mode ()), brhs2 (&rhs[j], aligned_mode ());
237
+ blhs2 (&lhs[j], aligned_mode ()), brhs2 (&rhs[j], aligned_mode ());
232
238
B blhs3 (&lhs[k], aligned_mode ()), brhs3 (&rhs[k], aligned_mode ()),
233
- blhs4 (&lhs[l], aligned_mode ()), brhs4 (&rhs[l], aligned_mode ());
239
+ blhs4 (&lhs[l], aligned_mode ()), brhs4 (&rhs[l], aligned_mode ());
234
240
B bres = f (blhs, brhs);
235
241
B bres2 = f (blhs2, brhs2);
236
242
B bres3 = f (blhs3, brhs3);
@@ -247,7 +253,6 @@ namespace xsimd
247
253
return t_res;
248
254
}
249
255
250
-
251
256
template <class B , class F , class V >
252
257
duration_type benchmark_simd (F f, V& op0, V& op1, V& op2, V& res, std::size_t number)
253
258
{
@@ -259,8 +264,8 @@ namespace xsimd
259
264
for (std::size_t i = 0 ; i <= (s - B::size); i += B::size)
260
265
{
261
266
B bop0 (&op0[i], aligned_mode ()),
262
- bop1 (&op1[i], aligned_mode ()),
263
- bop2 (&op2[i], aligned_mode ());
267
+ bop1 (&op1[i], aligned_mode ()),
268
+ bop2 (&op2[i], aligned_mode ());
264
269
B bres = f (bop0, bop1, bop2);
265
270
bres.store_aligned (&res[i]);
266
271
}
@@ -558,85 +563,105 @@ namespace xsimd
558
563
out << " ============================" << std::endl;
559
564
}
560
565
561
-
562
- # define DEFINE_OP_FUNCTOR_2OP ( OP, NAME) \
563
- struct NAME ##_fn { \
564
- template <class T >\
565
- inline T operator ()(const T& lhs, const T& rhs) const { return lhs OP rhs; }\
566
- inline std::string name () const { return #NAME; }\
566
+ # define DEFINE_OP_FUNCTOR_2OP ( OP, NAME ) \
567
+ struct NAME ##_fn \
568
+ { \
569
+ template <class T > \
570
+ inline T operator ()(const T& lhs, const T& rhs) const { return lhs OP rhs; } \
571
+ inline std::string name () const { return #NAME; } \
567
572
}
568
573
569
- #define DEFINE_FUNCTOR_1OP (FN )\
570
- struct FN ##_fn {\
571
- template <class T >\
572
- inline T operator ()(const T& x) const { using xsimd::FN; return FN (x); }\
573
- inline std::string name () const { return #FN; }\
574
+ #define DEFINE_FUNCTOR_1OP (FN ) \
575
+ struct FN ##_fn \
576
+ { \
577
+ template <class T > \
578
+ inline T operator ()(const T& x) const \
579
+ { \
580
+ using xsimd::FN; \
581
+ return FN (x); \
582
+ } \
583
+ inline std::string name () const { return #FN; } \
574
584
}
575
585
576
- #define DEFINE_FUNCTOR_1OP_TEMPLATE (FN, N, ...)\
577
- struct FN ##_##N##_fn {\
578
- template <class T >\
579
- inline T operator ()(const T& x) const { using xsimd::FN; return FN<T, __VA_ARGS__>(x); }\
580
- inline std::string name () const { return #FN " " #N ; }\
586
+ #define DEFINE_FUNCTOR_1OP_TEMPLATE (FN, N, ...) \
587
+ struct FN ##_##N##_fn \
588
+ { \
589
+ template <class T > \
590
+ inline T operator ()(const T& x) const \
591
+ { \
592
+ using xsimd::FN; \
593
+ return FN<T, __VA_ARGS__>(x); \
594
+ } \
595
+ inline std::string name () const { return #FN " " #N; } \
581
596
}
582
597
583
- #define DEFINE_FUNCTOR_2OP (FN )\
584
- struct FN ##_fn{\
585
- template <class T >\
586
- inline T operator ()(const T&lhs, const T& rhs) const { using xsimd::FN; return FN (lhs, rhs); }\
587
- inline std::string name () const { return #FN; }\
598
+ #define DEFINE_FUNCTOR_2OP (FN ) \
599
+ struct FN ##_fn \
600
+ { \
601
+ template <class T > \
602
+ inline T operator ()(const T& lhs, const T& rhs) const \
603
+ { \
604
+ using xsimd::FN; \
605
+ return FN (lhs, rhs); \
606
+ } \
607
+ inline std::string name () const { return #FN; } \
588
608
}
589
609
590
- #define DEFINE_FUNCTOR_3OP (FN )\
591
- struct FN ##_fn{\
592
- template <class T >\
593
- inline T operator ()(const T& op0, const T& op1, const T& op2) const { using xsimd::FN; return FN (op0, op1, op2); }\
594
- inline std::string name () const { return #FN; }\
610
+ #define DEFINE_FUNCTOR_3OP (FN ) \
611
+ struct FN ##_fn \
612
+ { \
613
+ template <class T > \
614
+ inline T operator ()(const T& op0, const T& op1, const T& op2) const \
615
+ { \
616
+ using xsimd::FN; \
617
+ return FN (op0, op1, op2); \
618
+ } \
619
+ inline std::string name () const { return #FN; } \
595
620
}
596
621
597
- DEFINE_OP_FUNCTOR_2OP (+, add);
598
- DEFINE_OP_FUNCTOR_2OP (-, sub);
599
- DEFINE_OP_FUNCTOR_2OP (*, mul);
600
- DEFINE_OP_FUNCTOR_2OP (/, div);
601
-
602
- DEFINE_FUNCTOR_1OP (exp);
603
- DEFINE_FUNCTOR_1OP (exp2);
604
- DEFINE_FUNCTOR_1OP (expm1);
605
- DEFINE_FUNCTOR_1OP (log);
606
- DEFINE_FUNCTOR_1OP (log10);
607
- DEFINE_FUNCTOR_1OP (log2);
608
- DEFINE_FUNCTOR_1OP (log1p);
609
-
610
- DEFINE_FUNCTOR_1OP (sin);
611
- DEFINE_FUNCTOR_1OP (cos);
612
- DEFINE_FUNCTOR_1OP (tan);
613
- DEFINE_FUNCTOR_1OP (asin);
614
- DEFINE_FUNCTOR_1OP (acos);
615
- DEFINE_FUNCTOR_1OP (atan);
616
-
617
- DEFINE_FUNCTOR_1OP (sinh);
618
- DEFINE_FUNCTOR_1OP (cosh);
619
- DEFINE_FUNCTOR_1OP (tanh);
620
- DEFINE_FUNCTOR_1OP (asinh);
621
- DEFINE_FUNCTOR_1OP (acosh);
622
- DEFINE_FUNCTOR_1OP (atanh);
623
-
624
- DEFINE_FUNCTOR_2OP (pow);
625
- DEFINE_FUNCTOR_1OP (sqrt);
626
- DEFINE_FUNCTOR_1OP (cbrt);
627
- DEFINE_FUNCTOR_2OP (hypot);
628
-
629
- DEFINE_FUNCTOR_1OP (ceil);
630
- DEFINE_FUNCTOR_1OP (floor);
631
- DEFINE_FUNCTOR_1OP (trunc);
632
- DEFINE_FUNCTOR_1OP (round);
633
- DEFINE_FUNCTOR_1OP (nearbyint);
634
- DEFINE_FUNCTOR_1OP (rint);
635
-
636
- DEFINE_FUNCTOR_2OP (fmod);
637
- DEFINE_FUNCTOR_2OP (remainder);
638
- DEFINE_FUNCTOR_2OP (fdim);
639
- DEFINE_FUNCTOR_3OP (clip);
622
+ DEFINE_OP_FUNCTOR_2OP (+, add);
623
+ DEFINE_OP_FUNCTOR_2OP (-, sub);
624
+ DEFINE_OP_FUNCTOR_2OP (*, mul);
625
+ DEFINE_OP_FUNCTOR_2OP (/, div);
626
+
627
+ DEFINE_FUNCTOR_1OP (exp);
628
+ DEFINE_FUNCTOR_1OP (exp2);
629
+ DEFINE_FUNCTOR_1OP (expm1);
630
+ DEFINE_FUNCTOR_1OP (log);
631
+ DEFINE_FUNCTOR_1OP (log10);
632
+ DEFINE_FUNCTOR_1OP (log2);
633
+ DEFINE_FUNCTOR_1OP (log1p);
634
+
635
+ DEFINE_FUNCTOR_1OP (sin);
636
+ DEFINE_FUNCTOR_1OP (cos);
637
+ DEFINE_FUNCTOR_1OP (tan);
638
+ DEFINE_FUNCTOR_1OP (asin);
639
+ DEFINE_FUNCTOR_1OP (acos);
640
+ DEFINE_FUNCTOR_1OP (atan);
641
+
642
+ DEFINE_FUNCTOR_1OP (sinh);
643
+ DEFINE_FUNCTOR_1OP (cosh);
644
+ DEFINE_FUNCTOR_1OP (tanh);
645
+ DEFINE_FUNCTOR_1OP (asinh);
646
+ DEFINE_FUNCTOR_1OP (acosh);
647
+ DEFINE_FUNCTOR_1OP (atanh);
648
+
649
+ DEFINE_FUNCTOR_2OP (pow);
650
+ DEFINE_FUNCTOR_1OP (sqrt);
651
+ DEFINE_FUNCTOR_1OP (cbrt);
652
+ DEFINE_FUNCTOR_2OP (hypot);
653
+
654
+ DEFINE_FUNCTOR_1OP (ceil);
655
+ DEFINE_FUNCTOR_1OP (floor);
656
+ DEFINE_FUNCTOR_1OP (trunc);
657
+ DEFINE_FUNCTOR_1OP (round);
658
+ DEFINE_FUNCTOR_1OP (nearbyint);
659
+ DEFINE_FUNCTOR_1OP (rint);
660
+
661
+ DEFINE_FUNCTOR_2OP (fmod);
662
+ DEFINE_FUNCTOR_2OP (remainder);
663
+ DEFINE_FUNCTOR_2OP (fdim);
664
+ DEFINE_FUNCTOR_3OP (clip);
640
665
#if 0
641
666
DEFINE_FUNCTOR_1OP(isfinite);
642
667
DEFINE_FUNCTOR_1OP(isinf);
@@ -646,16 +671,16 @@ DEFINE_FUNCTOR_1OP(is_even);
646
671
#endif
647
672
648
673
#ifdef XSIMD_POLY_BENCHMARKS
649
- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 5 , 1 , 2 , 3 , 4 , 5 );
650
- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 5 , 1 , 2 , 3 , 4 , 5 );
651
- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
652
- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
653
- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
654
- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
655
- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
656
- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
657
- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
658
- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
674
+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 5 , 1 , 2 , 3 , 4 , 5 );
675
+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 5 , 1 , 2 , 3 , 4 , 5 );
676
+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
677
+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
678
+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
679
+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
680
+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
681
+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
682
+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
683
+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
659
684
#endif
660
685
661
686
}
0 commit comments