-
Notifications
You must be signed in to change notification settings - Fork 722
/
Copy pathvector-addition-examples.cpp
130 lines (111 loc) · 3.52 KB
/
vector-addition-examples.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* Demonstrate conditionals using vector addition */
#include <iostream>
#include <CL/sycl.hpp>
using namespace cl;
/* Base vector add function. */
void vecAdd(const float* a, const float* b, float* c, size_t id) {
c[id] = a[id] + b[id];
}
/* Masked variant where the store is hidden behind a runtime branch. */
void vecAddMasked(const float* a, const float* b, float* c, size_t id) {
float v = a[id] + b[id];
if (v < 0.0f) {
c[id] = v;
}
}
/* Variant where the variable value is predicated on a branch. */
void vecAddPredicated(const float* a, const float* b, float* c, size_t id) {
float v = a[id] + b[id];
if (v < 0.0f) {
v = 0.0f;
}
c[id] = v;
}
class VecAddKernel;
class VecAddKernelMasked;
class VecAddKernelPredicated;
void zeroBuffer(sycl::buffer<float, 1> b) {
static constexpr auto dwrite = sycl::access::mode::discard_write;
auto h = b.get_access<dwrite>();
for (auto i = 0u; i < b.get_range()[0]; i++) {
h[i] = 0.f;
}
}
double sumBuffer(sycl::buffer<float, 1> b) {
static constexpr auto read = sycl::access::mode::read;
auto h = b.get_access<read>();
auto sum = 0.0f;
for (auto i = 0u; i < b.get_range()[0]; i++) {
sum += h[i];
}
return sum;
}
/* This sample shows three different vector addition functions. It
* is possible to inspect the IR generated by these samples using LLVM
* tooling to compare the different approaches.
* The general flow is that the output buffer is zeroed, the calculation
* scheduled, then the sum printed for each of the functions. */
int main(int argc, char* argv[]) {
static constexpr auto read = sycl::access::mode::read;
static constexpr auto write = sycl::access::mode::write;
static constexpr auto dwrite = sycl::access::mode::discard_write;
constexpr const size_t N = 100000;
const double PI = 3.14159;
const double ival = PI / N;
const sycl::range<1> VecSize{N};
double sumall, sumneg, sumpos;
sycl::buffer<float> bufA{VecSize};
sycl::buffer<float> bufB{VecSize};
sycl::buffer<float> bufC{VecSize};
{
auto h_a = bufA.get_access<dwrite>();
auto h_b = bufB.get_access<dwrite>();
for (auto i = 0u; i < N; i++) {
const double val = i * ival - (PI / 2);
h_a[i] = sin(val);
h_b[i] = cos(val);
}
}
sycl::queue myQueue;
{
zeroBuffer(bufC);
auto cg = [&](sycl::handler& h) {
auto a = bufA.get_access<read>(h);
auto b = bufB.get_access<read>(h);
auto c = bufC.get_access<write>(h);
h.parallel_for<VecAddKernel>(
VecSize, [=](sycl::id<1> i) { vecAdd(&a[0], &b[0], &c[0], i[0]); });
};
myQueue.submit(cg);
sumall = sumBuffer(bufC);
}
{
zeroBuffer(bufC);
auto cg = [&](sycl::handler& h) {
auto a = bufA.get_access<read>(h);
auto b = bufB.get_access<read>(h);
auto c = bufC.get_access<write>(h);
h.parallel_for<VecAddKernelMasked>(VecSize, [=](sycl::id<1> i) {
vecAddMasked(&a[0], &b[0], &c[0], i[0]);
});
};
myQueue.submit(cg);
sumneg = sumBuffer(bufC);
}
{
zeroBuffer(bufC);
auto cg = [&](sycl::handler& h) {
auto a = bufA.get_access<read>(h);
auto b = bufB.get_access<read>(h);
auto c = bufC.get_access<write>(h);
h.parallel_for<VecAddKernelPredicated>(VecSize, [=](sycl::id<1> i) {
vecAddPredicated(&a[0], &b[0], &c[0], i[0]);
});
};
myQueue.submit(cg);
sumpos = sumBuffer(bufC);
}
std::cout << "Sum: " << sumall << "; Sum neg: " << sumneg << "; Sum pos: " <<
sumpos << "; checksum: " << sumall - (sumneg + sumpos) << "\n";
return 0;
}