-
Notifications
You must be signed in to change notification settings - Fork 699
/
Copy pathOCLTest.cpp
254 lines (213 loc) · 9.1 KB
/
OCLTest.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/**
* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "BackendTestUtils.h"
// Silence Apple's warning about the deprecation of OpenCL.
#define CL_SILENCE_DEPRECATION
// Silence warnings about using deprecated OpenCL 1.2 functions.
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include "../../lib/Backends/OpenCL/OpenCLDeviceManager.h"
#include "glow/Backends/DeviceManager.h"
#include "glow/ExecutionEngine/ExecutionEngine.h"
#include "glow/Graph/Graph.h"
#include "glow/IR/IR.h"
#include "glow/IR/IRBuilder.h"
#include "glow/IR/Instrs.h"
#include "gtest/gtest.h"
/// Takes an Expected<T> \p rhsOrErrV, asserts that it is not an error,
/// and takes the value from rhsOrErrV and assigns it to \p lhs.
#define ASSERT_AND_ASSIGN_VALUE(lhs, rhsOrErrV) \
do { \
if (rhsOrErrV) { \
lhs = std::move(rhsOrErrV.get()); \
} else { \
ASSERT_TRUE(false); \
} \
} while (0)
using namespace glow;
using llvm::cast;
TEST(OpenCLCorrectnessTest, convOps) {
PseudoRNG PRNG;
Tensor inputs(ElemKind::FloatTy, {2, 3, 16, 16});
inputs.getHandle().initXavier(1, PRNG);
Tensor out1;
Tensor out2;
inferBasicConvNet(&inputs, &out1, "OpenCL", 8);
inferBasicConvNet(&inputs, &out2, "Interpreter", 8);
EXPECT_TRUE(out1.isEqual(out2));
}
TEST(OpenCLCorrectnessTest, inferMixedNet) {
PseudoRNG PRNG;
Tensor inputs(ElemKind::FloatTy, {2, 3, 16, 16});
inputs.getHandle().initXavier(1, PRNG);
Tensor out1;
Tensor out2;
inferMixedNet(&inputs, &out1, "OpenCL");
inferMixedNet(&inputs, &out2, "Interpreter");
EXPECT_TRUE(out1.isEqual(out2));
}
TEST(OpenCLCorrectnessTest, softmaxGradTest) {
PseudoRNG PRNG;
std::array<dim_t, 2> S{{8, 23}};
llvm::ArrayRef<dim_t> shape(S);
Tensor inputs(ElemKind::FloatTy, shape);
Tensor weights(ElemKind::FloatTy, {23, 23});
Tensor bias(ElemKind::FloatTy, {23});
Tensor selected(ElemKind::Int64ITy, {8, 1});
inputs.getHandle().initXavier(1, PRNG);
weights.getHandle().randomize(0.0, 0.5, PRNG);
bias.getHandle().randomize(-0.2, 0.0, PRNG);
auto selectedH = selected.getHandle<int64_t>();
for (size_t i = 0; i < 8; i++) {
selectedH.raw(i) = PRNG.nextRandInt(0, 22);
}
Tensor out1(ElemKind::FloatTy, shape);
Tensor out2(ElemKind::FloatTy, shape);
trainSoftMaxNet(&inputs, &weights, &bias, &selected, &out1, "OpenCL");
trainSoftMaxNet(&inputs, &weights, &bias, &selected, &out2, "Interpreter");
EXPECT_TRUE(out1.isEqual(out2));
}
TEST(OpenCLCorrectnessTest, tanhConcatTest) {
Tensor I1(ElemKind::FloatTy, {10, 5});
Tensor I2(ElemKind::FloatTy, {20, 5});
Tensor I3(ElemKind::FloatTy, {30, 5});
for (dim_t i = 0; i < 10; i++) {
for (dim_t j = 0; j < 5; j++) {
I1.getHandle<float>().at({i, j}) = 0.05 * (i + j * 10 + 1);
I2.getHandle<float>().at({i, j}) = 0.10 * (i + j * 10 + 1);
I2.getHandle<float>().at({i + 10, j}) = 0.15 * (i + j * 10 + 1);
I3.getHandle<float>().at({i, j}) = 0.20 * (i + j * 10 + 1);
I3.getHandle<float>().at({i + 10, j}) = 0.25 * (i + j * 10 + 1);
I3.getHandle<float>().at({i + 20, j}) = 0.30 * (i + j * 10 + 1);
}
}
Tensor out1(ElemKind::FloatTy, {100, 5});
Tensor out2(ElemKind::FloatTy, {100, 5});
inferTanhConcatNet(&I1, &I2, &I3, &out1, "OpenCL");
inferTanhConcatNet(&I1, &I2, &I3, &out2, "Interpreter");
EXPECT_TRUE(out1.isEqual(out2));
}
TEST(OpenCLCorrectnessTest, SetDeviceMemory) {
using namespace glow;
using namespace runtime;
auto openCLConfigEmpty = DeviceConfig("OpenCL");
auto openCLConfigFull = DeviceConfig("OpenCL");
openCLConfigFull.setDeviceMemory(32768);
// Default device memory size is from OpenCL device info.
// This memory size can be limited by deviceConfig.
// No setting at all, default memory size from OpenCL device info.
OpenCLDeviceManager openCLDeviceDefault(openCLConfigEmpty);
Error err1 = openCLDeviceDefault.init();
ASSERT_FALSE(ERR_TO_BOOL(std::move(err1)));
uint64_t memSize = openCLDeviceDefault.getMaximumMemory();
// If limited by deviceConfig.
OpenCLDeviceManager openCLDeviceSetByDeviceConfig(openCLConfigFull);
Error err2 = openCLDeviceSetByDeviceConfig.init();
ASSERT_FALSE(ERR_TO_BOOL(std::move(err2)));
EXPECT_EQ(openCLDeviceSetByDeviceConfig.getMaximumMemory(), 32768);
// If devicConfig defines larger memory size than the OpenCL device info,
// then fall back to default.
auto openCLConfigLarger = DeviceConfig("OpenCL");
openCLConfigLarger.setDeviceMemory(memSize + 10000);
OpenCLDeviceManager openCLDeviceLarger(openCLConfigLarger);
Error err3 = openCLDeviceLarger.init();
ASSERT_FALSE(ERR_TO_BOOL(std::move(err3)));
EXPECT_EQ(openCLDeviceLarger.getMaximumMemory(), memSize);
}
class OpenCLCommandQueuePoolTest : public ::testing::Test {
protected:
void SetUp() override {
// Get an OpenCL platform ID.
std::vector<cl_platform_id> platforms(1);
cl_int err = clGetPlatformIDs(1, platforms.data(), nullptr);
ASSERT_EQ(err, CL_SUCCESS) << "clGetPlatformIDs failed.";
// Get an OpenCL device ID.
cl_platform_id platform_id_used = platforms[0];
std::vector<cl_device_id> devices(1);
err = clGetDeviceIDs(platform_id_used, CL_DEVICE_TYPE_ALL, 1,
devices.data(), nullptr);
ASSERT_EQ(err, CL_SUCCESS) << "clGetDeviceIDs failed";
// Create an OpenCL context.
device_ = devices[0];
context_ = clCreateContext(nullptr, 1, &device_, nullptr, nullptr, nullptr);
ASSERT_TRUE(context_) << "clCreateContext failed";
// Set the context and device on the pool to prepare for the tests.
pool_.setContext(context_);
pool_.setDevice(device_);
}
void TearDown() override {
// Release the context.
cl_int err = clReleaseContext(context_);
ASSERT_EQ(err, CL_SUCCESS) << "clReleaseContext failed";
}
cl_context context_{nullptr};
cl_device_id device_{0};
runtime::OpenCLCommandQueuePool pool_;
};
/// Tests that the command queue pool returns an error when a queue is requested
/// but the pool is not correctly initialized.
TEST_F(OpenCLCommandQueuePoolTest, ErrorWhenNotInitialized) {
// Set the context and device to nonsensical values.
pool_.setContext(nullptr);
pool_.setDevice(0);
// A request for a command queue should return an Error.
auto err = pool_.requestCommandQueue().takeError();
ASSERT_TRUE(ERR_TO_BOOL(std::move(err)));
}
/// Tests that the pool reuses queues.
TEST_F(OpenCLCommandQueuePoolTest, QueueReuse) {
cl_command_queue backingQueue1, backingQueue2;
runtime::OpenCLCommandQueue queue;
// Request a queue.
Expected<runtime::OpenCLCommandQueue> queueOrError =
pool_.requestCommandQueue(0);
ASSERT_AND_ASSIGN_VALUE(queue, queueOrError);
backingQueue1 = queue.backingQueue;
// Put it back and request another.
pool_.returnCommandQueue(queue);
queueOrError = pool_.requestCommandQueue(0);
ASSERT_AND_ASSIGN_VALUE(queue, queueOrError);
backingQueue2 = queue.backingQueue;
// The retuned queues should have been the same and only one should have been
// allocated.
EXPECT_EQ(backingQueue1, backingQueue2);
EXPECT_EQ(pool_.getNumAllocatedQueues(), 1);
EXPECT_EQ(pool_.getNumAllocatedQueuesForProperties(0), 1);
pool_.returnCommandQueue(queue);
}
/// Tests that the pool does not reuse queues if the requested properties are
/// different.
TEST_F(OpenCLCommandQueuePoolTest, NoQueueReuseWithDifferentProps) {
cl_command_queue backingQueue1, backingQueue2;
runtime::OpenCLCommandQueue queue;
// Request a queue.
Expected<runtime::OpenCLCommandQueue> queueOrError =
pool_.requestCommandQueue(0);
ASSERT_AND_ASSIGN_VALUE(queue, queueOrError);
backingQueue1 = queue.backingQueue;
// Put it back and request another with profiling enabled.
pool_.returnCommandQueue(queue);
queueOrError = pool_.requestCommandQueue(CL_QUEUE_PROFILING_ENABLE);
ASSERT_AND_ASSIGN_VALUE(queue, queueOrError);
backingQueue2 = queue.backingQueue;
// The retuned queues should not have been the same and two should have been
// allocated; one with profiling enabled and one without.
EXPECT_NE(backingQueue1, backingQueue2);
EXPECT_EQ(pool_.getNumAllocatedQueues(), 2);
EXPECT_EQ(pool_.getNumAllocatedQueuesForProperties(0), 1);
EXPECT_EQ(pool_.getNumAllocatedQueuesForProperties(CL_QUEUE_PROFILING_ENABLE),
1);
pool_.returnCommandQueue(queue);
}