Skip to content

Commit b2b3c42

Browse files
Plukiche/vonmisses random (#998)
* Fix race condition in dpnp_rng_vonmises_small_kappa_c and dpnp_rng_vonmises_large_kappa_c * Rename arrays and change if condition from kernels in dpnp_rng_vonmises_large_kappa_c and dpnp_rng_vonmises_small_kappa_c * Add space * Fix indices in dpnp_rng_vonmises_small_kappa_c and dpnp_rng_vonmises_large_kappa_c
1 parent 2c3eeb2 commit b2b3c42

File tree

1 file changed

+65
-46
lines changed

1 file changed

+65
-46
lines changed

dpnp/backend/kernels/dpnp_krnl_random.cpp

+65-46
Original file line numberDiff line numberDiff line change
@@ -1261,7 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da
12611261
_DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one;
12621262
_DataType* Uvec = nullptr;
12631263
_DataType* Vvec = nullptr;
1264-
size_t* n = nullptr;
1264+
bool* result_ready = nullptr;
1265+
bool* result_mask = nullptr;
12651266
const _DataType d_zero = 0.0, d_one = 1.0;
12661267

12671268
assert(kappa > 1.0);
@@ -1277,50 +1278,59 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da
12771278

12781279
Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
12791280
Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
1280-
n = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(sizeof(size_t)));
1281-
for (*n = 0; *n < size;)
1281+
1282+
result_ready = reinterpret_cast<bool*>(dpnp_memory_alloc_c(1 * sizeof(bool)));
1283+
result_ready[0] = false;
1284+
result_mask = reinterpret_cast<bool*>(dpnp_memory_alloc_c(size * sizeof(bool)));
1285+
dpnp_full_c<bool>(result_ready, result_mask, size);
1286+
1287+
while(!result_ready[0])
12821288
{
1283-
size_t diff_size = size - *n;
12841289
mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI);
1285-
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec);
1290+
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec);
12861291
mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one);
1287-
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec);
1292+
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec);
12881293

1289-
cl::sycl::range<1> diff_gws(diff_size);
1294+
cl::sycl::range<1> gws(size);
12901295
auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
12911296
cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event});
1292-
cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) {
1297+
cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) {
12931298
size_t i = global_id[0];
1299+
if (!result_mask[i]) {
1300+
_DataType sn, cn, sn2, cn2;
1301+
_DataType neg_W_minus_one, V, Y;
12941302

1295-
_DataType sn, cn, sn2, cn2;
1296-
_DataType neg_W_minus_one, V, Y;
1297-
1298-
sn = cl::sycl::sin(Uvec[i]);
1299-
cn = cl::sycl::cos(Uvec[i]);
1300-
V = Vvec[i];
1301-
sn2 = sn * sn;
1302-
cn2 = cn * cn;
1303+
sn = cl::sycl::sin(Uvec[i]);
1304+
cn = cl::sycl::cos(Uvec[i]);
1305+
V = Vvec[i];
1306+
sn2 = sn * sn;
1307+
cn2 = cn * cn;
13031308

1304-
neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
1305-
Y = kappa * (s_minus_one + neg_W_minus_one);
1309+
neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
1310+
Y = kappa * (s_minus_one + neg_W_minus_one);
13061311

1307-
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
1308-
{
1309-
Y = neg_W_minus_one * (2 - neg_W_minus_one);
1310-
if (Y < 0)
1311-
Y = 0.0;
1312-
else if (Y > 1.0)
1313-
Y = 1.0;
1314-
*n = *n + 1;
1315-
result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y));
1312+
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
1313+
{
1314+
Y = neg_W_minus_one * (2 - neg_W_minus_one);
1315+
if (Y < 0)
1316+
Y = 0.0;
1317+
else if (Y > 1.0)
1318+
Y = 1.0;
1319+
1320+
result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y));
1321+
result_mask[i] = true;
1322+
}
13161323
}
13171324
});
13181325
};
13191326
auto some_event = DPNP_QUEUE.submit(paral_kernel_some);
13201327
some_event.wait();
1328+
1329+
dpnp_all_c<bool, bool>(result_mask, result_ready, size);
13211330
}
13221331
dpnp_memory_free_c(Uvec);
1323-
dpnp_memory_free_c(n);
1332+
dpnp_memory_free_c(result_ready);
1333+
dpnp_memory_free_c(result_mask);
13241334

13251335
mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one);
13261336
auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
@@ -1359,7 +1369,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
13591369
_DataType rho_over_kappa, rho, r, s_kappa;
13601370
_DataType* Uvec = nullptr;
13611371
_DataType* Vvec = nullptr;
1362-
size_t* n = nullptr;
1372+
bool* result_ready = nullptr;
1373+
bool* result_mask = nullptr;
13631374

13641375
const _DataType d_zero = 0.0, d_one = 1.0;
13651376

@@ -1374,39 +1385,47 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
13741385

13751386
Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
13761387
Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
1377-
n = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(sizeof(size_t)));
13781388

1379-
for (*n = 0; *n < size;)
1389+
result_ready = reinterpret_cast<bool*>(dpnp_memory_alloc_c(1 * sizeof(bool)));
1390+
result_ready[0] = false;
1391+
result_mask = reinterpret_cast<bool*>(dpnp_memory_alloc_c(size * sizeof(bool)));
1392+
dpnp_full_c<bool>(result_ready, result_mask, size);
1393+
1394+
while (!result_ready[0])
13801395
{
1381-
size_t diff_size = size - *n;
13821396
mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI);
1383-
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec);
1397+
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec);
13841398
mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one);
1385-
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec);
1399+
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec);
13861400

1387-
cl::sycl::range<1> diff_gws((diff_size));
1401+
cl::sycl::range<1> gws((size));
13881402

13891403
auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
13901404
cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event});
1391-
cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) {
1405+
cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) {
13921406
size_t i = global_id[0];
1393-
_DataType Z, W, Y, V;
1394-
Z = cl::sycl::cos(Uvec[i]);
1395-
V = Vvec[i];
1396-
W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
1397-
Y = s_kappa - kappa * W;
1398-
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
1399-
{
1400-
*n = *n + 1;
1401-
result1[*n] = cl::sycl::acos(W);
1407+
if (!result_mask[i]) {
1408+
_DataType Z, W, Y, V;
1409+
Z = cl::sycl::cos(Uvec[i]);
1410+
V = Vvec[i];
1411+
W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
1412+
Y = s_kappa - kappa * W;
1413+
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
1414+
{
1415+
result1[i] = cl::sycl::acos(W);
1416+
result_mask[i] = true;
1417+
}
14021418
}
14031419
});
14041420
};
14051421
auto some_event = DPNP_QUEUE.submit(paral_kernel_some);
14061422
some_event.wait();
1423+
1424+
dpnp_all_c<bool, bool>(result_mask, result_ready, size);
14071425
}
14081426
dpnp_memory_free_c(Uvec);
1409-
dpnp_memory_free_c(n);
1427+
dpnp_memory_free_c(result_ready);
1428+
dpnp_memory_free_c(result_mask);
14101429

14111430
mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one);
14121431
auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);

0 commit comments

Comments
 (0)