@@ -1261,7 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da
12611261 _DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one;
12621262 _DataType* Uvec = nullptr ;
12631263 _DataType* Vvec = nullptr ;
1264- size_t * n = nullptr ;
1264+ bool * result_ready = nullptr ;
1265+ bool * result_mask = nullptr ;
12651266 const _DataType d_zero = 0.0 , d_one = 1.0 ;
12661267
12671268 assert (kappa > 1.0 );
@@ -1277,50 +1278,59 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da
12771278
12781279 Uvec = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (size * sizeof (_DataType)));
12791280 Vvec = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (size * sizeof (_DataType)));
1280- n = reinterpret_cast <size_t *>(dpnp_memory_alloc_c (sizeof (size_t )));
1281- for (*n = 0 ; *n < size;)
1281+
1282+ result_ready = reinterpret_cast <bool *>(dpnp_memory_alloc_c (1 * sizeof (bool )));
1283+ result_ready[0 ] = false ;
1284+ result_mask = reinterpret_cast <bool *>(dpnp_memory_alloc_c (size * sizeof (bool )));
1285+ dpnp_full_c<bool >(result_ready, result_mask, size);
1286+
1287+ while (!result_ready[0 ])
12821288 {
1283- size_t diff_size = size - *n;
12841289 mkl_rng::uniform<_DataType> uniform_distribution_u (d_zero, 0.5 * M_PI);
1285- auto uniform_distr_u_event = mkl_rng::generate (uniform_distribution_u, DPNP_RNG_ENGINE, diff_size , Uvec);
1290+ auto uniform_distr_u_event = mkl_rng::generate (uniform_distribution_u, DPNP_RNG_ENGINE, size , Uvec);
12861291 mkl_rng::uniform<_DataType> uniform_distribution_v (d_zero, d_one);
1287- auto uniform_distr_v_event = mkl_rng::generate (uniform_distribution_v, DPNP_RNG_ENGINE, diff_size , Vvec);
1292+ auto uniform_distr_v_event = mkl_rng::generate (uniform_distribution_v, DPNP_RNG_ENGINE, size , Vvec);
12881293
1289- cl::sycl::range<1 > diff_gws (diff_size );
1294+ cl::sycl::range<1 > gws (size );
12901295 auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
12911296 cgh.depends_on ({uniform_distr_u_event, uniform_distr_v_event});
1292- cgh.parallel_for (diff_gws , [=](cl::sycl::id<1 > global_id) {
1297+ cgh.parallel_for (gws , [=](cl::sycl::id<1 > global_id) {
12931298 size_t i = global_id[0 ];
1299+ if (!result_mask[i]) {
1300+ _DataType sn, cn, sn2, cn2;
1301+ _DataType neg_W_minus_one, V, Y;
12941302
1295- _DataType sn, cn, sn2, cn2;
1296- _DataType neg_W_minus_one, V, Y;
1297-
1298- sn = cl::sycl::sin (Uvec[i]);
1299- cn = cl::sycl::cos (Uvec[i]);
1300- V = Vvec[i];
1301- sn2 = sn * sn;
1302- cn2 = cn * cn;
1303+ sn = cl::sycl::sin (Uvec[i]);
1304+ cn = cl::sycl::cos (Uvec[i]);
1305+ V = Vvec[i];
1306+ sn2 = sn * sn;
1307+ cn2 = cn * cn;
13031308
1304- neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
1305- Y = kappa * (s_minus_one + neg_W_minus_one);
1309+ neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
1310+ Y = kappa * (s_minus_one + neg_W_minus_one);
13061311
1307- if ((Y * (2 - Y) >= V) || (cl::sycl::log (Y / V) + 1 >= Y))
1308- {
1309- Y = neg_W_minus_one * (2 - neg_W_minus_one);
1310- if (Y < 0 )
1311- Y = 0.0 ;
1312- else if (Y > 1.0 )
1313- Y = 1.0 ;
1314- *n = *n + 1 ;
1315- result1[*n] = cl::sycl::asin (cl::sycl::sqrt (Y));
1312+ if ((Y * (2 - Y) >= V) || (cl::sycl::log (Y / V) + 1 >= Y))
1313+ {
1314+ Y = neg_W_minus_one * (2 - neg_W_minus_one);
1315+ if (Y < 0 )
1316+ Y = 0.0 ;
1317+ else if (Y > 1.0 )
1318+ Y = 1.0 ;
1319+
1320+ result1[i] = cl::sycl::asin (cl::sycl::sqrt (Y));
1321+ result_mask[i] = true ;
1322+ }
13161323 }
13171324 });
13181325 };
13191326 auto some_event = DPNP_QUEUE.submit (paral_kernel_some);
13201327 some_event.wait ();
1328+
1329+ dpnp_all_c<bool , bool >(result_mask, result_ready, size);
13211330 }
13221331 dpnp_memory_free_c (Uvec);
1323- dpnp_memory_free_c (n);
1332+ dpnp_memory_free_c (result_ready);
1333+ dpnp_memory_free_c (result_mask);
13241334
13251335 mkl_rng::uniform<_DataType> uniform_distribution (d_zero, d_one);
13261336 auto uniform_distr_event = mkl_rng::generate (uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
@@ -1359,7 +1369,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
13591369 _DataType rho_over_kappa, rho, r, s_kappa;
13601370 _DataType* Uvec = nullptr ;
13611371 _DataType* Vvec = nullptr ;
1362- size_t * n = nullptr ;
1372+ bool * result_ready = nullptr ;
1373+ bool * result_mask = nullptr ;
13631374
13641375 const _DataType d_zero = 0.0 , d_one = 1.0 ;
13651376
@@ -1374,39 +1385,47 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
13741385
13751386 Uvec = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (size * sizeof (_DataType)));
13761387 Vvec = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (size * sizeof (_DataType)));
1377- n = reinterpret_cast <size_t *>(dpnp_memory_alloc_c (sizeof (size_t )));
13781388
1379- for (*n = 0 ; *n < size;)
1389+ result_ready = reinterpret_cast <bool *>(dpnp_memory_alloc_c (1 * sizeof (bool )));
1390+ result_ready[0 ] = false ;
1391+ result_mask = reinterpret_cast <bool *>(dpnp_memory_alloc_c (size * sizeof (bool )));
1392+ dpnp_full_c<bool >(result_ready, result_mask, size);
1393+
1394+ while (!result_ready[0 ])
13801395 {
1381- size_t diff_size = size - *n;
13821396 mkl_rng::uniform<_DataType> uniform_distribution_u (d_zero, M_PI);
1383- auto uniform_distr_u_event = mkl_rng::generate (uniform_distribution_u, DPNP_RNG_ENGINE, diff_size , Uvec);
1397+ auto uniform_distr_u_event = mkl_rng::generate (uniform_distribution_u, DPNP_RNG_ENGINE, size , Uvec);
13841398 mkl_rng::uniform<_DataType> uniform_distribution_v (d_zero, d_one);
1385- auto uniform_distr_v_event = mkl_rng::generate (uniform_distribution_v, DPNP_RNG_ENGINE, diff_size , Vvec);
1399+ auto uniform_distr_v_event = mkl_rng::generate (uniform_distribution_v, DPNP_RNG_ENGINE, size , Vvec);
13861400
1387- cl::sycl::range<1 > diff_gws ((diff_size ));
1401+ cl::sycl::range<1 > gws ((size ));
13881402
13891403 auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
13901404 cgh.depends_on ({uniform_distr_u_event, uniform_distr_v_event});
1391- cgh.parallel_for (diff_gws , [=](cl::sycl::id<1 > global_id) {
1405+ cgh.parallel_for (gws , [=](cl::sycl::id<1 > global_id) {
13921406 size_t i = global_id[0 ];
1393- _DataType Z, W, Y, V;
1394- Z = cl::sycl::cos (Uvec[i]);
1395- V = Vvec[i];
1396- W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
1397- Y = s_kappa - kappa * W;
1398- if ((Y * (2 - Y) >= V) || (cl::sycl::log (Y / V) + 1 >= Y))
1399- {
1400- *n = *n + 1 ;
1401- result1[*n] = cl::sycl::acos (W);
1407+ if (!result_mask[i]) {
1408+ _DataType Z, W, Y, V;
1409+ Z = cl::sycl::cos (Uvec[i]);
1410+ V = Vvec[i];
1411+ W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
1412+ Y = s_kappa - kappa * W;
1413+ if ((Y * (2 - Y) >= V) || (cl::sycl::log (Y / V) + 1 >= Y))
1414+ {
1415+ result1[i] = cl::sycl::acos (W);
1416+ result_mask[i] = true ;
1417+ }
14021418 }
14031419 });
14041420 };
14051421 auto some_event = DPNP_QUEUE.submit (paral_kernel_some);
14061422 some_event.wait ();
1423+
1424+ dpnp_all_c<bool , bool >(result_mask, result_ready, size);
14071425 }
14081426 dpnp_memory_free_c (Uvec);
1409- dpnp_memory_free_c (n);
1427+ dpnp_memory_free_c (result_ready);
1428+ dpnp_memory_free_c (result_mask);
14101429
14111430 mkl_rng::uniform<_DataType> uniform_distribution (d_zero, d_one);
14121431 auto uniform_distr_event = mkl_rng::generate (uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
0 commit comments