Merge pull request #1794 from arcaneframework/dev/gg-return-old-value-for-atomics

grospelliergilles · web-flow · commit 5a907cf7db03 · 2024-11-25T19:30:09.000+01:00
Return old value instead of void for atomic operations
diff --git a/arcane/src/arcane/accelerator/Atomic.h b/arcane/src/arcane/accelerator/Atomic.h
@@ -46,11 +46,11 @@ class HostAtomic<eAtomicOperation::Add>
 {
  public:
 
-  template <AcceleratorAtomicConcept DataType> static void
+  template <AcceleratorAtomicConcept DataType> static DataType
   apply(DataType* ptr, DataType value)
   {
     std::atomic_ref<DataType> v(*ptr);
-    v.fetch_add(value);
+    return v.fetch_add(value);
   }
 };
 
@@ -59,13 +59,14 @@ class HostAtomic<eAtomicOperation::Max>
 {
  public:
 
-  template <AcceleratorAtomicConcept DataType> static void
+  template <AcceleratorAtomicConcept DataType> static DataType
   apply(DataType* ptr, DataType value)
   {
     std::atomic_ref<DataType> v(*ptr);
     DataType prev_value = v;
     while (prev_value < value && !v.compare_exchange_weak(prev_value, value)) {
     }
+    return prev_value;
   }
 };
 
@@ -74,13 +75,14 @@ class HostAtomic<eAtomicOperation::Min>
 {
  public:
 
-  template <AcceleratorAtomicConcept DataType> static void
+  template <AcceleratorAtomicConcept DataType> static DataType
   apply(DataType* ptr, DataType value)
   {
     std::atomic_ref<DataType> v(*ptr);
     DataType prev_value = v;
     while (prev_value > value && !v.compare_exchange_weak(prev_value, value)) {
     }
+    return prev_value;
   }
 };
 
@@ -94,11 +96,11 @@ class SyclAtomic<eAtomicOperation::Add>
 {
  public:
 
-  template <AcceleratorAtomicConcept DataType> static void
+  template <AcceleratorAtomicConcept DataType> static DataType
   apply(DataType* ptr, DataType value)
   {
     sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
-    v.fetch_add(value);
+    return v.fetch_add(value);
   }
 };
 
@@ -107,11 +109,11 @@ class SyclAtomic<eAtomicOperation::Max>
 {
  public:
 
-  template <AcceleratorAtomicConcept DataType> static void
+  template <AcceleratorAtomicConcept DataType> static DataType
   apply(DataType* ptr, DataType value)
   {
     sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
-    v.fetch_max(value);
+    return v.fetch_max(value);
   }
 };
 
@@ -120,11 +122,11 @@ class SyclAtomic<eAtomicOperation::Min>
 {
  public:
 
-  template <AcceleratorAtomicConcept DataType> static void
+  template <AcceleratorAtomicConcept DataType> static DataType
   apply(DataType* ptr, DataType value)
   {
     sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
-    v.fetch_min(value);
+    return v.fetch_min(value);
   }
 };
 
@@ -138,23 +140,23 @@ class AtomicImpl
  public:
 
   template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>
-  ARCCORE_HOST_DEVICE static inline void
+  ARCCORE_HOST_DEVICE static inline DataType
   doAtomic(DataType* ptr, DataType value)
   {
 #if defined(ARCCORE_DEVICE_TARGET_CUDA) || defined(ARCCORE_DEVICE_TARGET_HIP)
-    impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);
+    return impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);
 #elif defined(ARCCORE_DEVICE_TARGET_SYCL)
-    SyclAtomic<Operation>::apply(ptr, value);
+    return SyclAtomic<Operation>::apply(ptr, value);
 #else
-    HostAtomic<Operation>::apply(ptr, value);
+    return HostAtomic<Operation>::apply(ptr, value);
 #endif
   }
 
   template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>
-  ARCCORE_HOST_DEVICE static inline void
+  ARCCORE_HOST_DEVICE static inline DataType
   doAtomic(const DataViewGetterSetter<DataType>& view, DataType value)
   {
-    doAtomic<DataType, Operation>(view._address(), value);
+    return doAtomic<DataType, Operation>(view._address(), value);
   }
 };
 
@@ -168,25 +170,34 @@ namespace Arcane::Accelerator
 
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/
-
-//! Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value
+/*!
+ * \brief Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value.
+ *
+ * \retval l'ancienne valeur avant ajout.
+ */
 template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>
-ARCCORE_HOST_DEVICE inline void
+ARCCORE_HOST_DEVICE inline DataType
 doAtomic(DataType* ptr, ValueType value)
 requires(std::convertible_to<ValueType, DataType>)
 {
   DataType v = value;
-  impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);
+  return impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);
 }
 
-//! Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value
+/*---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/*!
+ * \brief Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value.
+ *
+ * \retval l'ancienne valeur avant ajout.
+ */
 template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>
-ARCCORE_HOST_DEVICE inline void
+ARCCORE_HOST_DEVICE inline DataType
 doAtomic(const DataViewGetterSetter<DataType>& view, ValueType value)
 requires(std::convertible_to<ValueType, DataType>)
 {
   DataType v = value;
-  impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);
+  return impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);
 }
 
 /*---------------------------------------------------------------------------*/
diff --git a/arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h b/arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h
@@ -1,11 +1,11 @@
 ﻿// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
 //-----------------------------------------------------------------------------
-// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
+// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
 // See the top-level COPYRIGHT file for details.
 // SPDX-License-Identifier: Apache-2.0
 //-----------------------------------------------------------------------------
 /*---------------------------------------------------------------------------*/
-/* CommonCudaHipAtomicImpl.h                                   (C) 2000-2023 */
+/* CommonCudaHipAtomicImpl.h                                   (C) 2000-2024 */
 /*                                                                           */
 /* Implémentation CUDA et HIP des opérations atomiques.                      */
 /*---------------------------------------------------------------------------*/
@@ -24,7 +24,7 @@
 // méthodes atomiques ne fonctionnent pas si le pointeur est allouée
 // en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax,
 // atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès
-// concurrents ne sont pas trop nombreux
+// concurrents ne sont pas trop nombreux.
 
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/
@@ -50,9 +50,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Add>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(int* ptr, int v)
+  static ARCCORE_DEVICE int apply(int* ptr, int v)
   {
-    ::atomicAdd(ptr, v);
+    return ::atomicAdd(ptr, v);
   }
 };
 
@@ -61,9 +61,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Max>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(int* ptr, int v)
+  static ARCCORE_DEVICE int apply(int* ptr, int v)
   {
-    ::atomicMax(ptr, v);
+    return ::atomicMax(ptr, v);
   }
 };
 
@@ -72,9 +72,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Min>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(int* ptr, int v)
+  static ARCCORE_DEVICE int apply(int* ptr, int v)
   {
-    ::atomicMin(ptr, v);
+    return ::atomicMin(ptr, v);
   }
 };
 
@@ -83,10 +83,10 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Add>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
+  static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
   {
     static_assert(sizeof(Int64) == sizeof(long long int), "Bad pointer size");
-    ::atomicAdd((unsigned long long int*)ptr, v);
+    return static_cast<Int64>(::atomicAdd((unsigned long long int*)ptr, v));
   }
 };
 
@@ -96,7 +96,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
  public:
 
 #if defined(__HIP__)
-  static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
+  static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
   {
     unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
     unsigned long long int old = *address_as_ull, assumed;
@@ -107,11 +107,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
       old = atomicCAS(address_as_ull, assumed,
                       static_cast<unsigned long long int>(v > assumed_as_int64 ? v : assumed_as_int64));
     } while (assumed != old);
+    return static_cast<Int64>(old);
   }
 #else
-  static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
+  static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
   {
-    ::atomicMax((long long int*)ptr, v);
+    return static_cast<Int64>(::atomicMax((long long int*)ptr, v));
   }
 #endif
 };
@@ -122,7 +123,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
  public:
 
 #if defined(__HIP__)
-  static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
+  static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
   {
     unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
     unsigned long long int old = *address_as_ull, assumed;
@@ -133,11 +134,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
       old = atomicCAS(address_as_ull, assumed,
                       static_cast<unsigned long long int>(v < assumed_as_int64 ? v : assumed_as_int64));
     } while (assumed != old);
+    return static_cast<Int64>(old);
   }
 #else
-  static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
+  static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
   {
-    ::atomicMin((long long int*)ptr, v);
+    return static_cast<Int64>(::atomicMin((long long int*)ptr, v));
   }
 #endif
 };
@@ -200,12 +202,12 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Add>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(double* ptr, double v)
+  static ARCCORE_DEVICE double apply(double* ptr, double v)
   {
 #if __CUDA_ARCH__ >= 600
-    ::atomicAdd(ptr, v);
+    return ::atomicAdd(ptr, v);
 #else
-    preArch60atomicAdd(ptr, v);
+    return preArch60atomicAdd(ptr, v);
 #endif
   }
 };
@@ -215,9 +217,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Max>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(double* ptr, double v)
+  static ARCCORE_DEVICE double apply(double* ptr, double v)
   {
-    atomicMaxDouble(ptr, v);
+    return atomicMaxDouble(ptr, v);
   }
 };
 
@@ -226,9 +228,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Min>
 {
  public:
 
-  static ARCCORE_DEVICE void apply(double* ptr, double v)
+  static ARCCORE_DEVICE double apply(double* ptr, double v)
   {
-    atomicMinDouble(ptr, v);
+    return atomicMinDouble(ptr, v);
   }
 };
 
diff --git a/arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc b/arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc
@@ -15,6 +15,7 @@
 #include "arcane/utils/PlatformUtils.h"
 #include "arcane/utils/ValueChecker.h"
 #include "arcane/utils/IMemoryRessourceMng.h"
+#include "arcane/utils/ITraceMng.h"
 
 #include "arcane/core/BasicUnitTest.h"
 #include "arcane/core/ServiceFactory.h"
@@ -224,29 +225,40 @@ _executeTest1(eMemoryRessource mem_ressource)
 
   auto queue = makeQueue(m_runner);
   NumArray<DataType, MDDim1> v_sum(1, mem_ressource);
+  NumArray<bool, MDDim1> is_ok_array(nb_value);
   v_sum.fill(init_value, &queue);
   DataType* device_sum_ptr = &v_sum[0];
   {
     auto command = makeCommand(queue);
     auto inout_a = viewInOut(command, v0);
-
+    auto out_is_ok = viewOut(command, is_ok_array);
     command << RUNCOMMAND_LOOP1(iter, nb_value)
     {
       auto [i] = iter();
       DataType x = static_cast<DataType>(i % (nb_value / 4));
       if ((i % 2) == 0)
         x = -x;
       DataType v = x + add0;
-      ax::doAtomic<Operation>(inout_a(iter), v);
+      DataType old_v = ax::doAtomic<Operation>(inout_a(iter), v);
+      DataType new_v = inout_a(iter);
+      // Si l'opération est l'ajout, teste que l'ancienne valeur plus
+      // la valeur ajoutée vaut la nouvelle
+      if (Operation == ax::eAtomicOperation::Add) {
+        out_is_ok[i] = (new_v == (old_v + v));
+      }
+      else
+        out_is_ok[i] = true;
       ax::doAtomic<Operation>(device_sum_ptr, inout_a(iter));
     };
   }
 
   DataType cumulative = init_value;
   for (Int32 i = 0; i < nb_value; ++i) {
     if (i < 10)
-      info() << "V[" << i << "] = " << v0[i];
+      info() << "V[" << i << "] = " << v0[i] << " is_ok=" << is_ok_array[i];
     ax::doAtomic<Operation>(&cumulative, v0[i]);
+    if (!is_ok_array[i])
+      ARCANE_FATAL("Bad old value for index '{0}'", i);
   }
   NumArray<DataType, MDDim1> host_cumulative(1);
   host_cumulative.copy(v_sum);

Original file line number	Diff line number	Diff line change
`@@ -46,11 +46,11 @@ class HostAtomic<eAtomicOperation::Add>`
`46`	`46`	`{`
`47`	`47`	`public:`
`48`	`48`
`49`		`- template <AcceleratorAtomicConcept DataType> static void`
	`49`	`+ template <AcceleratorAtomicConcept DataType> static DataType`
`50`	`50`	`apply(DataType* ptr, DataType value)`
`51`	`51`	`{`
`52`	`52`	`std::atomic_ref<DataType> v(*ptr);`
`53`		`- v.fetch_add(value);`
	`53`	`+ return v.fetch_add(value);`
`54`	`54`	`}`
`55`	`55`	`};`
`56`	`56`
`@@ -59,13 +59,14 @@ class HostAtomic<eAtomicOperation::Max>`
`59`	`59`	`{`
`60`	`60`	`public:`
`61`	`61`
`62`		`- template <AcceleratorAtomicConcept DataType> static void`
	`62`	`+ template <AcceleratorAtomicConcept DataType> static DataType`
`63`	`63`	`apply(DataType* ptr, DataType value)`
`64`	`64`	`{`
`65`	`65`	`std::atomic_ref<DataType> v(*ptr);`
`66`	`66`	`DataType prev_value = v;`
`67`	`67`	`while (prev_value < value && !v.compare_exchange_weak(prev_value, value)) {`
`68`	`68`	`}`
	`69`	`+ return prev_value;`
`69`	`70`	`}`
`70`	`71`	`};`
`71`	`72`
`@@ -74,13 +75,14 @@ class HostAtomic<eAtomicOperation::Min>`
`74`	`75`	`{`
`75`	`76`	`public:`
`76`	`77`
`77`		`- template <AcceleratorAtomicConcept DataType> static void`
	`78`	`+ template <AcceleratorAtomicConcept DataType> static DataType`
`78`	`79`	`apply(DataType* ptr, DataType value)`
`79`	`80`	`{`
`80`	`81`	`std::atomic_ref<DataType> v(*ptr);`
`81`	`82`	`DataType prev_value = v;`
`82`	`83`	`while (prev_value > value && !v.compare_exchange_weak(prev_value, value)) {`
`83`	`84`	`}`
	`85`	`+ return prev_value;`
`84`	`86`	`}`
`85`	`87`	`};`
`86`	`88`
`@@ -94,11 +96,11 @@ class SyclAtomic<eAtomicOperation::Add>`
`94`	`96`	`{`
`95`	`97`	`public:`
`96`	`98`
`97`		`- template <AcceleratorAtomicConcept DataType> static void`
	`99`	`+ template <AcceleratorAtomicConcept DataType> static DataType`
`98`	`100`	`apply(DataType* ptr, DataType value)`
`99`	`101`	`{`
`100`	`102`	`sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);`
`101`		`- v.fetch_add(value);`
	`103`	`+ return v.fetch_add(value);`
`102`	`104`	`}`
`103`	`105`	`};`
`104`	`106`
`@@ -107,11 +109,11 @@ class SyclAtomic<eAtomicOperation::Max>`
`107`	`109`	`{`
`108`	`110`	`public:`
`109`	`111`
`110`		`- template <AcceleratorAtomicConcept DataType> static void`
	`112`	`+ template <AcceleratorAtomicConcept DataType> static DataType`
`111`	`113`	`apply(DataType* ptr, DataType value)`
`112`	`114`	`{`
`113`	`115`	`sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);`
`114`		`- v.fetch_max(value);`
	`116`	`+ return v.fetch_max(value);`
`115`	`117`	`}`
`116`	`118`	`};`
`117`	`119`
`@@ -120,11 +122,11 @@ class SyclAtomic<eAtomicOperation::Min>`
`120`	`122`	`{`
`121`	`123`	`public:`
`122`	`124`
`123`		`- template <AcceleratorAtomicConcept DataType> static void`
	`125`	`+ template <AcceleratorAtomicConcept DataType> static DataType`
`124`	`126`	`apply(DataType* ptr, DataType value)`
`125`	`127`	`{`
`126`	`128`	`sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);`
`127`		`- v.fetch_min(value);`
	`129`	`+ return v.fetch_min(value);`
`128`	`130`	`}`
`129`	`131`	`};`
`130`	`132`
`@@ -138,23 +140,23 @@ class AtomicImpl`
`138`	`140`	`public:`
`139`	`141`
`140`	`142`	`template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>`
`141`		`- ARCCORE_HOST_DEVICE static inline void`
	`143`	`+ ARCCORE_HOST_DEVICE static inline DataType`
`142`	`144`	`doAtomic(DataType* ptr, DataType value)`
`143`	`145`	`{`
`144`	`146`	`#if defined(ARCCORE_DEVICE_TARGET_CUDA) \|\| defined(ARCCORE_DEVICE_TARGET_HIP)`
`145`		`- impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);`
	`147`	`+ return impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);`
`146`	`148`	`#elif defined(ARCCORE_DEVICE_TARGET_SYCL)`
`147`		`- SyclAtomic<Operation>::apply(ptr, value);`
	`149`	`+ return SyclAtomic<Operation>::apply(ptr, value);`
`148`	`150`	`#else`
`149`		`- HostAtomic<Operation>::apply(ptr, value);`
	`151`	`+ return HostAtomic<Operation>::apply(ptr, value);`
`150`	`152`	`#endif`
`151`	`153`	`}`
`152`	`154`
`153`	`155`	`template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>`
`154`		`- ARCCORE_HOST_DEVICE static inline void`
	`156`	`+ ARCCORE_HOST_DEVICE static inline DataType`
`155`	`157`	`doAtomic(const DataViewGetterSetter<DataType>& view, DataType value)`
`156`	`158`	`{`
`157`		`- doAtomic<DataType, Operation>(view._address(), value);`
	`159`	`+ return doAtomic<DataType, Operation>(view._address(), value);`
`158`	`160`	`}`
`159`	`161`	`};`
`160`	`162`
`@@ -168,25 +170,34 @@ namespace Arcane::Accelerator`
`168`	`170`
`169`	`171`	`/---------------------------------------------------------------------------/`
`170`	`172`	`/---------------------------------------------------------------------------/`
`171`		`-`
`172`		`-//! Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value`
	`173`	`+/*!`
	`174`	`+ * \brief Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value.`
	`175`	`+ *`
	`176`	`+ * \retval l'ancienne valeur avant ajout.`
	`177`	`+ */`
`173`	`178`	`template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>`
`174`		`-ARCCORE_HOST_DEVICE inline void`
	`179`	`+ARCCORE_HOST_DEVICE inline DataType`
`175`	`180`	`doAtomic(DataType* ptr, ValueType value)`
`176`	`181`	`requires(std::convertible_to<ValueType, DataType>)`
`177`	`182`	`{`
`178`	`183`	`DataType v = value;`
`179`		`- impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);`
	`184`	`+ return impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);`
`180`	`185`	`}`
`181`	`186`
`182`		`-//! Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value`
	`187`	`+/---------------------------------------------------------------------------/`
	`188`	`+/---------------------------------------------------------------------------/`
	`189`	`+/*!`
	`190`	`+ * \brief Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value.`
	`191`	`+ *`
	`192`	`+ * \retval l'ancienne valeur avant ajout.`
	`193`	`+ */`
`183`	`194`	`template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>`
`184`		`-ARCCORE_HOST_DEVICE inline void`
	`195`	`+ARCCORE_HOST_DEVICE inline DataType`
`185`	`196`	`doAtomic(const DataViewGetterSetter<DataType>& view, ValueType value)`
`186`	`197`	`requires(std::convertible_to<ValueType, DataType>)`
`187`	`198`	`{`
`188`	`199`	`DataType v = value;`
`189`		`- impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);`
	`200`	`+ return impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);`
`190`	`201`	`}`
`191`	`202`
`192`	`203`	`/---------------------------------------------------------------------------/`
Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,11 @@`
`1`	`1`	`// -- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --`
`2`	`2`	`//-----------------------------------------------------------------------------`
`3`		`-// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)`
	`3`	`+// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)`
`4`	`4`	`// See the top-level COPYRIGHT file for details.`
`5`	`5`	`// SPDX-License-Identifier: Apache-2.0`
`6`	`6`	`//-----------------------------------------------------------------------------`
`7`	`7`	`/---------------------------------------------------------------------------/`
`8`		`-/* CommonCudaHipAtomicImpl.h (C) 2000-2023 */`
	`8`	`+/* CommonCudaHipAtomicImpl.h (C) 2000-2024 */`
`9`	`9`	`/* */`
`10`	`10`	`/* Implémentation CUDA et HIP des opérations atomiques. */`
`11`	`11`	`/---------------------------------------------------------------------------/`
`@@ -24,7 +24,7 @@`
`24`	`24`	`// méthodes atomiques ne fonctionnent pas si le pointeur est allouée`
`25`	`25`	`// en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax,`
`26`	`26`	`// atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès`
`27`		`-// concurrents ne sont pas trop nombreux`
	`27`	`+// concurrents ne sont pas trop nombreux.`
`28`	`28`
`29`	`29`	`/---------------------------------------------------------------------------/`
`30`	`30`	`/---------------------------------------------------------------------------/`
`@@ -50,9 +50,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Add>`
`50`	`50`	`{`
`51`	`51`	`public:`
`52`	`52`
`53`		`- static ARCCORE_DEVICE void apply(int* ptr, int v)`
	`53`	`+ static ARCCORE_DEVICE int apply(int* ptr, int v)`
`54`	`54`	`{`
`55`		`- ::atomicAdd(ptr, v);`
	`55`	`+ return ::atomicAdd(ptr, v);`
`56`	`56`	`}`
`57`	`57`	`};`
`58`	`58`
`@@ -61,9 +61,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Max>`
`61`	`61`	`{`
`62`	`62`	`public:`
`63`	`63`
`64`		`- static ARCCORE_DEVICE void apply(int* ptr, int v)`
	`64`	`+ static ARCCORE_DEVICE int apply(int* ptr, int v)`
`65`	`65`	`{`
`66`		`- ::atomicMax(ptr, v);`
	`66`	`+ return ::atomicMax(ptr, v);`
`67`	`67`	`}`
`68`	`68`	`};`
`69`	`69`
`@@ -72,9 +72,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Min>`
`72`	`72`	`{`
`73`	`73`	`public:`
`74`	`74`
`75`		`- static ARCCORE_DEVICE void apply(int* ptr, int v)`
	`75`	`+ static ARCCORE_DEVICE int apply(int* ptr, int v)`
`76`	`76`	`{`
`77`		`- ::atomicMin(ptr, v);`
	`77`	`+ return ::atomicMin(ptr, v);`
`78`	`78`	`}`
`79`	`79`	`};`
`80`	`80`
`@@ -83,10 +83,10 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Add>`
`83`	`83`	`{`
`84`	`84`	`public:`
`85`	`85`
`86`		`- static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)`
	`86`	`+ static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)`
`87`	`87`	`{`
`88`	`88`	`static_assert(sizeof(Int64) == sizeof(long long int), "Bad pointer size");`
`89`		`- ::atomicAdd((unsigned long long int*)ptr, v);`
	`89`	`+ return static_cast<Int64>(::atomicAdd((unsigned long long int*)ptr, v));`
`90`	`90`	`}`
`91`	`91`	`};`
`92`	`92`
`@@ -96,7 +96,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>`
`96`	`96`	`public:`
`97`	`97`
`98`	`98`	`#if defined(__HIP__)`
`99`		`- static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)`
	`99`	`+ static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)`
`100`	`100`	`{`
`101`	`101`	`unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);`
`102`	`102`	`unsigned long long int old = *address_as_ull, assumed;`
`@@ -107,11 +107,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>`
`107`	`107`	`old = atomicCAS(address_as_ull, assumed,`
`108`	`108`	`static_cast<unsigned long long int>(v > assumed_as_int64 ? v : assumed_as_int64));`
`109`	`109`	`} while (assumed != old);`
	`110`	`+ return static_cast<Int64>(old);`
`110`	`111`	`}`
`111`	`112`	`#else`
`112`		`- static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)`
	`113`	`+ static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)`
`113`	`114`	`{`
`114`		`- ::atomicMax((long long int*)ptr, v);`
	`115`	`+ return static_cast<Int64>(::atomicMax((long long int*)ptr, v));`
`115`	`116`	`}`
`116`	`117`	`#endif`
`117`	`118`	`};`
`@@ -122,7 +123,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>`
`122`	`123`	`public:`
`123`	`124`
`124`	`125`	`#if defined(__HIP__)`
`125`		`- static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)`
	`126`	`+ static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)`
`126`	`127`	`{`
`127`	`128`	`unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);`
`128`	`129`	`unsigned long long int old = *address_as_ull, assumed;`
`@@ -133,11 +134,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>`
`133`	`134`	`old = atomicCAS(address_as_ull, assumed,`
`134`	`135`	`static_cast<unsigned long long int>(v < assumed_as_int64 ? v : assumed_as_int64));`
`135`	`136`	`} while (assumed != old);`
	`137`	`+ return static_cast<Int64>(old);`
`136`	`138`	`}`
`137`	`139`	`#else`
`138`		`- static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)`
	`140`	`+ static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)`
`139`	`141`	`{`
`140`		`- ::atomicMin((long long int*)ptr, v);`
	`142`	`+ return static_cast<Int64>(::atomicMin((long long int*)ptr, v));`
`141`	`143`	`}`
`142`	`144`	`#endif`
`143`	`145`	`};`
`@@ -200,12 +202,12 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Add>`
`200`	`202`	`{`
`201`	`203`	`public:`
`202`	`204`
`203`		`- static ARCCORE_DEVICE void apply(double* ptr, double v)`
	`205`	`+ static ARCCORE_DEVICE double apply(double* ptr, double v)`
`204`	`206`	`{`
`205`	`207`	`#if __CUDA_ARCH__ >= 600`
`206`		`- ::atomicAdd(ptr, v);`
	`208`	`+ return ::atomicAdd(ptr, v);`
`207`	`209`	`#else`
`208`		`- preArch60atomicAdd(ptr, v);`
	`210`	`+ return preArch60atomicAdd(ptr, v);`
`209`	`211`	`#endif`
`210`	`212`	`}`
`211`	`213`	`};`
`@@ -215,9 +217,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Max>`
`215`	`217`	`{`
`216`	`218`	`public:`
`217`	`219`
`218`		`- static ARCCORE_DEVICE void apply(double* ptr, double v)`
	`220`	`+ static ARCCORE_DEVICE double apply(double* ptr, double v)`
`219`	`221`	`{`
`220`		`- atomicMaxDouble(ptr, v);`
	`222`	`+ return atomicMaxDouble(ptr, v);`
`221`	`223`	`}`
`222`	`224`	`};`
`223`	`225`
`@@ -226,9 +228,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Min>`
`226`	`228`	`{`
`227`	`229`	`public:`
`228`	`230`
`229`		`- static ARCCORE_DEVICE void apply(double* ptr, double v)`
	`231`	`+ static ARCCORE_DEVICE double apply(double* ptr, double v)`
`230`	`232`	`{`
`231`		`- atomicMinDouble(ptr, v);`
	`233`	`+ return atomicMinDouble(ptr, v);`
`232`	`234`	`}`
`233`	`235`	`};`
`234`	`236`