Skip to content

Commit 5a907cf

Browse files
Merge pull request #1794 from arcaneframework/dev/gg-return-old-value-for-atomics
Return old value instead of void for atomic operations
2 parents 04326f1 + 6501665 commit 5a907cf

File tree

3 files changed

+75
-50
lines changed

3 files changed

+75
-50
lines changed

arcane/src/arcane/accelerator/Atomic.h

+34-23
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ class HostAtomic<eAtomicOperation::Add>
4646
{
4747
public:
4848

49-
template <AcceleratorAtomicConcept DataType> static void
49+
template <AcceleratorAtomicConcept DataType> static DataType
5050
apply(DataType* ptr, DataType value)
5151
{
5252
std::atomic_ref<DataType> v(*ptr);
53-
v.fetch_add(value);
53+
return v.fetch_add(value);
5454
}
5555
};
5656

@@ -59,13 +59,14 @@ class HostAtomic<eAtomicOperation::Max>
5959
{
6060
public:
6161

62-
template <AcceleratorAtomicConcept DataType> static void
62+
template <AcceleratorAtomicConcept DataType> static DataType
6363
apply(DataType* ptr, DataType value)
6464
{
6565
std::atomic_ref<DataType> v(*ptr);
6666
DataType prev_value = v;
6767
while (prev_value < value && !v.compare_exchange_weak(prev_value, value)) {
6868
}
69+
return prev_value;
6970
}
7071
};
7172

@@ -74,13 +75,14 @@ class HostAtomic<eAtomicOperation::Min>
7475
{
7576
public:
7677

77-
template <AcceleratorAtomicConcept DataType> static void
78+
template <AcceleratorAtomicConcept DataType> static DataType
7879
apply(DataType* ptr, DataType value)
7980
{
8081
std::atomic_ref<DataType> v(*ptr);
8182
DataType prev_value = v;
8283
while (prev_value > value && !v.compare_exchange_weak(prev_value, value)) {
8384
}
85+
return prev_value;
8486
}
8587
};
8688

@@ -94,11 +96,11 @@ class SyclAtomic<eAtomicOperation::Add>
9496
{
9597
public:
9698

97-
template <AcceleratorAtomicConcept DataType> static void
99+
template <AcceleratorAtomicConcept DataType> static DataType
98100
apply(DataType* ptr, DataType value)
99101
{
100102
sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
101-
v.fetch_add(value);
103+
return v.fetch_add(value);
102104
}
103105
};
104106

@@ -107,11 +109,11 @@ class SyclAtomic<eAtomicOperation::Max>
107109
{
108110
public:
109111

110-
template <AcceleratorAtomicConcept DataType> static void
112+
template <AcceleratorAtomicConcept DataType> static DataType
111113
apply(DataType* ptr, DataType value)
112114
{
113115
sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
114-
v.fetch_max(value);
116+
return v.fetch_max(value);
115117
}
116118
};
117119

@@ -120,11 +122,11 @@ class SyclAtomic<eAtomicOperation::Min>
120122
{
121123
public:
122124

123-
template <AcceleratorAtomicConcept DataType> static void
125+
template <AcceleratorAtomicConcept DataType> static DataType
124126
apply(DataType* ptr, DataType value)
125127
{
126128
sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
127-
v.fetch_min(value);
129+
return v.fetch_min(value);
128130
}
129131
};
130132

@@ -138,23 +140,23 @@ class AtomicImpl
138140
public:
139141

140142
template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>
141-
ARCCORE_HOST_DEVICE static inline void
143+
ARCCORE_HOST_DEVICE static inline DataType
142144
doAtomic(DataType* ptr, DataType value)
143145
{
144146
#if defined(ARCCORE_DEVICE_TARGET_CUDA) || defined(ARCCORE_DEVICE_TARGET_HIP)
145-
impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);
147+
return impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);
146148
#elif defined(ARCCORE_DEVICE_TARGET_SYCL)
147-
SyclAtomic<Operation>::apply(ptr, value);
149+
return SyclAtomic<Operation>::apply(ptr, value);
148150
#else
149-
HostAtomic<Operation>::apply(ptr, value);
151+
return HostAtomic<Operation>::apply(ptr, value);
150152
#endif
151153
}
152154

153155
template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>
154-
ARCCORE_HOST_DEVICE static inline void
156+
ARCCORE_HOST_DEVICE static inline DataType
155157
doAtomic(const DataViewGetterSetter<DataType>& view, DataType value)
156158
{
157-
doAtomic<DataType, Operation>(view._address(), value);
159+
return doAtomic<DataType, Operation>(view._address(), value);
158160
}
159161
};
160162

@@ -168,25 +170,34 @@ namespace Arcane::Accelerator
168170

169171
/*---------------------------------------------------------------------------*/
170172
/*---------------------------------------------------------------------------*/
171-
172-
//! Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value
173+
/*!
174+
* \brief Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value.
175+
*
176+
* \retval l'ancienne valeur avant ajout.
177+
*/
173178
template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>
174-
ARCCORE_HOST_DEVICE inline void
179+
ARCCORE_HOST_DEVICE inline DataType
175180
doAtomic(DataType* ptr, ValueType value)
176181
requires(std::convertible_to<ValueType, DataType>)
177182
{
178183
DataType v = value;
179-
impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);
184+
return impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);
180185
}
181186

182-
//! Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value
187+
/*---------------------------------------------------------------------------*/
188+
/*---------------------------------------------------------------------------*/
189+
/*!
190+
* \brief Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value.
191+
*
192+
* \retval l'ancienne valeur avant ajout.
193+
*/
183194
template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>
184-
ARCCORE_HOST_DEVICE inline void
195+
ARCCORE_HOST_DEVICE inline DataType
185196
doAtomic(const DataViewGetterSetter<DataType>& view, ValueType value)
186197
requires(std::convertible_to<ValueType, DataType>)
187198
{
188199
DataType v = value;
189-
impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);
200+
return impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);
190201
}
191202

192203
/*---------------------------------------------------------------------------*/

arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h

+26-24
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
22
//-----------------------------------------------------------------------------
3-
// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
3+
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
44
// See the top-level COPYRIGHT file for details.
55
// SPDX-License-Identifier: Apache-2.0
66
//-----------------------------------------------------------------------------
77
/*---------------------------------------------------------------------------*/
8-
/* CommonCudaHipAtomicImpl.h (C) 2000-2023 */
8+
/* CommonCudaHipAtomicImpl.h (C) 2000-2024 */
99
/* */
1010
/* Implémentation CUDA et HIP des opérations atomiques. */
1111
/*---------------------------------------------------------------------------*/
@@ -24,7 +24,7 @@
2424
// méthodes atomiques ne fonctionnent pas si le pointeur est allouée
2525
// en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax,
2626
// atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès
27-
// concurrents ne sont pas trop nombreux
27+
// concurrents ne sont pas trop nombreux.
2828

2929
/*---------------------------------------------------------------------------*/
3030
/*---------------------------------------------------------------------------*/
@@ -50,9 +50,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Add>
5050
{
5151
public:
5252

53-
static ARCCORE_DEVICE void apply(int* ptr, int v)
53+
static ARCCORE_DEVICE int apply(int* ptr, int v)
5454
{
55-
::atomicAdd(ptr, v);
55+
return ::atomicAdd(ptr, v);
5656
}
5757
};
5858

@@ -61,9 +61,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Max>
6161
{
6262
public:
6363

64-
static ARCCORE_DEVICE void apply(int* ptr, int v)
64+
static ARCCORE_DEVICE int apply(int* ptr, int v)
6565
{
66-
::atomicMax(ptr, v);
66+
return ::atomicMax(ptr, v);
6767
}
6868
};
6969

@@ -72,9 +72,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Min>
7272
{
7373
public:
7474

75-
static ARCCORE_DEVICE void apply(int* ptr, int v)
75+
static ARCCORE_DEVICE int apply(int* ptr, int v)
7676
{
77-
::atomicMin(ptr, v);
77+
return ::atomicMin(ptr, v);
7878
}
7979
};
8080

@@ -83,10 +83,10 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Add>
8383
{
8484
public:
8585

86-
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
86+
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
8787
{
8888
static_assert(sizeof(Int64) == sizeof(long long int), "Bad pointer size");
89-
::atomicAdd((unsigned long long int*)ptr, v);
89+
return static_cast<Int64>(::atomicAdd((unsigned long long int*)ptr, v));
9090
}
9191
};
9292

@@ -96,7 +96,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
9696
public:
9797

9898
#if defined(__HIP__)
99-
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
99+
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
100100
{
101101
unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
102102
unsigned long long int old = *address_as_ull, assumed;
@@ -107,11 +107,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
107107
old = atomicCAS(address_as_ull, assumed,
108108
static_cast<unsigned long long int>(v > assumed_as_int64 ? v : assumed_as_int64));
109109
} while (assumed != old);
110+
return static_cast<Int64>(old);
110111
}
111112
#else
112-
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
113+
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
113114
{
114-
::atomicMax((long long int*)ptr, v);
115+
return static_cast<Int64>(::atomicMax((long long int*)ptr, v));
115116
}
116117
#endif
117118
};
@@ -122,7 +123,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
122123
public:
123124

124125
#if defined(__HIP__)
125-
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
126+
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
126127
{
127128
unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
128129
unsigned long long int old = *address_as_ull, assumed;
@@ -133,11 +134,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
133134
old = atomicCAS(address_as_ull, assumed,
134135
static_cast<unsigned long long int>(v < assumed_as_int64 ? v : assumed_as_int64));
135136
} while (assumed != old);
137+
return static_cast<Int64>(old);
136138
}
137139
#else
138-
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
140+
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
139141
{
140-
::atomicMin((long long int*)ptr, v);
142+
return static_cast<Int64>(::atomicMin((long long int*)ptr, v));
141143
}
142144
#endif
143145
};
@@ -200,12 +202,12 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Add>
200202
{
201203
public:
202204

203-
static ARCCORE_DEVICE void apply(double* ptr, double v)
205+
static ARCCORE_DEVICE double apply(double* ptr, double v)
204206
{
205207
#if __CUDA_ARCH__ >= 600
206-
::atomicAdd(ptr, v);
208+
return ::atomicAdd(ptr, v);
207209
#else
208-
preArch60atomicAdd(ptr, v);
210+
return preArch60atomicAdd(ptr, v);
209211
#endif
210212
}
211213
};
@@ -215,9 +217,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Max>
215217
{
216218
public:
217219

218-
static ARCCORE_DEVICE void apply(double* ptr, double v)
220+
static ARCCORE_DEVICE double apply(double* ptr, double v)
219221
{
220-
atomicMaxDouble(ptr, v);
222+
return atomicMaxDouble(ptr, v);
221223
}
222224
};
223225

@@ -226,9 +228,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Min>
226228
{
227229
public:
228230

229-
static ARCCORE_DEVICE void apply(double* ptr, double v)
231+
static ARCCORE_DEVICE double apply(double* ptr, double v)
230232
{
231-
atomicMinDouble(ptr, v);
233+
return atomicMinDouble(ptr, v);
232234
}
233235
};
234236

arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc

+15-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "arcane/utils/PlatformUtils.h"
1616
#include "arcane/utils/ValueChecker.h"
1717
#include "arcane/utils/IMemoryRessourceMng.h"
18+
#include "arcane/utils/ITraceMng.h"
1819

1920
#include "arcane/core/BasicUnitTest.h"
2021
#include "arcane/core/ServiceFactory.h"
@@ -224,29 +225,40 @@ _executeTest1(eMemoryRessource mem_ressource)
224225

225226
auto queue = makeQueue(m_runner);
226227
NumArray<DataType, MDDim1> v_sum(1, mem_ressource);
228+
NumArray<bool, MDDim1> is_ok_array(nb_value);
227229
v_sum.fill(init_value, &queue);
228230
DataType* device_sum_ptr = &v_sum[0];
229231
{
230232
auto command = makeCommand(queue);
231233
auto inout_a = viewInOut(command, v0);
232-
234+
auto out_is_ok = viewOut(command, is_ok_array);
233235
command << RUNCOMMAND_LOOP1(iter, nb_value)
234236
{
235237
auto [i] = iter();
236238
DataType x = static_cast<DataType>(i % (nb_value / 4));
237239
if ((i % 2) == 0)
238240
x = -x;
239241
DataType v = x + add0;
240-
ax::doAtomic<Operation>(inout_a(iter), v);
242+
DataType old_v = ax::doAtomic<Operation>(inout_a(iter), v);
243+
DataType new_v = inout_a(iter);
244+
// Si l'opération est l'ajout, teste que l'ancienne valeur plus
245+
// la valeur ajoutée vaut la nouvelle
246+
if (Operation == ax::eAtomicOperation::Add) {
247+
out_is_ok[i] = (new_v == (old_v + v));
248+
}
249+
else
250+
out_is_ok[i] = true;
241251
ax::doAtomic<Operation>(device_sum_ptr, inout_a(iter));
242252
};
243253
}
244254

245255
DataType cumulative = init_value;
246256
for (Int32 i = 0; i < nb_value; ++i) {
247257
if (i < 10)
248-
info() << "V[" << i << "] = " << v0[i];
258+
info() << "V[" << i << "] = " << v0[i] << " is_ok=" << is_ok_array[i];
249259
ax::doAtomic<Operation>(&cumulative, v0[i]);
260+
if (!is_ok_array[i])
261+
ARCANE_FATAL("Bad old value for index '{0}'", i);
250262
}
251263
NumArray<DataType, MDDim1> host_cumulative(1);
252264
host_cumulative.copy(v_sum);

0 commit comments

Comments
 (0)