1
1
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2
2
// -----------------------------------------------------------------------------
3
- // Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
3
+ // Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4
4
// See the top-level COPYRIGHT file for details.
5
5
// SPDX-License-Identifier: Apache-2.0
6
6
// -----------------------------------------------------------------------------
7
7
/* ---------------------------------------------------------------------------*/
8
- /* CommonCudaHipAtomicImpl.h (C) 2000-2023 */
8
+ /* CommonCudaHipAtomicImpl.h (C) 2000-2024 */
9
9
/* */
10
10
/* Implémentation CUDA et HIP des opérations atomiques. */
11
11
/* ---------------------------------------------------------------------------*/
24
24
// méthodes atomiques ne fonctionnent pas si le pointeur est allouée
25
25
// en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax,
26
26
// atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès
27
- // concurrents ne sont pas trop nombreux
27
+ // concurrents ne sont pas trop nombreux.
28
28
29
29
/* ---------------------------------------------------------------------------*/
30
30
/* ---------------------------------------------------------------------------*/
@@ -50,9 +50,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Add>
50
50
{
51
51
public:
52
52
53
- static ARCCORE_DEVICE void apply (int * ptr, int v)
53
+ static ARCCORE_DEVICE int apply (int * ptr, int v)
54
54
{
55
- ::atomicAdd (ptr, v);
55
+ return ::atomicAdd (ptr, v);
56
56
}
57
57
};
58
58
@@ -61,9 +61,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Max>
61
61
{
62
62
public:
63
63
64
- static ARCCORE_DEVICE void apply (int * ptr, int v)
64
+ static ARCCORE_DEVICE int apply (int * ptr, int v)
65
65
{
66
- ::atomicMax (ptr, v);
66
+ return ::atomicMax (ptr, v);
67
67
}
68
68
};
69
69
@@ -72,9 +72,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Min>
72
72
{
73
73
public:
74
74
75
- static ARCCORE_DEVICE void apply (int * ptr, int v)
75
+ static ARCCORE_DEVICE int apply (int * ptr, int v)
76
76
{
77
- ::atomicMin (ptr, v);
77
+ return ::atomicMin (ptr, v);
78
78
}
79
79
};
80
80
@@ -83,10 +83,10 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Add>
83
83
{
84
84
public:
85
85
86
- static ARCCORE_DEVICE void apply (Int64* ptr, Int64 v)
86
+ static ARCCORE_DEVICE Int64 apply (Int64* ptr, Int64 v)
87
87
{
88
88
static_assert (sizeof (Int64) == sizeof (long long int ), " Bad pointer size" );
89
- ::atomicAdd ((unsigned long long int *)ptr, v);
89
+ return static_cast <Int64>( ::atomicAdd ((unsigned long long int *)ptr, v) );
90
90
}
91
91
};
92
92
@@ -96,7 +96,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
96
96
public:
97
97
98
98
#if defined(__HIP__)
99
- static ARCCORE_DEVICE void apply (Int64* ptr, Int64 v)
99
+ static ARCCORE_DEVICE Int64 apply (Int64* ptr, Int64 v)
100
100
{
101
101
unsigned long long int * address_as_ull = reinterpret_cast <unsigned long long int *>(ptr);
102
102
unsigned long long int old = *address_as_ull, assumed;
@@ -107,11 +107,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
107
107
old = atomicCAS (address_as_ull, assumed,
108
108
static_cast <unsigned long long int >(v > assumed_as_int64 ? v : assumed_as_int64));
109
109
} while (assumed != old);
110
+ return static_cast <Int64>(old);
110
111
}
111
112
#else
112
- static ARCCORE_DEVICE void apply (Int64* ptr, Int64 v)
113
+ static ARCCORE_DEVICE Int64 apply (Int64* ptr, Int64 v)
113
114
{
114
- ::atomicMax ((long long int *)ptr, v);
115
+ return static_cast <Int64>( ::atomicMax ((long long int *)ptr, v) );
115
116
}
116
117
#endif
117
118
};
@@ -122,7 +123,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
122
123
public:
123
124
124
125
#if defined(__HIP__)
125
- static ARCCORE_DEVICE void apply (Int64* ptr, Int64 v)
126
+ static ARCCORE_DEVICE Int64 apply (Int64* ptr, Int64 v)
126
127
{
127
128
unsigned long long int * address_as_ull = reinterpret_cast <unsigned long long int *>(ptr);
128
129
unsigned long long int old = *address_as_ull, assumed;
@@ -133,11 +134,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
133
134
old = atomicCAS (address_as_ull, assumed,
134
135
static_cast <unsigned long long int >(v < assumed_as_int64 ? v : assumed_as_int64));
135
136
} while (assumed != old);
137
+ return static_cast <Int64>(old);
136
138
}
137
139
#else
138
- static ARCCORE_DEVICE void apply (Int64* ptr, Int64 v)
140
+ static ARCCORE_DEVICE Int64 apply (Int64* ptr, Int64 v)
139
141
{
140
- ::atomicMin ((long long int *)ptr, v);
142
+ return static_cast <Int64>( ::atomicMin ((long long int *)ptr, v) );
141
143
}
142
144
#endif
143
145
};
@@ -200,12 +202,12 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Add>
200
202
{
201
203
public:
202
204
203
- static ARCCORE_DEVICE void apply (double * ptr, double v)
205
+ static ARCCORE_DEVICE double apply (double * ptr, double v)
204
206
{
205
207
#if __CUDA_ARCH__ >= 600
206
- ::atomicAdd (ptr, v);
208
+ return ::atomicAdd (ptr, v);
207
209
#else
208
- preArch60atomicAdd (ptr, v);
210
+ return preArch60atomicAdd (ptr, v);
209
211
#endif
210
212
}
211
213
};
@@ -215,9 +217,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Max>
215
217
{
216
218
public:
217
219
218
- static ARCCORE_DEVICE void apply (double * ptr, double v)
220
+ static ARCCORE_DEVICE double apply (double * ptr, double v)
219
221
{
220
- atomicMaxDouble (ptr, v);
222
+ return atomicMaxDouble (ptr, v);
221
223
}
222
224
};
223
225
@@ -226,9 +228,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Min>
226
228
{
227
229
public:
228
230
229
- static ARCCORE_DEVICE void apply (double * ptr, double v)
231
+ static ARCCORE_DEVICE double apply (double * ptr, double v)
230
232
{
231
- atomicMinDouble (ptr, v);
233
+ return atomicMinDouble (ptr, v);
232
234
}
233
235
};
234
236
0 commit comments