Skip to content

Return old value instead of void for atomic operations #1794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 34 additions & 23 deletions arcane/src/arcane/accelerator/Atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ class HostAtomic<eAtomicOperation::Add>
{
public:

template <AcceleratorAtomicConcept DataType> static void
template <AcceleratorAtomicConcept DataType> static DataType
apply(DataType* ptr, DataType value)
{
std::atomic_ref<DataType> v(*ptr);
v.fetch_add(value);
return v.fetch_add(value);
}
};

Expand All @@ -59,13 +59,14 @@ class HostAtomic<eAtomicOperation::Max>
{
public:

template <AcceleratorAtomicConcept DataType> static void
template <AcceleratorAtomicConcept DataType> static DataType
apply(DataType* ptr, DataType value)
{
std::atomic_ref<DataType> v(*ptr);
DataType prev_value = v;
while (prev_value < value && !v.compare_exchange_weak(prev_value, value)) {
}
return prev_value;
}
};

Expand All @@ -74,13 +75,14 @@ class HostAtomic<eAtomicOperation::Min>
{
public:

template <AcceleratorAtomicConcept DataType> static void
template <AcceleratorAtomicConcept DataType> static DataType
apply(DataType* ptr, DataType value)
{
std::atomic_ref<DataType> v(*ptr);
DataType prev_value = v;
while (prev_value > value && !v.compare_exchange_weak(prev_value, value)) {
}
return prev_value;
}
};

Expand All @@ -94,11 +96,11 @@ class SyclAtomic<eAtomicOperation::Add>
{
public:

template <AcceleratorAtomicConcept DataType> static void
template <AcceleratorAtomicConcept DataType> static DataType
apply(DataType* ptr, DataType value)
{
sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
v.fetch_add(value);
return v.fetch_add(value);
}
};

Expand All @@ -107,11 +109,11 @@ class SyclAtomic<eAtomicOperation::Max>
{
public:

template <AcceleratorAtomicConcept DataType> static void
template <AcceleratorAtomicConcept DataType> static DataType
apply(DataType* ptr, DataType value)
{
sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
v.fetch_max(value);
return v.fetch_max(value);
}
};

Expand All @@ -120,11 +122,11 @@ class SyclAtomic<eAtomicOperation::Min>
{
public:

template <AcceleratorAtomicConcept DataType> static void
template <AcceleratorAtomicConcept DataType> static DataType
apply(DataType* ptr, DataType value)
{
sycl::atomic_ref<DataType, sycl::memory_order::relaxed, sycl::memory_scope::device> v(*ptr);
v.fetch_min(value);
return v.fetch_min(value);
}
};

Expand All @@ -138,23 +140,23 @@ class AtomicImpl
public:

template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>
ARCCORE_HOST_DEVICE static inline void
ARCCORE_HOST_DEVICE static inline DataType
doAtomic(DataType* ptr, DataType value)
{
#if defined(ARCCORE_DEVICE_TARGET_CUDA) || defined(ARCCORE_DEVICE_TARGET_HIP)
impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);
return impl::CommonCudaHipAtomic<DataType, Operation>::apply(ptr, value);
#elif defined(ARCCORE_DEVICE_TARGET_SYCL)
SyclAtomic<Operation>::apply(ptr, value);
return SyclAtomic<Operation>::apply(ptr, value);
#else
HostAtomic<Operation>::apply(ptr, value);
return HostAtomic<Operation>::apply(ptr, value);
#endif
}

template <AcceleratorAtomicConcept DataType, enum eAtomicOperation Operation>
ARCCORE_HOST_DEVICE static inline void
ARCCORE_HOST_DEVICE static inline DataType
doAtomic(const DataViewGetterSetter<DataType>& view, DataType value)
{
doAtomic<DataType, Operation>(view._address(), value);
return doAtomic<DataType, Operation>(view._address(), value);
}
};

Expand All @@ -168,25 +170,34 @@ namespace Arcane::Accelerator

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

//! Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value
/*!
* \brief Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value.
*
* \retval l'ancienne valeur avant ajout.
*/
template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>
ARCCORE_HOST_DEVICE inline void
ARCCORE_HOST_DEVICE inline DataType
doAtomic(DataType* ptr, ValueType value)
requires(std::convertible_to<ValueType, DataType>)
{
DataType v = value;
impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);
return impl::AtomicImpl::doAtomic<DataType, Operation>(ptr, v);
}

//! Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
/*!
* \brief Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value.
*
* \retval l'ancienne valeur avant ajout.
*/
template <enum eAtomicOperation Operation, AcceleratorAtomicConcept DataType, typename ValueType>
ARCCORE_HOST_DEVICE inline void
ARCCORE_HOST_DEVICE inline DataType
doAtomic(const DataViewGetterSetter<DataType>& view, ValueType value)
requires(std::convertible_to<ValueType, DataType>)
{
DataType v = value;
impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);
return impl::AtomicImpl::doAtomic<DataType, Operation>(view, v);
}

/*---------------------------------------------------------------------------*/
Expand Down
50 changes: 26 additions & 24 deletions arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
//-----------------------------------------------------------------------------
// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
// See the top-level COPYRIGHT file for details.
// SPDX-License-Identifier: Apache-2.0
//-----------------------------------------------------------------------------
/*---------------------------------------------------------------------------*/
/* CommonCudaHipAtomicImpl.h (C) 2000-2023 */
/* CommonCudaHipAtomicImpl.h (C) 2000-2024 */
/* */
/* Implémentation CUDA et HIP des opérations atomiques. */
/*---------------------------------------------------------------------------*/
Expand All @@ -24,7 +24,7 @@
// méthodes atomiques ne fonctionnent pas si le pointeur est allouée
// en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax,
// atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès
// concurrents ne sont pas trop nombreux
// concurrents ne sont pas trop nombreux.

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
Expand All @@ -50,9 +50,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Add>
{
public:

static ARCCORE_DEVICE void apply(int* ptr, int v)
static ARCCORE_DEVICE int apply(int* ptr, int v)
{
::atomicAdd(ptr, v);
return ::atomicAdd(ptr, v);
}
};

Expand All @@ -61,9 +61,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Max>
{
public:

static ARCCORE_DEVICE void apply(int* ptr, int v)
static ARCCORE_DEVICE int apply(int* ptr, int v)
{
::atomicMax(ptr, v);
return ::atomicMax(ptr, v);
}
};

Expand All @@ -72,9 +72,9 @@ class CommonCudaHipAtomic<int, eAtomicOperation::Min>
{
public:

static ARCCORE_DEVICE void apply(int* ptr, int v)
static ARCCORE_DEVICE int apply(int* ptr, int v)
{
::atomicMin(ptr, v);
return ::atomicMin(ptr, v);
}
};

Expand All @@ -83,10 +83,10 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Add>
{
public:

static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
{
static_assert(sizeof(Int64) == sizeof(long long int), "Bad pointer size");
::atomicAdd((unsigned long long int*)ptr, v);
return static_cast<Int64>(::atomicAdd((unsigned long long int*)ptr, v));
}
};

Expand All @@ -96,7 +96,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
public:

#if defined(__HIP__)
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
{
unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
unsigned long long int old = *address_as_ull, assumed;
Expand All @@ -107,11 +107,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Max>
old = atomicCAS(address_as_ull, assumed,
static_cast<unsigned long long int>(v > assumed_as_int64 ? v : assumed_as_int64));
} while (assumed != old);
return static_cast<Int64>(old);
}
#else
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
{
::atomicMax((long long int*)ptr, v);
return static_cast<Int64>(::atomicMax((long long int*)ptr, v));
}
#endif
};
Expand All @@ -122,7 +123,7 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
public:

#if defined(__HIP__)
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
{
unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(ptr);
unsigned long long int old = *address_as_ull, assumed;
Expand All @@ -133,11 +134,12 @@ class CommonCudaHipAtomic<Int64, eAtomicOperation::Min>
old = atomicCAS(address_as_ull, assumed,
static_cast<unsigned long long int>(v < assumed_as_int64 ? v : assumed_as_int64));
} while (assumed != old);
return static_cast<Int64>(old);
}
#else
static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v)
static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v)
{
::atomicMin((long long int*)ptr, v);
return static_cast<Int64>(::atomicMin((long long int*)ptr, v));
}
#endif
};
Expand Down Expand Up @@ -200,12 +202,12 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Add>
{
public:

static ARCCORE_DEVICE void apply(double* ptr, double v)
static ARCCORE_DEVICE double apply(double* ptr, double v)
{
#if __CUDA_ARCH__ >= 600
::atomicAdd(ptr, v);
return ::atomicAdd(ptr, v);
#else
preArch60atomicAdd(ptr, v);
return preArch60atomicAdd(ptr, v);
#endif
}
};
Expand All @@ -215,9 +217,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Max>
{
public:

static ARCCORE_DEVICE void apply(double* ptr, double v)
static ARCCORE_DEVICE double apply(double* ptr, double v)
{
atomicMaxDouble(ptr, v);
return atomicMaxDouble(ptr, v);
}
};

Expand All @@ -226,9 +228,9 @@ class CommonCudaHipAtomic<double, eAtomicOperation::Min>
{
public:

static ARCCORE_DEVICE void apply(double* ptr, double v)
static ARCCORE_DEVICE double apply(double* ptr, double v)
{
atomicMinDouble(ptr, v);
return atomicMinDouble(ptr, v);
}
};

Expand Down
18 changes: 15 additions & 3 deletions arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "arcane/utils/PlatformUtils.h"
#include "arcane/utils/ValueChecker.h"
#include "arcane/utils/IMemoryRessourceMng.h"
#include "arcane/utils/ITraceMng.h"

#include "arcane/core/BasicUnitTest.h"
#include "arcane/core/ServiceFactory.h"
Expand Down Expand Up @@ -224,29 +225,40 @@

auto queue = makeQueue(m_runner);
NumArray<DataType, MDDim1> v_sum(1, mem_ressource);
NumArray<bool, MDDim1> is_ok_array(nb_value);
v_sum.fill(init_value, &queue);
DataType* device_sum_ptr = &v_sum[0];
{
auto command = makeCommand(queue);
auto inout_a = viewInOut(command, v0);

auto out_is_ok = viewOut(command, is_ok_array);
command << RUNCOMMAND_LOOP1(iter, nb_value)
{
auto [i] = iter();
DataType x = static_cast<DataType>(i % (nb_value / 4));
if ((i % 2) == 0)
x = -x;
DataType v = x + add0;
ax::doAtomic<Operation>(inout_a(iter), v);
DataType old_v = ax::doAtomic<Operation>(inout_a(iter), v);
DataType new_v = inout_a(iter);
// Si l'opération est l'ajout, teste que l'ancienne valeur plus
// la valeur ajoutée vaut la nouvelle
if (Operation == ax::eAtomicOperation::Add) {
out_is_ok[i] = (new_v == (old_v + v));
}
else
out_is_ok[i] = true;
ax::doAtomic<Operation>(device_sum_ptr, inout_a(iter));
};
}

DataType cumulative = init_value;
for (Int32 i = 0; i < nb_value; ++i) {
if (i < 10)
info() << "V[" << i << "] = " << v0[i];
info() << "V[" << i << "] = " << v0[i] << " is_ok=" << is_ok_array[i];
ax::doAtomic<Operation>(&cumulative, v0[i]);
if (!is_ok_array[i])
ARCANE_FATAL("Bad old value for index '{0}'", i);

Check warning on line 261 in arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc

View check run for this annotation

Codecov / codecov/patch

arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc#L261

Added line #L261 was not covered by tests
}
NumArray<DataType, MDDim1> host_cumulative(1);
host_cumulative.copy(v_sum);
Expand Down
Loading