From 22f05d25c958d82a46d18c54eebf45ac41bd88b4 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Mon, 25 Nov 2024 17:58:48 +0100 Subject: [PATCH 1/2] =?UTF-8?q?[arcane,accelerator]=20Retourne=20l'ancienn?= =?UTF-8?q?e=20valeur=20lors=20des=20op=C3=A9rations=20atomiques.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arcane/src/arcane/accelerator/Atomic.h | 57 +++++++++++-------- .../accelerator/CommonCudaHipAtomicImpl.h | 50 ++++++++-------- 2 files changed, 60 insertions(+), 47 deletions(-) diff --git a/arcane/src/arcane/accelerator/Atomic.h b/arcane/src/arcane/accelerator/Atomic.h index c987e9f6e..967ce82e0 100644 --- a/arcane/src/arcane/accelerator/Atomic.h +++ b/arcane/src/arcane/accelerator/Atomic.h @@ -46,11 +46,11 @@ class HostAtomic { public: - template static void + template static DataType apply(DataType* ptr, DataType value) { std::atomic_ref v(*ptr); - v.fetch_add(value); + return v.fetch_add(value); } }; @@ -59,13 +59,14 @@ class HostAtomic { public: - template static void + template static DataType apply(DataType* ptr, DataType value) { std::atomic_ref v(*ptr); DataType prev_value = v; while (prev_value < value && !v.compare_exchange_weak(prev_value, value)) { } + return prev_value; } }; @@ -74,13 +75,14 @@ class HostAtomic { public: - template static void + template static DataType apply(DataType* ptr, DataType value) { std::atomic_ref v(*ptr); DataType prev_value = v; while (prev_value > value && !v.compare_exchange_weak(prev_value, value)) { } + return prev_value; } }; @@ -94,11 +96,11 @@ class SyclAtomic { public: - template static void + template static DataType apply(DataType* ptr, DataType value) { sycl::atomic_ref v(*ptr); - v.fetch_add(value); + return v.fetch_add(value); } }; @@ -107,11 +109,11 @@ class SyclAtomic { public: - template static void + template static DataType apply(DataType* ptr, DataType value) { sycl::atomic_ref v(*ptr); - v.fetch_max(value); + return v.fetch_max(value); } }; @@ -120,11 +122,11 @@ class SyclAtomic { public: - template static void + template static DataType apply(DataType* ptr, DataType value) { sycl::atomic_ref v(*ptr); - v.fetch_min(value); + return v.fetch_min(value); } }; @@ -138,23 +140,23 @@ class AtomicImpl public: template - ARCCORE_HOST_DEVICE static inline void + ARCCORE_HOST_DEVICE static inline DataType doAtomic(DataType* ptr, DataType value) { #if defined(ARCCORE_DEVICE_TARGET_CUDA) || defined(ARCCORE_DEVICE_TARGET_HIP) - impl::CommonCudaHipAtomic::apply(ptr, value); + return impl::CommonCudaHipAtomic::apply(ptr, value); #elif defined(ARCCORE_DEVICE_TARGET_SYCL) - SyclAtomic::apply(ptr, value); + return SyclAtomic::apply(ptr, value); #else - HostAtomic::apply(ptr, value); + return HostAtomic::apply(ptr, value); #endif } template - ARCCORE_HOST_DEVICE static inline void + ARCCORE_HOST_DEVICE static inline DataType doAtomic(const DataViewGetterSetter& view, DataType value) { - doAtomic(view._address(), value); + return doAtomic(view._address(), value); } }; @@ -168,25 +170,34 @@ namespace Arcane::Accelerator /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ - -//! Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value +/*! + * \brief Applique l'opération atomique \a Operation à la valeur à l'adresse \a ptr avec la valeur \a value. + * + * \retval l'ancienne valeur avant ajout. + */ template -ARCCORE_HOST_DEVICE inline void +ARCCORE_HOST_DEVICE inline DataType doAtomic(DataType* ptr, ValueType value) requires(std::convertible_to) { DataType v = value; - impl::AtomicImpl::doAtomic(ptr, v); + return impl::AtomicImpl::doAtomic(ptr, v); } -//! Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ +/*! + * \brief Applique l'opération atomique \a Operation à la vue \a view avec la valeur \a value. + * + * \retval l'ancienne valeur avant ajout. + */ template -ARCCORE_HOST_DEVICE inline void +ARCCORE_HOST_DEVICE inline DataType doAtomic(const DataViewGetterSetter& view, ValueType value) requires(std::convertible_to) { DataType v = value; - impl::AtomicImpl::doAtomic(view, v); + return impl::AtomicImpl::doAtomic(view, v); } /*---------------------------------------------------------------------------*/ diff --git a/arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h b/arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h index d543003c9..632aad0d6 100644 --- a/arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h +++ b/arcane/src/arcane/accelerator/CommonCudaHipAtomicImpl.h @@ -1,11 +1,11 @@ // -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- //----------------------------------------------------------------------------- -// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) +// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) // See the top-level COPYRIGHT file for details. // SPDX-License-Identifier: Apache-2.0 //----------------------------------------------------------------------------- /*---------------------------------------------------------------------------*/ -/* CommonCudaHipAtomicImpl.h (C) 2000-2023 */ +/* CommonCudaHipAtomicImpl.h (C) 2000-2024 */ /* */ /* Implémentation CUDA et HIP des opérations atomiques. */ /*---------------------------------------------------------------------------*/ @@ -24,7 +24,7 @@ // méthodes atomiques ne fonctionnent pas si le pointeur est allouée // en mémoire unifiée. A priori le problème se pose avec atomicMin, atomicMax, // atomicInc. Par contre atomicAdd a l'air de fonctionner si les accès -// concurrents ne sont pas trop nombreux +// concurrents ne sont pas trop nombreux. /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ @@ -50,9 +50,9 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(int* ptr, int v) + static ARCCORE_DEVICE int apply(int* ptr, int v) { - ::atomicAdd(ptr, v); + return ::atomicAdd(ptr, v); } }; @@ -61,9 +61,9 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(int* ptr, int v) + static ARCCORE_DEVICE int apply(int* ptr, int v) { - ::atomicMax(ptr, v); + return ::atomicMax(ptr, v); } }; @@ -72,9 +72,9 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(int* ptr, int v) + static ARCCORE_DEVICE int apply(int* ptr, int v) { - ::atomicMin(ptr, v); + return ::atomicMin(ptr, v); } }; @@ -83,10 +83,10 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v) + static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v) { static_assert(sizeof(Int64) == sizeof(long long int), "Bad pointer size"); - ::atomicAdd((unsigned long long int*)ptr, v); + return static_cast(::atomicAdd((unsigned long long int*)ptr, v)); } }; @@ -96,7 +96,7 @@ class CommonCudaHipAtomic public: #if defined(__HIP__) - static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v) + static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v) { unsigned long long int* address_as_ull = reinterpret_cast(ptr); unsigned long long int old = *address_as_ull, assumed; @@ -107,11 +107,12 @@ class CommonCudaHipAtomic old = atomicCAS(address_as_ull, assumed, static_cast(v > assumed_as_int64 ? v : assumed_as_int64)); } while (assumed != old); + return static_cast(old); } #else - static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v) + static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v) { - ::atomicMax((long long int*)ptr, v); + return static_cast(::atomicMax((long long int*)ptr, v)); } #endif }; @@ -122,7 +123,7 @@ class CommonCudaHipAtomic public: #if defined(__HIP__) - static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v) + static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v) { unsigned long long int* address_as_ull = reinterpret_cast(ptr); unsigned long long int old = *address_as_ull, assumed; @@ -133,11 +134,12 @@ class CommonCudaHipAtomic old = atomicCAS(address_as_ull, assumed, static_cast(v < assumed_as_int64 ? v : assumed_as_int64)); } while (assumed != old); + return static_cast(old); } #else - static ARCCORE_DEVICE void apply(Int64* ptr, Int64 v) + static ARCCORE_DEVICE Int64 apply(Int64* ptr, Int64 v) { - ::atomicMin((long long int*)ptr, v); + return static_cast(::atomicMin((long long int*)ptr, v)); } #endif }; @@ -200,12 +202,12 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(double* ptr, double v) + static ARCCORE_DEVICE double apply(double* ptr, double v) { #if __CUDA_ARCH__ >= 600 - ::atomicAdd(ptr, v); + return ::atomicAdd(ptr, v); #else - preArch60atomicAdd(ptr, v); + return preArch60atomicAdd(ptr, v); #endif } }; @@ -215,9 +217,9 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(double* ptr, double v) + static ARCCORE_DEVICE double apply(double* ptr, double v) { - atomicMaxDouble(ptr, v); + return atomicMaxDouble(ptr, v); } }; @@ -226,9 +228,9 @@ class CommonCudaHipAtomic { public: - static ARCCORE_DEVICE void apply(double* ptr, double v) + static ARCCORE_DEVICE double apply(double* ptr, double v) { - atomicMinDouble(ptr, v); + return atomicMinDouble(ptr, v); } }; From 6501665f6aea5f3659ae867d6c83af62ed9adaf4 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Mon, 25 Nov 2024 18:18:57 +0100 Subject: [PATCH 2/2] =?UTF-8?q?[arcane,tests]=20Ajoute=20test=20pour=20la?= =?UTF-8?q?=20valeur=20de=20retour=20des=20op=C3=A9rations=20atomiques.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../arcane/tests/accelerator/AtomicUnitTest.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc b/arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc index d5cb6d945..278547cb1 100644 --- a/arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc +++ b/arcane/src/arcane/tests/accelerator/AtomicUnitTest.cc @@ -15,6 +15,7 @@ #include "arcane/utils/PlatformUtils.h" #include "arcane/utils/ValueChecker.h" #include "arcane/utils/IMemoryRessourceMng.h" +#include "arcane/utils/ITraceMng.h" #include "arcane/core/BasicUnitTest.h" #include "arcane/core/ServiceFactory.h" @@ -224,12 +225,13 @@ _executeTest1(eMemoryRessource mem_ressource) auto queue = makeQueue(m_runner); NumArray v_sum(1, mem_ressource); + NumArray is_ok_array(nb_value); v_sum.fill(init_value, &queue); DataType* device_sum_ptr = &v_sum[0]; { auto command = makeCommand(queue); auto inout_a = viewInOut(command, v0); - + auto out_is_ok = viewOut(command, is_ok_array); command << RUNCOMMAND_LOOP1(iter, nb_value) { auto [i] = iter(); @@ -237,7 +239,15 @@ _executeTest1(eMemoryRessource mem_ressource) if ((i % 2) == 0) x = -x; DataType v = x + add0; - ax::doAtomic(inout_a(iter), v); + DataType old_v = ax::doAtomic(inout_a(iter), v); + DataType new_v = inout_a(iter); + // Si l'opération est l'ajout, teste que l'ancienne valeur plus + // la valeur ajoutée vaut la nouvelle + if (Operation == ax::eAtomicOperation::Add) { + out_is_ok[i] = (new_v == (old_v + v)); + } + else + out_is_ok[i] = true; ax::doAtomic(device_sum_ptr, inout_a(iter)); }; } @@ -245,8 +255,10 @@ _executeTest1(eMemoryRessource mem_ressource) DataType cumulative = init_value; for (Int32 i = 0; i < nb_value; ++i) { if (i < 10) - info() << "V[" << i << "] = " << v0[i]; + info() << "V[" << i << "] = " << v0[i] << " is_ok=" << is_ok_array[i]; ax::doAtomic(&cumulative, v0[i]); + if (!is_ok_array[i]) + ARCANE_FATAL("Bad old value for index '{0}'", i); } NumArray host_cumulative(1); host_cumulative.copy(v_sum);