Skip to content

Commit 4048536

Browse files
[arcane,accelerator] Renomme la classe 'ThreadBlockInfo' en 'KernelLaunchArgs' et la déplace dans son propre fichier.
1 parent 8a16ff8 commit 4048536

File tree

6 files changed

+84
-22
lines changed

6 files changed

+84
-22
lines changed

arcane/src/arcane/accelerator/RunQueueInternal.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,10 @@ _applyKernelCUDA(impl::RunCommandLaunchInfo& launch_info, const CudaKernel& kern
322322
{
323323
#if defined(ARCANE_COMPILING_CUDA)
324324
Int32 wanted_shared_memory = 0;
325-
auto [b, t] = launch_info._threadBlockInfo(reinterpret_cast<const void*>(kernel), wanted_shared_memory);
325+
auto tbi = launch_info._threadBlockInfo(reinterpret_cast<const void*>(kernel), wanted_shared_memory);
326326
cudaStream_t* s = reinterpret_cast<cudaStream_t*>(launch_info._internalStreamImpl());
327327
// TODO: utiliser cudaLaunchKernel() à la place.
328-
kernel<<<b, t, wanted_shared_memory, *s>>>(args, func, other_args...);
328+
kernel<<<tbi.nbBlockPerGrid(), tbi.nbThreadPerBlock(), wanted_shared_memory, *s>>>(args, func, other_args...);
329329
#else
330330
ARCANE_UNUSED(launch_info);
331331
ARCANE_UNUSED(kernel);
@@ -350,9 +350,9 @@ _applyKernelHIP(impl::RunCommandLaunchInfo& launch_info, const HipKernel& kernel
350350
{
351351
#if defined(ARCANE_COMPILING_HIP)
352352
Int32 wanted_shared_memory = 0;
353-
auto [b, t] = launch_info._threadBlockInfo(reinterpret_cast<const void*>(kernel), wanted_shared_memory);
353+
auto tbi = launch_info._threadBlockInfo(reinterpret_cast<const void*>(kernel), wanted_shared_memory);
354354
hipStream_t* s = reinterpret_cast<hipStream_t*>(launch_info._internalStreamImpl());
355-
hipLaunchKernelGGL(kernel, b, t, wanted_shared_memory, *s, args, func, other_args...);
355+
hipLaunchKernelGGL(kernel, tbi.nbBlockPerGrid(), tbi.nbThreadPerBlock(), wanted_shared_memory, *s, args, func, other_args...);
356356
#else
357357
ARCANE_UNUSED(launch_info);
358358
ARCANE_UNUSED(kernel);
@@ -379,7 +379,9 @@ void _applyKernelSYCL(impl::RunCommandLaunchInfo& launch_info, SyclKernel kernel
379379
sycl::queue* s = reinterpret_cast<sycl::queue*>(launch_info._internalStreamImpl());
380380
sycl::event event;
381381
if constexpr (sizeof...(ReducerArgs) > 0) {
382-
auto [b, t] = launch_info.threadBlockInfo();
382+
auto tbi = launch_info.kernelLaunchArgs();
383+
Int32 b = tbi.nbBlockPerGrid();
384+
Int32 t = tbi.nbThreadPerBlock();
383385
sycl::nd_range<1> loop_size(b * t, t);
384386
event = s->parallel_for(loop_size, [=](sycl::nd_item<1> i) { kernel(i, args, func, reducer_args...); });
385387
}

arcane/src/arcane/accelerator/core/AcceleratorCoreGlobal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ enum class eMemoryAdvice;
6363

6464
namespace impl
6565
{
66+
class KernelLaunchArgs;
6667
class RuntimeStaticInfo;
6768
class IRunnerRuntime;
6869
// typedef pour compatibilité avec anciennes versions (octobre 2022)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2+
//-----------------------------------------------------------------------------
3+
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4+
// See the top-level COPYRIGHT file for details.
5+
// SPDX-License-Identifier: Apache-2.0
6+
//-----------------------------------------------------------------------------
7+
/*---------------------------------------------------------------------------*/
8+
/* KernelLaunchArgs.h (C) 2000-2024 */
9+
/* */
10+
/* Arguments pour lancer un kernel. */
11+
/*---------------------------------------------------------------------------*/
12+
#ifndef ARCANE_ACCELERATOR_CORE_KERNELLAUNCHARGS_H
13+
#define ARCANE_ACCELERATOR_CORE_KERNELLAUNCHARGS_H
14+
/*---------------------------------------------------------------------------*/
15+
/*---------------------------------------------------------------------------*/
16+
17+
#include "arcane/accelerator/core/AcceleratorCoreGlobal.h"
18+
19+
/*---------------------------------------------------------------------------*/
20+
/*---------------------------------------------------------------------------*/
21+
22+
namespace Arcane::Accelerator::impl
23+
{
24+
25+
/*---------------------------------------------------------------------------*/
26+
/*---------------------------------------------------------------------------*/
27+
/*!
28+
* \internal
29+
* \brief Arguments pour lancer un kernel.
30+
*/
31+
class ARCANE_ACCELERATOR_CORE_EXPORT KernelLaunchArgs
32+
{
33+
friend RunCommandLaunchInfo;
34+
35+
public:
36+
37+
KernelLaunchArgs() = default;
38+
KernelLaunchArgs(Int32 nb_block_per_grid, Int32 nb_thread_per_block)
39+
: m_nb_block_per_grid(nb_block_per_grid)
40+
, m_nb_thread_per_block(nb_thread_per_block)
41+
{
42+
}
43+
44+
public:
45+
46+
int nbBlockPerGrid() const { return m_nb_block_per_grid; }
47+
int nbThreadPerBlock() const { return m_nb_thread_per_block; }
48+
49+
private:
50+
51+
int m_nb_block_per_grid = 0;
52+
int m_nb_thread_per_block = 0;
53+
};
54+
55+
/*---------------------------------------------------------------------------*/
56+
/*---------------------------------------------------------------------------*/
57+
58+
} // End namespace Arcane::Accelerator::impl
59+
60+
/*---------------------------------------------------------------------------*/
61+
/*---------------------------------------------------------------------------*/
62+
63+
#endif

arcane/src/arcane/accelerator/core/RunCommandLaunchInfo.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ RunCommandLaunchInfo(RunCommand& command, Int64 total_loop_size)
3333
: m_command(command)
3434
, m_total_loop_size(total_loop_size)
3535
{
36-
m_thread_block_info = _computeThreadBlockInfo();
36+
m_kernel_launch_args = _computeKernelLaunchArgs();
3737
_begin();
3838
}
3939

@@ -58,7 +58,7 @@ _begin()
5858
m_exec_policy = queue.executionPolicy();
5959
m_queue_stream = queue._internalStream();
6060
m_runtime = queue._internalRuntime();
61-
m_command._allocateReduceMemory(m_thread_block_info.nb_block_per_grid);
61+
m_command._allocateReduceMemory(m_kernel_launch_args.nbBlockPerGrid());
6262
}
6363

6464
/*---------------------------------------------------------------------------*/
@@ -118,8 +118,8 @@ _internalStreamImpl()
118118
/*---------------------------------------------------------------------------*/
119119

120120
//! Calcule le nombre de block/thread/grille du noyau en fonction de \a full_size
121-
auto RunCommandLaunchInfo::
122-
_computeThreadBlockInfo() const -> ThreadBlockInfo
121+
KernelLaunchArgs RunCommandLaunchInfo::
122+
_computeKernelLaunchArgs() const
123123
{
124124
int threads_per_block = m_command.nbThreadPerBlock();
125125
if (threads_per_block<=0)
@@ -167,10 +167,10 @@ computeLoopRunInfo()
167167
/*---------------------------------------------------------------------------*/
168168
/*---------------------------------------------------------------------------*/
169169

170-
RunCommandLaunchInfo::ThreadBlockInfo RunCommandLaunchInfo::
170+
KernelLaunchArgs RunCommandLaunchInfo::
171171
_threadBlockInfo([[maybe_unused]] const void* func,[[maybe_unused]] Int64 shared_memory_size) const
172172
{
173-
return m_thread_block_info;
173+
return m_kernel_launch_args;
174174
}
175175

176176
/*---------------------------------------------------------------------------*/

arcane/src/arcane/accelerator/core/RunCommandLaunchInfo.h

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@
1414
/*---------------------------------------------------------------------------*/
1515
/*---------------------------------------------------------------------------*/
1616

17-
#include "arcane/utils/CheckedConvert.h"
1817
#include "arcane/utils/ConcurrencyUtils.h"
1918
#include "arcane/utils/Profiling.h"
2019

21-
#include "arcane/accelerator/core/AcceleratorCoreGlobal.h"
20+
#include "arcane/accelerator/core/KernelLaunchArgs.h"
2221

2322
/*---------------------------------------------------------------------------*/
2423
/*---------------------------------------------------------------------------*/
@@ -48,11 +47,7 @@ class ARCANE_ACCELERATOR_CORE_EXPORT RunCommandLaunchInfo
4847

4948
public:
5049

51-
struct ThreadBlockInfo
52-
{
53-
int nb_block_per_grid = 0;
54-
int nb_thread_per_block = 0;
55-
};
50+
using ThreadBlockInfo = KernelLaunchArgs;
5651

5752
public:
5853

@@ -82,7 +77,7 @@ class ARCANE_ACCELERATOR_CORE_EXPORT RunCommandLaunchInfo
8277
void endExecute();
8378

8479
//! Informations sur le nombre de block/thread/grille du noyau à lancer.
85-
ThreadBlockInfo threadBlockInfo() const { return m_thread_block_info; }
80+
KernelLaunchArgs kernelLaunchArgs() const { return m_kernel_launch_args; }
8681

8782
//! Calcul les informations pour les boucles multi-thread
8883
ParallelLoopOptions computeParallelLoopOptions() const;
@@ -104,7 +99,7 @@ class ARCANE_ACCELERATOR_CORE_EXPORT RunCommandLaunchInfo
10499
IRunnerRuntime* m_runtime = nullptr;
105100
IRunQueueStream* m_queue_stream = nullptr;
106101
eExecutionPolicy m_exec_policy = eExecutionPolicy::Sequential;
107-
ThreadBlockInfo m_thread_block_info;
102+
KernelLaunchArgs m_kernel_launch_args;
108103
ForLoopRunInfo m_loop_run_info;
109104
Int64 m_total_loop_size = 0;
110105

@@ -116,11 +111,11 @@ class ARCANE_ACCELERATOR_CORE_EXPORT RunCommandLaunchInfo
116111
* Ces informations sont calculées à partir de méthodes fournies par le runtime accélérateur
117112
* sous-jacent.
118113
*/
119-
ThreadBlockInfo _threadBlockInfo(const void* func, Int64 shared_memory_size) const;
114+
KernelLaunchArgs _threadBlockInfo(const void* func, Int64 shared_memory_size) const;
120115
void* _internalStreamImpl();
121116
void _begin();
122117
void _doEndKernelLaunch();
123-
ThreadBlockInfo _computeThreadBlockInfo() const;
118+
KernelLaunchArgs _computeKernelLaunchArgs() const;
124119

125120
private:
126121

arcane/src/arcane/accelerator/core/srcs.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set( ARCANE_SOURCES
1010
DeviceInfoList.h
1111
IReduceMemoryImpl.h
1212
IDeviceInfoList.h
13+
KernelLaunchArgs.h
1314
Memory.h
1415
Memory.cc
1516
MemoryTracer.cc

0 commit comments

Comments
 (0)