Skip to content

Commit b2c106c

Browse files
Merge pull request #1878 from arcaneframework/dev/gg-add-scan-algo-in-multi-thread
Add basic implementation of 'Scan' algorithm with multi-thread accelerator policy
2 parents 3ca87c2 + f146d53 commit b2c106c

File tree

4 files changed

+197
-1
lines changed

4 files changed

+197
-1
lines changed

arcane/src/arcane/accelerator/GenericScanner.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "arcane/accelerator/RunCommandLaunchInfo.h"
2727
#include "arcane/accelerator/RunCommandLoop.h"
2828
#include "arcane/accelerator/ScanImpl.h"
29+
#include "arcane/accelerator/MultiThreadAlgo.h"
2930

3031
/*---------------------------------------------------------------------------*/
3132
/*---------------------------------------------------------------------------*/
@@ -141,7 +142,14 @@ class ScannerImpl
141142
} break;
142143
#endif
143144
case eExecutionPolicy::Thread:
144-
// Pas encore implémenté en multi-thread
145+
// Si le nombre de valeurs est 1 on utilise la version séquentielle.
146+
// TODO: il serait judicieux de faire cela aussi pour des valeurs plus importantes
147+
// car en général sur les petites boucles le multi-threading est contre productif.
148+
if (nb_item > 1) {
149+
MultiThreadAlgo scanner;
150+
scanner.doScan<IsExclusive, DataType>(launch_info.loopRunInfo(), nb_item, input_data, output_data, init_value, op);
151+
break;
152+
}
145153
[[fallthrough]];
146154
case eExecutionPolicy::Sequential: {
147155
DataType sum = init_value;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2+
//-----------------------------------------------------------------------------
3+
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4+
// See the top-level COPYRIGHT file for details.
5+
// SPDX-License-Identifier: Apache-2.0
6+
//-----------------------------------------------------------------------------
7+
/*---------------------------------------------------------------------------*/
8+
/* MultiThreadAlgo.cc (C) 2000-2024 */
9+
/* */
10+
/* Implémentation des algorithmes accélérateurs en mode multi-thread. */
11+
/*---------------------------------------------------------------------------*/
12+
/*---------------------------------------------------------------------------*/
13+
14+
#include "arcane/accelerator/MultiThreadAlgo.h"
15+
16+
/*---------------------------------------------------------------------------*/
17+
/*---------------------------------------------------------------------------*/
18+
19+
/*---------------------------------------------------------------------------*/
20+
/*---------------------------------------------------------------------------*/
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2+
//-----------------------------------------------------------------------------
3+
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4+
// See the top-level COPYRIGHT file for details.
5+
// SPDX-License-Identifier: Apache-2.0
6+
//-----------------------------------------------------------------------------
7+
/*---------------------------------------------------------------------------*/
8+
/* MultiThreadAlgo.h (C) 2000-2024 */
9+
/* */
10+
/* Implémentation des algorithmes accélérateurs en mode multi-thread. */
11+
/*---------------------------------------------------------------------------*/
12+
#ifndef ARCANE_ACCELERATOR_MULTITHREADALGO_H
13+
#define ARCANE_ACCELERATOR_MULTITHREADALGO_H
14+
/*---------------------------------------------------------------------------*/
15+
/*---------------------------------------------------------------------------*/
16+
17+
#include "arcane/utils/SmallArray.h"
18+
19+
#include "arcane/core/Concurrency.h"
20+
21+
#include "arcane/accelerator/AcceleratorGlobal.h"
22+
23+
/*---------------------------------------------------------------------------*/
24+
/*---------------------------------------------------------------------------*/
25+
26+
namespace Arcane::Accelerator::impl
27+
{
28+
29+
/*---------------------------------------------------------------------------*/
30+
/*---------------------------------------------------------------------------*/
31+
/*!
32+
* \brief Algorithmes avancée en mode multi-thread.
33+
*
34+
* Pour l'instant seule l'opération de Scan est implémentée.
35+
*/
36+
class MultiThreadAlgo
37+
{
38+
public:
39+
40+
/*!
41+
* \brief Algorithme de scan multi-thread.
42+
*
43+
* \note Cette classe est interne à Arcane. La version de l'API publique
44+
* est accessible via la classe GenericScanner.
45+
*
46+
* Cet algorithme basique utilise deux passes pour le calcul.
47+
* L'interval d'itération est divisé en N blocs. On prend N = 2*nb_thread.
48+
* - la première passe calcule en parallèle le résultat du scan pour tous
49+
* les éléments d'un bloc.
50+
* - la deuxième passe calcule la valeurs finale.
51+
*
52+
* Le calcul donne toujours la même valeur pour un nombre de bloc donné.
53+
*
54+
* TODO: Utilise du padding pour conserver les valeurs partielles par bloc.
55+
* TODO: Faire des versions spécialisées si DataType est un type
56+
* de base tel que 'Int32', 'Int64', 'float' ou 'double'.
57+
*/
58+
template <bool IsExclusive, typename DataType, typename Operator,
59+
typename InputIterator, typename OutputIterator>
60+
void doScan(ForLoopRunInfo run_info, Int32 nb_value,
61+
InputIterator input, OutputIterator output,
62+
DataType init_value, Operator op)
63+
{
64+
//std::cout << "DO_SCAN MULTI_THREAD nb_value=" << nb_value << " init_value=" << init_value << "\n";
65+
auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> DataType {
66+
DataType partial_value = Operator::defaultValue();
67+
for (Int32 x = 0; x < nb_value; ++x)
68+
partial_value = op(input[x + input_index], partial_value);
69+
return partial_value;
70+
};
71+
72+
auto multiple_setter_func = [=](DataType previous_sum, Int32 input_index, Int32 nb_value) {
73+
for (Int32 x = 0; x < nb_value; ++x) {
74+
if constexpr (IsExclusive) {
75+
output[x + input_index] = previous_sum;
76+
previous_sum = op(input[x + input_index], previous_sum);
77+
}
78+
else {
79+
previous_sum = op(input[x + input_index], previous_sum);
80+
output[x + input_index] = previous_sum;
81+
}
82+
}
83+
};
84+
// TODO: calculer automatiquement cette valeur.
85+
const Int32 nb_block = 10;
86+
87+
// Tableau pour conserver les valeurs partielles des blocs.
88+
// TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
89+
SmallArray<DataType> partial_values(nb_block);
90+
Span<DataType> out_partial_values = partial_values;
91+
92+
auto partial_value_func = [=](Int32 a, Int32 n) {
93+
for (Int32 i = 0; i < n; ++i) {
94+
Int32 interval_index = i + a;
95+
96+
Int32 input_index = 0;
97+
Int32 nb_value_in_interval = 0;
98+
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
99+
100+
DataType partial_value = multiple_getter_func(input_index, nb_value_in_interval);
101+
102+
out_partial_values[interval_index] = partial_value;
103+
}
104+
};
105+
106+
ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
107+
loop_options.setGrainSize(1);
108+
run_info.addOptions(loop_options);
109+
110+
// Calcule les sommes partielles pour nb_block
111+
Arcane::arcaneParallelFor(0, nb_block, run_info, partial_value_func);
112+
113+
auto final_sum_func = [=](Int32 a, Int32 n) {
114+
for (Int32 i = 0; i < n; ++i) {
115+
Int32 interval_index = i + a;
116+
117+
DataType previous_sum = init_value;
118+
for (Int32 z = 0; z < interval_index; ++z)
119+
previous_sum = op(out_partial_values[z], previous_sum);
120+
121+
Int32 input_index = 0;
122+
Int32 nb_value_in_interval = 0;
123+
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
124+
125+
multiple_setter_func(previous_sum, input_index, nb_value_in_interval);
126+
}
127+
};
128+
129+
// Calcule les valeurs finales
130+
Arcane::arcaneParallelFor(0, nb_block, run_info, final_sum_func);
131+
}
132+
133+
private:
134+
135+
template <typename SizeType>
136+
static void _subInterval(SizeType size, SizeType interval_index, SizeType nb_interval,
137+
SizeType* out_begin_index, SizeType* out_interval_size)
138+
{
139+
*out_begin_index = 0;
140+
*out_interval_size = 0;
141+
if (nb_interval <= 0)
142+
return;
143+
if (interval_index < 0 || interval_index >= nb_interval)
144+
return;
145+
SizeType isize = size / nb_interval;
146+
SizeType ibegin = interval_index * isize;
147+
// Pour le dernier interval, prend les elements restants
148+
if ((interval_index + 1) == nb_interval)
149+
isize = size - ibegin;
150+
*out_begin_index = ibegin;
151+
*out_interval_size = isize;
152+
}
153+
};
154+
155+
/*---------------------------------------------------------------------------*/
156+
/*---------------------------------------------------------------------------*/
157+
158+
} // namespace Arcane::Accelerator::impl
159+
160+
/*---------------------------------------------------------------------------*/
161+
/*---------------------------------------------------------------------------*/
162+
163+
#endif
164+
165+
/*---------------------------------------------------------------------------*/
166+
/*---------------------------------------------------------------------------*/

arcane/src/arcane/accelerator/srcs.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ set( ARCANE_SOURCES
1919
MaterialVariableViews.h
2020
MaterialVariableViews.cc
2121
MemoryCopier.cc
22+
MultiThreadAlgo.h
23+
MultiThreadAlgo.cc
2224
NumArray.h
2325
NumArrayViews.h
2426
NumArrayViews.cc

0 commit comments

Comments
 (0)