Skip to content

Commit 2a2432e

Browse files
[arcane,accelerator] Ajoute implémentation multi-thread pour 'GenericFiltererIf'.
1 parent c140744 commit 2a2432e

File tree

2 files changed

+103
-1
lines changed

2 files changed

+103
-1
lines changed

arcane/src/arcane/accelerator/GenericFilterer.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "arcane/accelerator/RunCommandLaunchInfo.h"
2727
#include "arcane/accelerator/RunCommandLoop.h"
2828
#include "arcane/accelerator/ScanImpl.h"
29+
#include "arcane/accelerator/MultiThreadAlgo.h"
2930

3031
/*---------------------------------------------------------------------------*/
3132
/*---------------------------------------------------------------------------*/
@@ -315,7 +316,12 @@ class GenericFilteringIf
315316
} break;
316317
#endif
317318
case eExecutionPolicy::Thread:
318-
// Pas encore implémenté en multi-thread
319+
if (nb_item > 500) {
320+
MultiThreadAlgo scanner;
321+
Int32 v = scanner.doFilter(launch_info.loopRunInfo(), nb_item, input_iter, output_iter, select_lambda);
322+
s.m_host_nb_out_storage[0] = v;
323+
break;
324+
}
319325
[[fallthrough]];
320326
case eExecutionPolicy::Sequential: {
321327
Int32 index = 0;

arcane/src/arcane/accelerator/MultiThreadAlgo.h

+96
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,102 @@ class MultiThreadAlgo
130130
Arcane::arcaneParallelFor(0, nb_block, run_info, final_sum_func);
131131
}
132132

133+
template <typename InputIterator, typename OutputIterator, typename SelectLambda>
134+
Int32 doFilter(ForLoopRunInfo run_info, Int32 nb_value,
135+
InputIterator input, OutputIterator output,
136+
SelectLambda select_lambda)
137+
{
138+
// Type de l'index
139+
using IndexType = Int32;
140+
141+
UniqueArray<bool> select_flags(nb_value);
142+
Span<bool> select_flags_view = select_flags;
143+
//std::cout << "DO_FILTER MULTI_THREAD nb_value=" << nb_value << "\n";
144+
auto multiple_getter_func = [=](Int32 input_index, Int32 nb_value) -> IndexType {
145+
IndexType partial_value = 0;
146+
for (Int32 x = 0; x < nb_value; ++x) {
147+
const Int32 index = x + input_index;
148+
bool is_select = select_lambda(input[index]);
149+
select_flags_view[index] = is_select;
150+
if (is_select)
151+
++partial_value;
152+
}
153+
return partial_value;
154+
};
155+
156+
auto multiple_setter_func = [=](IndexType partial_value, Int32 input_index, Int32 nb_value) {
157+
for (Int32 x = 0; x < nb_value; ++x) {
158+
const Int32 index = x + input_index;
159+
if (select_flags_view[index]) {
160+
output[partial_value] = input[index];
161+
++partial_value;
162+
}
163+
}
164+
};
165+
166+
// TODO: calculer automatiquement cette valeur.
167+
const Int32 nb_block = 10;
168+
169+
// Tableau pour conserver les valeurs partielles des blocs.
170+
// TODO: Utiliser un padding pour éviter des conflits de cache entre les threads.
171+
SmallArray<Int32> partial_values(nb_block, 0);
172+
Span<Int32> out_partial_values = partial_values;
173+
174+
auto partial_value_func = [=](Int32 a, Int32 n) {
175+
for (Int32 i = 0; i < n; ++i) {
176+
Int32 interval_index = i + a;
177+
178+
Int32 input_index = 0;
179+
Int32 nb_value_in_interval = 0;
180+
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
181+
182+
out_partial_values[interval_index] = multiple_getter_func(input_index, nb_value_in_interval);
183+
}
184+
};
185+
186+
ParallelLoopOptions loop_options(run_info.options().value_or(ParallelLoopOptions{}));
187+
loop_options.setGrainSize(1);
188+
run_info.addOptions(loop_options);
189+
190+
// Calcule les sommes partielles pour nb_block
191+
Arcane::arcaneParallelFor(0, nb_block, run_info, partial_value_func);
192+
193+
// Calcule le nombre de valeurs filtrées
194+
// Calcule aussi la valeur accumulée de partial_values
195+
Int32 nb_filter = 0;
196+
for (Int32 i = 0; i < nb_block; ++i) {
197+
Int32 x = partial_values[i];
198+
nb_filter += x;
199+
partial_values[i] = nb_filter;
200+
}
201+
202+
auto filter_func = [=](Int32 a, Int32 n) {
203+
for (Int32 i = 0; i < n; ++i) {
204+
Int32 interval_index = i + a;
205+
206+
IndexType partial_value = 0;
207+
if (interval_index > 0)
208+
partial_value = out_partial_values[interval_index - 1];
209+
210+
Int32 input_index = 0;
211+
Int32 nb_value_in_interval = 0;
212+
_subInterval<Int32>(nb_value, interval_index, nb_block, &input_index, &nb_value_in_interval);
213+
214+
multiple_setter_func(partial_value, input_index, nb_value_in_interval);
215+
}
216+
};
217+
218+
// Pour l'instant il est possible que l'entrée et la sortie
219+
// se chevauchent. Dans ce cas on fait le remplissage en séquentiel.
220+
const bool may_input_and_output_overlap = true;
221+
if (may_input_and_output_overlap)
222+
filter_func(0, nb_block);
223+
else
224+
Arcane::arcaneParallelFor(0, nb_block, run_info, filter_func);
225+
226+
return nb_filter;
227+
}
228+
133229
private:
134230

135231
template <typename SizeType>

0 commit comments

Comments
 (0)