Skip to content

Commit a780c35

Browse files
Merge pull request #1790 from arcaneframework/dev/gg-disallow-reusing-command
Disallow reusing the command to launch two kernels
2 parents a5c7b72 + 2e23286 commit a780c35

File tree

7 files changed

+157
-107
lines changed

7 files changed

+157
-107
lines changed

arcane/ceapart/src/arcane/tests/CartesianMeshTestUtils.cc

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
22
//-----------------------------------------------------------------------------
3-
// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
3+
// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
44
// See the top-level COPYRIGHT file for details.
55
// SPDX-License-Identifier: Apache-2.0
66
//-----------------------------------------------------------------------------
77
/*---------------------------------------------------------------------------*/
8-
/* CartesianMeshTestUtils.cc (C) 2000-2023 */
8+
/* CartesianMeshTestUtils.cc (C) 2000-2024 */
99
/* */
1010
/* Fonctions utilitaires pour les tests de 'CartesianMesh'. */
1111
/*---------------------------------------------------------------------------*/
@@ -245,19 +245,19 @@ _testDirCellAccelerator()
245245
CellDirectionMng cdm2;
246246
CellDirectionMng cdm3;
247247

248-
auto queue = m_accelerator_mng->defaultQueue();
249-
auto command = makeCommand(*queue);
248+
auto queue = m_accelerator_mng->queue();
250249

251250
VariableCellInt32 dummy_var(VariableBuildInfo(mesh, "DummyCellVariable"));
252251
dummy_var.fill(0);
253-
auto inout_dummy_var = viewInOut(command, dummy_var);
254252

255253
for (Integer idir = 0; idir < nb_dir; ++idir) {
256254
CellDirectionMng cdm(m_cartesian_mesh->cellDirection(idir));
257255
cdm2 = m_cartesian_mesh->cellDirection(idir);
258256
cdm3 = cdm;
259257
info() << "ACCELERATOR_DIRECTION=" << idir << " Cells=" << cdm.allCells().name();
260258
_checkItemGroupIsSorted(cdm.allCells());
259+
auto command = makeCommand(queue);
260+
auto inout_dummy_var = viewInOut(command, dummy_var);
261261
command << RUNCOMMAND_ENUMERATE(Cell, icell, cdm.allCells())
262262
{
263263
DirCellLocalId dir_cell(cdm.dirCellId(icell));

arcane/src/arcane/accelerator/core/RunCommandImpl.cc

+13-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "arcane/utils/ForLoopTraceInfo.h"
1818
#include "arcane/utils/ConcurrencyUtils.h"
1919
#include "arcane/utils/PlatformUtils.h"
20+
#include "arcane/utils/ValueConvert.h"
2021

2122
#include "arcane/accelerator/core/Runner.h"
2223
#include "arcane/accelerator/core/internal/IRunQueueEventImpl.h"
@@ -99,6 +100,9 @@ _init()
99100

100101
m_start_event = _createEvent();
101102
m_stop_event = _createEvent();
103+
104+
if (auto v = Convert::Type<Int32>::tryParseFromEnvironment("ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND", true))
105+
m_is_allow_reuse_command = (v.value() != 0);
102106
}
103107

104108
/*---------------------------------------------------------------------------*/
@@ -107,7 +111,9 @@ _init()
107111
RunCommandImpl* RunCommandImpl::
108112
create(RunQueueImpl* r)
109113
{
110-
return r->_internalCreateOrGetRunCommandImpl();
114+
RunCommandImpl* c = r->_internalCreateOrGetRunCommandImpl();
115+
c->_reset();
116+
return c;
111117
}
112118

113119
/*---------------------------------------------------------------------------*/
@@ -118,6 +124,12 @@ create(RunQueueImpl* r)
118124
void RunCommandImpl::
119125
notifyBeginLaunchKernel()
120126
{
127+
if (m_has_been_launched) {
128+
if (!m_is_allow_reuse_command)
129+
ARCANE_FATAL("Command has already been launched. You can not re-use the same command.\n"
130+
" You can temporarily allow it if you set environment variable\n"
131+
" ARCANE_ACCELERATOR_ALLOW_REUSE_COMMAND to 1\n");
132+
}
121133
IRunQueueStream* stream = internalStream();
122134
stream->notifyBeginLaunchKernel(*this);
123135
// TODO: utiliser la bonne stream en séquentiel
@@ -193,8 +205,6 @@ notifyEndExecuteKernel()
193205
ForLoopTraceInfo flti(traceInfo(), kernelName());
194206
ProfilingRegistry::_threadLocalForLoopInstance()->merge(*exec_info, flti);
195207
}
196-
197-
_reset();
198208
}
199209

200210
/*---------------------------------------------------------------------------*/

arcane/src/arcane/accelerator/core/RunQueueImpl.cc

+1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ _internalCreateOrGetRunCommandImpl()
191191
else {
192192
p = RunCommand::_internalCreateImpl(this);
193193
}
194+
p->_reset();
194195
m_active_run_command_list.add(p);
195196
return p;
196197
}

arcane/src/arcane/accelerator/core/internal/RunCommandImpl.h

+10
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ namespace Arcane::Accelerator::impl
3838
class RunCommandImpl
3939
{
4040
friend RunCommand;
41+
friend RunQueueImpl;
4142

4243
public:
4344

@@ -110,6 +111,15 @@ class RunCommandImpl
110111
//! Indique si la commande s'exécute sur accélérateur
111112
const bool m_use_accelerator = false;
112113

114+
/*!
115+
* \brief Indique si on autorise à utiliser plusieurs fois la même commande.
116+
*
117+
* Normalement cela est interdit mais avant novembre 2024, il n'y avait pas
118+
* de mécanisme pour détecter cela. On peut donc temporairement autoriser
119+
* cela et dans un on supprimera cette possibilité.
120+
*/
121+
bool m_is_allow_reuse_command = false;
122+
113123
private:
114124

115125
void _freePools();

arcane/src/arcane/tests/accelerator/AcceleratorViewsUnitTest.cc

+65-49
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ class AcceleratorViewsUnitTest
5555

5656
private:
5757

58-
ax::Runner* m_runner = nullptr;
58+
ax::Runner m_runner;
59+
ax::RunQueue m_queue;
5960
VariableCellArrayReal m_cell_array1;
6061
VariableCellArrayReal m_cell_array2;
6162
VariableCellReal2 m_cell1_real2;
@@ -136,7 +137,8 @@ AcceleratorViewsUnitTest::
136137
void AcceleratorViewsUnitTest::
137138
initializeTest()
138139
{
139-
m_runner = subDomain()->acceleratorMng()->defaultRunner();
140+
m_runner = subDomain()->acceleratorMng()->runner();
141+
m_queue = subDomain()->acceleratorMng()->queue();
140142

141143
m_cell_array1.resize(12);
142144
m_cell_array2.resize(12);
@@ -217,64 +219,70 @@ _executeTest1()
217219
{
218220
info() << "Test1";
219221
auto queue = makeQueue(m_runner);
220-
auto command = makeCommand(queue);
221222

222223
Integer dim2_size = m_cell_array1.arraySize();
223224

224225
{
225226
int seed = 37;
226227
_setCellArrayValue(seed);
227228

228-
auto in_cell_array1 = ax::viewIn(command, m_cell_array1);
229-
auto out_cell_array2 = ax::viewOut(command, m_cell_array2);
230-
auto in_partial_cell_array1 = ax::viewIn(command, m_partial_cell_array1);
231-
auto out_partial_cell_array2 = ax::viewOut(command, m_partial_cell_array2);
232-
auto inout_partial_cell_array2 = ax::viewInOut(command, m_partial_cell_array2);
233-
234-
auto in_partial_cell1 = ax::viewIn(command, m_partial_cell1);
235-
auto out_partial_cell2 = ax::viewOut(command, m_partial_cell2);
236-
auto inout_partial_cell2 = ax::viewOut(command, m_partial_cell2);
237-
238-
auto in_partial_cell1_real3 = ax::viewIn(command, m_partial_cell1_real3);
239-
auto out_partial_cell2_real3 = ax::viewOut(command, m_partial_cell2_real3);
240-
auto inout_partial_cell2_real3 = ax::viewInOut(command, m_partial_cell2_real3);
241-
242-
command << RUNCOMMAND_ENUMERATE (CellLocalId, vi, allCells())
243229
{
244-
out_cell_array2[vi].copy(in_cell_array1[vi]);
245-
};
230+
auto command = makeCommand(m_queue);
231+
auto in_cell_array1 = viewIn(command, m_cell_array1);
232+
auto out_cell_array2 = viewOut(command, m_cell_array2);
233+
234+
command << RUNCOMMAND_ENUMERATE (CellLocalId, vi, allCells())
235+
{
236+
out_cell_array2[vi].copy(in_cell_array1[vi]);
237+
};
238+
}
246239

247240
_checkCellArrayValue("View1");
248241

249-
command << RUNCOMMAND_ENUMERATE (IteratorWithIndex<CellLocalId>, vi, m_partial_cell_array1.itemGroup())
250242
{
251-
CellEnumeratorIndex iter_index(vi.index());
252-
CellLocalId cell_lid(vi.value());
253-
out_partial_cell_array2[iter_index].copy(in_cell_array1[cell_lid]);
254-
out_partial_cell_array2[iter_index][0] = in_partial_cell_array1[cell_lid][1];
255-
Real3 xyz(in_partial_cell1_real3[iter_index].y, in_partial_cell1_real3[iter_index].z, in_partial_cell1_real3[iter_index].x);
256-
Int32 modulo = vi.index() % 4;
257-
if (modulo == 3) {
258-
out_partial_cell_array2[iter_index][1] = in_partial_cell1[iter_index];
259-
out_partial_cell2[iter_index] = in_partial_cell1[iter_index];
260-
out_partial_cell2_real3[iter_index] = xyz;
261-
}
262-
else if (modulo == 2) {
263-
inout_partial_cell_array2[cell_lid][1] = in_partial_cell1[cell_lid];
264-
inout_partial_cell2[cell_lid] = in_partial_cell1[iter_index];
265-
inout_partial_cell2_real3[cell_lid] = xyz;
266-
}
267-
else if (modulo == 1) {
268-
out_partial_cell_array2[cell_lid][1] = in_partial_cell1[iter_index];
269-
out_partial_cell2[cell_lid] = in_partial_cell1[iter_index];
270-
out_partial_cell2_real3[cell_lid] = xyz;
271-
}
272-
else {
273-
inout_partial_cell_array2[iter_index][1] = in_partial_cell1[cell_lid];
274-
inout_partial_cell2[iter_index] = in_partial_cell1[iter_index];
275-
inout_partial_cell2_real3[iter_index] = xyz;
276-
}
277-
};
243+
auto command = makeCommand(m_queue);
244+
auto in_cell_array1 = viewIn(command, m_cell_array1);
245+
auto in_partial_cell_array1 = viewIn(command, m_partial_cell_array1);
246+
auto out_partial_cell_array2 = viewOut(command, m_partial_cell_array2);
247+
auto inout_partial_cell_array2 = viewInOut(command, m_partial_cell_array2);
248+
249+
auto in_partial_cell1 = viewIn(command, m_partial_cell1);
250+
auto out_partial_cell2 = viewOut(command, m_partial_cell2);
251+
auto inout_partial_cell2 = viewOut(command, m_partial_cell2);
252+
253+
auto in_partial_cell1_real3 = viewIn(command, m_partial_cell1_real3);
254+
auto out_partial_cell2_real3 = viewOut(command, m_partial_cell2_real3);
255+
auto inout_partial_cell2_real3 = viewInOut(command, m_partial_cell2_real3);
256+
command << RUNCOMMAND_ENUMERATE (IteratorWithIndex<CellLocalId>, vi, m_partial_cell_array1.itemGroup())
257+
{
258+
CellEnumeratorIndex iter_index(vi.index());
259+
CellLocalId cell_lid(vi.value());
260+
out_partial_cell_array2[iter_index].copy(in_cell_array1[cell_lid]);
261+
out_partial_cell_array2[iter_index][0] = in_partial_cell_array1[cell_lid][1];
262+
Real3 xyz(in_partial_cell1_real3[iter_index].y, in_partial_cell1_real3[iter_index].z, in_partial_cell1_real3[iter_index].x);
263+
Int32 modulo = vi.index() % 4;
264+
if (modulo == 3) {
265+
out_partial_cell_array2[iter_index][1] = in_partial_cell1[iter_index];
266+
out_partial_cell2[iter_index] = in_partial_cell1[iter_index];
267+
out_partial_cell2_real3[iter_index] = xyz;
268+
}
269+
else if (modulo == 2) {
270+
inout_partial_cell_array2[cell_lid][1] = in_partial_cell1[cell_lid];
271+
inout_partial_cell2[cell_lid] = in_partial_cell1[iter_index];
272+
inout_partial_cell2_real3[cell_lid] = xyz;
273+
}
274+
else if (modulo == 1) {
275+
out_partial_cell_array2[cell_lid][1] = in_partial_cell1[iter_index];
276+
out_partial_cell2[cell_lid] = in_partial_cell1[iter_index];
277+
out_partial_cell2_real3[cell_lid] = xyz;
278+
}
279+
else {
280+
inout_partial_cell_array2[iter_index][1] = in_partial_cell1[cell_lid];
281+
inout_partial_cell2[iter_index] = in_partial_cell1[iter_index];
282+
inout_partial_cell2_real3[iter_index] = xyz;
283+
}
284+
};
285+
}
278286
info() << "Check Partial values";
279287
ENUMERATE_ (Cell, iter, m_partial_cell_array1.itemGroup()) {
280288
CellEnumeratorIndex iter_index(iter.index());
@@ -290,6 +298,8 @@ _executeTest1()
290298
int seed = 23;
291299
_setCellArrayValue(seed);
292300

301+
auto command = makeCommand(m_queue);
302+
293303
auto in_cell_array1 = viewIn(command, m_cell_array1);
294304
auto out_cell_array2 = viewOut(command, m_cell_array2);
295305

@@ -305,6 +315,8 @@ _executeTest1()
305315
int seed = 53;
306316
_setCellArrayValue(seed);
307317

318+
auto command = makeCommand(m_queue);
319+
308320
auto in_cell_array1 = viewInOut(command, m_cell_array1);
309321
auto out_cell_array2 = viewOut(command, m_cell_array2);
310322

@@ -320,6 +332,8 @@ _executeTest1()
320332
int seed = 93;
321333
_setCellArrayValue(seed);
322334

335+
auto command = makeCommand(m_queue);
336+
323337
auto in_cell_array1 = ax::viewIn(command, m_cell_array1);
324338
auto out_cell_array2 = ax::viewInOut(command, m_cell_array2);
325339

@@ -335,6 +349,8 @@ _executeTest1()
335349
int seed = 43;
336350
_setCellArrayValue(seed);
337351

352+
auto command = makeCommand(m_queue);
353+
338354
auto inout_cell_array1 = ax::viewInOut(command, m_cell_array1);
339355
auto out_cell_array2 = ax::viewInOut(command, m_cell_array2);
340356

@@ -686,7 +702,7 @@ _executeTestMemoryCopy()
686702
info() << "Execute Test MemoryCopy";
687703
eMemoryRessource source_mem = eMemoryRessource::Host;
688704
eMemoryRessource dest_mem = eMemoryRessource::Host;
689-
if (ax::impl::isAcceleratorPolicy(m_runner->executionPolicy()))
705+
if (ax::impl::isAcceleratorPolicy(m_runner.executionPolicy()))
690706
dest_mem = eMemoryRessource::Device;
691707

692708
const int nb_value = 100000;

0 commit comments

Comments
 (0)