Skip to content

Commit c149943

Browse files
Babis Chaliosjmperez-bsc
Babis Chalios
authored andcommitted
Creating the infrastructure for Cluster support
Adding bits and pieces of the necessary infrastructure for running on a cluster. At the moment we have defined what a message and a messenger is and we have a basic MPI messenger implementation. I also added a "hardware" representation of a cluster node and a basic cluster manager. Cluster support is conditional and enabled at configuration time. Also Cluster support is not enabled at runtime unless the environment variable NANOS6_COMMUNICATION is set. The environment variable itself determines the communication layer to be used. The only meaningful value at the moment is 'mpi-2sided'
1 parent 63b8f75 commit c149943

20 files changed

+1079
-5
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ configure.scan
2929
libtool
3030
stamp-h1
3131
*.swp
32+
**cscope.out

Makefile.am

+20-5
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ libnanos6_main_wrapper_a_SOURCES = \
121121
loader/main-wrapper.c \
122122
loader/main-wrapper.h
123123

124-
libnanos6_main_wrapper_a_CPPFLAGS = $(CUDA_CFLAGS) $(MCMODEL_FLAGS)
124+
libnanos6_main_wrapper_a_CPPFLAGS = $(CUDA_CFLAGS) $(MPI_CXXFLAGS) $(MCMODEL_FLAGS)
125125

126126

127127
nanos6-main-wrapper.o: libnanos6-main-wrapper.a
@@ -135,7 +135,7 @@ libnanos6_library_mode_a_SOURCES = \
135135
loader/device_strings.c \
136136
loader/library-mode-init.c
137137

138-
libnanos6_library_mode_a_CPPFLAGS = $(CUDA_CFLAGS) $(MCMODEL_FLAGS)
138+
libnanos6_library_mode_a_CPPFLAGS = $(CUDA_CFLAGS) $(MPI_CXXFLAGS) $(MCMODEL_FLAGS)
139139

140140

141141
nanos6-library-mode.o: libnanos6-library-mode.a
@@ -215,7 +215,7 @@ libnanos6_la_SOURCES = \
215215
loader/loader.c \
216216
loader/loader.h
217217

218-
libnanos6_la_CPPFLAGS = -DSONAME_MAJOR=\"$(lib_major)\" -DSONAME_SUFFIX=\"$(lib_suffix)\" $(CUDA_CFLAGS)
218+
libnanos6_la_CPPFLAGS = -DSONAME_MAJOR=\"$(lib_major)\" -DSONAME_SUFFIX=\"$(lib_suffix)\" $(CUDA_CFLAGS) $(MPI_CXXFLAGS)
219219
libnanos6_la_LDFLAGS = $(AM_LDFLAGS) $(PTHREAD_CFLAGS) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(LDFLAGS_NOUNDEFINED)
220220

221221

@@ -371,7 +371,16 @@ if USE_CUDA
371371
common_sources += $(cuda_sources)
372372
endif
373373

374+
cluster_sources = \
375+
src/cluster/ClusterManager.cpp \
376+
src/cluster/messages/Message.cpp \
377+
src/cluster/messages/MessageSysFinish.cpp \
378+
src/cluster/messages/MessageType.cpp \
379+
src/cluster/messenger/MPIMessenger.cpp
374380

381+
if USE_CLUSTER
382+
common_sources += $(cluster_sources)
383+
endif
375384

376385
discrete_dependency_sources = \
377386
src/dependencies/discrete/Reductions.cpp \
@@ -549,6 +558,10 @@ memory_allocator_debug_cppflags = -I$(srcdir)/src/memory/allocator/malloc
549558
# src/null/NullStaticBlock.cpp
550559

551560
noinst_HEADERS = \
561+
src/cluster/ClusterManager.hpp \
562+
src/cluster/messages/Message.hpp \
563+
src/cluster/messages/MessageSysFinish.hpp \
564+
src/cluster/messages/MessageType.hpp \
552565
src/dependencies/DataAccessBase.hpp \
553566
src/dependencies/DataAccessType.hpp \
554567
src/dependencies/MultidimensionalAPITraversal.hpp \
@@ -628,6 +641,7 @@ noinst_HEADERS = \
628641
src/executors/threads/WorkerThreadImplementation.hpp \
629642
src/executors/threads/kernel-level/CPUThreadingModelData.hpp \
630643
src/executors/threads/kernel-level/WorkerThreadBase.hpp \
644+
src/hardware/cluster/ClusterNode.hpp \
631645
src/hardware/cuda/CUDAManager.hpp \
632646
src/hardware/cuda/compute/CUDAComputePlace.hpp \
633647
src/hardware/cuda/compute/stream/CUDAStream.hpp \
@@ -909,6 +923,7 @@ noinst_HEADERS = \
909923
src/scheduling/schedulers/tree-scheduler/queue/LIFOQueue.hpp \
910924
src/support/ConcurrentUnorderedList.hpp \
911925
src/support/ConstPropagator.hpp \
926+
src/support/GenericFactory.hpp \
912927
src/support/GlobalLock.hpp \
913928
src/support/InlineDoublyLinkedList.hpp \
914929
src/support/InstrumentedThread.hpp \
@@ -944,8 +959,8 @@ EXTRA_DIST += \
944959
tests/tap-driver.pl \
945960
tests/tap-driver.sh
946961

947-
common_libnanos6_cppflags = $(BOOST_CPPFLAGS) -DBOOST_ENABLE_ASSERT_DEBUG_HANDLER $(PTHREAD_CFLAGS) $(hwloc_CFLAGS) $(libnuma_CPPFLAGS) $(CUDA_CFLAGS) $(memkind_CPPFLAGS)
948-
common_libnanos6_ldflags = $(AM_LDFLAGS) $(BOOST_LDFLAGS) -version-info $(lib_current):$(lib_revision):$(lib_age) $(PTHREAD_CFLAGS) $(PTHREAD_LIBS) $(LDFLAGS_NOUNDEFINED) $(hwloc_LIBS) $(libnuma_LIBS) $(DLOPEN_LIBS) $(CUDA_LIBS) $(memkind_LIBS)
962+
common_libnanos6_cppflags = $(BOOST_CPPFLAGS) -DBOOST_ENABLE_ASSERT_DEBUG_HANDLER $(PTHREAD_CFLAGS) $(hwloc_CFLAGS) $(libnuma_CPPFLAGS) $(CUDA_CFLAGS) $(MPI_CXXFLAGS) $(memkind_CPPFLAGS)
963+
common_libnanos6_ldflags = $(AM_LDFLAGS) $(BOOST_LDFLAGS) -version-info $(lib_current):$(lib_revision):$(lib_age) $(PTHREAD_CFLAGS) $(PTHREAD_LIBS) $(LDFLAGS_NOUNDEFINED) $(hwloc_LIBS) $(libnuma_LIBS) $(DLOPEN_LIBS) $(CUDA_LIBS) $(MPI_CXXLDFLAGS) $(memkind_LIBS)
949964

950965

951966
# This is needed for debuggable spinlocks

configure.ac

+25
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,21 @@ if test x"${ac_use_cuda}" = x"yes" ; then
280280
AC_DEFINE([USE_CUDA], [1], [Define if CUDA is enabled.])
281281
fi
282282

283+
# Check support for Cluster
284+
AC_ARG_ENABLE([cluster], [AS_HELP_STRING([--enable-cluster], [Enable OmpSs@Cluster support])])
285+
286+
if test x"${enable_cluster}" = x"yes" ; then
287+
ac_use_cluster=yes
288+
AC_DEMAND_MPI
289+
else
290+
ac_use_cluster=no
291+
fi
292+
AM_CONDITIONAL([USE_CLUSTER], [test x${ac_use_cluster} = x"yes"])
293+
AM_CONDITIONAL([HAVE_MPI], [test x${ac_use_cluster} = x"yes"])
294+
295+
if test x"${ac_use_cluster}" = x"yes" ; then
296+
AC_DEFINE([USE_CLUSTER], [1], [Define if Cluster is enabled.])
297+
fi
283298

284299
AC_CHECK_MAIN_WRAPPER_TYPE
285300
AC_CHECK_SYMBOL_RESOLUTION_STRATEGY
@@ -363,6 +378,16 @@ else
363378
AC_MSG_RESULT([no])
364379
fi
365380

381+
_AS_ECHO([])
382+
_AS_ECHO_N([ Cluster support is enabled... ])
383+
if test x"${ac_use_cluster}" = x"yes" ; then
384+
AC_MSG_RESULT([yes])
385+
_AS_ECHO([ MPI CXXFLAGS... ${MPI_CXXFLAGS}])
386+
_AS_ECHO([ MPI CXXLDFLAGS... ${MPI_CXXLDFLAGS}])
387+
else
388+
AC_MSG_RESULT([no])
389+
fi
390+
366391
_AS_ECHO([])
367392
_AS_ECHO_N([ Memkind is enabled... ])
368393
if test x"${ac_use_memkind}" = x"yes" ; then

m4/lx_find_mpi.m4

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#################################################################################################
2+
# Copyright (c) 2010, Lawrence Livermore National Security, LLC.
3+
# Produced at the Lawrence Livermore National Laboratory
4+
# Written by Todd Gamblin, [email protected].
5+
# LLNL-CODE-417602
6+
# All rights reserved.
7+
#
8+
# This file is part of Libra. For details, see http://github.com/tgamblin/libra.
9+
# Please also read the LICENSE file for further information.
10+
#
11+
# Redistribution and use in source and binary forms, with or without modification, are
12+
# permitted provided that the following conditions are met:
13+
#
14+
# * Redistributions of source code must retain the above copyright notice, this list of
15+
# conditions and the disclaimer below.
16+
# * Redistributions in binary form must reproduce the above copyright notice, this list of
17+
# conditions and the disclaimer (as noted below) in the documentation and/or other materials
18+
# provided with the distribution.
19+
# * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse
20+
# or promote products derived from this software without specific prior written permission.
21+
#
22+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
23+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
25+
# LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
26+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27+
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
#################################################################################################
32+
33+
#
34+
# LX_FIND_MPI()
35+
# ------------------------------------------------------------------------
36+
# This macro finds an MPI compiler and extracts includes and libraries from
37+
# it for use in automake projects. The script exports the following variables:
38+
#
39+
# AC_DEFINE variables:
40+
# HAVE_MPI AC_DEFINE'd to 1 if we found MPI
41+
#
42+
# AC_SUBST variables:
43+
# MPICC Name of MPI compiler
44+
# MPI_CFLAGS Includes and defines for MPI C compilation
45+
# MPI_CLDFLAGS Libraries and library paths for linking MPI C programs
46+
#
47+
# MPICXX Name of MPI C++ compiler
48+
# MPI_CXXFLAGS Includes and defines for MPI C++ compilation
49+
# MPI_CXXLDFLAGS Libraries and library paths for linking MPI C++ programs
50+
#
51+
# MPIF77 Name of MPI Fortran 77 compiler
52+
# MPI_F77FLAGS Includes and defines for MPI Fortran 77 compilation
53+
# MPI_F77LDFLAGS Libraries and library paths for linking MPI Fortran 77 programs
54+
#
55+
# MPIFC Name of MPI Fortran compiler
56+
# MPI_FFLAGS Includes and defines for MPI Fortran compilation
57+
# MPI_FLDFLAGS Libraries and library paths for linking MPI Fortran programs
58+
#
59+
# Shell variables output by this macro:
60+
# have_C_mpi 'yes' if we found MPI for C, 'no' otherwise
61+
# have_CXX_mpi 'yes' if we found MPI for C++, 'no' otherwise
62+
# have_F77_mpi 'yes' if we found MPI for F77, 'no' otherwise
63+
# have_F_mpi 'yes' if we found MPI for Fortran, 'no' otherwise
64+
#
65+
AC_DEFUN([LX_FIND_MPI],
66+
[
67+
AC_LANG_CASE(
68+
[C], [
69+
AC_REQUIRE([AC_PROG_CC])
70+
if [[ ! -z "$MPICC" ]]; then
71+
LX_QUERY_MPI_COMPILER(MPICC, [$MPICC], C)
72+
else
73+
LX_QUERY_MPI_COMPILER(MPICC, [mpicc mpiicc mpixlc mpipgcc], C)
74+
fi
75+
],
76+
[C++], [
77+
AC_REQUIRE([AC_PROG_CXX])
78+
if [[ ! -z "$MPICXX" ]]; then
79+
LX_QUERY_MPI_COMPILER(MPICXX, [$MPICXX], CXX)
80+
else
81+
LX_QUERY_MPI_COMPILER(MPICXX, [mpicxx mpiCC mpic++ mpig++ mpiicpc mpipgCC mpixlC], CXX)
82+
fi
83+
],
84+
[F77], [
85+
AC_REQUIRE([AC_PROG_F77])
86+
if [[ ! -z "$MPIF77" ]]; then
87+
LX_QUERY_MPI_COMPILER(MPIF77, [$MPIF77], F77)
88+
else
89+
LX_QUERY_MPI_COMPILER(MPIF77, [mpif77 mpiifort mpixlf77 mpixlf77_r], F77)
90+
fi
91+
],
92+
[Fortran], [
93+
AC_REQUIRE([AC_PROG_FC])
94+
if [[ ! -z "$MPIFC" ]]; then
95+
LX_QUERY_MPI_COMPILER(MPIFC, [$MPIFC], F)
96+
else
97+
mpi_default_fc="mpif95 mpif90 mpigfortran mpif2003"
98+
mpi_intel_fc="mpiifort"
99+
mpi_xl_fc="mpixlf95 mpixlf95_r mpixlf90 mpixlf90_r mpixlf2003 mpixlf2003_r"
100+
mpi_pg_fc="mpipgf95 mpipgf90"
101+
LX_QUERY_MPI_COMPILER(MPIFC, [$mpi_default_fc $mpi_intel_fc $mpi_xl_fc $mpi_pg_fc], F)
102+
fi
103+
])
104+
])
105+
106+
107+
#
108+
# LX_QUERY_MPI_COMPILER([compiler-var-name], [compiler-names], [output-var-prefix])
109+
# ------------------------------------------------------------------------
110+
# AC_SUBST variables:
111+
# MPI_<prefix>FLAGS Includes and defines for MPI compilation
112+
# MPI_<prefix>LDFLAGS Libraries and library paths for linking MPI C programs
113+
#
114+
# Shell variables output by this macro:
115+
# found_mpi_flags 'yes' if we were able to get flags, 'no' otherwise
116+
#
117+
AC_DEFUN([LX_QUERY_MPI_COMPILER],
118+
[
119+
# Try to find a working MPI compiler from the supplied names
120+
AC_PATH_PROGS($1, [$2], [not-found])
121+
122+
# Figure out what the compiler responds to to get it to show us the compile
123+
# and link lines. After this part of the macro, we'll have a valid
124+
# lx_mpi_command_line
125+
echo -n "Checking whether $$1 responds to '-showme:compile'... "
126+
lx_mpi_compile_line=`$$1 -showme:compile 2>/dev/null`
127+
if [[ "$?" -eq 0 ]]; then
128+
echo yes
129+
lx_mpi_link_line=`$$1 -showme:link 2>/dev/null`
130+
else
131+
echo no
132+
echo -n "Checking whether $$1 responds to '-showme'... "
133+
lx_mpi_command_line=`$$1 -showme 2>/dev/null`
134+
if [[ "$?" -ne 0 ]]; then
135+
echo no
136+
echo -n "Checking whether $$1 responds to '-compile-info'... "
137+
lx_mpi_compile_line=`$$1 -compile-info 2>/dev/null`
138+
if [[ "$?" -eq 0 ]]; then
139+
echo yes
140+
lx_mpi_link_line=`$$1 -link-info 2>/dev/null`
141+
else
142+
echo no
143+
echo -n "Checking whether $$1 responds to '-show'... "
144+
lx_mpi_command_line=`$$1 -show 2>/dev/null`
145+
if [[ "$?" -eq 0 ]]; then
146+
echo yes
147+
else
148+
echo no
149+
fi
150+
fi
151+
else
152+
echo yes
153+
fi
154+
fi
155+
156+
if [[ ! -z "$lx_mpi_compile_line" -a ! -z "$lx_mpi_link_line" ]]; then
157+
lx_mpi_command_line="$lx_mpi_compile_line $lx_mpi_link_line"
158+
fi
159+
160+
if [[ ! -z "$lx_mpi_command_line" ]]; then
161+
# Now extract the different parts of the MPI command line. Do these separately in case we need to
162+
# parse them all out in future versions of this macro.
163+
lx_mpi_defines=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-D\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
164+
lx_mpi_includes=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-I\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
165+
lx_mpi_link_paths=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-L\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
166+
lx_mpi_libs=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-l\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
167+
lx_mpi_link_args=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-Wl,\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
168+
169+
# Create variables and clean up newlines and multiple spaces
170+
MPI_$3FLAGS="$lx_mpi_defines $lx_mpi_includes"
171+
MPI_$3LDFLAGS="$lx_mpi_link_paths $lx_mpi_libs $lx_mpi_link_args"
172+
MPI_$3FLAGS=` echo "$MPI_$3FLAGS" | tr '\n' ' ' | sed 's/^[[ \t]]*//;s/[[ \t]]*$//' | sed 's/ +/ /g'`
173+
MPI_$3LDFLAGS=`echo "$MPI_$3LDFLAGS" | tr '\n' ' ' | sed 's/^[[ \t]]*//;s/[[ \t]]*$//' | sed 's/ +/ /g'`
174+
175+
OLD_CPPFLAGS=$CPPFLAGS
176+
OLD_LIBS=$LIBS
177+
CPPFLAGS=$MPI_$3FLAGS
178+
LIBS=$MPI_$3LDFLAGS
179+
180+
AC_TRY_LINK([#include <mpi.h>],
181+
[int rank, size;
182+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
183+
MPI_Comm_size(MPI_COMM_WORLD, &size);],
184+
[# Add a define for testing at compile time.
185+
AC_DEFINE([HAVE_MPI], [1], [Define to 1 if you have MPI libs and headers.])
186+
have_$3_mpi='yes'],
187+
[# zero out mpi flags so we don't link against the faulty library.
188+
MPI_$3FLAGS=""
189+
MPI_$3LDFLAGS=""
190+
have_$3_mpi='no'])
191+
192+
# AC_SUBST everything.
193+
AC_SUBST($1)
194+
AC_SUBST(MPI_$3FLAGS)
195+
AC_SUBST(MPI_$3LDFLAGS)
196+
197+
LIBS=$OLD_LIBS
198+
CPPFLAGS=$OLD_CPPFLAGS
199+
else
200+
echo Unable to find suitable MPI Compiler. Try setting $1.
201+
have_$3_mpi='no'
202+
fi
203+
])

m4/mpi.m4

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
AC_DEFUN([AC_ARG_MPICC],
2+
[
3+
AC_ARG_VAR(MPICC, [The MPI C compiler])
4+
]
5+
)
6+
7+
# Just check
8+
AC_DEFUN([AC_PREPARE_MPI],
9+
[
10+
AC_REQUIRE([AC_ARG_MPICC])
11+
12+
AC_LANG_PUSH(C++)
13+
LX_FIND_MPI
14+
AC_LANG_POP(C++)
15+
16+
AM_CONDITIONAL([HAVE_MPI], [test x"${have_CXX_mpi}" = x"yes"])
17+
AC_SUBST(MPI_CXXFLAGS)
18+
AC_SUBST(MPI_CXXLDFLAGS)
19+
]
20+
)
21+
22+
# Check, set compiler and linker parameters, and fail if not found
23+
AC_DEFUN([AC_DEMAND_MPI],
24+
[
25+
AC_REQUIRE([AC_PREPARE_MPI])
26+
if test "${have_CXX_mpi}" != "yes" ; then
27+
AC_MSG_ERROR([Could not find the MPI compiler. Please try setting the MPICC environment variable.])
28+
fi
29+
]
30+
)
31+

0 commit comments

Comments
 (0)