Skip to content

Commit 1bc0dce

Browse files
committed
Merge branch '2023.06-software.eessi.io' of github.com:EESSI/software-layer into ci_enable_sapphire_rapids
2 parents 97834f7 + ea73ab0 commit 1bc0dce

File tree

55 files changed

+884
-421
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+884
-421
lines changed

Diff for: .github/workflows/test-software.eessi.io.yml

-14
Original file line numberDiff line numberDiff line change
@@ -60,20 +60,6 @@ jobs:
6060
# first check the CPU-only builds for this CPU target
6161
echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml with latest EasyBuild release)"
6262
for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} .github/workflows/scripts/only_latest_easystacks.sh); do
63-
if [ ${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} = "x86_64/amd/zen4" ]; then
64-
if grep -q 2022b <<<"${easystack_file}"; then
65-
# skip the check of installed software on zen4 for foss/2022b builds
66-
continue
67-
fi
68-
if [[ $easystack_file == *"rebuilds"* ]]; then
69-
# Also handle rebuilds, make a temporary EasyStack file where we clean out all 2022b stuff and use that
70-
new_easystack=$(mktemp pruned_easystackXXX --suffix=.yml)
71-
# first clean out the options then clean out the .eb name
72-
sed '/2022b\|12\.2\.0/,/\.eb/{/\.eb/!d}' "${easystack_file}" | sed '/2022b\|12\.2\.0/d' > $new_easystack
73-
diff --unified=0 "$easystack_file" "$new_easystack" || :
74-
easystack_file="$new_easystack"
75-
fi
76-
fi
7763
echo "check missing installations for ${easystack_file}..."
7864
./check_missing_installations.sh ${easystack_file}
7965
ec=$?

Diff for: EESSI-install-software.sh

+10-3
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,18 @@ fi
271271

272272
# Install NVIDIA drivers in host_injections (if they exist)
273273
if command_exists "nvidia-smi"; then
274-
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
275-
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
274+
nvidia-smi --version
275+
ec=$?
276+
if [ ${ec} -eq 0 ]; then
277+
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
278+
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
279+
else
280+
echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully."
281+
echo "This script now assumes this is NOT a GPU node."
282+
echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error."
283+
fi
276284
fi
277285

278-
279286
if [ ! -z "${shared_fs_path}" ]; then
280287
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
281288
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"

Diff for: EESSI-remove-software.sh

+10-7
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,12 @@ if [ $EUID -eq 0 ]; then
116116
if [ -f ${easystack_file} ]; then
117117
echo_green "Software rebuild(s) requested in ${easystack_file}, so determining which existing installation have to be removed..."
118118
# we need to remove existing installation directories first,
119-
# so let's figure out which modules have to be rebuilt by doing a dry-run and grepping "someapp/someversion" for the relevant lines (with [R])
119+
# so let's figure out which modules have to be rebuilt by doing a
120+
# dry-run and grepping "someapp/someversion" for the relevant
121+
# lines (with [R] or [F])
122+
# * [F] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion)
120123
# * [R] $CFGS/s/someapp/someapp-someversion.eb (module: someapp/someversion)
121-
rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[R\]" | grep -o "module: .*[^)]" | awk '{print $2}')
124+
rebuild_apps=$(eb --allow-use-as-root-and-accept-consequences --dry-run-short --rebuild --easystack ${easystack_file} | grep "^ \* \[[FR]\]" | grep -o "module: .*[^)]" | awk '{print $2}')
122125
for app in ${rebuild_apps}; do
123126
# Returns e.g. /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/modules/all:
124127
app_modulepath=$(module --terse av ${app} 2>&1 | head -n 1 | sed 's/://')
@@ -130,13 +133,13 @@ if [ $EUID -eq 0 ]; then
130133
echo_yellow "Removing ${app_dir} and ${app_module}..."
131134
rm -rf ${app_dir}
132135
rm -rf ${app_module}
133-
# recreate the installation directories and first-level subdirectories to work around permission denied
134-
# issues when rebuilding the package (see https://github.com/EESSI/software-layer/issues/556)
136+
# recreate the installation directory and do an ls on the first-level subdirectories to work around
137+
# permission issues when reinstalling the application (see https://github.com/EESSI/software-layer/issues/556)
135138
echo_yellow "Recreating an empty ${app_dir}..."
136139
mkdir -p ${app_dir}
137-
for app_subdir in ${app_subdirs}; do
138-
mkdir -p ${app_subdir}
139-
done
140+
# these subdirs don't (and shouldn't) exist, but we need to do the ls anyway as a workaround,
141+
# so redirect to /dev/null and ignore the exit code
142+
ls ${app_subdirs} >& /dev/null || true
140143
done
141144
else
142145
fatal_error "Easystack file ${easystack_file} not found!"

Diff for: bot/build.sh

+27-8
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,15 @@ if [[ ! -z ${SINGULARITY_CACHEDIR} ]]; then
9090
export SINGULARITY_CACHEDIR
9191
fi
9292

93-
echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> "
94-
# replace any env variable in ${LOCAL_TMP} with its
95-
# current value (e.g., a value that is local to the job)
96-
STORAGE=$(envsubst <<< ${LOCAL_TMP})
97-
echo "'${STORAGE}'"
93+
if [[ -z "${TMPDIR}" ]]; then
94+
echo -n "setting \$STORAGE by replacing any var in '${LOCAL_TMP}' -> "
95+
# replace any env variable in ${LOCAL_TMP} with its
96+
# current value (e.g., a value that is local to the job)
97+
STORAGE=$(envsubst <<< ${LOCAL_TMP})
98+
else
99+
STORAGE=${TMPDIR}
100+
fi
101+
echo "bot/build.sh: STORAGE='${STORAGE}'"
98102

99103
# make sure ${STORAGE} exists
100104
mkdir -p ${STORAGE}
@@ -115,7 +119,8 @@ mkdir -p ${SINGULARITY_TMPDIR}
115119

116120
# load modules if LOAD_MODULES is not empty
117121
if [[ ! -z ${LOAD_MODULES} ]]; then
118-
for mod in $(echo ${LOAD_MODULES} | tr ',' '\n')
122+
IFS=',' read -r -a modules <<< "$(echo "${LOAD_MODULES}")"
123+
for mod in "${modules[@]}";
119124
do
120125
echo "bot/build.sh: loading module '${mod}'"
121126
module load ${mod}
@@ -243,14 +248,28 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
243248
# prepare arguments to eessi_container.sh specific to build step
244249
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
245250
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")
251+
246252
# add options required to handle NVIDIA support
247253
if command_exists "nvidia-smi"; then
248-
echo "Command 'nvidia-smi' found, using available GPU"
249-
BUILD_STEP_ARGS+=("--nvidia" "all")
254+
# Accept that this may fail
255+
set +e
256+
nvidia-smi --version
257+
ec=$?
258+
set -e
259+
if [ ${ec} -eq 0 ]; then
260+
echo "Command 'nvidia-smi' found, using available GPU"
261+
BUILD_STEP_ARGS+=("--nvidia" "all")
262+
else
263+
echo "Warning: command 'nvidia-smi' found, but 'nvidia-smi --version' did not run succesfully."
264+
echo "This script now assumes this is NOT a GPU node."
265+
echo "If, and only if, the current node actually does contain Nvidia GPUs, this should be considered an error."
266+
BUILD_STEP_ARGS+=("--nvidia" "install")
267+
fi
250268
else
251269
echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check"
252270
BUILD_STEP_ARGS+=("--nvidia" "install")
253271
fi
272+
254273
# Retain location for host injections so we don't reinstall CUDA
255274
# (Always need to run the driver installation as available driver may change)
256275
if [[ ! -z ${SHARED_FS_PATH} ]]; then

0 commit comments

Comments
 (0)