diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 44f524ebbc..c2900b9a30 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -161,19 +161,21 @@ _eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_ _lmod_cfg_dir=${_eessi_software_path}/.lmod _lmod_rc_file=${_lmod_cfg_dir}/lmodrc.lua if [ ! -f ${_lmod_rc_file} ]; then + echo "Lmod file '${_lmod_rc_file}' does not exist yet; creating it..." command -V python3 python3 ${TOPDIR}/create_lmodrc.py ${_eessi_software_path} fi _lmod_sitepackage_file=${_lmod_cfg_dir}/SitePackage.lua if [ ! -f ${_lmod_sitepackage_file} ]; then + echo "Lmod file '${_lmod_sitepackage_file}' does not exist yet; creating it..." command -V python3 python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path} fi # Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE) -# $EESSI_SILENT - don't print any messages -# $EESSI_BASIC_ENV - give a basic set of environment variables -EESSI_SILENT=1 EESSI_BASIC_ENV=1 source $TOPDIR/init/eessi_environment_variables +# $EESSI_SILENT - don't print any messages if set (use 'unset EESSI_SILENT' to let script show messages) +# $EESSI_BASIC_ENV - give a basic set of environment variables if set (use 'EESSI_BASIC_ENV=' to let script initialise a full environment) +EESSI_SILENT=1 EESSI_BASIC_ENV= source $TOPDIR/init/eessi_environment_variables if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then fatal_error "Failed to determine software subdirectory?!" @@ -243,12 +245,13 @@ if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} fi -# Install full CUDA SDK in host_injections +# Install full CUDA SDK and cu* libraries in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install # Allow skipping CUDA SDK install in e.g. CI environments # The install_cuda... script uses EasyBuild. So, we need to check if we have EB # or skip this step. +echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary" module_avail_out=$TMPDIR/ml.out module avail 2>&1 | grep EasyBuild &> ${module_avail_out} if [[ $? -eq 0 ]]; then @@ -258,10 +261,15 @@ else export skip_cuda_install=True fi +temp_install_storage=${TMPDIR}/temp_install_storage +mkdir -p ${temp_install_storage} if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ + -t ${temp_install_storage} \ + --accept-cuda-eula \ + --accept-cudnn-eula else - echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found" + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found" fi # Install NVIDIA drivers in host_injections (if they exist) @@ -318,20 +326,30 @@ else done fi -echo ">> Creating/updating Lmod RC file..." export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod" lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" +if [[ ! -z ${EESSI_ACCELERATOR_TARGET} ]]; then + # EESSI_ACCELERATOR_TARGET is set, so let's remove the accelerator path from $lmod_rc_file + lmod_rc_file=$(echo ${lmod_rc_file} | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@") + echo "Path to lmodrc.lua changed to '${lmod_rc_file}'" +fi lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then + echo ">> Creating/updating Lmod RC file (${lmod_rc_file})..." python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file" fi -echo ">> Creating/updating Lmod SitePackage.lua ..." export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod" lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" +if [[ ! -z ${EESSI_ACCELERATOR_TARGET} ]]; then + # EESSI_ACCELERATOR_TARGET is set, so let's remove the accelerator path from $lmod_sitepackage_file + lmod_sitepackage_file=$(echo ${lmod_sitepackage_file} | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@") + echo "Path to SitePackage.lua changed to '${lmod_sitepackage_file}'" +fi sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?) if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then + echo ">> Creating/updating Lmod SitePackage.lua (${lmod_sitepackage_file})..." python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH} check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file" fi diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml new file mode 100644 index 0000000000..873c19aa33 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -0,0 +1,3 @@ +easyconfigs: + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb diff --git a/eb_hooks.py b/eb_hooks.py index 79bdeeee0d..3179ac170f 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -756,64 +756,170 @@ def post_postproc_cuda(self, *args, **kwargs): if 'libcudart' not in allowlist: raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist) - # iterate over all files in the CUDA installation directory - for dir_path, _, files in os.walk(self.installdir): - for filename in files: - full_path = os.path.join(dir_path, filename) - # we only really care about real files, i.e. not symlinks - if not os.path.islink(full_path): - # check if the current file name stub is part of the allowlist - basename = filename.split('.')[0] - if basename in allowlist: - self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) - else: - self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", - basename, full_path) - # if it is not in the allowlist, delete the file and create a symlink to host_injections - - # the host_injections path is under a fixed repo/location for CUDA - host_inj_path = re.sub(EESSI_INSTALLATION_REGEX, HOST_INJECTIONS_LOCATION, full_path) - # CUDA itself doesn't care about compute capability so remove this duplication from - # under host_injections (symlink to a single CUDA installation for all compute - # capabilities) - accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") - if accel_subdir: - host_inj_path = host_inj_path.replace("/accel/%s" % accel_subdir, '') - # make sure source and target of symlink are not the same - if full_path == host_inj_path: - raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " - "are using this hook for an EESSI installation?", - full_path, host_inj_path) - remove_file(full_path) - symlink(host_inj_path, full_path) + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) else: raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") +def post_postproc_cudnn(self, *args, **kwargs): + """ + Remove files from cuDNN installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + + # We need to check if we are doing an EESSI-distributed installation + eessi_installation = bool(re.search(EESSI_INSTALLATION_REGEX, self.installdir)) + + if self.name == 'cuDNN' and eessi_installation: + print_msg("Replacing files in cuDNN installation that we can not ship with symlinks to host_injections...") + + allowlist = ['LICENSE'] + + # read cuDNN LICENSE, construct allowlist based on section "2. Distribution" that specifies list of files that can be shipped + license_path = os.path.join(self.installdir, 'LICENSE') + search_string = "2. Distribution. The following portions of the SDK are distributable under the Agreement:" + found_search_string = False + with open(license_path) as infile: + for line in infile: + if line.strip().startswith(search_string): + found_search_string = True + # remove search string, split into words, remove trailing + # dots '.' and only retain words starting with a dot '.' + distributable = line[len(search_string):] + # distributable looks like ' the runtime files .so and .dll.' + # note the '.' after '.dll' + for word in distributable.split(): + if word[0] == '.': + # rstrip is used to remove the '.' after '.dll' + allowlist.append(word.rstrip('.')) + if not found_search_string: + # search string wasn't found in LICENSE file + raise EasyBuildError("search string '%s' was not found in license file '%s';" + "hence installation may be replaced by symlinks only", + search_string, license_path) + + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in cuDNN installation that can be redistributed: " + ', '.join(allowlist)) + + # replace files that are not distributable with symlinks into + # host_injections + replace_non_distributable_files_with_symlinks(self.log, self.installdir, self.name, allowlist) + else: + raise EasyBuildError("cuDNN-specific hook triggered for non-cuDNN easyconfig?!") + + +def replace_non_distributable_files_with_symlinks(log, install_dir, pkg_name, allowlist): + """ + Replace files that cannot be distributed with symlinks into host_injections + """ + # Different packages use different ways to specify which files or file + # 'types' may be redistributed. For CUDA, the 'EULA.txt' lists full file + # names. For cuDNN, the 'LICENSE' lists file endings/suffixes (e.g., '.so') + # that can be redistributed. + # The map 'extension_based' defines which of these two ways are employed. If + # full file names are used it maps a package name (key) to False (value). If + # endings/suffixes are used, it maps a package name to True. Later we can + # easily use this data structure to employ the correct method for + # postprocessing an installation. + extension_based = { + "CUDA": False, + "cuDNN": True, + } + if not pkg_name in extension_based: + raise EasyBuildError("Don't know how to strip non-distributable files from package %s.", pkg_name) + + # iterate over all files in the package installation directory + for dir_path, _, files in os.walk(install_dir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + check_by_extension = extension_based[pkg_name] and '.' in filename + if check_by_extension: + # if the allowlist only contains extensions, we have to + # determine that from filename. we assume the extension is + # the second element when splitting the filename at dots + # (e.g., for 'libcudnn_adv_infer.so.8.9.2' the extension + # would be '.so') + extension = '.' + filename.split('.')[1] + # check if the current file name stub or its extension is part of the allowlist + basename = filename.split('.')[0] + if basename in allowlist: + log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + elif check_by_extension and extension in allowlist: + log.debug("%s is found in allowlist, so keeping it: %s", extension, full_path) + else: + print_name = filename if extension_based[pkg_name] else basename + log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + print_name, full_path) + # the host_injections path is under a fixed repo/location for CUDA or cuDNN + host_inj_path = re.sub(EESSI_INSTALLATION_REGEX, HOST_INJECTIONS_LOCATION, full_path) + # CUDA and cu* libraries themselves don't care about compute capability so remove this + # duplication from under host_injections (symlink to a single CUDA or cu* library + # installation for all compute capabilities) + accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") + if accel_subdir: + host_inj_path = host_inj_path.replace("/accel/%s" % accel_subdir, '') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for an EESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + + def inject_gpu_property(ec): """ - Add 'gpu' property, via modluafooter easyconfig parameter + Add 'gpu' property and EESSIVERSION envvars via modluafooter + easyconfig parameter, and drop dependencies to build dependencies """ ec_dict = ec.asdict() - # Check if CUDA is in the dependencies, if so add the 'gpu' Lmod property - if ('CUDA' in [dep[0] for dep in iter(ec_dict['dependencies'])]): - ec.log.info("Injecting gpu as Lmod arch property and envvar with CUDA version") - key = 'modluafooter' - value = 'add_property("arch","gpu")' - cuda_version = 0 - for dep in iter(ec_dict['dependencies']): - # Make CUDA a build dependency only (rpathing saves us from link errors) - if 'CUDA' in dep[0]: - cuda_version = dep[1] - ec_dict['dependencies'].remove(dep) - if dep not in ec_dict['builddependencies']: - ec_dict['builddependencies'].append(dep) - value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version]) - if key in ec_dict: - if value not in ec_dict[key]: - ec[key] = '\n'.join([ec_dict[key], value]) + # Check if CUDA, cuDNN, you-name-it is in the dependencies, if so + # - drop dependency to build dependency + # - add 'gpu' Lmod property + # - add envvar with package version + pkg_names = ( "CUDA", "cuDNN" ) + pkg_versions = { } + add_gpu_property = '' + + for pkg_name in pkg_names: + # Check if pkg_name is in the dependencies, if so drop dependency to build + # dependency and set variable for later adding the 'gpu' Lmod property + # to '.remove' dependencies from ec_dict['dependencies'] we make a copy, + # iterate over the copy and can then savely use '.remove' on the original + # ec_dict['dependencies']. + deps = ec_dict['dependencies'][:] + if (pkg_name in [dep[0] for dep in deps]): + add_gpu_property = 'add_property("arch","gpu")' + for dep in deps: + if pkg_name == dep[0]: + # make pkg_name a build dependency only (rpathing saves us from link errors) + ec.log.info("Dropping dependency on %s to build dependency" % pkg_name) + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + # take note of version for creating the modluafooter + pkg_versions[pkg_name] = dep[1] + if add_gpu_property: + ec.log.info("Injecting gpu as Lmod arch property and envvars for dependencies with their version") + modluafooter = 'modluafooter' + extra_mod_footer_lines = [add_gpu_property] + for pkg_name, version in pkg_versions.items(): + envvar = "EESSI%sVERSION" % pkg_name.upper() + extra_mod_footer_lines.append('setenv("%s","%s")' % (envvar, version)) + # take into account that modluafooter may already be set + if modluafooter in ec_dict: + value = ec_dict[modluafooter] + for line in extra_mod_footer_lines: + if not line in value: + value = '\n'.join([value, line]) + ec[modluafooter] = value else: - ec[key] = value + ec[modluafooter] = '\n'.join(extra_mod_footer_lines) + return ec @@ -873,4 +979,5 @@ def inject_gpu_property(ec): POST_POSTPROC_HOOKS = { 'CUDA': post_postproc_cuda, + 'cuDNN': post_postproc_cudnn, } diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index d2daf40ace..60d69cc198 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -153,10 +153,10 @@ if [ -d $EESSI_PREFIX ]; then fi # Fix wrong path for RHEL >=8 libcurl - # This is required here because we ship curl in our compat layer. If we only provided - # curl as a module file we could instead do this via a `modluafooter` in an EasyBuild - # hook (or via an Lmod hook) - rhel_libcurl_file="/etc/pki/tls/certs/ca-bundle.crt" + # This is required here because we ship curl in our compat layer. If we only provided + # curl as a module file we could instead do this via a `modluafooter` in an EasyBuild + # hook (or via an Lmod hook) + rhel_libcurl_file="/etc/pki/tls/certs/ca-bundle.crt" if [ -f $rhel_libcurl_file ]; then show_msg "Found libcurl CAs file at RHEL location, setting CURL_CA_BUNDLE" export CURL_CA_BUNDLE=$rhel_libcurl_file diff --git a/install_scripts.sh b/install_scripts.sh index 11c7fc2a9f..b6b5ac92b0 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -122,10 +122,19 @@ copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@ # Copy files for the scripts/gpu_support/nvidia directory nvidia_files=( - install_cuda_host_injections.sh link_nvidia_host_libraries.sh + install_cuda_and_libraries.sh + install_cuda_host_injections.sh + link_nvidia_host_libraries.sh ) copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}" +# Easystacks to be used to install software in host injections +host_injections_easystacks=( + eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml +) +copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia/easystacks \ +${INSTALL_PREFIX}/scripts/gpu_support/nvidia/easystacks "${host_injections_easystacks[@]}" + # Copy over EasyBuild hooks file used for installations hook_files=( eb_hooks.py diff --git a/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml new file mode 100644 index 0000000000..83e68077a2 --- /dev/null +++ b/scripts/gpu_support/nvidia/easystacks/eessi-2023.06-eb-4.9.4-2023a-CUDA-host-injections.yml @@ -0,0 +1,9 @@ +# This EasyStack provides a list of all the EasyConfigs that should be installed in host_injections +# for nvidia GPU support, because they cannot (fully) be shipped as part of EESSI due to license constraints +easyconfigs: + - CUDA-12.1.1.eb + - cuDNN-8.9.2.26-CUDA-12.1.1.eb: + options: + # needed to enforce acceptance of EULA in cuDNN easyblock, + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3473 + include-easyblocks-from-commit: 11afb88ec55e0ca431cbe823696aa43e2a9bfca8 diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh new file mode 100755 index 0000000000..f9d889c1a1 --- /dev/null +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash + +# This script can be used to install CUDA and other libraries by NVIDIA under +# the `.../host_injections` directory. +# +# This provides the parts of the CUDA installation and other libriaries that +# cannot be redistributed as part of EESSI due to license limitations. While +# GPU-based software from EESSI will _run_ without these, installation of +# additional software that builds upon CUDA or other libraries requires that +# these installation are present under `host_injections`. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " --accept-cuda-eula You _must_ accept the CUDA EULA to install" + echo " CUDA, see the EULA at" + echo " https://docs.nvidia.com/cuda/eula/index.html" + echo " --accept-cudnn-eula You _must_ accept the cuDNN EULA to install" + echo " cuDNN, see the EULA at" + echo " https://docs.nvidia.com/deeplearning/cudnn/latest/reference/eula.html" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the installation of CUDA" + echo " and/or other libraries (must have" + echo " several GB available; depends on the number of installations)" +} + +# Initialize variables +cuda_eula_accepted=0 +cudnn_eula_accepted=0 +EASYSTACK_FILE= +TEMP_DIR= + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + --accept-cuda-eula) + cuda_eula_accepted=1 + shift 1 + ;; + --accept-cudnn-eula) + cudnn_eula_accepted=1 + shift 1 + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +# Make sure EESSI is initialised +check_eessi_initialised + +# Make sure that `EESSI-extend` will install in the site installation path EESSI_SITE_SOFTWARE_PATH +export EESSI_SITE_INSTALL=1 +echo "EESSI_SITE_SOFTWARE_PATH=${EESSI_SITE_SOFTWARE_PATH}" + +# we need a directory we can use for temporary storage +if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) +else + mkdir -p ${TEMP_DIR} + tmpdir=$(mktemp -d --tmpdir=${TEMP_DIR} cuda_n_co.XXX) + if [[ ! -d "$tmpdir" ]] ; then + fatal_error "Could not create directory ${tmpdir}" + fi +fi +echo "Created temporary directory '${tmpdir}'" + +# use EESSI_SITE_SOFTWARE_PATH/.modules/all as MODULEPATH +SAVE_MODULEPATH=${MODULEPATH} + +for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do + echo -e "Processing easystack file ${easystack_file}...\n\n" + + # We don't want hooks used in this install, we need vanilla installations + touch "${tmpdir}"/none.py + export EASYBUILD_HOOKS="${tmpdir}/none.py" + + # determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file + eb_version=$(echo ${EASYSTACK_FILE} | sed 's/.*eb-\([0-9.]*\).*/\1/g') + + # Load EasyBuild version for this easystack file _before_ loading EESSI-extend + module avail EasyBuild + module load EasyBuild/${eb_version} + module load EESSI-extend/${EESSI_VERSION}-easybuild + + # Install modules in hidden .modules dir to keep track of what was installed before + # (this action is temporary, and we do not call Lmod again within the current shell context, but in EasyBuild + # subshells, so loaded modules are not automatically unloaded) + MODULEPATH=${EESSI_SITE_SOFTWARE_PATH}/.modules/all + echo "set MODULEPATH=${MODULEPATH}" + + # show EasyBuild configuration + echo "Show EasyBuild configuration" + eb --show-config + + # do a 'eb --dry-run-short' with the EASYSTACK_FILE and determine list of packages + # to be installed + echo ">> Determining if packages specified in ${EASYSTACK_FILE} are missing under ${EESSI_SITE_SOFTWARE_PATH}" + eb_dry_run_short_out=${tmpdir}/eb_dry_run_short.out + eb --dry-run-short --easystack ${EASYSTACK_FILE} 2>&1 | tee ${eb_dry_run_short_out} + ret=$? + + # Check if CUDA shall be installed + cuda_install_needed=0 + cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | grep "module: CUDA/" > /dev/null + ret=$? + if [ "${ret}" -eq 0 ]; then + cuda_install_needed=1 + fi + + # Make sure the CUDA EULA is accepted if it shall be installed + if [ "${cuda_install_needed}" -eq 1 ] && [ "${cuda_eula_accepted}" -ne 1 ]; then + show_help + error="\nCUDA shall be installed. However, the CUDA EULA has not been accepted\nYou _must_ accept the CUDA EULA via the appropriate command line option.\n" + fatal_error "${error}" + fi + + # Check if cdDNN shall be installed + cudnn_install_needed=0 + cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | grep "module: cuDNN/" > /dev/null + ret=$? + if [ "${ret}" -eq 0 ]; then + cudnn_install_needed=1 + fi + + # Make sure the cuDNN EULA is accepted if it shall be installed + if [ "${cudnn_install_needed}" -eq 1 ] && [ "${cudnn_eula_accepted}" -ne 1 ]; then + show_help + error="\ncuDNN shall be installed. However, the cuDNN EULA has not been accepted\nYou _must_ accept the cuDNN EULA via the appropriate command line option.\n" + fatal_error "${error}" + fi + + # determine the number of packages to be installed (assume 5 GB + num_packages * + # 3GB space needed). Both CUDA and cuDNN are about this size + number_of_packages=$(cat ${eb_dry_run_short_out} | grep "^ \* \[[ ]\]" | sed -e 's/^.*module: //' | sort -u | wc -l) + echo "number of packages to be (re-)installed: '${number_of_packages}'" + base_storage_space=$((5000000 + ${number_of_packages} * 3000000)) + + required_space_in_tmpdir=${base_storage_space} + # Let's see if we have sources and build locations defined if not, we use the temporary space + if [[ -z "${EASYBUILD_BUILDPATH}" ]]; then + export EASYBUILD_BUILDPATH=${tmpdir}/build + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) + fi + if [[ -z "${EASYBUILD_SOURCEPATH}" ]]; then + export EASYBUILD_SOURCEPATH=${tmpdir}/sources + required_space_in_tmpdir=$((required_space_in_tmpdir + ${base_storage_space})) + fi + + # The install is pretty fat, you need lots of space for download/unpack/install + # (~3*${base_storage_space}*1000 Bytes), + # need to do a space check before we proceed + avail_space=$(df --output=avail "${EESSI_SITE_SOFTWARE_PATH}"/ | tail -n 1 | awk '{print $1}') + min_disk_storage=$((3 * ${base_storage_space})) + if (( avail_space < ${min_disk_storage} )); then + fatal_error "Need at least $(echo "${min_disk_storage} / 1000000" | bc) GB disk space to install CUDA and other libraries under ${EESSI_SITE_SOFTWARE_PATH}, exiting now..." + fi + avail_space=$(df --output=avail "${tmpdir}"/ | tail -n 1 | awk '{print $1}') + if (( avail_space < required_space_in_tmpdir )); then + error="Need at least $(echo "${required_space_in_tmpdir} / 1000000" | bc) temporary disk space under ${tmpdir}.\n" + error="${error}Set the environment variable TEMP_DIR to a location with adequate space to pass this check." + error="${error}You can alternatively set EASYBUILD_BUILDPATH and/or EASYBUILD_SOURCEPATH" + error="${error}to reduce this requirement. Exiting now..." + fatal_error "${error}" + fi + + # Brief explanation of parameters: + # - prefix: using $tmpdir as default base directory for several EB settings + # - installpath-modules: We install the module in a hidden .modules, so that next time this script + # is run, it is not reinstalled. + # - ${accept_eula_opt}: We only set the --accept-eula-for=CUDA option if CUDA will be installed and if + # this script was called with the argument --accept-cuda-eula. + # - hooks: We don't want hooks used in this install, we need vanilla + # installations of CUDA and/or other libraries + # - easystack: Path to easystack file that defines which packages shall be + # installed + accept_eula_opt= + if [[ ${cuda_eula_accepted} -eq 1 ]]; then + accept_eula_opt="CUDA" + fi + if [[ ${cudnn_eula_accepted} -eq 1 ]]; then + if [[ -z ${accept_eula_opt} ]]; then + accept_eula_opt="cuDNN" + else + accept_eula_opt="${accept_eula_opt},cuDNN" + fi + fi + touch "$tmpdir"/none.py + eb_args="--prefix=$tmpdir" + eb_args="$eb_args --installpath-modules=${EASYBUILD_INSTALLPATH}/.modules" + eb_args="$eb_args --hooks="$tmpdir"/none.py" + eb_args="$eb_args --easystack ${EASYSTACK_FILE}" + if [[ ! -z ${accept_eula_opt} ]]; then + eb_args="$eb_args --accept-eula-for=$accept_eula_opt" + fi + echo "Running eb $eb_args" + eb $eb_args + ret=$? + if [ $ret -ne 0 ]; then + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "some installation failed, please check EasyBuild logs ${PWD}/$(basename ${eb_last_log})..." + else + echo_green "all installations at ${EESSI_SITE_SOFTWARE_PATH}/software/... succeeded!" + fi + + # clean up tmpdir + rm -rf "${tmpdir}" + + # Restore MODULEPATH for next loop iteration + MODULEPATH=${SAVE_MODULEPATH} +done