Skip to content

Commit

Permalink
Santis aarch64 (#70)
Browse files Browse the repository at this point in the history
Update to support Grace-Hopper and Santis

- add gh200 as a target uarch
- add santis runner and deployment information
- add weather-climate platform recipes for building ICON
- update the netcdf-tools recipe for arm64
  • Loading branch information
bcumming authored Apr 5, 2024
1 parent 435db16 commit 2b114af
Show file tree
Hide file tree
Showing 38 changed files with 453 additions and 34 deletions.
3 changes: 2 additions & 1 deletion ci/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ stages:

pipeline-configure:
stage: configure
tags: [clariden-login-baremetal]
# temporary workaround: we need somewhere to run the initial configuration stage
tags: [santis-login-baremetal]
script:
- ./ci/configure-pipeline
artifacts:
Expand Down
7 changes: 2 additions & 5 deletions ci/config/ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,8 @@ def readenv(config):
# - JOB_ID (if needed?)
if os.getenv("UENVCITEST", default=None) is not None:
os.environ["system"] = "santis"
os.environ["uarch"] = "zen2"
os.environ["uenv"] = "gromacs:2023"
#os.environ["system"] = "clariden"
#os.environ["uarch"] = "a100"
#os.environ["uenv"] = "gromacs:2023"
os.environ["uarch"] = "gh200"
os.environ["uenv"] = "netcdf-tools:2024"

# read and validate the configuration
print(recipe_path)
Expand Down
21 changes: 15 additions & 6 deletions ci/config/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import schema

valid_uarch = ["zen2", "zen3", "a100", "mi200"]
valid_uarch = ["zen2", "zen3", "a100", "mi200", "gh200"]

class ConfigError(Exception):
"""ConfigError when an invalid configuration is entered.
Expand All @@ -25,8 +25,9 @@ def __init__(self, message):
super().__init__(self.message)

class Version:
def __init__(self, name, desc, recipe_path):
def __init__(self, name, uenv_name, desc, recipe_path):
self._name = name
self._uenv_name = uenv_name
self._recipes = desc["recipes"]
self._deploy = desc["deploy"]
self._use_spack_develop = desc["develop"]
Expand All @@ -52,8 +53,16 @@ def uarch(self):
# list of the uarch that this version can be deployed on
return [n for n in self._recipes.keys()]

def recipe_path(self, uarch):
return self._recipe_path / self._recipes[uarch]
def recipe_path(self, uarch, relative=False):
# search for self._recipe_path / name / arch-specific-recipe
# then for self._recipe_path / arch-specific-recipe
relpaths = [self._uenv_name + "/" + self._recipes[uarch], self._recipes[uarch]]
for relpath in relpaths:
fullpath = self._recipe_path / relpath
if fullpath.is_dir():
return pathlib.Path("recipes")/relpath if relative else fullpath

raise FileNotFoundError(f"the path for {self._name}@{uarch} ({self._recipes[uarch]}) does not exist")

def recipe(self, uarch):
return self._recipes[uarch]
Expand All @@ -65,7 +74,7 @@ def deployments(self):
class Uenv:
def __init__(self, name, desc, recipe_path):
self._name = name
self._versions = [Version(v, desc[v], recipe_path / name) for v in desc.keys()]
self._versions = [Version(v, name, desc[v], recipe_path) for v in desc.keys()]

@property
def name(self):
Expand Down Expand Up @@ -163,7 +172,7 @@ def recipe(self, name, version, uarch):
if u is not None:
for v in u.versions:
if v.name==version and uarch in v.uarch:
return "recipes/" + name + "/" + v.recipe(uarch)
return v.recipe_path(uarch, relative=True)

return None

Expand Down
6 changes: 3 additions & 3 deletions ci/config/schema/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"properties": {
"uarch": {
"type": "string",
"enum": ["zen2", "zen3", "a100", "mi200"]
"enum": ["zen2", "zen3", "a100", "mi200", "gh200"]
},
"partition": {"type": "string"},
"variables": {
Expand Down Expand Up @@ -81,7 +81,7 @@
"type":"object",
"additionalProperties": false,
"patternProperties": {
"^(zen2|zen3|a100|mi200)$": {"type": "string"}
"^(zen2|zen3|a100|mi200|gh200)$": {"type": "string"}
}
},
"deploy": {
Expand All @@ -93,7 +93,7 @@
"type": "array",
"items": {
"type": "string",
"enum": ["zen2", "zen3", "a100", "mi200"]
"enum": ["zen2", "zen3", "a100", "mi200", "gh200"]
}
}
}
Expand Down
1 change: 1 addition & 0 deletions ci/config/templates/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ stages:
variables:
SLURM_TIMELIMIT: 180
script:
- echo "==== RECIPE $STACK_RECIPE"
- ./stack-build -n $STACK_NAME -s $STACK_SYSTEM -r $STACK_RECIPE -b /dev/shm/jenkssl $SPACK_DEVELOP -m $STACK_MOUNT -u $STACK_UARCH
after_script:
- rm -Rf /dev/shm/jenkssl
Expand Down
26 changes: 19 additions & 7 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,10 @@ clusters:
santis:
targets:
-
uarch: 'zen2'
uarch: 'gh200'
partition: 'normal'
runner:
slurm-tag: santis-spack-stack-builder
# there is no baremetal tag as of now.
#baremetal-tag: null
slurm-tag: santis-spack-stack-builder
baremetal-tag: santis-login-baremetal
uenvs:
gromacs:
Expand All @@ -56,6 +54,14 @@ uenvs:
eiger: [zen2]
clariden: [a100]
develop: False
icon-wcp:
"v1":
recipes:
a100: wcp/icon/v1/a100
gh200: wcp/icon/v1/gh200
deploy:
santis: [gh200]
develop: False
lammps:
"2023":
recipes:
Expand Down Expand Up @@ -87,9 +93,10 @@ uenvs:
netcdf-tools:
"2024":
recipes:
zen2: '2024'
zen3: '2024'
a100: '2024'
zen2: '2024/x86'
zen3: '2024/x86'
a100: '2024/x86'
gh200: '2024/arm64'
deploy:
eiger: [zen2]
develop: False
Expand All @@ -111,6 +118,11 @@ uenvs:
# deploy to both the production and test clusters
eiger: [zen2]
clariden: [a100]
"24.2":
recipes:
gh200: 24.2/gh200
deploy:
santis: [gh200]
quantumespresso:
"v7.1":
recipes:
Expand Down
5 changes: 5 additions & 0 deletions recipes/netcdf-tools/2024/arm64/compilers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bootstrap:
spec: [email protected]
gcc:
specs:
- [email protected]
File renamed without changes.
24 changes: 24 additions & 0 deletions recipes/netcdf-tools/2024/arm64/environments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
gcc-env:
compiler:
- toolchain: gcc
spec: gcc@12
mpi:
spec: cray-mpich
gpu: false
unify: true
specs:
- hdf5
- netcdf-c
- netcdf-cxx4
- netcdf-fortran
- ncview
- cdo
- nco
#- ferret
views:
default:
link: roots
packages:
- git
- perl

File renamed without changes.
5 changes: 0 additions & 5 deletions recipes/netcdf-tools/2024/compilers.yaml

This file was deleted.

5 changes: 5 additions & 0 deletions recipes/netcdf-tools/2024/x86/compilers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bootstrap:
spec: [email protected]
gcc:
specs:
- [email protected]
6 changes: 6 additions & 0 deletions recipes/netcdf-tools/2024/x86/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: netcdf-tools
store: /user-environment
description: utilities used for climate/weather analysis and file manipulation
spack:
repo: https://github.com/spack/spack.git
commit: releases/v0.21
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
gcc-env:
compiler:
- toolchain: gcc
spec: gcc@11
spec: gcc@12
mpi:
spec: cray-mpich@8.1.25
spec: cray-mpich
gpu: false
unify: true
specs:
Expand Down
23 changes: 23 additions & 0 deletions recipes/netcdf-tools/2024/x86/modules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
modules:
# Paths to check when creating modules for all module sets
prefix_inspections:
bin:
- PATH
lib:
- LD_LIBRARY_PATH
lib64:
- LD_LIBRARY_PATH

default:
arch_folder: false
# Where to install modules
roots:
tcl: /user-environment/modules
tcl:
all:
autoload: none
hash_length: 0
exclude_implicits: true
exclude: ['%[email protected]', 'gcc %[email protected]']
projections:
all: '{name}/{version}'
5 changes: 5 additions & 0 deletions recipes/prgenv-gnu/24.2/gh200/compilers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bootstrap:
spec: [email protected]
gcc:
specs:
- [email protected]
7 changes: 7 additions & 0 deletions recipes/prgenv-gnu/24.2/gh200/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: prgenv-gnu
spack:
commit: releases/v0.21
repo: https://github.com/spack/spack.git
store: /user-environment
description: GNU Compiler toolchain with cray-mpich, Python, CMake and other development tools.

28 changes: 28 additions & 0 deletions recipes/prgenv-gnu/24.2/gh200/environments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
gcc-env:
compiler:
- toolchain: gcc
spec: gcc
mpi:
spec: [email protected]
gpu: cuda
unify: true
specs:
- aws-ofi-nccl@master
- cmake
- [email protected]
- fftw
- fmt
- hdf5
- nccl
- nccl-tests
- ninja
- openblas threads=openmp
- [email protected]
- [email protected]
variants:
- +mpi
- +cuda
- cuda_arch=90
views:
default:
link: roots
11 changes: 11 additions & 0 deletions recipes/prgenv-gnu/24.2/gh200/extra/reframe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
default:
features:
- cuda
- mpi
- osu-micro-benchmarks
- openmp
- serial
cc: mpicc
cxx: mpic++
ftn: mpifort
activation: /user-environment/env/default/activate.sh
23 changes: 23 additions & 0 deletions recipes/prgenv-gnu/24.2/gh200/modules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
modules:
# Paths to check when creating modules for all module sets
prefix_inspections:
bin:
- PATH
lib:
- LD_LIBRARY_PATH
lib64:
- LD_LIBRARY_PATH

default:
arch_folder: false
# Where to install modules
roots:
tcl: /user-environment/modules
tcl:
all:
autoload: none
hash_length: 0
exclude_implicits: true
exclude: ['%[email protected]', 'gcc %[email protected]']
projections:
all: '{name}/{version}'
9 changes: 9 additions & 0 deletions recipes/wcp/icon/v1/a100/compilers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
bootstrap:
spec: [email protected]
gcc:
specs:
- [email protected]
llvm:
requires: gcc@11
specs:
- [email protected]
6 changes: 6 additions & 0 deletions recipes/wcp/icon/v1/a100/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: icon-wcp
store: /user-environment
spack:
commit: releases/v0.21
repo: https://github.com/spack/spack.git
description: The tools required to build ICON
Loading

0 comments on commit 2b114af

Please sign in to comment.