Skip to content

Commit

Permalink
Merge pull request #69 from itkovian/24.05.ug-before-reduce-patches
Browse files Browse the repository at this point in the history
24.05.ug before reduce patches
  • Loading branch information
hajgato authored Dec 19, 2024
2 parents a5cab9d + 30a0b7d commit 7a6ac98
Show file tree
Hide file tree
Showing 44 changed files with 1,302 additions and 680 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: slurm C/C++ build

on:
push:
branches: [20.11.ug, 22.05.ug, 24.05.ug]
pull_request:
branches: [20.11.ug, 22.05.ug, 24.05.ug]

jobs:
build:
runs-on: ubuntu-20.04

steps:
- uses: actions/checkout@v2
- name: Install deps
run: sudo apt-get install -y libmunge-dev
- name: configure
run: ./configure --enable-multiple-slurmd --prefix=/tmp/slurm/
- name: make
run: make -j
- name: make check
run: make -j check
- name: make install
run: make -j install
4 changes: 2 additions & 2 deletions META
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
Name: slurm
Major: 24
Minor: 05
Micro: 4
Version: 24.05.4
Micro: 5
Version: 24.05.5
Release: 1

##
Expand Down
22 changes: 22 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
This file describes changes in recent versions of Slurm. It primarily
documents those changes that are of interest to users and administrators.

* Changes in Slurm 24.05.6
==========================

* Changes in Slurm 24.05.5
==========================
-- Fix issue signaling cron jobs resulting in unintended requeues.
Expand All @@ -14,6 +17,25 @@ documents those changes that are of interest to users and administrators.
removal of a dynamic node.
-- gpu/nvml - Attempt loading libnvidia-ml.so.1 as a fallback for failure in
loading libnvidia-ml.so.
-- slurmrestd - Fix populating non-required object fields of objects as '{}' in
JSON/YAML instead of 'null' causing compiled OpenAPI clients to reject
the response to 'GET /slurm/v0.0.40/jobs' due to validation failure of
'.jobs[].job_resources'.
-- Fix sstat/sattach protocol errors for steps on higher version slurmd's
(regressions since 20.11.0rc1 and 16.05.1rc1 respectively).
-- slurmd - Avoid a crash when starting slurmd version 24.05 with
SlurmdSpoolDir files that have been upgraded to a newer major version of
Slurm. Log warnings instead.
-- Fix race condition in stepmgr step completion handling.
-- Fix slurmctld segfault with stepmgr and MpiParams when running a job array.
-- Fix requeued jobs keeping their priority until the decay thread happens.
-- slurmctld - Fix crash and possible split brain issue if the
backup controller handles an scontrol reconfigure while in control
before the primary resumes operation.
-- Fix stepmgr not getting dynamic node addrs from the controller
-- stepmgr - avoid "Unexpected missing socket" errors.
-- Fix `scontrol show steps` with dynamic stepmgr
-- Support IPv6 in configless mode.

* Changes in Slurm 24.05.4
==========================
Expand Down
7 changes: 7 additions & 0 deletions bdist_rpm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

set -e
set -x


./build.sh
150 changes: 150 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/bin/bash

set -e # exit whenever command in pipeline fails
set -x # print commands as they are executed

VERSION=`grep "Version:.*[0-9]" slurm.spec | tr -s " " | awk '{print $2;}'`
RELEASE=`grep "%define rel.*[-1-9]" slurm.spec | tr -s " " | awk '{print $3}'`

if [ "${RELEASE}" != "1" ]; then
SUFFIX=${VERSION}-${RELEASE}
else
SUFFIX=${VERSION}
fi

GITTAG=$(git log --format=%ct.%h -1)

SCRIPT=$(readlink -f "${BASH_SOURCE[0]}")
ORIGIN=$(dirname "$SCRIPT")

# which version to download from github
SLURM_VERSION=${VERSION:-24.05.3}
UPSTREAM_REL=${UPSTREAM_REL:-1}

# which release should be used for our RPMs
OUR_RELEASE=${RELEASE:-1}

# NVML
# allow _empty_ version, which is used in pipeline

if grep "release 8.8" /etc/redhat-release; then
NVIDIA_DRIVER=${NVIDIA_DRIVER-555.42.06}
NVDRV_NVML_PKG="nvidia-driver-NVML${NVIDIA_DRIVER:+-$NVIDIA_DRIVER}"
CUDA_VERSION=${CUDA_VERSION:-12.6}
CUDA_NVML_PKG="cuda-nvml-devel-${CUDA_VERSION//./-}"
elif grep "release 9.4" /etc/redhat-release; then
NVIDIA_DRIVER=${NVIDIA_DRIVER-555.42.06}
NVDRV_NVML_PKG="nvidia-driver-NVML${NVIDIA_DRIVER:+-$NVIDIA_DRIVER}"
CUDA_VERSION=${CUDA_VERSION:-12.6}
CUDA_NVML_PKG="cuda-nvml-devel-${CUDA_VERSION//./-}"
fi

# Prepare directory structure
rm -Rf $ORIGIN/rpmbuild/ $ORIGIN/dist/
mkdir -p $ORIGIN/rpmbuild/{BUILD,RPMS,SRPMS,SOURCES} $ORIGIN/dist
echo "Building source tarball"

# archive git repo
echo "archive suffix: ${SUFFIX}"
git archive --format=tar.gz -o "rpmbuild/SOURCES/slurm-${SUFFIX}.tar.gz" --prefix="slurm-${SUFFIX}/" HEAD

cp slurm.spec "SPECS"

# Patch sources
#for src_patch in $ORIGIN/src-patches/*.patch; do
#echo "Patching $src_patch"
#git am $src_patch
#done

# Patch spec file
#for spec_patch in $ORIGIN/spec-patches/*.patch; do
#echo "Patching $spec_patch"
#patch -p1 -b -i $spec_patch
#done

# Install dependencies
# see https://slurm.schedmd.com/quickstart_admin.html#build_install

echo "Installing specfile requires"
# this goes first because it might install undesired stuff related to `--with` options
sudo dnf -y builddep slurm.spec

# dependecy versions

if grep "release 8.8" /etc/redhat-release; then
UCX_VERSION="1.13.1-2.el8.x86_64"
PMIX_VERSION=">= 4.2.6"
HWLOC_VERSION=">= 2.2.0-3"
elif grep "release 9.2" /etc/redhat-release; then
UCX_VERSION="1.13.1-2.el9.x86_64"
PMIX_VERSION=">= 4.2.7"
HWLOC_VERSION=">= 2.4.1-5"
elif grep "release 9.4" /etc/redhat-release; then
UCX_VERSION="1.15.0-2.el9.x86_64"
PMIX_VERSION=">= 4.2.7"
HWLOC_VERSION=">= 2.4.1-5"
else
echo "unsupported OS release"
exit 1
fi

echo "Installing dependencies"
# - features: basic
sudo dnf -y install lua-devel mariadb-devel lz4-devel
# - features: authentication (MUNGE: yes, JWT: yes, PAM: yes)
sudo dnf -y install munge-devel libjwt-devel pam-devel
# - features: slurmrestd
sudo dnf -y install http-parser-devel json-c-devel libyaml-devel
# - features: Nvidia NVML
sudo dnf -y autoremove cuda-nvml-* nvidia-driver-NVML-* nvidia-driver* libnvidia-ml*
sudo dnf -y install "$CUDA_NVML_PKG" "$NVDRV_NVML_PKG" "nvidia-driver-devel"
# - plugins: MPI
sudo dnf -y install pmix "pmix-devel ${PMIX_VERSION}" "ucx-devel-${UCX_VERSION}"
# - plugins: cgroup/v2
# see https://slurm.schedmd.com/cgroup_v2.html
sudo dnf -y install kernel-headers dbus-devel
# - plugins: task/cgroup, task/affinity
sudo dnf -y install "hwloc-devel ${HWLOC_VERSION}" numactl-devel
# - plugins: acct_gather_profile/hdf5
sudo dnf -y install hdf5-devel

# Build defines
RPM_DEFINES=( --define "gittag ${GITTAG}" --define "_topdir $ORIGIN/rpmbuild" )

# Build options
SLURM_BUILDOPTS=( --with slurmrestd --without debug )
# basic features
SLURM_BUILDOPTS+=( --with lua --with mysql --with x11 )
# plugins
SLURM_BUILDOPTS+=( --with numa --with hwloc --with pmix --with ucx )
# authentication
SLURM_BUILDOPTS+=( --with pam --with jwt )

echo "Running rpmbuild (without nvml)"
rpmbuild -ba "${RPM_DEFINES[@]}" "${SLURM_BUILDOPTS[@]}" --without nvml \
slurm.spec 2>&1 | tee rpmbuild-without-nvml.out


echo "Doing rpm rebuild (without nvml)"
for rpm in $ORIGIN/rpmbuild/RPMS/x86_64/slurm-*$SUFFIX*.rpm ; do
rpmrebuild --release=${OUR_RELEASE}.${GITTAG}$(rpm -E '%dist').nogpu.ug -d $ORIGIN/dist -p $rpm
done


echo "Running rpmbuild (with nvml)"
RPM_DEFINES+=( --define "_cuda_version $CUDA_VERSION" )
rpmbuild -ba "${RPM_DEFINES[@]}" "${SLURM_BUILDOPTS[@]}" --with nvml \
slurm.spec 2>&1 | tee rpmbuild-with-nvml.out

echo "Doing rpm rebuild (with nvml)"
for rpm in $ORIGIN/rpmbuild/RPMS/x86_64/slurm-*$SUFFIX*.rpm ; do
rpmrebuild --release=${OUR_RELEASE}.${GITTAG}$(rpm -E '%dist').ug -d $ORIGIN/dist -p $rpm
done

# strip out torque binaries/wrapper from slurm-torque
rpmrebuild -d $ORIGIN/dist --change-spec-files="sed '/\(pbsnodes\|mpiexec\|bin\/q.\+\)/d'" -p $ORIGIN/dist/x86_64/slurm-torque-*-${OUR_RELEASE}.${GITTAG}$(rpm -E '%dist').nogpu.ug*.rpm
rpmrebuild -d $ORIGIN/dist --change-spec-files="sed '/\(pbsnodes\|mpiexec\|bin\/q.\+\)/d'" -p $ORIGIN/dist/x86_64/slurm-torque-*-${OUR_RELEASE}.${GITTAG}$(rpm -E '%dist').ug.*.rpm

# get the RPMs out of the subdirectories
find rpmbuild -type f -name "*.rpm" -exec rm {} ";"
find $ORIGIN/dist/ -type f -name '*.rpm' -print0 | xargs -0 -I{} mv {} rpmbuild/RPMS/x86_64/
Loading

0 comments on commit 7a6ac98

Please sign in to comment.