Skip to content

Commit d25970b

Browse files
OFED builder workflow (#1132)
Add OFED workflow
1 parent d243298 commit d25970b

File tree

9 files changed

+460
-1
lines changed

9 files changed

+460
-1
lines changed
+254
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
---
2+
name: Build OFED packages
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
rocky9:
7+
description: Build Rocky Linux 9
8+
type: boolean
9+
default: true
10+
secrets:
11+
KAYOBE_VAULT_PASSWORD:
12+
required: true
13+
CLOUDS_YAML:
14+
required: true
15+
OS_APPLICATION_CREDENTIAL_ID:
16+
required: true
17+
OS_APPLICATION_CREDENTIAL_SECRET:
18+
required: true
19+
20+
env:
21+
ANSIBLE_FORCE_COLOR: True
22+
KAYOBE_ENVIRONMENT: ci-builder
23+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
24+
jobs:
25+
overcloud-ofed-packages:
26+
name: Build OFED packages
27+
if: github.repository == 'stackhpc/stackhpc-kayobe-config'
28+
runs-on: arc-skc-host-image-builder-runner
29+
permissions: {}
30+
steps:
31+
- name: Install Package
32+
uses: ConorMacBride/install-package@main
33+
with:
34+
apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq
35+
36+
- name: Start the SSH service
37+
run: |
38+
sudo /etc/init.d/ssh start
39+
40+
- name: Checkout
41+
uses: actions/checkout@v4
42+
with:
43+
path: src/kayobe-config
44+
45+
- name: Determine OpenStack release
46+
id: openstack_release
47+
run: |
48+
BRANCH=$(awk -F'=' '/defaultbranch/ {print $2}' src/kayobe-config/.gitreview)
49+
echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," >> $GITHUB_OUTPUT
50+
51+
- name: Clone StackHPC Kayobe repository
52+
uses: actions/checkout@v4
53+
with:
54+
repository: stackhpc/kayobe
55+
ref: refs/heads/stackhpc/${{ steps.openstack_release.outputs.openstack_release }}
56+
path: src/kayobe
57+
58+
- name: Install Kayobe
59+
run: |
60+
mkdir -p venvs &&
61+
pushd venvs &&
62+
python3 -m venv kayobe &&
63+
source kayobe/bin/activate &&
64+
pip install -U pip &&
65+
pip install ../src/kayobe
66+
67+
- name: Install terraform
68+
uses: hashicorp/setup-terraform@v2
69+
70+
- name: Initialise terraform
71+
run: terraform init
72+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
73+
74+
- name: Generate SSH keypair
75+
run: ssh-keygen -f id_rsa -N ''
76+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
77+
78+
- name: Generate clouds.yaml
79+
run: |
80+
cat << EOF > clouds.yaml
81+
${{ secrets.CLOUDS_YAML }}
82+
EOF
83+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
84+
85+
- name: Output image tag
86+
id: image_tag
87+
run: |
88+
echo image_tag=$(grep stackhpc_rocky_9_overcloud_host_image_version: etc/kayobe/pulp-host-image-versions.yml | awk '{print $2}') >> $GITHUB_OUTPUT
89+
90+
# Use the image override if set, otherwise use overcloud-os_distribution-os_release-tag
91+
- name: Output image name
92+
id: image_name
93+
run: |
94+
echo image_name=overcloud-rocky-9-${{ steps.image_tag.outputs.image_tag }} >> $GITHUB_OUTPUT
95+
96+
- name: Generate terraform.tfvars
97+
run: |
98+
cat << EOF > terraform.tfvars
99+
ssh_public_key = "id_rsa.pub"
100+
ssh_username = "cloud-user"
101+
aio_vm_name = "skc-ofed-builder"
102+
aio_vm_image = "${{ env.VM_IMAGE }}"
103+
aio_vm_flavor = "en1.medium"
104+
aio_vm_network = "stackhpc-ci"
105+
aio_vm_subnet = "stackhpc-ci"
106+
aio_vm_interface = "ens3"
107+
EOF
108+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
109+
env:
110+
VM_IMAGE: ${{ steps.image_name.outputs.image_name }}
111+
112+
- name: Terraform Plan
113+
run: terraform plan
114+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
115+
env:
116+
OS_CLOUD: "openstack"
117+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
118+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
119+
120+
- name: Terraform Apply
121+
run: |
122+
for attempt in $(seq 5); do
123+
if terraform apply -auto-approve; then
124+
echo "Created infrastructure on attempt $attempt"
125+
exit 0
126+
fi
127+
echo "Failed to create infrastructure on attempt $attempt"
128+
sleep 10
129+
terraform destroy -auto-approve
130+
sleep 60
131+
done
132+
echo "Failed to create infrastructure after $attempt attempts"
133+
exit 1
134+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
135+
env:
136+
OS_CLOUD: "openstack"
137+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
138+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
139+
140+
- name: Get Terraform outputs
141+
id: tf_outputs
142+
run: |
143+
terraform output -json
144+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
145+
146+
- name: Write Terraform outputs
147+
run: |
148+
cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-outputs.yml
149+
${{ steps.tf_outputs.outputs.stdout }}
150+
EOF
151+
152+
- name: Write Terraform network config
153+
run: |
154+
cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-network-allocation.yml
155+
---
156+
aio_ips:
157+
builder: "{{ access_ip_v4.value }}"
158+
EOF
159+
160+
- name: Write Terraform network interface config
161+
run: |
162+
mkdir -p src/kayobe-config/etc/kayobe/environments/$KAYOBE_ENVIRONMENT/inventory/group_vars/seed
163+
rm -f src/kayobe-config/etc/kayobe/environments/$KAYOBE_ENVIRONMENT/inventory/group_vars/seed/network-interfaces
164+
cat << EOF > src/kayobe-config/etc/kayobe/environments/$KAYOBE_ENVIRONMENT/inventory/group_vars/seed/network-interfaces
165+
admin_interface: "{{ access_interface.value }}"
166+
aio_interface: "{{ access_interface.value }}"
167+
EOF
168+
169+
- name: Manage SSH keys
170+
run: |
171+
mkdir -p ~/.ssh
172+
touch ~/.ssh/authorized_keys
173+
cat src/kayobe-config/terraform/aio/id_rsa.pub >> ~/.ssh/authorized_keys
174+
cp src/kayobe-config/terraform/aio/id_rsa* ~/.ssh/
175+
176+
- name: Bootstrap the control host
177+
run: |
178+
source venvs/kayobe/bin/activate &&
179+
source src/kayobe-config/kayobe-env --environment ci-builder &&
180+
kayobe control host bootstrap
181+
182+
- name: Run growroot playbook
183+
run: |
184+
source venvs/kayobe/bin/activate &&
185+
source src/kayobe-config/kayobe-env --environment ci-builder &&
186+
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml
187+
env:
188+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
189+
190+
- name: Configure the seed host (Builder VM)
191+
run: |
192+
source venvs/kayobe/bin/activate &&
193+
source src/kayobe-config/kayobe-env --environment ci-builder &&
194+
kayobe seed host configure --skip-tags network,docker
195+
env:
196+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
197+
198+
- name: Run a distro-sync
199+
run: |
200+
source venvs/kayobe/bin/activate &&
201+
source src/kayobe-config/kayobe-env --environment ci-builder &&
202+
kayobe seed host command run --become --command "dnf distro-sync --refresh"
203+
env:
204+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
205+
206+
- name: Reset BLS entries on the seed host
207+
run: |
208+
source venvs/kayobe/bin/activate &&
209+
source src/kayobe-config/kayobe-env --environment ci-builder &&
210+
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reset-bls-entries.yml \
211+
-e "reset_bls_host=ofed-builder"
212+
env:
213+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
214+
215+
- name: Disable noexec in /var/tmp
216+
run: |
217+
source venvs/kayobe/bin/activate &&
218+
source src/kayobe-config/kayobe-env --environment ci-builder &&
219+
kayobe seed host command run --become --command "sed -i 's/noexec,//g' /etc/fstab"
220+
env:
221+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
222+
223+
- name: Reboot to apply the kernel update
224+
run: |
225+
source venvs/kayobe/bin/activate &&
226+
source src/kayobe-config/kayobe-env --environment ci-builder &&
227+
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reboot.yml
228+
env:
229+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
230+
231+
- name: Run OFED builder playbook
232+
run: |
233+
source venvs/kayobe/bin/activate &&
234+
source src/kayobe-config/kayobe-env --environment ci-builder &&
235+
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/build-ofed-rocky.yml
236+
env:
237+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
238+
239+
- name: Run OFED upload playbook
240+
run: |
241+
source venvs/kayobe/bin/activate &&
242+
source src/kayobe-config/kayobe-env --environment ci-builder &&
243+
kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml
244+
env:
245+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }}
246+
247+
- name: Destroy
248+
run: terraform destroy -auto-approve
249+
working-directory: ${{ github.workspace }}/src/kayobe-config/terraform/aio
250+
env:
251+
OS_CLOUD: openstack
252+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
253+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
254+
if: always()

doc/source/contributor/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ This guide is for contributors of the StackHPC Kayobe configuration project.
1212
environments/index
1313
package-updates
1414
pre-commit
15+
ofed

doc/source/contributor/ofed.rst

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
====
2+
OFED
3+
====
4+
5+
Warning: Experimental workflow subject to change
6+
7+
This section documents the workflow for building OFED packages for Release train integration.
8+
9+
The workflow builds the OFED kernel modules against the latest available kernel in Release train
10+
(as configured in SKC) and compiles them into RPM packages to be uploaded to Ark. Addtionally,
11+
this workflow downloads the userspace OFED packages from the Nvidia repository and uploads these
12+
to Ark.
13+
14+
Workflow
15+
========
16+
17+
The workflow uses workflow_dispatch to manually request an OFED build, which will deploy a builder
18+
VM, apply kayobe config to the builder, upgrade the kernel, reboot, then run two Ansible playbooks
19+
for building and uploading OFED to Ark.
20+
21+
Pre-requisites
22+
--------------
23+
24+
Before building OFED packages, the workflow will ensure that:
25+
26+
* A full distro-sync has taken place, ensuring the kernel is upgraded.
27+
28+
* The bootloader has been configured to use the latest kernel
29+
30+
* noexec is disabled in the temporary logical volume.
31+
32+
build-ofed
33+
----------
34+
35+
Currently we only support building Rocky Linux 9 OFED packages.
36+
37+
In order to setup OFED, we're required to build kernel modules for the OFED drivers as
38+
the kernels we provide in release train are unsupported by OFED. To accomplish this we
39+
will need to use the doca-kernel-support from the doca-extra repository.
40+
41+
We will need to instll dependencies in order to build the OFED kernel modules, and these
42+
are installed at the beginning of the build playbook. We also install base and appstream
43+
dependencies of userspace OFED packages here, this is intended to stop these dependencies
44+
being pulled in later when we download the OFED packages from the doca-host repository.
45+
46+
At the end of the playbook following the kernel module build, the OFED userspace packages
47+
are downloaded from the upstream repository in order to upload these to Ark.
48+
49+
push-ofed
50+
---------
51+
52+
As we're not syncing OFED from any upstream source, and are instead creating our own
53+
repository of custom packages, we will be required to setup the Pulp distribution/publication
54+
and upload the content directly to Ark. This playbook uses the Pulp CLI to upload the RPMs
55+
to Ark.
+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
---
2+
- name: Build OFED packages
3+
become: true
4+
hosts: ofed-builder
5+
gather_facts: false
6+
tasks:
7+
- name: Check whether noexec is enabled for /var/tmp
8+
ansible.builtin.lineinfile:
9+
path: "/etc/fstab"
10+
regexp: "noexec"
11+
state: absent
12+
changed_when: false
13+
check_mode: true
14+
register: result
15+
failed_when: result.found
16+
17+
- name: Install package dependencies
18+
ansible.builtin.dnf:
19+
name:
20+
- kpartx
21+
- perl
22+
- rpm-build
23+
- automake
24+
- patch
25+
- kernel
26+
- kernel-devel
27+
- autoconf
28+
- pciutils
29+
- kernel-modules-extra
30+
- kernel-rpm-macros
31+
- lsof
32+
- libtool
33+
- tk
34+
- gcc-gfortran
35+
- tcl
36+
- createrepo
37+
- cmake-filesystem
38+
- libnl3-devel
39+
- python3-devel
40+
state: latest
41+
update_cache: true
42+
43+
- name: Add DOCA host repository package
44+
ansible.builtin.dnf:
45+
name: https://developer.nvidia.com/downloads/networking/secure/doca-sdk/DOCA_2.8/doca-host-2.8.0-204000_{{ stackhpc_pulp_doca_ofed_version }}_rhel9{{ stackhpc_pulp_repo_rocky_9_minor_version }}.x86_64.rpm
46+
disable_gpg_check: true
47+
48+
- name: Install DOCA extra packages
49+
ansible.builtin.dnf:
50+
name: doca-extra
51+
52+
- name: Create build directory
53+
ansible.builtin.file:
54+
path: /home/cloud-user/ofed
55+
state: directory
56+
mode: 0777
57+
58+
- name: Set build directory
59+
ansible.builtin.replace:
60+
path: /opt/mellanox/doca/tools/doca-kernel-support
61+
regexp: 'TMP_DIR=\$1'
62+
replace: 'TMP_DIR=/home/cloud-user/ofed'
63+
64+
- name: Build OFED kernel modules
65+
ansible.builtin.shell:
66+
cmd: |
67+
/opt/mellanox/doca/tools/doca-kernel-support
68+
69+
- name: Download OFED userspace packages
70+
ansible.builtin.dnf:
71+
name: doca-ofed-userspace
72+
download_only: true
73+
download_dir: /home/cloud-user/ofed

0 commit comments

Comments
 (0)