From d12470a761f69dc67ed9aaace9f7f95d870d09e1 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 29 Nov 2023 15:15:06 +0000 Subject: [PATCH 1/3] remove cve-2023-41914 role now openhpc 2.6.2 (and later) are released --- ansible/.gitignore | 4 +- ansible/roles/cve-2023-41914/README.md | 32 -------------- .../roles/cve-2023-41914/defaults/main.yml | 24 ----------- .../cve-2023-41914/tasks/install-rpms.yml | 42 ------------------- ansible/roles/cve-2023-41914/tasks/main.yml | 4 -- .../cve-2023-41914/tasks/post-upgrade.yml | 19 --------- .../cve-2023-41914/tasks/pre-upgrade.yml | 40 ------------------ .../roles/cve-2023-41914/tasks/validate.yml | 22 ---------- 8 files changed, 2 insertions(+), 185 deletions(-) delete mode 100644 ansible/roles/cve-2023-41914/README.md delete mode 100644 ansible/roles/cve-2023-41914/defaults/main.yml delete mode 100644 ansible/roles/cve-2023-41914/tasks/install-rpms.yml delete mode 100644 ansible/roles/cve-2023-41914/tasks/main.yml delete mode 100644 ansible/roles/cve-2023-41914/tasks/post-upgrade.yml delete mode 100644 ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml delete mode 100644 ansible/roles/cve-2023-41914/tasks/validate.yml diff --git a/ansible/.gitignore b/ansible/.gitignore index 47a79a28a..1d06436bb 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -40,8 +40,6 @@ roles/* !roles/proxy/** !roles/resolv_conf/ !roles/resolv_conf/** -!roles/cve-2023-41914 -!roles/cve-2023-41914/** !roles/cluster_infra/ !roles/cluster_infra/** !roles/image_build_infra/ @@ -55,3 +53,5 @@ roles/* !roles/persist_hostkeys/ !roles/persist_hostkeys/** !roles/requirements.yml +!roles/prometheus/ +!roles/prometheus/** diff --git a/ansible/roles/cve-2023-41914/README.md b/ansible/roles/cve-2023-41914/README.md deleted file mode 100644 index 02c650857..000000000 --- a/ansible/roles/cve-2023-41914/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# cve-2023-41914 - -This role fixes [Slurm CVE-2023-41914](https://lists.schedmd.com/pipermail/slurm-announce/2023/000100.html): - -> A number of race conditions have been identified within the slurmd/slurmstepd processes that can lead to the user taking ownership of an arbitrary file on the system. A related issue can lead to the user overwriting an arbitrary file on the compute node (although with data that is not directly under their control). A related issue can also lead to the user deleting all files and sub-directories of an arbitrary target directory on the compute node. - -**NB:** It is only suitable for use on systems installed from OpenHPC v2.6.1 (Slurm v22.05). - -At the time of writing, new OpenHPC packages have been built but are not available from the respositories (reference), hence `dnf update ...` is not available. - -This role can be run in two ways: - -1. To remediate an existing system, run `tasks/main.yml`, e.g. using the playbook `ansible/adhoc/cve-2023-41914.yml`. This will: -- Stop all Slurm services -- Backup the slurmdbd mysql database to the volume-backed directory `/var/lib/state/mysql-backups/` on the control node (by default). -- Uninstall the affected packages and install updated rpms from the OpenHPC build system. -- Restart Slurm services. - - **NB**: This playbook will ALWAYS stop and restart Slurm, even if no updates are actually required. - -2. To remediate images during build (i.e no Slurm services are running, no slurm database exists), run `tasks/install-rpms.yml`, e.g. using the following in an environment pre-hook: - -```yaml -- hosts: builder - gather_facts: no - become: yes - tasks: - - name: Apply fixes for cve-2023-41914 - import_role: - name: cve-2023-41914 - tasks_from: install-rpms.yml -``` diff --git a/ansible/roles/cve-2023-41914/defaults/main.yml b/ansible/roles/cve-2023-41914/defaults/main.yml deleted file mode 100644 index 685c6619c..000000000 --- a/ansible/roles/cve-2023-41914/defaults/main.yml +++ /dev/null @@ -1,24 +0,0 @@ - -# _cve_2023_41814_installed_slurm: [] -cve_2023_41914_mysql_backup_path: "{{ mysql_datadir }}-backups/{{ lookup('pipe', 'date --iso-8601=seconds') }}.sql" - -cve_2023_41914_rpm_url: http://obs.openhpc.community:82/OpenHPC:/2.6.2:/Factory/EL_8/x86_64 -cve_2023_41914_rpms: # see cve_2023_41914_rpm_url - - slurm-ohpc # has to be first as dependency - - slurm-contribs-ohpc - - slurm-devel-ohpc - - slurm-example-configs-ohpc - - slurm-libpmi-ohpc - - slurm-ohpc-slurmrestd - - slurm-openlava-ohpc - - slurm-pam_slurm-ohpc - - slurm-perlapi-ohpc - - slurm-slurmctld-ohpc - - slurm-slurmd-ohpc - - slurm-slurmdbd-ohpc - - slurm-sview-ohpc - - slurm-torque-ohpc -cve_2023_41914_rpm_fix_ver: '22.05.10' -cve_2023_41914_rpm_fix_release: '2.1.ohpc.2.6.2' -_cve_2023_41814_updates: [] -cve_2023_41914_pkglist_path: "{{ appliances_environment_root }}/{{ inventory_hostname }}-cve_2023_41814_updates" diff --git a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml deleted file mode 100644 index 42168fd9b..000000000 --- a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml +++ /dev/null @@ -1,42 +0,0 @@ -- name: Validate suitability - include_tasks: validate.yml - when: _cve_2023_41814_installed_pkgs is undefined - -- name: Identify packages to update - set_fact: - _cve_2023_41814_updates: "{{ _cve_2023_41814_updates + [item] }}" - loop: "{{ cve_2023_41914_rpms }}" - when: - - item in ansible_facts.packages - - cve_2023_41914_rpm_fix_ver is version(ansible_facts.packages[item][0].version, '>') - -- name: Write packages to be modified to a file - # allows recovery from failures in subsequent package deletion/rpm install - copy: - dest: "{{ cve_2023_41914_pkglist_path }}" - content: "{{ _cve_2023_41814_updates | to_nice_yaml }}" - when: _cve_2023_41814_updates | length > 0 - delegate_to: localhost - -- name: Read packages to modify - set_fact: - _cve_2023_41814_updates: "{{ lookup('file', cve_2023_41914_pkglist_path) | from_yaml }}" - -- name: Identify architecture - setup: - gather_subset: architecture - -- name: Remove installed packages - dnf: - name: "{{ _cve_2023_41814_updates }}" - state: absent - -- name: Install rpms - dnf: - name: "{{ cve_2023_41914_rpm_url }}/{{ item }}-{{ cve_2023_41914_rpm_fix_ver }}-{{ cve_2023_41914_rpm_fix_release }}.{{ ansible_architecture }}.rpm" - loop: "{{ _cve_2023_41814_updates }}" - register: _cve_2023_41814_rpm_installs - -- name: Reload systemd units - command: systemctl daemon-reload - when: _cve_2023_41814_rpm_installs.changed diff --git a/ansible/roles/cve-2023-41914/tasks/main.yml b/ansible/roles/cve-2023-41914/tasks/main.yml deleted file mode 100644 index 83053baab..000000000 --- a/ansible/roles/cve-2023-41914/tasks/main.yml +++ /dev/null @@ -1,4 +0,0 @@ -- include_tasks: validate.yml -- include_tasks: pre-upgrade.yml -- include_tasks: install-rpms.yml -- include_tasks: post-upgrade.yml diff --git a/ansible/roles/cve-2023-41914/tasks/post-upgrade.yml b/ansible/roles/cve-2023-41914/tasks/post-upgrade.yml deleted file mode 100644 index d9540faa0..000000000 --- a/ansible/roles/cve-2023-41914/tasks/post-upgrade.yml +++ /dev/null @@ -1,19 +0,0 @@ -- name: Start slurmdbd - systemd: - name: slurmdbd - state: started - # NB: this approach is only suitable for minor version upgrades - # major ones may timeout on service start due to db upgrades - when: openhpc_enable.database | default('false') | bool - -- name: Start slurmctld - systemd: - name: slurmctld - state: started - when: openhpc_enable.control | default('false') | bool - -- name: Start slurmd - systemd: - name: slurmd - state: started - when: openhpc_enable.batch | default('false') | bool or 'login' in group_names diff --git a/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml b/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml deleted file mode 100644 index 59629a482..000000000 --- a/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml +++ /dev/null @@ -1,40 +0,0 @@ -- name: Stop slurmd - systemd: - name: slurmd - state: stopped - when: openhpc_enable.batch | default('false') | bool or 'login' in group_names - -- name: Stop slurmctld - systemd: - name: slurmctld - state: stopped - when: openhpc_enable.control | default('false') | bool - -- name: Stop slurmdbd - systemd: - name: slurmdbd - state: stopped - when: openhpc_enable.database | default('false') | bool - -- name: Ensure backup directory exists - file: - path: "{{ cve_2023_41914_mysql_backup_path | dirname }}" - state: directory - owner: root - group: root - when: openhpc_enable.control | default(false) | bool - -- name: Ensure mysqldump tool installed - dnf: - name: mysql - when: openhpc_enable.control | default(false) | bool - -- name: Backup database - community.mysql.mysql_db: - name: slurm_acct_db - state: dump - target: "{{ cve_2023_41914_mysql_backup_path }}" - login_user: root - login_password: "{{ mysql_root_password }}" - login_host: "{{ mysql_host }}" - when: openhpc_enable.control | default(false) | bool diff --git a/ansible/roles/cve-2023-41914/tasks/validate.yml b/ansible/roles/cve-2023-41914/tasks/validate.yml deleted file mode 100644 index 5da1afdc2..000000000 --- a/ansible/roles/cve-2023-41914/tasks/validate.yml +++ /dev/null @@ -1,22 +0,0 @@ -- name: Get package facts - package_facts: - -- name: Set fact for installed Slurm packages - # this is a subset (same format) as ansible_facts.packages - set_fact: - _cve_2023_41814_installed_pkgs: "{{ ansible_facts.packages | dict2items | selectattr('key', 'match', 'slurm-') | items2dict }}" - -- name: Ensure only a single version of all slurm-* packages is installed - assert: - that: item.value | length == 1 - loop: "{{ _cve_2023_41814_installed_pkgs | dict2items }}" - -- name: Ensure major version of installed Slurm matches upgrade - assert: - that: _slurm_installed_major_ver == ['22', '05'] - fail_msg: "{{ item.key }} has major version {{ _slurm_installed_major_ver | join('.') }}, expecting 22.05" - loop: "{{ _cve_2023_41814_installed_pkgs | dict2items }}" - when: item.key.startswith('slurm') - vars: - _slurm_installed_major_ver: "{{ item.value[0].version.split('.')[0:2] }}" - From 90a7053985f55c510cef2267b5324e8b50b1edd2 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 29 Nov 2023 15:17:24 +0000 Subject: [PATCH 2/3] remove cve-2023-41914 adhoc --- ansible/adhoc/cve-2023-41914.yml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 ansible/adhoc/cve-2023-41914.yml diff --git a/ansible/adhoc/cve-2023-41914.yml b/ansible/adhoc/cve-2023-41914.yml deleted file mode 100644 index e4b907d44..000000000 --- a/ansible/adhoc/cve-2023-41914.yml +++ /dev/null @@ -1,6 +0,0 @@ -- hosts: openhpc - gather_facts: no - become: yes - tasks: - - import_role: - name: cve-2023-41914 From e3a60742fa369c9dbe2e57d897384977a644310d Mon Sep 17 00:00:00 2001 From: Steve Brasier <33413598+sjpb@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:08:58 +0000 Subject: [PATCH 3/3] Remove non-existent prometheus role from .gitignore --- ansible/.gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/ansible/.gitignore b/ansible/.gitignore index 1d06436bb..ff35312d3 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -53,5 +53,3 @@ roles/* !roles/persist_hostkeys/ !roles/persist_hostkeys/** !roles/requirements.yml -!roles/prometheus/ -!roles/prometheus/**