Skip to content

Commit a28eba7

Browse files
authored
Improve Slurm cgroup.conf support (#195)
* Support defining custom cgroup.conf options * Restart Slurm if cgroup.conf has changed
1 parent fb1e398 commit a28eba7

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,14 @@ partition configuration for each.
107107
[slurm.conf](https://slurm.schedmd.com/slurm.conf.html). Keys are slurm.conf
108108
parameter names and values are lists or strings as appropriate. This can be
109109
used to supplement or override the template defaults. Templated parameters can
110-
also be removed by setting the value to the literal string`'omit'` - note
110+
also be removed by setting the value to the literal string `'omit'` - note
111+
that this is *not the same* as the Ansible `omit` [special variable](https://docs.ansible.com/ansible/latest/reference_appendices/special_variables.html#term-omit).
112+
113+
`openhpc_cgroup_config`: Optional. Mapping of additional parameters and values for
114+
[cgroup.conf](https://slurm.schedmd.com/cgroup.conf.html). Keys are cgroup.conf
115+
parameter names and values are lists or strings as appropriate. This can be
116+
used to supplement or override the template defaults. Templated parameters can
117+
also be removed by setting the value to the literal string `'omit'` - note
111118
that this is *not the same* as the Ansible `omit` [special variable](https://docs.ansible.com/ansible/latest/reference_appendices/special_variables.html#term-omit).
112119

113120
`openhpc_ram_multiplier`: Optional, default `0.95`. Multiplier used in the calculation: `total_memory * openhpc_ram_multiplier` when setting `RealMemory` for the partition in slurm.conf. Can be overriden on a per partition basis using `openhpc_slurm_partitions.ram_multiplier`. Has no effect if `openhpc_slurm_partitions.ram_mb` is set.

defaults/main.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ openhpc_default_config:
4040
PropagateResourceLimitsExcept: MEMLOCK
4141
Epilog: /etc/slurm/slurm.epilog.clean
4242
ReturnToService: 2
43+
openhpc_cgroup_default_config:
44+
ConstrainCores: "yes"
45+
ConstrainDevices: "yes"
46+
ConstrainRAMSpace: "yes"
47+
ConstrainSwapSpace: "yes"
4348

4449
openhpc_config: {}
50+
openhpc_cgroup_config: {}
4551
openhpc_gres_template: gres.conf.j2
4652
openhpc_slurm_configless: "{{ 'enable_configless' in openhpc_config.get('SlurmctldParameters', []) }}"
4753

tasks/runtime.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@
105105
owner: root
106106
group: root
107107
when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool
108+
notify:
109+
- Restart slurmctld service
110+
register: ohpc_cgroup_conf
111+
# NB uses restart rather than reload as this is needed in some cases
108112

109113
- name: Remove local tempfile for slurm.conf templating
110114
ansible.builtin.file:
@@ -139,7 +143,7 @@
139143
changed_when: true
140144
when:
141145
- openhpc_slurm_control_host in ansible_play_hosts
142-
- hostvars[openhpc_slurm_control_host].ohpc_slurm_conf.changed or hostvars[openhpc_slurm_control_host].ohpc_gres_conf.changed # noqa no-handler
146+
- hostvars[openhpc_slurm_control_host].ohpc_slurm_conf.changed or hostvars[openhpc_slurm_control_host].ohpc_cgroup_conf.changed or hostvars[openhpc_slurm_control_host].ohpc_gres_conf.changed # noqa no-handler
143147
notify:
144148
- Restart slurmd service
145149

templates/cgroup.conf.j2

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
# See man slurm.conf and man cgroup.conf for further
66
# information on cgroup configuration parameters
77
#--
8-
ConstrainCores=yes
9-
ConstrainDevices=yes
10-
ConstrainRAMSpace=yes
11-
ConstrainSwapSpace=yes
8+
{% for k, v in openhpc_cgroup_default_config | combine(openhpc_cgroup_config) | items %}
9+
{% if v != "omit" %}{# allow removing items using setting key: null #}
10+
{{ k }}={{ v | join(',') if (v is sequence and v is not string) else v }}
11+
{% endif %}
12+
{% endfor %}

0 commit comments

Comments
 (0)