diff --git a/changelogs/changelog.yaml b/changelogs/changelog.yaml index e2bab5b..5b01cb4 100644 --- a/changelogs/changelog.yaml +++ b/changelogs/changelog.yaml @@ -1,3 +1,4 @@ +--- ancestor: null releases: 0.1.0: diff --git a/roles/ceems_exporter/defaults/main.yml b/roles/ceems_exporter/defaults/main.yml index 152d239..2c0498e 100644 --- a/roles/ceems_exporter/defaults/main.yml +++ b/roles/ceems_exporter/defaults/main.yml @@ -16,7 +16,7 @@ ceems_exporter_enabled_collectors: [] ceems_exporter_disabled_collectors: [] ceems_exporter_create_unique_jobids: false ceems_exporter_slurm_job_props_dir: "" -ceems_exporter_gpu_type: nvidia +ceems_exporter_gpu_type: "" ceems_exporter_gpu_job_map_dir: "" ceems_exporter_emissions_country_code: FR diff --git a/roles/ceems_exporter/meta/argument_specs.yml b/roles/ceems_exporter/meta/argument_specs.yml index c2615ee..1cc565e 100644 --- a/roles/ceems_exporter/meta/argument_specs.yml +++ b/roles/ceems_exporter/meta/argument_specs.yml @@ -57,15 +57,12 @@ argument_specs: ceems_exporter_gpu_type: description: - GPU type. - - Check the L(example scripts,https://github.com/mahendrapaipuri/ceems_monitoring/tree/main/etc/slurm/epilog.d). - default: nvidia - choices: - - nvidia - - amd + - Currently, nVIDIA and AMD GPUs are supported. + default: "" ceems_exporter_gpu_job_map_dir: description: - Directory where files containing mapping of SLURM job ID to GPU ordinals are created by Epilog scripts. - - Currently, nVIDIA and AMD GPUs are supported. + - Check the L(example scripts,https://github.com/mahendrapaipuri/ceems_monitoring/tree/main/etc/slurm/epilog.d). default: "" ceems_exporter_emissions_country_code: description: ISO 3166-1 alpha-3 Country code to export emission data. diff --git a/roles/ceems_exporter/molecule/alternative/molecule.yml b/roles/ceems_exporter/molecule/alternative/molecule.yml index 2e40dbf..bd4aa02 100644 --- a/roles/ceems_exporter/molecule/alternative/molecule.yml +++ b/roles/ceems_exporter/molecule/alternative/molecule.yml @@ -15,8 +15,10 @@ provisioner: ceems_exporter_basic_auth_users: randomuser: examplepassword go_arch: amd64 - ceems_exporter_slurm_job_props_dir: /run/slurmjobprops - ceems_exporter_gpu_job_map_dir: /run/gpujobmap - ceems_exporter_ipmi_dcmi_cmd: ipmi-dcmi + ceems_exporter_enabled_collectors: + - emissions + ceems_exporter_disabled_collectors: + - slurm + ceems_exporter_ipmi_dcmi_cmd: "sudo ipmi-dcmi" ceems_exporter_env_vars: EMAPS_API_TOKEN: foo diff --git a/roles/ceems_exporter/molecule/alternative/prepare.yml b/roles/ceems_exporter/molecule/alternative/prepare.yml index 8e2e9bd..5f76f91 100644 --- a/roles/ceems_exporter/molecule/alternative/prepare.yml +++ b/roles/ceems_exporter/molecule/alternative/prepare.yml @@ -45,12 +45,3 @@ dest: "{{ ceems_exporter_tls_server_config.cert_file }}" - src: /tmp/tls.key dest: "{{ ceems_exporter_tls_server_config.key_file }}" - - - name: Create slurm job props and GPU job map directories - ansible.builtin.file: - path: "{{ item }}" - state: directory - mode: "0755" - loop: - - "{{ ceems_exporter_slurm_job_props_dir }}" - - "{{ ceems_exporter_gpu_job_map_dir }}" diff --git a/roles/ceems_exporter/molecule/alternative/tests/test_alternative.py b/roles/ceems_exporter/molecule/alternative/tests/test_alternative.py index 53362de..983f1dd 100644 --- a/roles/ceems_exporter/molecule/alternative/tests/test_alternative.py +++ b/roles/ceems_exporter/molecule/alternative/tests/test_alternative.py @@ -17,6 +17,14 @@ def test_directories(host, dir): assert d.exists +@pytest.mark.parametrize("file", [ + "/etc/sudoers.d/allow-ipmi-dcmi", +]) +def test_files(host, file): + f = host.file(file) + assert f.exists + + def test_service(host): s = host.service("ceems_exporter") try: @@ -35,10 +43,7 @@ def test_systemd_properties(host): p = s.systemd_properties assert p.get("ProtectHome") == "yes" assert p.get("Environment") == "EMAPS_API_TOKEN=foo" - # Seems like this test will fail on centos 7 - if host.system_info.distribution != "centos" and host.system_info.release != "7": - assert p.get("AmbientCapabilities") == "cap_setgid cap_setuid" - assert p.get("CapabilityBoundingSet") == "cap_setgid cap_setuid" + assert p.get("AmbientCapabilities") in [None, "", "0", "False", False, "No", "no"] @pytest.mark.parametrize("socket", [ diff --git a/roles/ceems_exporter/tasks/configure.yml b/roles/ceems_exporter/tasks/configure.yml index 8618269..f8c4f7b 100644 --- a/roles/ceems_exporter/tasks/configure.yml +++ b/roles/ceems_exporter/tasks/configure.yml @@ -31,7 +31,8 @@ user: "{{ ceems_exporter_system_user }}" commands: /usr/sbin/ipmi-dcmi when: - - '"ipmi_dcmi" in ceems_exporter_disabled_collectors' + - 'not "ipmi_dcmi" in ceems_exporter_disabled_collectors' + - '"sudo" in ceems_exporter_ipmi_dcmi_cmd' - ceems_exporter_system_user != "root" - name: Allow ceems_exporter port in SELinux on RedHat OS family diff --git a/roles/ceems_exporter/templates/ceems_exporter.service.j2 b/roles/ceems_exporter/templates/ceems_exporter.service.j2 index 4e82947..cd262ab 100644 --- a/roles/ceems_exporter/templates/ceems_exporter.service.j2 +++ b/roles/ceems_exporter/templates/ceems_exporter.service.j2 @@ -61,17 +61,19 @@ StartLimitInterval=0 {% endfor %} ProtectHome={{ ns.protect_home }} -{% if "sudo" not in ceems_exporter_ipmi_dcmi_cmd and - "ipmi_dcmi" not in ceems_exporter_disabled_collectors and +{% if "ipmi_dcmi" not in ceems_exporter_disabled_collectors and + "sudo" not in ceems_exporter_ipmi_dcmi_cmd and ceems_exporter_system_user != "root" %} {% set _ = ns.caps.extend(['CAP_SETUID', 'CAP_SETGID']) %} {% endif %} -{% if ceems_exporter_create_unique_jobids and - "slurm" not in ceems_exporter_disabled_collectors and + +{% if "slurm" not in ceems_exporter_disabled_collectors and + ceems_exporter_create_unique_jobids and not ceems_exporter_slurm_job_props_dir and ceems_exporter_system_user != "root" %} {% set _ = ns.caps.extend(['CAP_SYS_PTRACE', 'CAP_DAC_READ_SEARCH']) %} {% endif %} + {% if ceems_exporter_gpu_type and not ceems_exporter_gpu_job_map_dir and ceems_exporter_system_user != "root" %} diff --git a/roles/nvidia_dcgm_exporter/defaults/main.yml b/roles/nvidia_dcgm_exporter/defaults/main.yml index c42339a..0fa9a41 100644 --- a/roles/nvidia_dcgm_exporter/defaults/main.yml +++ b/roles/nvidia_dcgm_exporter/defaults/main.yml @@ -1,6 +1,6 @@ --- dcgm_exporter_version: 3.3.3-3.3.1 -golang_version: 1.21.5 + dcgm_exporter_binary_local_dir: "" dcgm_exporter_git_url: https://github.com/NVIDIA/dcgm-exporter.git diff --git a/roles/nvidia_dcgm_exporter/meta/argument_specs.yml b/roles/nvidia_dcgm_exporter/meta/argument_specs.yml index 3d3700e..0869c69 100644 --- a/roles/nvidia_dcgm_exporter/meta/argument_specs.yml +++ b/roles/nvidia_dcgm_exporter/meta/argument_specs.yml @@ -11,9 +11,6 @@ argument_specs: dcgm_exporter_version: description: DCGM exporter package version. Also accepts latest as parameter. default: 3.3.0-3.2.0 - golang_version: - description: Go version used to compile dcgm_exporter. - default: 1.21.5 dcgm_exporter_skip_install: description: DCGM exporter installation tasks gets skipped when set to true. type: bool diff --git a/roles/nvidia_dcgm_exporter/tasks/install.yml b/roles/nvidia_dcgm_exporter/tasks/install.yml index 874e1b5..ae6d8df 100644 --- a/roles/nvidia_dcgm_exporter/tasks/install.yml +++ b/roles/nvidia_dcgm_exporter/tasks/install.yml @@ -29,7 +29,7 @@ run_once: true ansible.builtin.get_url: url: "{{ _golang_download_url }}" - dest: /tmp/golang-{{ golang_version }}.linux-{{ go_arch }}.tar.gz + dest: /tmp/golang-{{ _golang_version }}.linux-{{ go_arch }}.tar.gz mode: "0644" register: _download_golang until: _download_golang is succeeded @@ -42,7 +42,7 @@ become: false run_once: true ansible.builtin.unarchive: - src: /tmp/golang-{{ golang_version }}.linux-{{ go_arch }}.tar.gz + src: /tmp/golang-{{ _golang_version }}.linux-{{ go_arch }}.tar.gz dest: /tmp creates: /tmp/go delegate_to: localhost diff --git a/roles/nvidia_dcgm_exporter/vars/main.yml b/roles/nvidia_dcgm_exporter/vars/main.yml index 9b5ef03..072063d 100644 --- a/roles/nvidia_dcgm_exporter/vars/main.yml +++ b/roles/nvidia_dcgm_exporter/vars/main.yml @@ -7,6 +7,8 @@ go_arch_map: armv6l: armv6 go_arch: "{{ go_arch_map[ansible_architecture] | default(ansible_architecture) }}" + _nvidia_dcgm_exporter_repo: NVIDIA/dcgm-exporter _github_api_headers: "{{ {'GITHUB_TOKEN': lookup('ansible.builtin.env', 'GITHUB_TOKEN')} if (lookup('ansible.builtin.env', 'GITHUB_TOKEN')) else {} }}" -_golang_download_url: https://go.dev/dl/go{{ golang_version }}.linux-{{ go_arch }}.tar.gz +_golang_version: 1.21.5 +_golang_download_url: https://go.dev/dl/go{{ _golang_version }}.linux-{{ go_arch }}.tar.gz