diff --git a/deployments/systemd/nvidia-cdi-refresh.path b/deployments/systemd/nvidia-cdi-refresh.path new file mode 100644 index 000000000..a1c1e2419 --- /dev/null +++ b/deployments/systemd/nvidia-cdi-refresh.path @@ -0,0 +1,23 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[Unit] +Description=Trigger CDI refresh on NVIDIA driver install / uninstall events + +[Path] +PathChanged=/lib/modules/%v/modules.dep +PathChanged=/lib/modules/%v/modules.dep.bin + +[Install] +WantedBy=multi-user.target diff --git a/deployments/systemd/nvidia-cdi-refresh.service b/deployments/systemd/nvidia-cdi-refresh.service new file mode 100644 index 000000000..d07fd5956 --- /dev/null +++ b/deployments/systemd/nvidia-cdi-refresh.service @@ -0,0 +1,27 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[Unit] +Description=Refresh NVIDIA CDI specification file +ConditionPathExists=/usr/bin/nvidia-smi +ConditionPathExists=/usr/bin/nvidia-ctk + +[Service] +Type=oneshot +ExecCondition=/usr/bin/grep -qE '/nvidia.ko' /lib/modules/%v/modules.dep +ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml +CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD + +[Install] +WantedBy=multi-user.target diff --git a/docker/Dockerfile.debian b/docker/Dockerfile.debian index 4b3c535f5..03f36f84e 100644 --- a/docker/Dockerfile.debian +++ b/docker/Dockerfile.debian @@ -55,6 +55,7 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR COPY packaging/debian ./debian +COPY deployments/systemd/ . ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/docker/Dockerfile.opensuse-leap b/docker/Dockerfile.opensuse-leap index f1ce31ecc..354e94f2a 100644 --- a/docker/Dockerfile.opensuse-leap +++ b/docker/Dockerfile.opensuse-leap @@ -46,6 +46,7 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR/.. COPY packaging/rpm . +COPY deployments/systemd/ . ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/docker/Dockerfile.rpm-yum b/docker/Dockerfile.rpm-yum index 1a429f589..ee49b0748 100644 --- a/docker/Dockerfile.rpm-yum +++ b/docker/Dockerfile.rpm-yum @@ -71,6 +71,7 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR/.. COPY packaging/rpm . +COPY deployments/systemd/ ${DIST_DIR}/ ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index cfa930bb6..af16823cd 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -53,6 +53,7 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR COPY packaging/debian ./debian +COPY deployments/systemd/ . ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/packaging/debian/nvidia-container-toolkit-base.install b/packaging/debian/nvidia-container-toolkit-base.install index 62c0d0956..53f214ca0 100644 --- a/packaging/debian/nvidia-container-toolkit-base.install +++ b/packaging/debian/nvidia-container-toolkit-base.install @@ -1,3 +1,5 @@ nvidia-container-runtime /usr/bin nvidia-ctk /usr/bin nvidia-cdi-hook /usr/bin +nvidia-cdi-refresh.service /etc/systemd/system/ +nvidia-cdi-refresh.path /etc/systemd/system/ diff --git a/packaging/debian/nvidia-container-toolkit-base.postinst b/packaging/debian/nvidia-container-toolkit-base.postinst index 7ee72e46f..578a27fb9 100644 --- a/packaging/debian/nvidia-container-toolkit-base.postinst +++ b/packaging/debian/nvidia-container-toolkit-base.postinst @@ -5,6 +5,16 @@ set -e case "$1" in configure) /usr/bin/nvidia-ctk --quiet config --config-file=/etc/nvidia-container-runtime/config.toml --in-place + + if command -v systemctl >/dev/null 2>&1 \ + && systemctl --quiet is-system-running 2>/dev/null; then + + systemctl daemon-reload || true + + if [ -z "$2" ]; then # $2 empty → first install + systemctl enable --now nvidia-cdi-refresh.path || true + fi + fi ;; abort-upgrade|abort-remove|abort-deconfigure) diff --git a/packaging/debian/rules b/packaging/debian/rules index f3908b290..d57bd1d43 100755 --- a/packaging/debian/rules +++ b/packaging/debian/rules @@ -5,3 +5,14 @@ %: dh $@ + +override_dh_fixperms: + dh_fixperms + chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime-hook || true + chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime || true + chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.cdi || true + chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.legacy || true + chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-ctk || true + chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-cdi-hook || true + chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.service || true + chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.path || true diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index 605528272..ea8791c95 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -17,6 +17,8 @@ Source3: nvidia-container-runtime Source4: nvidia-container-runtime.cdi Source5: nvidia-container-runtime.legacy Source6: nvidia-cdi-hook +Source7: nvidia-cdi-refresh.service +Source8: nvidia-cdi-refresh.path Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2 Provides: nvidia-container-runtime @@ -28,16 +30,20 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release} Provides tools and utilities to enable GPU support in containers. %prep -cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} . +cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} . %install mkdir -p %{buildroot}%{_bindir} +mkdir -p %{buildroot}/etc/systemd/system/ + install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook +install -m 644 -t %{buildroot}/etc/systemd/system nvidia-cdi-refresh.service +install -m 644 -t %{buildroot}/etc/systemd/system nvidia-cdi-refresh.path %post if [ $1 -gt 1 ]; then # only on package upgrade @@ -45,6 +51,14 @@ if [ $1 -gt 1 ]; then # only on package upgrade cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit fi +# Reload systemd unit cache +/bin/systemctl daemon-reload || : + +# On fresh install ($1 == 1) enable the path unit so it starts at boot +if [ "$1" -eq 1 ]; then + /bin/systemctl enable --now nvidia-cdi-refresh.path || : +fi + %posttrans if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then # repairing lost file nvidia-container-runtime-hook @@ -89,6 +103,8 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit %{_bindir}/nvidia-container-runtime %{_bindir}/nvidia-ctk %{_bindir}/nvidia-cdi-hook +/etc/systemd/system/nvidia-cdi-refresh.service +/etc/systemd/system/nvidia-cdi-refresh.path # The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes. # This package is not distributed as part of the NVIDIA Container Toolkit RPMs.