Skip to content

Commit 7263d26

Browse files
committed
Add feature gate to require NVIDIA kernel modules
This change adds an opt-in feature to the NVIDIA Container Runtime that only uses the NVIDIA runtime if the NVIDIA kernel modules are loaded. Signed-off-by: Evan Lezar <[email protected]>
1 parent efb18a7 commit 7263d26

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

internal/config/features.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ type features struct {
2121
// DisableImexChannelCreation ensures that the implicit creation of
2222
// requested IMEX channels is skipped when invoking the nvidia-container-cli.
2323
DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
24+
// RequireNvidiaKernelModules indicates that the NVIDIA kernel module must be
25+
// loaded for the NVIDIA Container Runtime to perform any OCI spec modifications.
26+
RequireNvidiaKernelModules *feature `toml:"require-nvidia-kernel-module,omitempty"`
2427
}
2528

2629
//nolint:unused

internal/runtime/runtime_factory.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package runtime
1818

1919
import (
2020
"fmt"
21+
"os"
2122

2223
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
2324
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@@ -41,6 +42,11 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
4142
return lowLevelRuntime, nil
4243
}
4344

45+
if cfg.Features.RequireNvidiaKernelModules.IsEnabled() && !isNvidiaModuleLoaded() {
46+
logger.Tracef("NVIDIA driver modules are not yet loaded; skipping modifer")
47+
return lowLevelRuntime, nil
48+
}
49+
4450
ociSpec, err := oci.NewSpec(logger, argv)
4551
if err != nil {
4652
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
@@ -62,6 +68,19 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
6268
return r, nil
6369
}
6470

71+
// isNvidiaKernelModuleLoaded checks whether the NVIDIA GPU driver is installed
72+
// and the kernel module is available.
73+
func isNvidiaModuleLoaded() bool {
74+
// TODO: This was implemented as:
75+
// cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1
76+
// if [ "${?}" != "0" ]; then
77+
// echo "nvidia driver modules are not yet loaded, invoking runc directly"
78+
// exec runc "$@"
79+
// fi
80+
_, err := os.Stat("/proc/driver/nvidia/version")
81+
return err == nil
82+
}
83+
6584
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
6685
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
6786
rawSpec, err := ociSpec.Load()

0 commit comments

Comments
 (0)