From c7f19f18f5768161a0d3a39a983d0234c341a6eb Mon Sep 17 00:00:00 2001 From: Tariq Ibrahim Date: Tue, 1 Oct 2024 01:25:08 -0700 Subject: [PATCH] fetch current container runtime config through the command line Signed-off-by: Tariq Ibrahim --- cmd/nvidia-ctk/runtime/configure/configure.go | 42 ++++-- internal/config/toml.go | 15 ++ pkg/config/engine/api.go | 6 + pkg/config/engine/containerd/config_v1.go | 24 ++++ pkg/config/engine/containerd/config_v2.go | 11 ++ pkg/config/engine/containerd/containerd.go | 11 ++ pkg/config/engine/crio/crio.go | 27 +++- pkg/config/engine/crio/crio_test.go | 4 +- pkg/config/engine/docker/docker.go | 5 + pkg/config/toml/source-cli.go | 44 ++++++ pkg/config/toml/source.go | 13 ++ .../runtime/containerd/containerd.go | 15 ++ tools/container/runtime/crio/crio.go | 15 ++ tools/container/toolkit/toolkit.go | 132 ++++++++++++++++-- 14 files changed, 333 insertions(+), 31 deletions(-) create mode 100644 pkg/config/toml/source-cli.go diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index 0b321d606..a2729d746 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -44,6 +44,10 @@ const ( defaultContainerdConfigFilePath = "/etc/containerd/config.toml" defaultCrioConfigFilePath = "/etc/crio/crio.conf" defaultDockerConfigFilePath = "/etc/docker/daemon.json" + + runtimeContainerd = "containerd" + runtimeCrio = "crio" + runtimeDocker = "docker" ) type command struct { @@ -174,14 +178,14 @@ func (m command) validateFlags(c *cli.Context, config *config) error { config.mode = "config-file" switch config.runtime { - case "containerd", "crio", "docker": + case runtimeContainerd, runtimeCrio, runtimeDocker: break default: return fmt.Errorf("unrecognized runtime '%v'", config.runtime) } switch config.runtime { - case "containerd", "crio": + case runtimeContainerd, runtimeCrio: if config.nvidiaRuntime.path == defaultNVIDIARuntimeExecutable { config.nvidiaRuntime.path = defaultNVIDIARuntimeExpecutablePath } @@ -190,7 +194,7 @@ func (m command) validateFlags(c *cli.Context, config *config) error { } } - if config.runtime != "containerd" && config.runtime != "docker" { + if config.runtime != runtimeContainerd && config.runtime != runtimeDocker { if config.cdi.enabled { m.logger.Warningf("Ignoring cdi.enabled flag for %v", config.runtime) } @@ -219,23 +223,24 @@ func (m command) configureWrapper(c *cli.Context, config *config) error { // configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime. func (m command) configureConfigFile(c *cli.Context, config *config) error { configFilePath := config.resolveConfigFilePath() + configCommand := config.resolveConfigCommand() var cfg engine.Interface var err error switch config.runtime { - case "containerd": + case runtimeContainerd: cfg, err = containerd.New( containerd.WithLogger(m.logger), containerd.WithPath(configFilePath), - containerd.WithConfigSource(toml.FromFile(configFilePath)), + containerd.WithConfigSource(toml.FromCommandLine(configCommand)), ) - case "crio": + case runtimeCrio: cfg, err = crio.New( crio.WithLogger(m.logger), crio.WithPath(configFilePath), - crio.WithConfigSource(toml.FromFile(configFilePath)), + crio.WithConfigSource(toml.FromCommandLine(configCommand)), ) - case "docker": + case runtimeDocker: cfg, err = docker.New( docker.WithLogger(m.logger), docker.WithPath(configFilePath), @@ -285,16 +290,27 @@ func (c *config) resolveConfigFilePath() string { return c.configFilePath } switch c.runtime { - case "containerd": + case runtimeContainerd: return defaultContainerdConfigFilePath - case "crio": + case runtimeCrio: return defaultCrioConfigFilePath - case "docker": + case runtimeDocker: return defaultDockerConfigFilePath } return "" } +// resolveConfigCommand returns the default cli command to fetch the current runtime config +func (c *config) resolveConfigCommand() []string { + switch c.runtime { + case runtimeContainerd: + return []string{"containerd", "config", "dump"} + case runtimeCrio: + return []string{"crio", "status", "config"} + } + return []string{} +} + // getOuputConfigPath returns the configured config path or "" if dry-run is enabled func (c *config) getOuputConfigPath() string { if c.dryRun { @@ -318,9 +334,9 @@ func enableCDI(config *config, cfg engine.Interface) error { return nil } switch config.runtime { - case "containerd": + case runtimeContainerd: cfg.Set("enable_cdi", true) - case "docker": + case runtimeDocker: cfg.Set("features", map[string]bool{"cdi": true}) default: return fmt.Errorf("enabling CDI in %s is not supported", config.runtime) diff --git a/internal/config/toml.go b/internal/config/toml.go index a1d37428d..60a641266 100644 --- a/internal/config/toml.go +++ b/internal/config/toml.go @@ -170,11 +170,26 @@ func (t *Toml) Get(key string) interface{} { return (*toml.Tree)(t).Get(key) } +// GetDefault returns the value for the specified key and falls back to the default value if the Get call fails +func (t *Toml) GetDefault(key string, def interface{}) interface{} { + val := t.Get(key) + if val == nil { + return def + } + return val +} + // Set sets the specified key to the specified value in the TOML config. func (t *Toml) Set(key string, value interface{}) { (*toml.Tree)(t).Set(key, value) } +// WriteTo encode the Tree as Toml and writes it to the writer w. +// Returns the number of bytes written in case of success, or an error if anything happened. +func (t *Toml) WriteTo(w io.Writer) (int64, error) { + return (*toml.Tree)(t).WriteTo(w) +} + // commentDefaults applies the required comments for default values to the Toml. func (t *Toml) commentDefaults() *Toml { asToml := (*toml.Tree)(t) diff --git a/pkg/config/engine/api.go b/pkg/config/engine/api.go index b074dadfa..d9933daa0 100644 --- a/pkg/config/engine/api.go +++ b/pkg/config/engine/api.go @@ -23,4 +23,10 @@ type Interface interface { Set(string, interface{}) RemoveRuntime(string) error Save(string) (int64, error) + GetRuntimeConfig(string) (Runtime, error) +} + +// Runtime defines the interface to query container runtime handler configuration +type Runtime interface { + GetBinPath() string } diff --git a/pkg/config/engine/containerd/config_v1.go b/pkg/config/engine/containerd/config_v1.go index e94a22f5a..50b1fa74f 100644 --- a/pkg/config/engine/containerd/config_v1.go +++ b/pkg/config/engine/containerd/config_v1.go @@ -22,6 +22,7 @@ import ( "github.com/pelletier/go-toml" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" + cfgtoml "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" ) // ConfigV1 represents a version 1 containerd config @@ -29,6 +30,17 @@ type ConfigV1 Config var _ engine.Interface = (*ConfigV1)(nil) +type ctrdCfgV1Runtime struct { + tree *cfgtoml.Tree +} + +func (c *ctrdCfgV1Runtime) GetBinPath() string { + if binPath, ok := c.tree.GetPath([]string{"options", "BinaryName"}).(string); ok { + return binPath + } + return "" +} + // AddRuntime adds a runtime to the containerd config func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error { if c == nil || c.Tree == nil { @@ -157,3 +169,15 @@ func (c *ConfigV1) Set(key string, value interface{}) { func (c ConfigV1) Save(path string) (int64, error) { return (Config)(c).Save(path) } + +func (c *ConfigV1) GetRuntimeConfig(name string) (engine.Runtime, error) { + if c == nil || c.Tree == nil { + return nil, fmt.Errorf("config is nil") + } + config := *c.Tree + runtimeData := config.GetSubtreeByPath([]string{"plugins", "cri", "containerd", "runtimes", name}) + + return &ctrdCfgV1Runtime{ + tree: runtimeData, + }, nil +} diff --git a/pkg/config/engine/containerd/config_v2.go b/pkg/config/engine/containerd/config_v2.go index 8f3e601f4..306d2d19a 100644 --- a/pkg/config/engine/containerd/config_v2.go +++ b/pkg/config/engine/containerd/config_v2.go @@ -22,6 +22,17 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" ) +type ctrdCfgV2Runtime struct { + tree *toml.Tree +} + +func (c *ctrdCfgV2Runtime) GetBinPath() string { + if binPath, ok := c.tree.GetPath([]string{"options", "BinaryName"}).(string); ok { + return binPath + } + return "" +} + // AddRuntime adds a runtime to the containerd config func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { if c == nil || c.Tree == nil { diff --git a/pkg/config/engine/containerd/containerd.go b/pkg/config/engine/containerd/containerd.go index 92bf9fff8..43f5dec3e 100644 --- a/pkg/config/engine/containerd/containerd.go +++ b/pkg/config/engine/containerd/containerd.go @@ -98,3 +98,14 @@ func (c *Config) parseVersion(useLegacyConfig bool) (int, error) { return -1, fmt.Errorf("unsupported type for version field: %v", v) } } + +func (c *Config) GetRuntimeConfig(name string) (engine.Runtime, error) { + if c == nil || c.Tree == nil { + return nil, fmt.Errorf("config is nil") + } + config := *c.Tree + runtimeData := config.GetSubtreeByPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}) + return &ctrdCfgV2Runtime{ + tree: runtimeData, + }, nil +} diff --git a/pkg/config/engine/crio/crio.go b/pkg/config/engine/crio/crio.go index d243372da..cd9ba6e5a 100644 --- a/pkg/config/engine/crio/crio.go +++ b/pkg/config/engine/crio/crio.go @@ -30,6 +30,17 @@ type Config struct { Logger logger.Interface } +type crioRuntime struct { + tree *toml.Tree +} + +func (c *crioRuntime) GetBinPath() string { + if binaryPath, ok := c.tree.GetPath([]string{"runtime_path"}).(string); ok && binaryPath != "" { + return binaryPath + } + return "" +} + var _ engine.Interface = (*Config)(nil) // New creates a cri-o config with the specified options @@ -65,11 +76,12 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { config := *c.Tree - // By default we extract the runtime options from the runc settings; if this does not exist we get the options from the default runtime specified in the config. - runtimeNamesForConfig := []string{"runc"} + // By default, we extract the runtime options from the runc settings; if this does not exist we get the options from the default runtime specified in the config. + var runtimeNamesForConfig []string if name, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok && name != "" { runtimeNamesForConfig = append(runtimeNamesForConfig, name) } + runtimeNamesForConfig = append(runtimeNamesForConfig, "runc") for _, r := range runtimeNamesForConfig { if options, ok := config.GetPath([]string{"crio", "runtime", "runtimes", r}).(*toml.Tree); ok { c.Logger.Debugf("using options from runtime %v: %v", r, options.String()) @@ -129,3 +141,14 @@ func (c *Config) RemoveRuntime(name string) error { *c.Tree = config return nil } + +func (c *Config) GetRuntimeConfig(name string) (engine.Runtime, error) { + if c == nil || c.Tree == nil { + return nil, fmt.Errorf("config is nil") + } + config := *c.Tree + runtimeData := config.GetSubtreeByPath([]string{"crio", "runtime", "runtimes", name}) + return &crioRuntime{ + tree: runtimeData, + }, nil +} diff --git a/pkg/config/engine/crio/crio_test.go b/pkg/config/engine/crio/crio_test.go index d2b81b9e8..831868956 100644 --- a/pkg/config/engine/crio/crio_test.go +++ b/pkg/config/engine/crio/crio_test.go @@ -91,7 +91,7 @@ func TestAddRuntime(t *testing.T) { `, }, { - description: "options from runc take precedence over default runtime", + description: "options from runc do NOT take precedence over default runtime", config: ` [crio] [crio.runtime] @@ -120,7 +120,7 @@ func TestAddRuntime(t *testing.T) { [crio.runtime.runtimes.test] runtime_path = "/usr/bin/test" runtime_type = "oci" - runc_option = "option" + default_option = "option" `, }, } diff --git a/pkg/config/engine/docker/docker.go b/pkg/config/engine/docker/docker.go index 45a96255d..51e8ff889 100644 --- a/pkg/config/engine/docker/docker.go +++ b/pkg/config/engine/docker/docker.go @@ -18,6 +18,7 @@ package docker import ( "encoding/json" + "errors" "fmt" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" @@ -132,3 +133,7 @@ func (c Config) Save(path string) (int64, error) { n, err := config.Raw(path).Write(output) return int64(n), err } + +func (c *Config) GetRuntimeConfig(name string) (engine.Runtime, error) { + return nil, errors.New("Not Implemented") +} diff --git a/pkg/config/toml/source-cli.go b/pkg/config/toml/source-cli.go new file mode 100644 index 000000000..7a2fa0c32 --- /dev/null +++ b/pkg/config/toml/source-cli.go @@ -0,0 +1,44 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package toml + +import ( + "bytes" + "fmt" + "os/exec" +) + +type tomlCliSource struct { + command string + args []string +} + +func (c tomlCliSource) Load() (*Tree, error) { + //nolint:gosec // Subprocess launched with a potential tainted input or cmd arguments + cmd := exec.Command(c.command, c.args...) + + var outb bytes.Buffer + var errb bytes.Buffer + + cmd.Stdout = &outb + cmd.Stderr = &errb + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("failed to run command %v %v: %w", c.command, c.args, err) + } + + return LoadBytes(outb.Bytes()) +} diff --git a/pkg/config/toml/source.go b/pkg/config/toml/source.go index 2bd9191d4..f835ca8da 100644 --- a/pkg/config/toml/source.go +++ b/pkg/config/toml/source.go @@ -33,3 +33,16 @@ func FromFile(path string) Loader { } return tomlFile(path) } + +// FromCommandLine creates a TOML source from the output +// of a shell command specified via string slice. +// If an empty slice is passed an empty toml config is used. +func FromCommandLine(params []string) Loader { + if len(params) == 0 { + return Empty + } + return &tomlCliSource{ + command: params[0], + args: params[1:], + } +} diff --git a/tools/container/runtime/containerd/containerd.go b/tools/container/runtime/containerd/containerd.go index df5db6d62..b44543e4d 100644 --- a/tools/container/runtime/containerd/containerd.go +++ b/tools/container/runtime/containerd/containerd.go @@ -24,6 +24,7 @@ import ( cli "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/tools/container" ) @@ -83,8 +84,10 @@ func Flags(opts *Options) []cli.Flag { func Setup(c *cli.Context, o *container.Options, co *Options) error { log.Infof("Starting 'setup' for %v", c.App.Name) + containerdCommand := getContainerdConfigCommand(o) cfg, err := containerd.New( containerd.WithPath(o.Config), + containerd.WithConfigSource(toml.FromCommandLine(containerdCommand)), containerd.WithRuntimeType(co.runtimeType), containerd.WithUseLegacyConfig(co.useLegacyConfig), containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...), @@ -112,8 +115,10 @@ func Setup(c *cli.Context, o *container.Options, co *Options) error { func Cleanup(c *cli.Context, o *container.Options, co *Options) error { log.Infof("Starting 'cleanup' for %v", c.App.Name) + containerdCommand := getContainerdConfigCommand(o) cfg, err := containerd.New( containerd.WithPath(o.Config), + containerd.WithConfigSource(toml.FromCommandLine(containerdCommand)), containerd.WithRuntimeType(co.runtimeType), containerd.WithUseLegacyConfig(co.useLegacyConfig), containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...), @@ -164,3 +169,13 @@ func (o *Options) runtimeConfigOverride() (map[string]interface{}, error) { return runtimeOptions, nil } + +// getContainerdConfigCommand returns a string slice which contains the CLI args to retrieve the current runtime configuration +func getContainerdConfigCommand(o *container.Options) []string { + var cliArgs []string + if o.HostRootMount != "" { + cliArgs = append(cliArgs, "chroot", o.HostRootMount) + } + cliArgs = append(cliArgs, "crio", "status", "config") + return cliArgs +} diff --git a/tools/container/runtime/crio/crio.go b/tools/container/runtime/crio/crio.go index 69482191f..143a7e1d4 100644 --- a/tools/container/runtime/crio/crio.go +++ b/tools/container/runtime/crio/crio.go @@ -27,6 +27,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/tools/container" ) @@ -115,8 +116,10 @@ func setupHook(o *container.Options, co *Options) error { func setupConfig(o *container.Options) error { log.Infof("Updating config file") + crioCommand := getCRIOConfigCommand(o) cfg, err := crio.New( crio.WithPath(o.Config), + crio.WithConfigSource(toml.FromCommandLine(crioCommand)), ) if err != nil { return fmt.Errorf("unable to load config: %v", err) @@ -166,8 +169,10 @@ func cleanupHook(co *Options) error { func cleanupConfig(o *container.Options) error { log.Infof("Reverting config file modifications") + crioCommand := getCRIOConfigCommand(o) cfg, err := crio.New( crio.WithPath(o.Config), + crio.WithConfigSource(toml.FromCommandLine(crioCommand)), ) if err != nil { return fmt.Errorf("unable to load config: %v", err) @@ -190,3 +195,13 @@ func cleanupConfig(o *container.Options) error { func RestartCrio(o *container.Options) error { return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") }) } + +// getCRIOConfigCommand returns a string slice which contains the CLI args to retrieve the current runtime configuration +func getCRIOConfigCommand(o *container.Options) []string { + var cliArgs []string + if o.HostRootMount != "" { + cliArgs = append(cliArgs, "chroot", o.HostRootMount) + } + cliArgs = append(cliArgs, "crio", "status", "config") + return cliArgs +} diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 484d7891b..6a2419dc0 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -24,7 +24,6 @@ import ( "path/filepath" "strings" - toml "github.com/pelletier/go-toml" log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "tags.cncf.io/container-device-interface/pkg/cdi" @@ -32,6 +31,10 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio" + cfgtoml "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root" ) @@ -40,6 +43,9 @@ const ( // DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory DefaultNvidiaDriverRoot = "/run/nvidia/driver" + // DefaultHostRootMount specifies the path to the host root to be used when executing shell commands + DefaultHostRootMount = "/host" + nvidiaContainerCliSource = "/usr/bin/nvidia-container-cli" nvidiaContainerRuntimeHookSource = "/usr/bin/nvidia-container-runtime-hook" @@ -54,6 +60,7 @@ type Options struct { DevRoot string DriverRootCtrPath string DevRootCtrPath string + HostRootMount string ContainerRuntimeMode string ContainerRuntimeDebug string @@ -79,6 +86,8 @@ type Options struct { acceptNVIDIAVisibleDevicesWhenUnprivileged bool acceptNVIDIAVisibleDevicesAsVolumeMounts bool + toolkitConfigSource string + ignoreErrors bool } @@ -109,6 +118,13 @@ func Flags(opts *Options) []cli.Flag { Destination: &opts.DevRootCtrPath, EnvVars: []string{"DEV_ROOT_CTR_PATH"}, }, + &cli.StringFlag{ + Name: "host-root", + Usage: "Specify the path to the host root to be used when executing shell commands.", + Value: DefaultHostRootMount, + Destination: &opts.HostRootMount, + EnvVars: []string{"HOST_ROOT_MOUNT"}, + }, &cli.StringFlag{ Name: "nvidia-container-runtime.debug", Aliases: []string{"nvidia-container-runtime-debug"}, @@ -190,6 +206,13 @@ func Flags(opts *Options) []cli.Flag { Destination: &opts.cdiKind, EnvVars: []string{"CDI_KIND"}, }, + &cli.StringFlag{ + Name: "toolkit-config-source", + Usage: "The file where the NVIDIA Container toolkit source configuration is specified", + Value: nvidiaContainerToolkitConfigSource, + Destination: &opts.toolkitConfigSource, + EnvVars: []string{"TOOLKIT_CONFIG_SOURCE"}, + }, &cli.BoolFlag{ Name: "ignore-errors", Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.", @@ -342,7 +365,14 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) } - err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) + var runtimeBinPaths []string + runtimeBinPaths, err = getRuntimeBinaryPaths(opts.HostRootMount) + if err != nil { + log.Warningf("Error retrieving runtime binary paths: %v", err) + } + + err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, + runtimeBinPaths, opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) } else if err != nil { @@ -416,10 +446,13 @@ func installLibrary(libName string, toolkitRoot string) error { // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // that the settings are updated to match the desired install and nvidia driver directories. -func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { +func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, + nvidaContainerRuntimeHookPath string, runtimeBinaryPaths []string, opts *Options) error { log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) - cfg, err := loadConfig(nvidiaContainerToolkitConfigSource) + cfg, err := config.New( + config.WithConfigFile(opts.toolkitConfigSource), + ) if err != nil { return fmt.Errorf("could not open source config file: %v", err) } @@ -436,6 +469,18 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai // Use the driver run root as the root: driverLdconfigPath := config.NormalizeLDConfigPath("@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/"))) + var ctkRuntimes []string + defaultCfg, err := cfg.Config() + if err == nil { + defaultCfgRuntimes := defaultCfg.NVIDIAContainerRuntimeConfig.Runtimes + if len(runtimeBinaryPaths) > 0 { + ctkRuntimes = append(ctkRuntimes, runtimeBinaryPaths...) + } + ctkRuntimes = append(ctkRuntimes, defaultCfgRuntimes...) + } else { + log.Warningf("could not get default toolkit config: %v", err) + } + configValues := map[string]interface{}{ // Set the options in the root toml table "accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged, @@ -450,6 +495,11 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai "nvidia-container-runtime-hook.path": nvidaContainerRuntimeHookPath, "nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection, } + + if len(ctkRuntimes) > 0 { + configValues["nvidia-container-runtime.runtimes"] = ctkRuntimes + } + for key, value := range configValues { cfg.Set(key, value) } @@ -503,16 +553,6 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai return nil } -func loadConfig(path string) (*toml.Tree, error) { - _, err := os.Stat(path) - if err == nil { - return toml.LoadFile(path) - } else if os.IsNotExist(err) { - return toml.TreeFromMap(nil) - } - return nil, err -} - // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper. func installContainerToolkitCLI(toolkitDir string) (string, error) { e := executable{ @@ -793,3 +833,67 @@ func generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { return nil } + +// getRuntimeBinaryPaths extracts the full paths of the low-level runtime binaries specified in the container runtime config +func getRuntimeBinaryPaths(hostRoot string) ([]string, error) { + var runtimeBinaryPaths []string + + if r, ok := os.LookupEnv("RUNTIME"); ok { + + runtimeConfigCommand := getRuntimeConfigCommand(hostRoot, r) + + var cfg engine.Interface + var err error + switch r { + case "containerd": + cfg, err = containerd.New( + containerd.WithConfigSource(cfgtoml.FromCommandLine(runtimeConfigCommand)), + ) + if err != nil { + return nil, fmt.Errorf("unable to load containerd config: %w", err) + } + + case "crio": + cfg, err = crio.New( + crio.WithConfigSource(cfgtoml.FromCommandLine(runtimeConfigCommand)), + ) + if err != nil { + return nil, fmt.Errorf("unable to load crio config: %w", err) + } + } + + if cfg == nil { + return nil, fmt.Errorf("unable to load runtime config for %s", r) + } + defaultRuntime := cfg.DefaultRuntime() + if defaultRuntime != "" { + runtimeCfg, err := cfg.GetRuntimeConfig(defaultRuntime) + if err == nil { + binPath := runtimeCfg.GetBinPath() + if binPath != "" { + runtimeBinaryPaths = append(runtimeBinaryPaths, binPath) + } + } else { + log.Warningf("Unable to determine runtime binary path: %v", err) + } + } + } + return runtimeBinaryPaths, nil +} + +// getRuntimeConfigCommand returns the default cli command to fetch the current runtime config +func getRuntimeConfigCommand(hostRoot, runtime string) []string { + var cliArgs []string + if hostRoot != "" { + cliArgs = append(cliArgs, "chroot", hostRoot) + } + switch runtime { + case "containerd": + cliArgs = append(cliArgs, "containerd", "config", "dump") + return cliArgs + case "crio": + cliArgs = append(cliArgs, "crio", "status", "config") + return cliArgs + } + return []string{} +}