diff --git a/cmd/gpu_fakedev/configs/2x4-PVC-xelink.json b/cmd/gpu_fakedev/configs/2x4-PVC-xelink.json index e92b6efe5..c658ce255 100644 --- a/cmd/gpu_fakedev/configs/2x4-PVC-xelink.json +++ b/cmd/gpu_fakedev/configs/2x4-PVC-xelink.json @@ -2,8 +2,10 @@ "Info": "2x 4 tile 4 GiB PVC [Ponte Vecchio] GPUs", "DevCount": 2, "TilesPerDev": 4, - "DevsPerNode": 1, + "DevsPerNumaNode": 1, "DevMemSize": 4294967296, + "Driver": "i915", + "Path": "/tmp", "Capabilities": { "platform": "fake_PVC", "connections": "0.1-0.0_0.2-0.0_0.3-0.0_1.0-0.0_1.1-0.0_1.2-0.0_1.3-0.0_0.2-0.1_0.3-0.1_1.0-0.1_1.1-0.1_1.2-0.1_1.3-0.1_0.3-0.2_1.0-0.2_1.1-0.2_1.2-0.2_1.3-0.2_1.0-0.3_1.1-0.3_1.2-0.3_1.3-0.3_1.1-1.0_1.2-1.0_1.3-1.0_1.2-1.1_1.3-1.1_1.3-1.2", diff --git a/cmd/gpu_fakedev/configs/8x-DG1.json b/cmd/gpu_fakedev/configs/8x-DG1.json index 2fcaa5b45..5b147eac9 100644 --- a/cmd/gpu_fakedev/configs/8x-DG1.json +++ b/cmd/gpu_fakedev/configs/8x-DG1.json @@ -2,6 +2,8 @@ "Info": "8x 4 GiB DG1 [Iris Xe MAX Graphics] GPUs", "DevCount": 8, "DevMemSize": 4294967296, + "Driver": "i915", + "Path": "/tmp", "Capabilities": { "platform": "fake_DG1" } diff --git a/cmd/gpu_fakedev/configs/8x2-PVC-xelink.json b/cmd/gpu_fakedev/configs/8x2-PVC-xelink.json index 55c5b7e63..0ed740828 100644 --- a/cmd/gpu_fakedev/configs/8x2-PVC-xelink.json +++ b/cmd/gpu_fakedev/configs/8x2-PVC-xelink.json @@ -1,12 +1,14 @@ { - "Info": "8x 4 GiB PVC [Ponte Vecchio] GPUs", - "DevCount": 8, + "Info": "8x 4 GiB PVC [Ponte Vecchio] GPUs", + "DevCount": 8, "TilesPerDev": 2, - "DevsPerNode": 2, - "DevMemSize": 4294967296, - "Capabilities": { - "platform": "fake_PVC", - "connections": "", - "connection-topology": "FULL" - } + "DevsPerNumaNode": 2, + "DevMemSize": 4294967296, + "Driver": "i915", + "Path": "/tmp", + "Capabilities": { + "platform": "fake_PVC", + "connections": "", + "connection-topology": "FULL" + } } diff --git a/cmd/gpu_fakedev/gpu_fakedev.go b/cmd/gpu_fakedev/gpu_fakedev.go index 239b652ed..39ac0e931 100644 --- a/cmd/gpu_fakedev/gpu_fakedev.go +++ b/cmd/gpu_fakedev/gpu_fakedev.go @@ -1,4 +1,4 @@ -// Copyright 2021-2023 Intel Corporation. All Rights Reserved. +// Copyright 2021-2024 Intel Corporation. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,399 +35,32 @@ package main import ( - "encoding/json" - "errors" "flag" - "fmt" - "io/fs" - "log" "os" - "path/filepath" - "strconv" - "strings" - "golang.org/x/sys/unix" -) + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/fakedri" -const ( - dirMode = 0775 - fileMode = 0644 - cardBase = 0 - renderBase = 128 - maxDevs = 128 - sysfsPath = "sys" - devfsPath = "dev" - mib = 1024.0 * 1024.0 - // null device major, minor on linux. - devNullMajor = 1 - devNullMinor = 3 - devNullType = unix.S_IFCHR - // GPU connectivity. - maxK8sLabelSize = 63 - fullyConnected = "FULL" + "k8s.io/klog/v2" ) -var verbose bool - -type genOptions struct { - Capabilities map[string]string // device capabilities mapping for NFD hook - Info string // verbal config description - DevCount int // how many devices to fake - TilesPerDev int // per-device tile count - DevMemSize int // available per-device device-local memory, in bytes - DevsPerNode int // How many devices per Numa node - VfsPerPf int // How many SR-IOV VFs per PF - // fields for counting what was generated - files int - dirs int - devs int -} - -func addSysfsDriTree(root string, opts *genOptions, i int) error { - card := fmt.Sprintf("card%d", cardBase+i) - base := filepath.Join(root, "class", "drm", card) - - if err := os.MkdirAll(base, dirMode); err != nil { - return err - } - opts.dirs++ - - data := []byte(strconv.Itoa(opts.DevMemSize)) - file := filepath.Join(base, "lmem_total_bytes") - - if err := os.WriteFile(file, data, fileMode); err != nil { - return err - } - opts.files++ - - path := filepath.Join(base, "device", "drm", card) - if err := os.MkdirAll(path, dirMode); err != nil { - return err - } - opts.dirs++ - - path = filepath.Join(base, "device", "drm", fmt.Sprintf("renderD%d", renderBase+i)) - if err := os.Mkdir(path, dirMode); err != nil { - return err - } - opts.dirs++ - - data = []byte("0x8086") - file = filepath.Join(base, "device", "vendor") - - if err := os.WriteFile(file, data, fileMode); err != nil { - return err - } - opts.files++ - - node := 0 - if opts.DevsPerNode > 0 { - node = i / opts.DevsPerNode - } - - data = []byte(strconv.Itoa(node)) - file = filepath.Join(base, "device", "numa_node") - - if err := os.WriteFile(file, data, fileMode); err != nil { - return err - } - opts.files++ - - if opts.VfsPerPf > 0 && i%(opts.VfsPerPf+1) == 0 { - data = []byte(strconv.Itoa(opts.VfsPerPf)) - file = filepath.Join(base, "device", "sriov_numvfs") - - if err := os.WriteFile(file, data, fileMode); err != nil { - return err - } - opts.files++ - } - - for tile := 0; tile < opts.TilesPerDev; tile++ { - path := filepath.Join(base, "gt", fmt.Sprintf("gt%d", tile)) - if err := os.MkdirAll(path, dirMode); err != nil { - return err - } - opts.dirs++ - } - - return nil -} - -func addSysfsBusTree(root string, opts *genOptions, i int) error { - pciName := fmt.Sprintf("0000:00:0%d.0", i) - base := filepath.Join(root, "bus", "pci", "drivers", "i915", pciName) - - if err := os.MkdirAll(base, dirMode); err != nil { - return err - } - opts.dirs++ - - data := []byte("0x4905") - file := filepath.Join(base, "device") - - if err := os.WriteFile(file, data, fileMode); err != nil { - return err - } - opts.files++ - - drm := filepath.Join(base, "drm") - if err := os.MkdirAll(drm, dirMode); err != nil { - return err - } - opts.dirs++ - - return addDeviceNodes(drm, opts, i) -} - -func addDeviceNodes(base string, opts *genOptions, i int) error { - mode := uint32(fileMode | devNullType) - devid := int(unix.Mkdev(uint32(devNullMajor), uint32(devNullMinor))) - - file := filepath.Join(base, fmt.Sprintf("card%d", cardBase+i)) - if err := unix.Mknod(file, mode, devid); err != nil { - return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w", - devNullMajor, devNullMinor, file, err) - } - opts.devs++ - - file = filepath.Join(base, fmt.Sprintf("renderD%d", renderBase+i)) - if err := unix.Mknod(file, mode, devid); err != nil { - return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w", - devNullMajor, devNullMinor, file, err) - } - opts.devs++ - - return nil -} - -func addDevfsDriTree(root string, opts *genOptions, i int) error { - base := filepath.Join(root, "dri") - if err := os.MkdirAll(base, dirMode); err != nil { - return err - } - opts.dirs++ - - return addDeviceNodes(base, opts, i) -} - -func addDebugfsDriTree(root string, opts *genOptions, i int) error { - base := filepath.Join(root, "kernel", "debug", "dri", strconv.Itoa(i)) - if err := os.MkdirAll(base, dirMode); err != nil { - return err - } - opts.dirs++ - - path := filepath.Join(base, "i915_capabilities") - f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileMode) - - if err != nil { - return err - } - defer f.Close() - opts.files++ - - // keys are in random order which provides extra testing for NFD label parsing code - for key, value := range opts.Capabilities { - line := fmt.Sprintf("%s: %s\n", key, value) - if _, err = f.WriteString(line); err != nil { - return err - } - } - - return nil -} - -func removeExistingDir(path, name string) { - entries, err := os.ReadDir(path) - if err != nil && !errors.Is(err, fs.ErrNotExist) { - log.Fatalf("ERROR: ReadDir() failed on fake %s path '%s': %v", name, path, err) - } - - if len(entries) == 0 { - return - } - - if name == "sysfs" && len(entries) > 3 { - log.Fatalf("ERROR: >3 entries in '%s' - real sysfs?", path) - } - - if name == "devfs" && (entries[0].Name() != "dri" || len(entries) > 1) { - log.Fatalf("ERROR: >1 entries in '%s', or '%s' != 'dri' - real devfs?", path, entries[0].Name()) - } - - log.Printf("WARN: removing already existing fake %s path '%s'", name, path) - - if err = os.RemoveAll(path); err != nil { - log.Fatalf("ERROR: removing existing %s in '%s' failed: %v", name, path, err) - } -} - -// generateDriFiles generates the fake sysfs + debugfs + devfs dirs & files according to given options. -func generateDriFiles(opts genOptions) { - if opts.Info != "" { - log.Printf("Config: '%s'", opts.Info) - } - - removeExistingDir(devfsPath, "devfs") - removeExistingDir(sysfsPath, "sysfs") - log.Printf("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'", - sysfsPath, devfsPath) - - opts.dirs, opts.files = 0, 0 - for i := 0; i < opts.DevCount; i++ { - if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil { - log.Fatalf("ERROR: dev-%d sysfs tree generation failed: %v", i, err) - } - - if err := addDebugfsDriTree(sysfsPath, &opts, i); err != nil { - log.Fatalf("ERROR: dev-%d debugfs tree generation failed: %v", i, err) - } - - if err := addDevfsDriTree(devfsPath, &opts, i); err != nil { - log.Fatalf("ERROR: dev-%d devfs tree generation failed: %v", i, err) - } - - if err := addSysfsBusTree(sysfsPath, &opts, i); err != nil { - log.Fatalf("ERROR: dev-%d sysfs bus tree generation failed: %v", i, err) - } - } - log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files) - - makeXelinkSideCar(opts) -} - -func makeXelinkSideCar(opts genOptions) { - topology := opts.Capabilities["connection-topology"] - gpus := opts.DevCount - tiles := opts.TilesPerDev - connections := opts.Capabilities["connections"] - - if topology != fullyConnected { - saveSideCarFile(connections) - } else { - saveSideCarFile(buildConnectionList(gpus, tiles)) - } - - log.Printf("XELINK: generated xelink sidecar label file, using (GPUs: %d, Tiles: %d, Topology: %s)", gpus, tiles, topology) -} - -func buildConnectionList(gpus, tiles int) string { - var nodes = make([]string, 0) - - for mm := 0; mm < gpus; mm++ { - for nn := 0; nn < tiles; nn++ { - nodes = append(nodes, fmt.Sprintf("%d.%d", mm, nn)) - } - } - - var links = make(map[string]bool, 0) - - var smap = make([]string, 0) - - for _, from := range nodes { - for _, to := range nodes { - // no self links, TODO ignore in-gpu xelinks - if to == from { - continue - } - - link := fmt.Sprintf("%s-%s", to, from) - - reverselink := fmt.Sprintf("%s-%s", from, to) - if _, exists := links[reverselink]; !exists { - links[link] = true - - smap = append(smap, link) - } - } - } - - return strings.Join(smap, "_") -} - -func saveSideCarFile(connections string) { - f, err := os.Create("xpum-sidecar-labels.txt") - if err != nil { - panic(err) - } - defer f.Close() - - // Write first line without Z prefix - line := fmt.Sprintf("xpumanager.intel.com/xe-links=%s", connections[:min(len(connections), maxK8sLabelSize)]) - fmt.Println(line) - - if _, err := f.WriteString(line + "\n"); err != nil { - panic(err) - } - - index := 2 +func main() { + name := flag.String("json", "", "JSON spec for fake device sysfs, debugfs and devfs content") - // Write next lines with Z prefix - for i := maxK8sLabelSize; i < len(connections); i += (maxK8sLabelSize - 1) { - line := fmt.Sprintf("xpumanager.intel.com/xe-links%d=Z%s", index, connections[i:min(len(connections), i+maxK8sLabelSize-1)]) - fmt.Println(line) + // Initialize klog flags for verbosity + klog.InitFlags(nil) - if _, err := f.WriteString(line + "\n"); err != nil { - panic(err) - } - index++ - } -} + flag.Parse() -// getOptions parses options from given JSON file, validates and returns them. -func getOptions(name string) genOptions { - if name == "" { - log.Fatal("ERROR: no fake device spec provided") + if *name == "" { + klog.Error("ERROR: no fake device spec provided") } - data, err := os.ReadFile(name) + data, err := os.ReadFile(*name) if err != nil { - log.Fatalf("ERROR: reading JSON spec file '%s' failed: %v", name, err) - } - - if verbose { - log.Printf("Using fake device spec: %v\n", string(data)) - } - - var opts genOptions - if err = json.Unmarshal(data, &opts); err != nil { - log.Fatalf("ERROR: Unmarshaling JSON spec file '%s' failed: %v", name, err) - } - - if opts.DevCount < 1 || opts.DevCount > maxDevs { - log.Fatalf("ERROR: invalid device count: 1 <= %d <= %d", opts.DevCount, maxDevs) - } - - if opts.VfsPerPf > 0 { - if opts.TilesPerDev > 0 || opts.DevsPerNode > 0 { - log.Fatalf("ERROR: SR-IOV VFs (%d) with device tiles (%d) or Numa nodes (%d) is unsupported for faking", - opts.VfsPerPf, opts.TilesPerDev, opts.DevsPerNode) - } - - if opts.DevCount%(opts.VfsPerPf+1) != 0 { - log.Fatalf("ERROR: %d devices cannot be evenly split to between set of 1 SR-IOV PF + %d VFs", - opts.DevCount, opts.VfsPerPf) - } - } - - if opts.DevsPerNode > opts.DevCount { - log.Fatalf("ERROR: DevsPerNode (%d) > DevCount (%d)", opts.DevsPerNode, opts.DevCount) + klog.Fatalf("Reading JSON spec file '%s' failed: %v", *name, err) } - if opts.DevMemSize%mib != 0 { - log.Fatalf("ERROR: Invalid memory size (%f MiB), not even MiB", float64(opts.DevMemSize)/mib) - } - - return opts -} - -func main() { - var name string - - flag.StringVar(&name, "json", "", "JSON spec for fake device sysfs, debugfs and devfs content") - flag.BoolVar(&verbose, "verbose", false, "More verbose output") - flag.Parse() - - generateDriFiles(getOptions(name)) + options := fakedri.GetOptionsByJSON(string(data)) + options.NfdFeatureDir = options.Path + fakedri.GenerateDriFiles(options) } diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index c3b6ed73b..71a4898ae 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -37,20 +37,19 @@ import ( "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/labeler" gpulevelzero "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/levelzero" dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/fakedri" cdispec "tags.cncf.io/container-device-interface/specs-go" ) const ( - sysfsDrmDirectory = "/sys/class/drm" - devfsDriDirectory = "/dev/dri" - wslDxgPath = "/dev/dxg" - wslLibPath = "/usr/lib/wsl" - nfdFeatureDir = "/etc/kubernetes/node-feature-discovery/features.d" - resourceFilename = "intel-gpu-resources.txt" - gpuDeviceRE = `^card[0-9]+$` - controlDeviceRE = `^controlD[0-9]+$` - pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$" - vendorString = "0x8086" + wslDxgPath = "/dev/dxg" + wslLibPath = "/usr/lib/wsl" + nfdFeatureDir = "/etc/kubernetes/node-feature-discovery/features.d" + resourceFilename = "intel-gpu-resources.txt" + gpuDeviceRE = `^card[0-9]+$` + controlDeviceRE = `^controlD[0-9]+$` + pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$" + vendorString = "0x8086" // Device plugin settings. namespace = "gpu.intel.com" @@ -70,6 +69,12 @@ const ( labelerMaxInterval = 5 * 60 * time.Second ) +var ( + sysfsDrmDirectory = "/sys/class/drm" + devfsDriDirectory = "/dev/dri" + prefix = "" +) + type cliOptions struct { preferredAllocationPolicy string sharedDevNum int @@ -764,12 +769,10 @@ func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi return nil, &dpapi.UseDefaultMethodError{} } -func main() { - var ( - prefix string - opts cliOptions - ) +func parseAndValidateOptions() cliOptions { + var opts cliOptions + // Flag and options parsing flag.StringVar(&prefix, "prefix", "", "Prefix for devfs & sysfs paths") flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable '*_monitoring' (= all GPUs) resource") flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management") @@ -780,68 +783,97 @@ func main() { flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none") flag.Parse() + // Handle fakedriSpec or environment variable + fakedriSpec := os.Getenv("FAKEDRI_SPEC") + + // If fakedriSpec is set, handle it + if fakedriSpec != "" { + options := fakedri.GetOptionsByYAML(fakedriSpec) + if options.Mode == "" || options.Mode == "yaml" { + options.NfdFeatureDir = nfdFeatureDir + fakedri.GenerateDriFiles(options) + } + + prefix = options.Path + } + + // Input validation if opts.sharedDevNum < 1 { - klog.Error("The number of containers sharing the same GPU must greater than zero") - os.Exit(1) + klog.Fatal("The number of containers sharing the same GPU must be greater than zero") } if opts.sharedDevNum == 1 && opts.resourceManagement { - klog.Error("Trying to use fractional resources with shared-dev-num 1 is pointless") - os.Exit(1) + klog.Fatal("Trying to use fractional resources with shared-dev-num 1 is pointless") } - var str = opts.preferredAllocationPolicy - if !(str == "balanced" || str == "packed" || str == "none") { - klog.Error("invalid value for preferredAllocationPolicy, the valid values: balanced, packed, none") - os.Exit(1) + // Validate preferred allocation policy + if !(opts.preferredAllocationPolicy == "balanced" || opts.preferredAllocationPolicy == "packed" || opts.preferredAllocationPolicy == "none") { + klog.Fatal("Invalid value for preferredAllocationPolicy. Valid values: balanced, packed, none") } klog.V(1).Infof("GPU device plugin started with %s preferred allocation policy", opts.preferredAllocationPolicy) - plugin := newDevicePlugin(prefix+sysfsDrmDirectory, prefix+devfsDriDirectory, opts) + return opts +} +func checkWSLMode(plugin *devicePlugin) { + // WSL mode validation and feature handling if plugin.options.wslScan { klog.Info("WSL mode requested") if plugin.options.resourceManagement { - klog.Error("Resource management is not supported within WSL. Please disable resource management.") - - os.Exit(1) + klog.Fatal("Resource management is not supported within WSL. Please disable resource management.") } if plugin.options.enableMonitoring { - klog.Error("Monitoring is not supported within WSL. Please disable monitoring.") - - os.Exit(1) + klog.Fatal("Monitoring is not supported within WSL. Please disable monitoring.") } if plugin.options.healthManagement { - klog.Error("Health management is not supported within WSL. Please disable health management.") - - os.Exit(1) + klog.Fatal("Health management is not supported within WSL. Please disable health management.") } } +} +func initializeServices(plugin *devicePlugin) { if plugin.options.healthManagement || plugin.options.wslScan { plugin.levelzeroService = levelzeroservice.NewLevelzero(gpulevelzero.DefaultUnixSocketPath) - go plugin.levelzeroService.Run(true) } +} +func handleResourceManagement(plugin *devicePlugin) { if plugin.options.resourceManagement { - // Start labeler to export labels file for NFD. + // Start labeler to export labels file for NFD nfdFeatureFile := path.Join(nfdFeatureDir, resourceFilename) - klog.V(2).Infof("NFD feature file location: %s", nfdFeatureFile) - // Labeler catches OS signals and calls os.Exit() after receiving any. + // Labeler catches OS signals and calls os.Exit() after receiving any go labeler.Run(prefix+sysfsDrmDirectory, nfdFeatureFile, labelerMaxInterval, plugin.scanResources, plugin.levelzeroService, func() { // Exit the whole app when labeler exits os.Exit(0) }) } +} + +func main() { + // Parse command-line options and validate them + opts := parseAndValidateOptions() + + // Create the device plugin + plugin := newDevicePlugin(prefix+sysfsDrmDirectory, prefix+devfsDriDirectory, opts) + + // Handle WSL mode and exit if not supported + checkWSLMode(plugin) + + // Initialize health management or WSL scan service if needed + initializeServices(plugin) + + // Handle resource management if enabled + handleResourceManagement(plugin) + // Start the manager to run the plugin manager := dpapi.NewManager(namespace, plugin) manager.Run() } diff --git a/deployments/gpu_plugin/base/fakedri-patch.yaml b/deployments/gpu_plugin/base/fakedri-patch.yaml new file mode 100644 index 000000000..e9e0f1953 --- /dev/null +++ b/deployments/gpu_plugin/base/fakedri-patch.yaml @@ -0,0 +1,39 @@ +# README: +# This patch allows testing of the intel-gpu-plugin with a simulated GPU in Kubernetes. +# The FAKEDRI_SPEC environment variable simulates an Intel GPU, enabling testing +# of GPU-based workload allocations without requiring real GPU hardware. +# It also mounts /tmp as a volume for temporary file storage. +# +# To apply this patch, run the following command: +# kubectl patch daemonset intel-gpu-plugin -n --patch "$(cat fakedri-patch.yaml)" +# Replace with the appropriate namespace (e.g., kube-system). + +spec: + template: + spec: + containers: + - name: intel-gpu-plugin + env: + # FAKEDRI_SPEC simulates an Intel GPU with specific hardware specs for testing. + - name: FAKEDRI_SPEC + value: | + Path: "/tmp" # Same as mounts and volumes + Info: "8x 4 GiB DG1 [Iris Xe MAX Graphics] GPUs" # Describes the fake GPU model + DevCount: 8 # The number of fake GPUs being simulated (8 devices) + TilesPerDev: 1 # Number of tiles per fake GPU (1 tile per device) + DevsPerNumaNode: 1 # Number of GPUs per node (1 GPU per numa node) + DevMemSize: 4294967296 # Memory size per GPU in bytes (4 GiB = 4294967296 bytes) + Driver: "i915" # The GPU driver to use for the fake device (For iGPUs use i915 driver, xe is used in future GPUs starting from LunarLake) + Capabilities: # List of capabilities that the fake GPU supports + platform: "fake_ARC" # Fake platform name (simulates the ARC platform) + connections: "" # Placeholder for GPU connections (manual connections for xe-link) + connection-topology: "FULL" # Defines the connection xe-link topology of the GPU + # Mount the /tmp directory from the host to the container + volumeMounts: + - name: tmp + mountPath: /tmp # The container's /tmp directory is mapped to the host's /tmp directory + volumes: + - name: tmp + hostPath: + path: /tmp # Mount the host's /tmp directory inside the container + type: DirectoryOrCreate # Create the directory if it does not already exist diff --git a/deployments/gpu_plugin/overlays/fractional_resources_fakedri/add-fakedri-spec.yaml b/deployments/gpu_plugin/overlays/fractional_resources_fakedri/add-fakedri-spec.yaml new file mode 100644 index 000000000..4273e7f41 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fractional_resources_fakedri/add-fakedri-spec.yaml @@ -0,0 +1,32 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-gpu-plugin +spec: + template: + spec: + containers: + - name: intel-gpu-plugin + env: + # FAKEDRI_SPEC simulates an Intel GPU with specific hardware specs for testing. + - name: FAKEDRI_SPEC + value: | + Path: "/tmp" # Same as mounts and volumes + DevCount: 8 + TilesPerDev: 1 + DevsPerNumaNode: 1 + DevMemSize: 4294967296 + Driver: "i915" + Capabilities: + platform: "fake_GPU" + connections: "" + connection-topology: "FULL" + volumeMounts: + - name: tmp + mountPath: /tmp # The container's /tmp directory is mapped to the host's /tmp directory + volumes: + - name: tmp + hostPath: + path: /tmp # Mount the host's /tmp directory inside the container + type: DirectoryOrCreate # Create the directory if it does not already exist + diff --git a/deployments/gpu_plugin/overlays/fractional_resources_fakedri/kustomization.yaml b/deployments/gpu_plugin/overlays/fractional_resources_fakedri/kustomization.yaml new file mode 100644 index 000000000..b83ad8084 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fractional_resources_fakedri/kustomization.yaml @@ -0,0 +1,8 @@ +resources: + - ../fractional_resources + +patches: + - path: add-fakedri-spec.yaml # The patch file that adds the environment variables + target: + kind: DaemonSet + name: intel-gpu-plugin diff --git a/pkg/deviceplugin/api.go b/pkg/deviceplugin/api.go index 2997f93a8..7c385d21e 100644 --- a/pkg/deviceplugin/api.go +++ b/pkg/deviceplugin/api.go @@ -64,6 +64,7 @@ func NewDeviceInfo(state string, nodes []pluginapi.DeviceSpec, mounts []pluginap devPaths = append(devPaths, node.HostPath) } + // Get topology information based on devPaths topologyInfo, err := topology.GetTopologyInfo(devPaths) if err == nil { deviceInfo.topology = topologyInfo diff --git a/pkg/fakedri/fakedri.go b/pkg/fakedri/fakedri.go new file mode 100644 index 000000000..bbecd804e --- /dev/null +++ b/pkg/fakedri/fakedri.go @@ -0,0 +1,525 @@ +// Copyright 2021-2024 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//--------------------------------------------------------------- +// sysfs SPECIFICATION +// +// sys/class/drm/cardX/ +// sys/class/drm/cardX/lmem_total_bytes (gpu memory size, number) +// sys/class/drm/cardX/device/ +// sys/class/drm/cardX/device/vendor (0x8086) +// sys/class/drm/cardX/device/sriov_numvfs (PF only, number of VF GPUs, number) +// sys/class/drm/cardX/device/drm/ +// sys/class/drm/cardX/device/drm/cardX/ +// sys/class/drm/cardX/device/drm/renderD1XX/ +// sys/class/drm/cardX/device/numa_node (Numa node index[1], number) +// [1] indexing these: /sys/devices/system/node/nodeX/ +//--------------------------------------------------------------- +// devfs SPECIFICATION +// +// dev/dri/cardX +// dev/dri/renderD1XX +//--------------------------------------------------------------- + +package fakedri + +import ( + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "gopkg.in/yaml.v2" + + "k8s.io/klog/v2" +) + +const ( + dirMode = 0775 + fileMode = 0644 + cardBase = 0 + renderBase = 128 + maxDevs = 128 + sysfsPath = "/sys" + devfsPath = "/dev" + mib = 1024.0 * 1024.0 + devNullMajor = 1 + devNullMinor = 3 + devNullType = unix.S_IFCHR + maxK8sLabelSize = 63 + fullyConnected = "FULL" +) + +// genOptionsWithTags represents the struct for our YAML data. +type GenOptions struct { + Capabilities map[string]string `yaml:"Capabilities"` // Device capabilities mapping for NFD hook + Info string `yaml:"Info"` // Verbal config description + Driver string `yaml:"Driver"` // Driver name (i915, xe) + Mode string `yaml:"Mode"` // Mode of operation (future use with different generation modes) + Path string `yaml:"Path"` // Path to fake device folder + NfdFeatureDir string `yaml:"NfdDir"` // NFD directory + DevCount int `yaml:"DevCount"` // How many devices to fake + TilesPerDev int `yaml:"TilesPerDev"` // Per-device tile count + DevMemSize int `yaml:"DevMemSize"` // Available per-device device-local memory, in bytes + DevsPerNumaNode int `yaml:"DevsPerNumaNode"` // How many devices per Numa node + VfsPerPf int `yaml:"VfsPerPf"` // How many SR-IOV VFs per PF + + // fields for counting what was generated + files int + dirs int + devs int + symls int +} + +func addSysfsDriTree(root string, opts *GenOptions, i int) error { + card := fmt.Sprintf("card%d", cardBase+i) + base := filepath.Join(root, "class", "drm", card) + + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + + opts.dirs++ + + data := []byte(strconv.Itoa(opts.DevMemSize)) + file := filepath.Join(base, "lmem_total_bytes") + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + + opts.files++ + + path := filepath.Join(base, "device", "drm", card) + if err := os.MkdirAll(path, dirMode); err != nil { + return err + } + + opts.dirs++ + + path = filepath.Join(base, "device", "drm", fmt.Sprintf("renderD%d", renderBase+i)) + if err := os.Mkdir(path, dirMode); err != nil { + return err + } + + opts.dirs++ + + file = filepath.Join(base, "device", "driver") + if err := os.Symlink(fmt.Sprintf("../../../../bus/pci/drivers/%s", opts.Driver), file); err != nil { + klog.Fatalf("symlink creation failed '%s': %v", + file, err) + } + + opts.symls++ + + data = []byte("0x8086") + file = filepath.Join(base, "device", "vendor") + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + + opts.files++ + + node := 0 + if opts.DevsPerNumaNode > 0 { + node = i / opts.DevsPerNumaNode + } + + data = []byte(strconv.Itoa(node)) + file = filepath.Join(base, "device", "numa_node") + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + + opts.files++ + + if opts.VfsPerPf > 0 && i%(opts.VfsPerPf+1) == 0 { + data = []byte(strconv.Itoa(opts.VfsPerPf)) + file = filepath.Join(base, "device", "sriov_numvfs") + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + + opts.files++ + } + + for tile := 0; tile < opts.TilesPerDev; tile++ { + path := filepath.Join(base, "gt", fmt.Sprintf("gt%d", tile)) + if err := os.MkdirAll(path, dirMode); err != nil { + return err + } + + opts.dirs++ + } + + return nil +} + +func addSysfsBusTree(root string, opts *GenOptions, i int) error { + pciName := fmt.Sprintf("0000:00:0%d.0", i) + base := filepath.Join(root, "bus", "pci", "drivers", opts.Driver, pciName) + + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + + opts.dirs++ + + data := []byte("0x4905") + file := filepath.Join(base, "device") + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + + opts.files++ + + drm := filepath.Join(base, "drm") + if err := os.MkdirAll(drm, dirMode); err != nil { + return err + } + + opts.dirs++ + + return addDeviceNodes(drm, opts, i) +} + +func addDeviceNodes(base string, opts *GenOptions, i int) error { + mode := uint32(fileMode | devNullType) + devid := int(unix.Mkdev(uint32(devNullMajor), uint32(devNullMinor))) + + file := filepath.Join(base, fmt.Sprintf("card%d", cardBase+i)) + if err := unix.Mknod(file, mode, devid); err != nil { + klog.Fatalf("NULL device (%d:%d) node creation failed for '%s': %v", + devNullMajor, devNullMinor, file, err) + } + + opts.devs++ + + file = filepath.Join(base, fmt.Sprintf("renderD%d", renderBase+i)) + if err := unix.Mknod(file, mode, devid); err != nil { + klog.Fatalf("NULL device (%d:%d) node creation failed for '%s': %v", + devNullMajor, devNullMinor, file, err) + } + + opts.devs++ + + return nil +} + +func addDeviceSymlinks(base string, opts *GenOptions, i int) error { + target := filepath.Join(base, fmt.Sprintf("by-path/pci-0000:%02d:02.0-card", i)) + if err := os.Symlink(fmt.Sprintf("../card%d", cardBase+i), target); err != nil { + klog.Fatalf("symlink creation failed '%s': %v", + target, err) + } + + opts.symls++ + + target = filepath.Join(base, fmt.Sprintf("by-path/pci-0000:%02d:02.0-render", i)) + if err := os.Symlink(fmt.Sprintf("../renderD%d", renderBase+i), target); err != nil { + klog.Fatalf("symlink creation failed '%s': %v", + target, err) + } + + opts.symls++ + + return nil +} + +func addDevfsDriTree(root string, opts *GenOptions, i int) error { + base := filepath.Join(root, "dri") + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + + if err := os.MkdirAll(filepath.Join(root, "dri/by-path"), dirMode); err != nil { + return err + } + + opts.dirs++ + + if err := addDeviceNodes(base, opts, i); err != nil { + return err + } + + return addDeviceSymlinks(base, opts, i) +} + +func addDebugfsDriTree(root string, opts *GenOptions, i int) error { + base := filepath.Join(root, "kernel", "debug", "dri", strconv.Itoa(i)) + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + + opts.dirs++ + + path := filepath.Join(base, "i915_capabilities") + f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileMode) + + if err != nil { + return err + } + + defer f.Close() + + opts.files++ + + // keys are in random order which provides extra testing for NFD label parsing code + for key, value := range opts.Capabilities { + line := fmt.Sprintf("%s: %s\n", key, value) + if _, err = f.WriteString(line); err != nil { + return err + } + } + + return nil +} + +func removeExistingDir(path, name string) { + entries, err := os.ReadDir(path) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + klog.Fatalf("ReadDir() failed on fake %s path '%s': %v", name, path, err) + } + + if len(entries) == 0 { + return + } + + // This should not be too tight as then node could got blocked just putting entries in to folder + if name == "sysfs" && len(entries) > 5 { + klog.Fatalf("too many entries in '%s' - real sysfs?", path) + } + + // This should not be too tight as then node could got blocked just putting entries in to folder + if name == "devfs" && (len(entries) > 5) { + klog.Fatalf("too many entries in '%s', or '%s' != 'dri' - real devfs?", path, entries[0].Name()) + } + + klog.V(1).Infof("Removing already existing fake %s path '%s'", name, path) + + if err = os.RemoveAll(path); err != nil { + klog.Fatalf("Removing existing %s in '%s' failed: %v", name, path, err) + } +} + +func GenerateDriFiles(opts GenOptions) { + if opts.Info != "" { + klog.V(1).Infof("Config: '%s'", opts.Info) + } + + sysfsPath := opts.Path + sysfsPath + devfsPath := opts.Path + devfsPath + + removeExistingDir(devfsPath, "devfs") + removeExistingDir(sysfsPath, "sysfs") + klog.Infof("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'", + sysfsPath, devfsPath) + + opts.dirs, opts.files, opts.devs, opts.symls = 0, 0, 0, 0 + for i := 0; i < opts.DevCount; i++ { + if err := addSysfsBusTree(sysfsPath, &opts, i); err != nil { + klog.Fatalf("Dev-%d sysfs bus tree generation failed: %v", i, err) + } + + if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil { + klog.Fatalf("Dev-%d sysfs dri tree generation failed: %v", i, err) + } + + if err := addDevfsDriTree(devfsPath, &opts, i); err != nil { + klog.Fatalf("Dev-%d devfs tree generation failed: %v", i, err) + } + + if err := addDebugfsDriTree(sysfsPath, &opts, i); err != nil { + klog.Fatalf("Dev-%d debugfs tree generation failed: %v", i, err) + } + } + + klog.V(1).Infof("Done, created %d dirs, %d devices, %d files and %d symlinks.", opts.dirs, opts.devs, opts.files, opts.symls) + + makeXelinkSideCar(opts) +} + +func makeXelinkSideCar(opts GenOptions) { + topology := opts.Capabilities["connection-topology"] + gpus := opts.DevCount + tiles := opts.TilesPerDev + connections := opts.Capabilities["connections"] + + if topology == fullyConnected { + saveSideCarFile(opts, buildConnectionList(gpus, tiles)) + } else if connections != "" { + saveSideCarFile(opts, connections) + } else { + return + } + + klog.V(1).Infof("XELINK: generated xelink sidecar label file, using (GPUs: %d, Tiles: %d, Topology: %s)", gpus, tiles, topology) +} + +func buildConnectionList(gpus, tiles int) string { + var nodes = make([]string, 0) + + for mm := 0; mm < gpus; mm++ { + for nn := 0; nn < tiles; nn++ { + nodes = append(nodes, fmt.Sprintf("%d.%d", mm, nn)) + } + } + + var links = make(map[string]bool, 0) + + var smap = make([]string, 0) + + for _, from := range nodes { + for _, to := range nodes { + if to == from { + continue + } + + link := fmt.Sprintf("%s-%s", to, from) + + reverselink := fmt.Sprintf("%s-%s", from, to) + if _, exists := links[reverselink]; !exists { + links[link] = true + + smap = append(smap, link) + } + } + } + + return strings.Join(smap, "_") +} + +// saveSideCarFile saves the sidecar labels to a file. +func saveSideCarFile(opts GenOptions, connections string) { + // Check if the directory exists; if not, use the current directory. + if opts.NfdFeatureDir == "" || !isDirExists(opts.NfdFeatureDir) { + klog.Warningf("XELINK: Directory '%s' does not exist. Using current directory.", opts.NfdFeatureDir) + opts.NfdFeatureDir = "." // Use current directory + } + + // Kubernetes directory for storing the sidecar labels + xfile := filepath.Join(opts.NfdFeatureDir, "xpum-sidecar-labels.txt") + + // Create the file + f, err := os.Create(xfile) + if err != nil { + klog.Warningf("XELINK: creation of the xelink sidecar label file '%s' failed: %v", xfile, err) + return + } + defer f.Close() + + klog.V(1).Infof("XELINK: created the xelink sidecar label file '%s'", xfile) + + // Write the main part of the connections string to the file + line := fmt.Sprintf("xpumanager.intel.com/xe-links=%s", connections[:min(len(connections), maxK8sLabelSize)]) + klog.V(1).Info(line) + + if _, err := f.WriteString(line + "\n"); err != nil { + klog.Fatalf("XELINK: failed to write to the file '%s': %v", xfile, err) + } + + // Additional lines for strings longer than maxK8sLabelSize + index := 2 + + for i := maxK8sLabelSize; i < len(connections); i += (maxK8sLabelSize - 1) { + line := fmt.Sprintf("xpumanager.intel.com/xe-links%d=Z%s", index, connections[i:min(len(connections), i+maxK8sLabelSize-1)]) + klog.V(1).Info(line) + + if _, err := f.WriteString(line + "\n"); err != nil { + klog.Fatalf("XELINK: failed to write to the file '%s': %v", xfile, err) + } + + index++ + } + + klog.Infof("XELINK: successfully wrote all labels to '%s'", xfile) +} + +// isDirExists checks if a directory exists at the given path. +func isDirExists(path string) bool { + info, err := os.Stat(path) + if err != nil { + return false + } + + return info.IsDir() +} + +func verifyOptions(opts GenOptions) GenOptions { + if opts.DevCount < 1 || opts.DevCount > maxDevs { + klog.Fatalf("Invalid device count: 1 <= %d <= %d", opts.DevCount, maxDevs) + } + + if opts.VfsPerPf > 0 { + if opts.TilesPerDev > 0 || opts.DevsPerNumaNode > 0 { + klog.Fatalf("SR-IOV VFs (%d) with device tiles (%d) or Numa nodes (%d) is unsupported for faking", + opts.VfsPerPf, opts.TilesPerDev, opts.DevsPerNumaNode) + } + + if opts.DevCount%(opts.VfsPerPf+1) != 0 { + klog.Fatalf("%d devices cannot be evenly split to between set of 1 SR-IOV PF + %d VFs", + opts.DevCount, opts.VfsPerPf) + } + } + + if opts.DevsPerNumaNode > opts.DevCount { + klog.Fatalf("DevsPerNumaNode (%d) > DevCount (%d)", opts.DevsPerNumaNode, opts.DevCount) + } + + if opts.DevMemSize%mib != 0 { + klog.Fatalf("Invalid memory size (%f mib), not even mib", float64(opts.DevMemSize)/mib) + } + + return opts +} + +func GetOptionsByJSON(data string) GenOptions { + if data == "" { + klog.Fatalf("No fake device spec provided") + } + + klog.V(1).Infof("Using fake device JSON spec: %v\n", data) + + var opts GenOptions + if err := json.Unmarshal([]byte(data), &opts); err != nil { + klog.Fatalf("Unmarshaling JSON spec '%s' failed: %v", data, err) + } + + return verifyOptions(opts) +} + +func GetOptionsByYAML(data string) GenOptions { + if data == "" { + klog.Fatalf("No fake device spec provided") + } + + klog.V(1).Infof("Using fake device YAML spec: %v\n", data) + + var opts GenOptions + if err := yaml.Unmarshal([]byte(data), &opts); err != nil { + klog.Fatalf("Unmarshaling YAML spec '%s' failed: %v", data, err) + } + + return verifyOptions(opts) +}