diff --git a/deployments/gpu_plugin/overlays/fake_devices/add-nfd-feature-file.yaml b/deployments/gpu_plugin/overlays/fake_devices/add-nfd-feature-file.yaml new file mode 100644 index 000000000..e55126102 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fake_devices/add-nfd-feature-file.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-gpu-plugin +spec: + template: + spec: + containers: + - name: intel-gpu-nfd + # convert generated sysfs content to NFD feature labels file + image: intel/intel-gpu-initcontainer:devel + imagePullPolicy: IfNotPresent + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: [ "ALL" ] + volumeMounts: + - name: nfd-features + mountPath: /nfd + readOnly: false + workingDir: /usr/local/bin/gpu-sw + # needed until GPU plugin drops NFD hook usage due to: + # https://github.com/kubernetes-sigs/node-feature-discovery/issues/856 + command: ["sh", "-c", "while true; do ./intel-gpu-nfdhook | tee /nfd/fake-gpu; sleep 99999; done"] + volumes: + - name: nfd-features + hostPath: + path: /etc/kubernetes/node-feature-discovery/features.d/ + type: DirectoryOrCreate diff --git a/deployments/gpu_plugin/overlays/fake_devices/del-intel-gpu-initcontainer.yaml b/deployments/gpu_plugin/overlays/fake_devices/del-intel-gpu-initcontainer.yaml new file mode 100644 index 000000000..725978a49 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fake_devices/del-intel-gpu-initcontainer.yaml @@ -0,0 +1,10 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-gpu-plugin +spec: + template: + spec: + initContainers: + - name: intel-gpu-initcontainer + $patch: delete diff --git a/deployments/gpu_plugin/overlays/fake_devices/fake-device-volumes.yaml b/deployments/gpu_plugin/overlays/fake_devices/fake-device-volumes.yaml new file mode 100644 index 000000000..950995ede --- /dev/null +++ b/deployments/gpu_plugin/overlays/fake_devices/fake-device-volumes.yaml @@ -0,0 +1,49 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-gpu-plugin +spec: + template: + spec: + initContainers: + - name: fakedev-generator + # container runtime prevents writing to /sys & /dev, + # so volumes need to be mounted elsewhere + volumeMounts: + - name: devfs + mountPath: /tmp/fakedev/dev + readOnly: false + - name: sysfs + mountPath: /tmp/fakedev/sys + readOnly: false + # files are generated under CWD + workingDir: /tmp/fakedev + containers: + - name: intel-gpu-nfd + # expects sysfs here + volumeMounts: + - name: sysfs + mountPath: /host-sys + readOnly: true + - name: intel-gpu-plugin + args: [ + "-prefix=/tmp/fakedev", + "-shared-dev-num=2", + "-enable-monitoring", + "-resource-manager" + ] + # devfs host & container paths must match for everything to work + volumeMounts: + - name: devfs + mountPath: /tmp/fakedev/dev + readOnly: true + - name: sysfs + mountPath: /tmp/fakedev/sys + readOnly: true + volumes: + - name: devfs + hostPath: + path: /tmp/fakedev/dev + type: DirectoryOrCreate + - name: sysfs + emptyDir: {} diff --git a/deployments/gpu_plugin/overlays/fake_devices/fakedev-config.json b/deployments/gpu_plugin/overlays/fake_devices/fakedev-config.json new file mode 100644 index 000000000..2fcaa5b45 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fake_devices/fakedev-config.json @@ -0,0 +1,8 @@ +{ + "Info": "8x 4 GiB DG1 [Iris Xe MAX Graphics] GPUs", + "DevCount": 8, + "DevMemSize": 4294967296, + "Capabilities": { + "platform": "fake_DG1" + } +} diff --git a/deployments/gpu_plugin/overlays/fake_devices/generate-fake-devices.yaml b/deployments/gpu_plugin/overlays/fake_devices/generate-fake-devices.yaml new file mode 100644 index 000000000..4bf038aae --- /dev/null +++ b/deployments/gpu_plugin/overlays/fake_devices/generate-fake-devices.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-gpu-plugin +spec: + template: + spec: + volumes: + - name: fake-conf + configMap: + name: fakedev-config + initContainers: + - name: fakedev-generator + image: intel/intel-gpu-fakedev:devel + securityContext: + runAsUser: 0 + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + volumeMounts: + - name: fake-conf + mountPath: /config + readOnly: true + # generate fake sysfs / devfs files for GPU plugin based on config + command: ["/generator", "-json", "/config/fakedev.json", "-verbose"] diff --git a/deployments/gpu_plugin/overlays/fake_devices/kustomization.yaml b/deployments/gpu_plugin/overlays/fake_devices/kustomization.yaml new file mode 100644 index 000000000..2d4aeb242 --- /dev/null +++ b/deployments/gpu_plugin/overlays/fake_devices/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +bases: + - ../fractional_resources +configMapGenerator: +- name: fakedev-config + files: + - fakedev-config.json +patches: + - fake-device-volumes.yaml + - generate-fake-devices.yaml + # NFD feature file changes is obsolete after GPU plugin moves away from NFD hooks + # https://github.com/kubernetes-sigs/node-feature-discovery/issues/856 + - del-intel-gpu-initcontainer.yaml + - add-nfd-feature-file.yaml