From c3a01b91ff10e9a5f8b6b0546b0e40f085e8b439 Mon Sep 17 00:00:00 2001 From: Tuomas Katila Date: Mon, 19 Aug 2024 15:26:06 +0300 Subject: [PATCH] cdi: restructure cdi support for more generic use Pass the whole cdi.spec structure to DeviceInfo and use cdiCache for interacting with the CDI files on the host. Signed-off-by: Tuomas Katila --- .golangci.yml | 1 + cmd/fpga_plugin/dfl_test.go | 63 ++++++++-- cmd/fpga_plugin/fpga_plugin.go | 28 +++-- cmd/fpga_plugin/opae_test.go | 63 ++++++++-- cmd/sgx_plugin/sgx_plugin.go | 4 +- go.mod | 4 + go.sum | 30 +++++ pkg/deviceplugin/api.go | 27 +++-- pkg/deviceplugin/server.go | 93 +++++++++------ pkg/deviceplugin/server_test.go | 205 ++++++++++++++++++++++++++++++-- 10 files changed, 437 insertions(+), 81 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 896c2ea87..a0d05ef61 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -69,6 +69,7 @@ issues: linters: - gocognit - wsl + - gocyclo - path: test/e2e/ linters: - wsl diff --git a/cmd/fpga_plugin/dfl_test.go b/cmd/fpga_plugin/dfl_test.go index 7403c0413..d9a8cc5b9 100644 --- a/cmd/fpga_plugin/dfl_test.go +++ b/cmd/fpga_plugin/dfl_test.go @@ -188,12 +188,6 @@ func TestGetRegionDevelTreeDFL(t *testing.T) { func TestGetRegionTreeDFL(t *testing.T) { expected := dpapi.NewDeviceTree() - hooks := []*cdispec.Hook{ - { - HookName: HookName, - Path: HookPath, - }, - } nodes := []pluginapi.DeviceSpec{ { HostPath: "/dev/dfl-port.0", @@ -201,7 +195,24 @@ func TestGetRegionTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) + cdiSpec := &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/fpga", + Devices: []cdispec.Device{ + { + Name: "region1", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, cdiSpec)) nodes = []pluginapi.DeviceSpec{ { @@ -215,7 +226,24 @@ func TestGetRegionTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) + cdiSpec = &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/fpga", + Devices: []cdispec.Device{ + { + Name: "region2", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, cdiSpec)) nodes = []pluginapi.DeviceSpec{ { @@ -229,7 +257,24 @@ func TestGetRegionTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "region3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, hooks)) + cdiSpec = &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/fpga", + Devices: []cdispec.Device{ + { + Name: "region3", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "region3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, cdiSpec)) result := getRegionTree(getDevicesDFL()) if !reflect.DeepEqual(result, expected) { diff --git a/cmd/fpga_plugin/fpga_plugin.go b/cmd/fpga_plugin/fpga_plugin.go index 67d3ca686..5b38be8b7 100644 --- a/cmd/fpga_plugin/fpga_plugin.go +++ b/cmd/fpga_plugin/fpga_plugin.go @@ -53,6 +53,7 @@ const ( scanPeriod = 5 * time.Second // CDI hook attributes. + CDIClass = "fpga" HookName = "createRuntime" HookPath = "/opt/intel/fpga-sw/intel-fpga-crihook" ) @@ -109,6 +110,24 @@ func getRegionTree(devices []device) dpapi.DeviceTree { devType := fmt.Sprintf("%s-%s", regionMode, region.interfaceID) devNodes := make([]pluginapi.DeviceSpec, len(region.afus)) + cdiSpec := &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/" + CDIClass, + Devices: []cdispec.Device{ + { + Name: region.id, + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + for num, afu := range region.afus { devNodes[num] = pluginapi.DeviceSpec{ HostPath: afu.devNode, @@ -117,14 +136,7 @@ func getRegionTree(devices []device) dpapi.DeviceTree { } } - hooks := []*cdispec.Hook{ - { - HookName: HookName, - Path: HookPath, - }, - } - - regionTree.AddDevice(devType, region.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil, hooks)) + regionTree.AddDevice(devType, region.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil, cdiSpec)) } } diff --git a/cmd/fpga_plugin/opae_test.go b/cmd/fpga_plugin/opae_test.go index 35a8dc86b..e2719fe7c 100644 --- a/cmd/fpga_plugin/opae_test.go +++ b/cmd/fpga_plugin/opae_test.go @@ -168,12 +168,6 @@ func TestGetRegionDevelTreeOPAE(t *testing.T) { func TestGetRegionTreeOPAE(t *testing.T) { expected := dpapi.NewDeviceTree() - hooks := []*cdispec.Hook{ - { - HookName: HookName, - Path: HookPath, - }, - } nodes := []pluginapi.DeviceSpec{ { HostPath: "/dev/intel-fpga-port.0", @@ -181,7 +175,24 @@ func TestGetRegionTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) + cdiSpec := &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/fpga", + Devices: []cdispec.Device{ + { + Name: "intel-fpga-fme.0", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, cdiSpec)) nodes = []pluginapi.DeviceSpec{ { @@ -190,7 +201,24 @@ func TestGetRegionTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) + cdiSpec = &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/fpga", + Devices: []cdispec.Device{ + { + Name: "intel-fpga-fme.1", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, cdiSpec)) nodes = []pluginapi.DeviceSpec{ { @@ -199,7 +227,24 @@ func TestGetRegionTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, hooks)) + cdiSpec = &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/fpga", + Devices: []cdispec.Device{ + { + Name: "intel-fpga-fme.2", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + }, + }, + }, + }, + } + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, cdiSpec)) result := getRegionTree(getDevicesOPAE()) if !reflect.DeepEqual(result, expected) { diff --git a/cmd/sgx_plugin/sgx_plugin.go b/cmd/sgx_plugin/sgx_plugin.go index e5ae16bae..cc8cd289a 100644 --- a/cmd/sgx_plugin/sgx_plugin.go +++ b/cmd/sgx_plugin/sgx_plugin.go @@ -87,13 +87,13 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { for i := uint(0); i < dp.nEnclave; i++ { devID := fmt.Sprintf("%s-%d", "sgx-enclave", i) nodes := []pluginapi.DeviceSpec{{HostPath: sgxEnclavePath, ContainerPath: sgxEnclavePath, Permissions: "rw"}} - devTree.AddDevice(deviceTypeEnclave, devID, dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, nodes, nil, nil, nil, nil)) + devTree.AddDevice(deviceTypeEnclave, devID, dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, nodes, nil, nil, nil, nil, nil)) } for i := uint(0); i < dp.nProvision; i++ { devID := fmt.Sprintf("%s-%d", "sgx-provision", i) nodes := []pluginapi.DeviceSpec{{HostPath: sgxProvisionPath, ContainerPath: sgxProvisionPath, Permissions: "rw"}} - devTree.AddDevice(deviceTypeProvision, devID, dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, nodes, nil, nil, nil, nil)) + devTree.AddDevice(deviceTypeProvision, devID, dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, nodes, nil, nil, nil, nil, nil)) } return devTree, nil diff --git a/go.mod b/go.mod index 9f4ffba85..ff0ff91fc 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.19.0 sigs.k8s.io/yaml v1.4.0 + tags.cncf.io/container-device-interface v0.8.0 tags.cncf.io/container-device-interface/specs-go v0.8.0 ) @@ -78,11 +79,14 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/runtime-spec v1.1.0 // indirect + github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect github.com/prometheus/client_golang v1.20.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/cobra v1.8.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stoewer/go-strcase v1.2.0 // indirect + github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect github.com/x448/float16 v0.8.4 // indirect go.etcd.io/etcd/api/v3 v3.5.14 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.14 // indirect diff --git a/go.sum b/go.sum index 239357fe8..4a76a04cc 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,7 @@ github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 h1:5iH8iuqE5apketRbSFBy+X1V0o+l+8NF1avt4HWl7cA= github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= @@ -95,6 +96,10 @@ github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -122,6 +127,7 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -129,6 +135,7 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= @@ -139,6 +146,14 @@ github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= +github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI= +github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= +github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -155,6 +170,7 @@ github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoG github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= @@ -170,6 +186,7 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -177,10 +194,19 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= +github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= +github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -251,6 +277,8 @@ golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= @@ -350,5 +378,7 @@ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+s sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +tags.cncf.io/container-device-interface v0.8.0 h1:8bCFo/g9WODjWx3m6EYl3GfUG31eKJbaggyBDxEldRc= +tags.cncf.io/container-device-interface v0.8.0/go.mod h1:Apb7N4VdILW0EVdEMRYXIDVRZfNJZ+kmEUss2kRRQ6Y= tags.cncf.io/container-device-interface/specs-go v0.8.0 h1:QYGFzGxvYK/ZLMrjhvY0RjpUavIn4KcmRmVP/JjdBTA= tags.cncf.io/container-device-interface/specs-go v0.8.0/go.mod h1:BhJIkjjPh4qpys+qm4DAYtUyryaTDg9zris+AczXyws= diff --git a/pkg/deviceplugin/api.go b/pkg/deviceplugin/api.go index 06d0e0557..2997f93a8 100644 --- a/pkg/deviceplugin/api.go +++ b/pkg/deviceplugin/api.go @@ -28,11 +28,10 @@ type DeviceInfo struct { envs map[string]string annotations map[string]string topology *pluginapi.TopologyInfo - state string - nodes []pluginapi.DeviceSpec - // Hooks can be passed only through CDI // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4009-add-cdi-devices-to-device-plugin-api - hooks []*cdispec.Hook + cdiSpec *cdispec.Spec + state string + nodes []pluginapi.DeviceSpec } // UseDefaultMethodError allows the plugin to request running the default @@ -49,14 +48,14 @@ func init() { } // NewDeviceInfo makes DeviceInfo struct and adds topology information to it. -func NewDeviceInfo(state string, nodes []pluginapi.DeviceSpec, mounts []pluginapi.Mount, envs, annotations map[string]string, hooks []*cdispec.Hook) DeviceInfo { +func NewDeviceInfo(state string, nodes []pluginapi.DeviceSpec, mounts []pluginapi.Mount, envs, annotations map[string]string, cdiSpec *cdispec.Spec) DeviceInfo { deviceInfo := DeviceInfo{ state: state, nodes: nodes, mounts: mounts, envs: envs, annotations: annotations, - hooks: hooks, + cdiSpec: cdiSpec, } devPaths := []string{} @@ -77,7 +76,7 @@ func NewDeviceInfo(state string, nodes []pluginapi.DeviceSpec, mounts []pluginap // NewDeviceInfoWithTopologyHints makes DeviceInfo struct with topology information provided to it. func NewDeviceInfoWithTopologyHints(state string, nodes []pluginapi.DeviceSpec, mounts []pluginapi.Mount, envs map[string]string, - annotations map[string]string, topology *pluginapi.TopologyInfo) DeviceInfo { + annotations map[string]string, topology *pluginapi.TopologyInfo, cdiSpec *cdispec.Spec) DeviceInfo { return DeviceInfo{ state: state, nodes: nodes, @@ -85,6 +84,7 @@ func NewDeviceInfoWithTopologyHints(state string, nodes []pluginapi.DeviceSpec, envs: envs, annotations: annotations, topology: topology, + cdiSpec: cdiSpec, } } @@ -102,6 +102,19 @@ func (tree DeviceTree) AddDevice(devType, id string, info DeviceInfo) { tree[devType] = make(map[string]DeviceInfo) } + if info.cdiSpec != nil { + devLength := len(info.cdiSpec.Devices) + if devLength == 0 { + klog.Warning("No CDI devices defined in spec, removing spec") + + info.cdiSpec = nil + } else if devLength > 1 { + klog.Warning("Including more than one CDI device per spec is not supported, using first") + + info.cdiSpec.Devices = info.cdiSpec.Devices[:1] + } + } + tree[devType][id] = info } diff --git a/pkg/deviceplugin/server.go b/pkg/deviceplugin/server.go index 19e0a4e25..9b15a4734 100644 --- a/pkg/deviceplugin/server.go +++ b/pkg/deviceplugin/server.go @@ -16,12 +16,12 @@ package deviceplugin import ( "context" - "encoding/json" "fmt" "net" "os" "path" "path/filepath" + "strings" "sync" "time" @@ -30,10 +30,11 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" + "tags.cncf.io/container-device-interface/pkg/cdi" + cdispec "tags.cncf.io/container-device-interface/specs-go" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" - cdispec "tags.cncf.io/container-device-interface/specs-go" ) type serverState int @@ -43,9 +44,10 @@ const ( uninitialized serverState = iota serving terminating + CDIVersion = "0.5.0" // Kubernetes 1.27 / CRI-O 1.27 / Containerd 1.7 use this version. - CDIKind = "intel.cdi.k8s.io/device" CDIDir = "/var/run/cdi" + CDIVendor = "intel.cdi.k8s.io" ) // devicePluginServer maintains a gRPC server satisfying @@ -67,6 +69,7 @@ type server struct { preStartContainer preStartContainerFunc getPreferredAllocation getPreferredAllocationFunc devType string + cdiDir string state serverState stateMutex sync.Mutex } @@ -86,6 +89,7 @@ func newServer(devType string, preStartContainer: preStartContainer, getPreferredAllocation: getPreferredAllocation, state: uninitialized, + cdiDir: CDIDir, } } @@ -136,37 +140,6 @@ func (srv *server) ListAndWatch(empty *pluginapi.Empty, stream pluginapi.DeviceP return nil } -func generateCDIDevices(deviceID string, dev *DeviceInfo) ([]*pluginapi.CDIDevice, error) { - if len(dev.hooks) == 0 { - return nil, nil - } - - spec := cdispec.Spec{ - Version: CDIVersion, - Kind: CDIKind, - Devices: []cdispec.Device{ - { - Name: deviceID, - ContainerEdits: cdispec.ContainerEdits{ - Hooks: dev.hooks, - }, - }, - }, - } - - jsonSpec, err := json.Marshal(spec) - if err != nil { - return nil, err - } - - cdiFileName := path.Join(CDIDir, deviceID) + ".json" - if err = os.WriteFile(cdiFileName, jsonSpec, 0o600); err != nil { - return nil, err - } - - return []*pluginapi.CDIDevice{{Name: fmt.Sprintf("%s=%s", CDIKind, deviceID)}}, nil -} - func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { if srv.allocate != nil { response, err := srv.allocate(rqt) @@ -211,12 +184,9 @@ func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) cresp.Annotations[key] = value } - CDIDevices, err := generateCDIDevices(id, &dev) - if err != nil { - return nil, fmt.Errorf("device %s: cannot generate CDI device: %w", id, err) + if names, err := writeCdiSpecToFilesystem(dev.cdiSpec, srv.cdiDir); err == nil { + cresp.CDIDevices = append(cresp.CDIDevices, names...) } - - cresp.CDIDevices = append(cresp.CDIDevices, CDIDevices...) } response.ContainerResponses = append(response.ContainerResponses, cresp) @@ -428,3 +398,48 @@ func waitForServer(socket string, timeout time.Duration) error { } } } + +// Writes CDI spec to filesystem if not found from the CDI cache. +// Returns a list of CDI device names. +func writeCdiSpecToFilesystem(spec *cdispec.Spec, cdiDir string) ([]*pluginapi.CDIDevice, error) { + names := []*pluginapi.CDIDevice{} + + if spec == nil { + return names, nil + } + + cache, err := cdi.NewCache(cdi.WithAutoRefresh(false), cdi.WithSpecDirs(cdiDir)) + if err != nil { + return nil, err + } + + // It's expected to have one device per spec + if len(spec.Devices) != 1 { + return nil, os.ErrNotExist + } + + deviceName := spec.Devices[0].Name + fqName := fmt.Sprintf("%s=%s", spec.Kind, deviceName) + + names = append(names, &pluginapi.CDIDevice{Name: fqName}) + + // The device is found in the cache. + if cache.GetDevice(fqName) != nil { + return names, nil + } + + // Generate filename with '/' and '=' replaced with '-'. + specFileName := fmt.Sprintf("%s-%s.yaml", strings.ReplaceAll(spec.Kind, "/", "-"), deviceName) + + // Write spec to filesystem. + if err := cache.WriteSpec(spec, specFileName); err != nil { + return nil, err + } + + // Fix access issues due to: https://github.com/cncf-tags/container-device-interface/issues/224 + if err := os.Chmod(filepath.Join(cdiDir, specFileName), 0o644); err != nil { + return nil, err + } + + return names, nil +} diff --git a/pkg/deviceplugin/server_test.go b/pkg/deviceplugin/server_test.go index c1849c1b3..d22a78695 100644 --- a/pkg/deviceplugin/server_test.go +++ b/pkg/deviceplugin/server_test.go @@ -30,6 +30,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" + cdispec "tags.cncf.io/container-device-interface/specs-go" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" @@ -259,19 +260,22 @@ func TestStop(t *testing.T) { } func TestAllocate(t *testing.T) { - rqt := &pluginapi.AllocateRequest{ - ContainerRequests: []*pluginapi.ContainerAllocateRequest{ - { - DevicesIDs: []string{"dev1"}, - }, - }, - } srv := newTestServer() + tmpRoot, err := os.MkdirTemp("", "server-allocate") + if err != nil { + t.Fatal("couldn't create temporary dir", err) + } + + defer os.RemoveAll(tmpRoot) + + srv.cdiDir = tmpRoot + tcases := []struct { devices map[string]DeviceInfo postAllocate func(*pluginapi.AllocateResponse) error name string + checkCdiFiles []string expectedAllocated int expectedErr bool }{ @@ -348,6 +352,162 @@ func TestAllocate(t *testing.T) { }, expectedAllocated: 2, }, + { + name: "Allocate healthy device with CDI device", + devices: map[string]DeviceInfo{ + "dev1": { + state: pluginapi.Healthy, + nodes: []pluginapi.DeviceSpec{ + { + HostPath: "/dev/dev1", + ContainerPath: "/dev/dev1", + Permissions: "rw", + }, + }, + cdiSpec: &cdispec.Spec{ + Kind: "intel.com/foo", + Version: "0.5.0", + Devices: []cdispec.Device{ + { + Name: "dev1", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: "createRuntime", + Path: "/bin/my-hook", + }, + }, + }, + }, + }, + }, + }, + }, + checkCdiFiles: []string{filepath.Join(tmpRoot, "intel.com-foo-dev1.yaml")}, + expectedAllocated: 1, + }, + { + name: "Allocate healthy devices with CDI devices", + devices: map[string]DeviceInfo{ + "dev1": { + state: pluginapi.Healthy, + nodes: []pluginapi.DeviceSpec{ + { + HostPath: "/dev/dev1", + ContainerPath: "/dev/dev1", + Permissions: "rw", + }, + }, + cdiSpec: &cdispec.Spec{ + Kind: "intel.com/foo", + Version: "0.5.0", + Devices: []cdispec.Device{ + { + Name: "dev2", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: "createRuntime", + Path: "/bin/my-hook", + }, + }, + }, + }, + }, + }, + }, + "dev2": { + state: pluginapi.Healthy, + nodes: []pluginapi.DeviceSpec{ + { + HostPath: "/dev/dev2", + ContainerPath: "/dev/dev2", + Permissions: "rw", + }, + }, + cdiSpec: &cdispec.Spec{ + Kind: "intel.com/bar", + Version: "0.5.0", + Devices: []cdispec.Device{ + { + Name: "dev2", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: "createRuntime", + Path: "/bin/my-hook", + }, + }, + }, + }, + }, + }, + }, + }, + checkCdiFiles: []string{filepath.Join(tmpRoot, "intel.com-foo-dev1.yaml"), filepath.Join(tmpRoot, "intel.com-bar-dev2.yaml")}, + expectedAllocated: 2, + }, + { + name: "Allocate two healthy devices with one CDI device", + devices: map[string]DeviceInfo{ + "dev1": { + state: pluginapi.Healthy, + nodes: []pluginapi.DeviceSpec{ + { + HostPath: "/dev/dev1", + ContainerPath: "/dev/dev1", + Permissions: "rw", + }, + }, + cdiSpec: &cdispec.Spec{ + Kind: "intel.com/foo", + Version: "0.5.0", + Devices: []cdispec.Device{ + { + Name: "dev", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: "createRuntime", + Path: "/bin/my-hook", + }, + }, + }, + }, + }, + }, + }, + "dev2": { + state: pluginapi.Healthy, + nodes: []pluginapi.DeviceSpec{ + { + HostPath: "/dev/dev2", + ContainerPath: "/dev/dev2", + Permissions: "rw", + }, + }, + cdiSpec: &cdispec.Spec{ + Kind: "intel.com/foo", + Version: "0.5.0", + Devices: []cdispec.Device{ + { + Name: "dev", + ContainerEdits: cdispec.ContainerEdits{ + Hooks: []*cdispec.Hook{ + { + HookName: "createRuntime", + Path: "/bin/my-hook", + }, + }, + }, + }, + }, + }, + }, + }, + checkCdiFiles: []string{filepath.Join(tmpRoot, "intel.com-foo-dev.yaml")}, + expectedAllocated: 2, + }, { name: "Allocate healthy device with failing postAllocate hook", devices: map[string]DeviceInfo{ @@ -370,6 +530,21 @@ func TestAllocate(t *testing.T) { } for _, tt := range tcases { + rqt := &pluginapi.AllocateRequest{ + ContainerRequests: []*pluginapi.ContainerAllocateRequest{ + {DevicesIDs: []string{"dev-1"}}, + }, + } + + if len(tt.devices) > 0 { + devNames := []string{} + for devName := range tt.devices { + devNames = append(devNames, devName) + } + + rqt.ContainerRequests[0].DevicesIDs = devNames + } + srv.devices = tt.devices srv.postAllocate = tt.postAllocate resp, err := srv.Allocate(context.Background(), rqt) @@ -393,6 +568,22 @@ func TestAllocate(t *testing.T) { t.Errorf("Test case '%s': got equal dev nodes in the same response", tt.name) } } + if len(tt.checkCdiFiles) > 0 { + for _, cdiFile := range tt.checkCdiFiles { + stat, err := os.Stat(cdiFile) + if err != nil { + t.Errorf("Test case '%s': couldn't find cdi spec for device (%s)", tt.name, cdiFile) + } + + if stat.Mode() != 0o644 { + t.Errorf("Test case '%s': wrong mode set for the cdi spec file (%s)", tt.name, cdiFile) + } + } + + if len(resp.ContainerResponses[0].CDIDevices) == 0 { + t.Errorf("Test case '%s': no cdi devices in allocate response", tt.name) + } + } } }