Skip to content

Commit def7d09

Browse files
committed
Refactor how device identifiers are parsed before performing automatic CDI spec generation
Signed-off-by: Christopher Desiniotis <[email protected]>
1 parent b9ac54b commit def7d09

25 files changed

+1395
-11
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d
77
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f
88
github.com/fsnotify/fsnotify v1.5.4
9+
github.com/google/uuid v1.4.0
910
github.com/opencontainers/runtime-spec v1.1.0
1011
github.com/pelletier/go-toml v1.9.4
1112
github.com/sirupsen/logrus v1.9.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
1515
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
1616
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
1717
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
18+
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
19+
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
1820
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
1921
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
2022
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=

pkg/nvcdi/identifier.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package nvcdi
2+
3+
import (
4+
"strconv"
5+
"strings"
6+
7+
"github.com/google/uuid"
8+
)
9+
10+
type identifier string
11+
12+
// isGPUIndex checks if an identifier is a full GPU index
13+
func (i identifier) isGpuIndex() bool {
14+
if _, err := strconv.ParseUint(string(i), 10, 0); err != nil {
15+
return false
16+
}
17+
return true
18+
}
19+
20+
// isMigIndex checks if an identifier is a MIG index
21+
func (i identifier) isMigIndex() bool {
22+
split := strings.SplitN(string(i), ":", 2)
23+
if len(split) != 2 {
24+
return false
25+
}
26+
for _, s := range split {
27+
if _, err := strconv.ParseUint(s, 10, 0); err != nil {
28+
return false
29+
}
30+
}
31+
return true
32+
}
33+
34+
// isUUID checks if an identifier is a UUID
35+
func (i identifier) isUUID() bool {
36+
return i.isGpuUUID() || i.isMigUUID()
37+
}
38+
39+
// isGpuUUID checks if an identifier is a GPU UUID
40+
// A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763
41+
func (i identifier) isGpuUUID() bool {
42+
if !strings.HasPrefix(string(i), "GPU-") {
43+
return false
44+
}
45+
_, err := uuid.Parse(strings.TrimPrefix(string(i), "GPU-"))
46+
return err == nil
47+
}
48+
49+
// isMigUUID checks if an identifier is a MIG UUID
50+
// A MIG UUID can be of one of two forms:
51+
// - MIG-b1028956-cfa2-0990-bf4a-5da9abb51763
52+
// - MIG-GPU-b1028956-cfa2-0990-bf4a-5da9abb51763/3/0
53+
func (i identifier) isMigUUID() bool {
54+
if !strings.HasPrefix(string(i), "MIG-") {
55+
return false
56+
}
57+
suffix := strings.TrimPrefix(string(i), "MIG-")
58+
_, err := uuid.Parse(suffix)
59+
if err == nil {
60+
return true
61+
}
62+
split := strings.SplitN(suffix, "/", 3)
63+
if len(split) != 3 {
64+
return false
65+
}
66+
if !identifier(split[0]).isGpuUUID() {
67+
return false
68+
}
69+
for _, s := range split[1:] {
70+
_, err := strconv.ParseUint(s, 10, 0)
71+
if err != nil {
72+
return false
73+
}
74+
}
75+
return true
76+
}

pkg/nvcdi/identifier_test.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package nvcdi
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestIsGpuIndex(t *testing.T) {
11+
testCases := []struct {
12+
id string
13+
expected bool
14+
}{
15+
{"", false},
16+
{"0", true},
17+
{"1", true},
18+
{"not an integer", false},
19+
}
20+
for i, tc := range testCases {
21+
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
22+
actual := identifier(tc.id).isGpuIndex()
23+
require.Equal(t, tc.expected, actual)
24+
})
25+
}
26+
}
27+
28+
func TestIsMigIndex(t *testing.T) {
29+
testCases := []struct {
30+
id string
31+
expected bool
32+
}{
33+
{"", false},
34+
{"0", false},
35+
{"not an integer", false},
36+
{"0:0", true},
37+
{"0:0:0", false},
38+
{"0:0.0", false},
39+
{"0:foo", false},
40+
{"foo:0", false},
41+
}
42+
for i, tc := range testCases {
43+
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
44+
actual := identifier(tc.id).isMigIndex()
45+
require.Equal(t, tc.expected, actual)
46+
})
47+
}
48+
}
49+
50+
func TestIsGpuUUID(t *testing.T) {
51+
testCases := []struct {
52+
id string
53+
expected bool
54+
}{
55+
{"", false},
56+
{"0", false},
57+
{"not an integer", false},
58+
{"GPU-foo", false},
59+
{"GPU-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", true},
60+
{"MIG-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false},
61+
{"ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false},
62+
}
63+
for i, tc := range testCases {
64+
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
65+
actual := identifier(tc.id).isGpuUUID()
66+
require.Equal(t, tc.expected, actual)
67+
})
68+
}
69+
}
70+
71+
func TestIsMigUUID(t *testing.T) {
72+
testCases := []struct {
73+
id string
74+
expected bool
75+
}{
76+
{"", false},
77+
{"0", false},
78+
{"not an integer", false},
79+
{"MIG-foo", false},
80+
{"MIG-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", true},
81+
{"GPU-ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false},
82+
{"ebd34bdf-1083-eaac-2aff-4b71a022f9bd", false},
83+
}
84+
for i, tc := range testCases {
85+
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
86+
actual := identifier(tc.id).isMigUUID()
87+
require.Equal(t, tc.expected, actual)
88+
})
89+
}
90+
}

pkg/nvcdi/lib-nvml.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,22 +124,37 @@ func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, err
124124

125125
// TODO: move this to go-nvlib?
126126
func (l *nvmllib) getNVMLDevicesByID(identifiers ...string) ([]nvml.Device, error) {
127-
devices := []nvml.Device{}
127+
var devices []nvml.Device
128128
for _, id := range identifiers {
129-
if dev, err := l.nvmllib.DeviceGetHandleByUUID(id); err == nvml.SUCCESS {
130-
devices = append(devices, dev)
131-
continue
129+
dev, err := l.getNVMLDeviceByID(id)
130+
if err != nvml.SUCCESS {
131+
return nil, fmt.Errorf("failed to get NVML device handle for identifier %q: %w", id, err)
132132
}
133-
// TODO: check for a MIG device index
133+
devices = append(devices, dev)
134+
}
135+
return devices, nil
136+
}
137+
138+
func (l *nvmllib) getNVMLDeviceByID(id string) (nvml.Device, error) {
139+
var err error
140+
devID := identifier(id)
141+
142+
if devID.isUUID() {
143+
return l.nvmllib.DeviceGetHandleByUUID(id)
144+
}
145+
146+
if devID.isGpuIndex() {
134147
if idx, err := strconv.Atoi(id); err == nil {
135-
if dev, err := l.nvmllib.DeviceGetHandleByIndex(idx); err == nvml.SUCCESS {
136-
devices = append(devices, dev)
137-
continue
138-
}
148+
return l.nvmllib.DeviceGetHandleByIndex(idx)
139149
}
140-
return nil, fmt.Errorf("failed to get NVML device handle for identifier %q", id)
150+
return nil, fmt.Errorf("failed to convert device index to an int: %w", err)
141151
}
142-
return devices, nil
152+
153+
if devID.isMigIndex() {
154+
return nil, fmt.Errorf("MIG index is not supported")
155+
}
156+
157+
return nil, fmt.Errorf("identifier is not a valid UUID or index: %q", id)
143158
}
144159

145160
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {

vendor/github.com/google/uuid/CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/google/uuid/CONTRIBUTING.md

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/google/uuid/CONTRIBUTORS

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/google/uuid/LICENSE

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/google/uuid/README.md

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)