Skip to content

Commit bcc737b

Browse files
committed
Adapt GPU label support to debugfs DRM entry changes
GPU generation "gen" number is replaced in the capability files of latest kernels with separate display, graphics, and media versions. For compatibility with newer kernels, provide "gen" based on the new labels (but without decimals), and for older kernel compatibility, new labels based on the "gen". Because different kernels match different items from the action map, whole capability file will get parsed. Capability file parsing is optimized by using prefix check instead of scanf. "platform_gen" label is deprecated, and can be dropped whenever it becomes inconvenient (lint complains about line count etc).
1 parent 434b7d6 commit bcc737b

File tree

3 files changed

+119
-18
lines changed

3 files changed

+119
-18
lines changed

cmd/gpu_nfdhook/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ Depending on your kernel driver, running the NFD hook as root may introduce foll
4343

4444
name | type | description|
4545
-----|------|------|
46-
|`gpu.intel.com/platform_gen`| string | GPU platform generation name, typically a number.
46+
|`gpu.intel.com/platform_gen`| string | GPU platform generation name, typically an integer. Deprecated.
47+
|`gpu.intel.com/media_version`| string | GPU platform Media pipeline generation name, typically a number.
48+
|`gpu.intel.com/graphics_version`| string | GPU platform graphics/compute pipeline generation name, typically a number.
4749
|`gpu.intel.com/platform_<PLATFORM_NAME>_.count`| number | GPU count for the named platform.
4850
|`gpu.intel.com/platform_<PLATFORM_NAME>_.tiles`| number | GPU tile count in the GPUs of the named platform.
4951
|`gpu.intel.com/platform_<PLATFORM_NAME>_.present`| string | "true" for indicating the presense of the GPU platform.
5052

51-
For the above to work as intended, installed GPUs must be identical in their capabilities.
53+
For the above to work as intended, GPUs on the same node must be identical in their capabilities.

cmd/gpu_nfdhook/labeler.go

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Intel Corporation. All Rights Reserved.
1+
// Copyright 2020-2021 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -171,33 +171,70 @@ func (l *labeler) createCapabilityLabels(cardNum string, numTiles uint64) {
171171
}
172172
defer file.Close()
173173

174-
// define strings to search from the file, and the actions to take in order to create labels from those strings (as funcs)
174+
gen := ""
175+
media := ""
176+
graphics := ""
177+
// define string prefixes to search from the file, and the actions to take in order to create labels from those strings (as funcs)
175178
searchStringActionMap := map[string]func(string){
176-
"platform: ": func(platformName string) {
179+
"platform:": func(platformName string) {
177180
l.labels.addNumericLabel(labelNamespace+"platform_"+platformName+".count", 1)
178181
l.labels[labelNamespace+"platform_"+platformName+".tiles"] = strconv.FormatInt(int64(numTiles), 10)
179182
l.labels[labelNamespace+"platform_"+platformName+".present"] = "true"
180183
},
181-
"gen: ": func(genName string) {
182-
l.labels[labelNamespace+"platform_gen"] = genName
184+
// there's also display block version, but that's not relevant
185+
"media version:": func(version string) {
186+
l.labels[labelNamespace+"media_version"] = version
187+
media = version
188+
},
189+
"graphics version:": func(version string) {
190+
l.labels[labelNamespace+"graphics_version"] = version
191+
graphics = version
192+
},
193+
"gen:": func(version string) {
194+
l.labels[labelNamespace+"platform_gen"] = version
195+
gen = version
183196
},
184197
}
185198

186199
// Finally, read the file, and try to find the matches. Perform actions and reduce the search map size as we proceed. Return at 0 size.
187200
scanner := bufio.NewScanner(file)
201+
scanning:
188202
for scanner.Scan() {
189-
for searchString, action := range searchStringActionMap {
190-
var stringValue string
191-
n, _ := fmt.Sscanf(scanner.Text(), searchString+"%s", &stringValue)
192-
if n > 0 {
193-
action(stringValue)
194-
delete(searchStringActionMap, searchString)
195-
if len(searchStringActionMap) == 0 {
196-
return
197-
}
198-
break
203+
line := scanner.Text()
204+
for prefix, action := range searchStringActionMap {
205+
if !strings.HasPrefix(line, prefix) {
206+
continue
207+
}
208+
fields := strings.Split(line, ": ")
209+
if len(fields) == 2 {
210+
action(fields[1])
211+
} else {
212+
klog.Warningf("invalid '%s' line format: '%s'", file.Name(), line)
213+
}
214+
delete(searchStringActionMap, prefix)
215+
if len(searchStringActionMap) == 0 {
216+
break scanning
199217
}
218+
break
219+
}
220+
}
221+
if gen == "" {
222+
// TODO: drop gen label before engine types
223+
// start to have diverging major gen values
224+
if graphics != "" {
225+
gen = graphics
226+
} else if media != "" {
227+
gen = media
228+
}
229+
if gen != "" {
230+
// truncate to major value
231+
gen = strings.SplitN(gen, ".", 2)[0]
232+
l.labels[labelNamespace+"platform_gen"] = gen
200233
}
234+
} else if media == "" && graphics == "" {
235+
// 5.14 or older kernels need this
236+
l.labels[labelNamespace+"media_version"] = gen
237+
l.labels[labelNamespace+"graphics_version"] = gen
201238
}
202239
}
203240

cmd/gpu_nfdhook/labeler_test.go

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020 Intel Corporation. All Rights Reserved.
1+
// Copyright 2020-2021 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -58,6 +58,8 @@ func getTestCases() []testcase {
5858
"gpu.intel.com/platform_new.count": "1",
5959
"gpu.intel.com/platform_new.present": "true",
6060
"gpu.intel.com/platform_new.tiles": "1",
61+
"gpu.intel.com/graphics_version": "9",
62+
"gpu.intel.com/media_version": "9",
6163
"gpu.intel.com/platform_gen": "9",
6264
"gpu.intel.com/cards": "card0",
6365
},
@@ -104,6 +106,8 @@ func getTestCases() []testcase {
104106
"gpu.intel.com/platform_new.count": "1",
105107
"gpu.intel.com/platform_new.present": "true",
106108
"gpu.intel.com/platform_new.tiles": "2",
109+
"gpu.intel.com/graphics_version": "9",
110+
"gpu.intel.com/media_version": "9",
107111
"gpu.intel.com/platform_gen": "9",
108112
"gpu.intel.com/cards": "card0",
109113
},
@@ -132,6 +136,8 @@ func getTestCases() []testcase {
132136
"gpu.intel.com/platform_new.count": "1",
133137
"gpu.intel.com/platform_new.present": "true",
134138
"gpu.intel.com/platform_new.tiles": "1",
139+
"gpu.intel.com/graphics_version": "9",
140+
"gpu.intel.com/media_version": "9",
135141
"gpu.intel.com/platform_gen": "9",
136142
"gpu.intel.com/cards": "card0",
137143
},
@@ -157,6 +163,8 @@ func getTestCases() []testcase {
157163
"gpu.intel.com/platform_new.count": "1",
158164
"gpu.intel.com/platform_new.present": "true",
159165
"gpu.intel.com/platform_new.tiles": "1",
166+
"gpu.intel.com/graphics_version": "9",
167+
"gpu.intel.com/media_version": "9",
160168
"gpu.intel.com/platform_gen": "9",
161169
"gpu.intel.com/cards": "card0",
162170
},
@@ -184,6 +192,60 @@ func getTestCases() []testcase {
184192
"gpu.intel.com/cards": "card0",
185193
},
186194
},
195+
{
196+
sysfsdirs: []string{
197+
"card0/device/drm/card0",
198+
},
199+
sysfsfiles: map[string][]byte{
200+
"card0/device/vendor": []byte("0x8086"),
201+
},
202+
name: "gen version missing, but media & graphics versions present",
203+
memoryOverride: 16000000000,
204+
capabilityFile: map[string][]byte{
205+
"0/i915_capabilities": []byte(
206+
"platform: new\n" +
207+
"media version: 12.5\n" +
208+
"graphics version: 12.2"),
209+
},
210+
expectedRetval: nil,
211+
expectedLabels: labelMap{
212+
"gpu.intel.com/millicores": "1000",
213+
"gpu.intel.com/memory.max": "16000000000",
214+
"gpu.intel.com/platform_new.count": "1",
215+
"gpu.intel.com/platform_new.present": "true",
216+
"gpu.intel.com/platform_new.tiles": "1",
217+
"gpu.intel.com/graphics_version": "12.2",
218+
"gpu.intel.com/media_version": "12.5",
219+
"gpu.intel.com/platform_gen": "12",
220+
"gpu.intel.com/cards": "card0",
221+
},
222+
},
223+
{
224+
sysfsdirs: []string{
225+
"card0/device/drm/card0",
226+
},
227+
sysfsfiles: map[string][]byte{
228+
"card0/device/vendor": []byte("0x8086"),
229+
},
230+
name: "only media version present",
231+
memoryOverride: 16000000000,
232+
capabilityFile: map[string][]byte{
233+
"0/i915_capabilities": []byte(
234+
"platform: new\n" +
235+
"media version: 12.5"),
236+
},
237+
expectedRetval: nil,
238+
expectedLabels: labelMap{
239+
"gpu.intel.com/millicores": "1000",
240+
"gpu.intel.com/memory.max": "16000000000",
241+
"gpu.intel.com/platform_new.count": "1",
242+
"gpu.intel.com/platform_new.present": "true",
243+
"gpu.intel.com/platform_new.tiles": "1",
244+
"gpu.intel.com/media_version": "12.5",
245+
"gpu.intel.com/platform_gen": "12",
246+
"gpu.intel.com/cards": "card0",
247+
},
248+
},
187249
{
188250
sysfsdirs: []string{
189251
"card0/device/drm/card0",

0 commit comments

Comments
 (0)