Skip to content

Commit c15feea

Browse files
committed
Add code for generating fake GPU sysfs + devfs files
To facilitate GPU plugin scalability testing on a real cluster. Pre-existing (fake) sysfs & devfs content needs to be removed first: * Fake devfs directory is mounted from host so OCI runtime can "mount" device files also to workloads requesting fake devices. This means that those files can persist over fake GPU plugin life-time, and earlier files need to be removed, as they may not match * DaemonSet restarts failing init containers, so errors about content created on previous generator run would prevent getting logs of the real error on first generator run * Before removal, check that removed directory content is as expected, to avoid accidentally removing host sysfs/devfs content (in case container was erronously granted access to the real thing) Container runtime requires fake device files to real be devices: * Use NULL devices to represent fake GPU devices: https://www.kernel.org/doc/Documentation/admin-guide/devices.txt * Give more detailed logging for MkNod() failures as device node creation is most likely operation to fail when container does not have the necessary access rights Created content is based on JSON config file (instead of e.g. commandline options) so that (configMap providing) it can be updated independently of the pod where generator is run. Signed-off-by: Eero Tamminen <[email protected]>
1 parent 6347609 commit c15feea

File tree

1 file changed

+313
-0
lines changed

1 file changed

+313
-0
lines changed

cmd/gpu_fakedev/gpu_fakedev.go

+313
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
// Copyright 2021-2022 Intel Corporation. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
//---------------------------------------------------------------
16+
// sysfs SPECIFICATION
17+
//
18+
// sys/class/drm/cardX/
19+
// sys/class/drm/cardX/lmem_total_bytes (gpu memory size, number)
20+
// sys/class/drm/cardX/device/
21+
// sys/class/drm/cardX/device/vendor (0x8086)
22+
// sys/class/drm/cardX/device/sriov_numvfs (PF only, number of VF GPUs, number)
23+
// sys/class/drm/cardX/device/drm/
24+
// sys/class/drm/cardX/device/drm/cardX/
25+
// sys/class/drm/cardX/device/drm/renderD1XX/
26+
// sys/class/drm/cardX/device/numa_node (Numa node index[1], number)
27+
// [1] indexing these: /sys/devices/system/node/nodeX/
28+
//---------------------------------------------------------------
29+
// devfs SPECIFICATION
30+
//
31+
// dev/dri/cardX
32+
// dev/dri/renderD1XX
33+
//---------------------------------------------------------------
34+
35+
package main
36+
37+
import (
38+
"encoding/json"
39+
"errors"
40+
"flag"
41+
"fmt"
42+
"io/fs"
43+
"log"
44+
"os"
45+
46+
"golang.org/x/sys/unix"
47+
)
48+
49+
const (
50+
dirMode = 0775
51+
fileMode = 0644
52+
cardBase = 0
53+
renderBase = 128
54+
maxDevs = 128
55+
sysfsPath = "sys"
56+
devfsPath = "dev"
57+
mib = 1024.0 * 1024.0
58+
// null device major, minor on linux.
59+
devNullMajor = 1
60+
devNullMinor = 3
61+
devNullType = unix.S_IFCHR
62+
)
63+
64+
var verbose bool
65+
66+
type genOptions struct {
67+
Capabilities map[string]string // device capabilities mapping for NFD hook
68+
Info string // verbal config description
69+
DevCount int // how many devices to fake
70+
TilesPerDev int // per-device tile count
71+
DevMemSize int // available per-device device-local memory, in bytes
72+
DevsPerNode int // How many devices per Numa node
73+
VfsPerPf int // How many SR-IOV VFs per PF
74+
// fields for counting what was generated
75+
files int
76+
dirs int
77+
devs int
78+
}
79+
80+
func addSysfsDriTree(root string, opts *genOptions, i int) error {
81+
card := cardBase + i
82+
base := fmt.Sprintf("%s/class/drm/card%d", root, card)
83+
84+
if err := os.MkdirAll(base, dirMode); err != nil {
85+
return err
86+
}
87+
opts.dirs++
88+
89+
data := []byte(fmt.Sprintf("%d", opts.DevMemSize))
90+
file := fmt.Sprintf("%s/lmem_total_bytes", base)
91+
92+
if err := os.WriteFile(file, data, fileMode); err != nil {
93+
return err
94+
}
95+
opts.files++
96+
97+
path := fmt.Sprintf("%s/device/drm/card%d", base, card)
98+
if err := os.MkdirAll(path, dirMode); err != nil {
99+
return err
100+
}
101+
opts.dirs++
102+
103+
path = fmt.Sprintf("%s/device/drm/renderD%d", base, renderBase+i)
104+
if err := os.Mkdir(path, dirMode); err != nil {
105+
return err
106+
}
107+
opts.dirs++
108+
109+
data = []byte("0x8086")
110+
file = fmt.Sprintf("%s/device/vendor", base)
111+
112+
if err := os.WriteFile(file, data, fileMode); err != nil {
113+
return err
114+
}
115+
opts.files++
116+
117+
node := 0
118+
if opts.DevsPerNode > 0 {
119+
node = i / opts.DevsPerNode
120+
}
121+
122+
data = []byte(fmt.Sprintf("%d", node))
123+
file = fmt.Sprintf("%s/device/numa_node", base)
124+
125+
if err := os.WriteFile(file, data, fileMode); err != nil {
126+
return err
127+
}
128+
opts.files++
129+
130+
if opts.VfsPerPf > 0 && i%(opts.VfsPerPf+1) == 0 {
131+
data = []byte(fmt.Sprintf("%d", opts.VfsPerPf))
132+
file = fmt.Sprintf("%s/device/sriov_numvfs", base)
133+
134+
if err := os.WriteFile(file, data, fileMode); err != nil {
135+
return err
136+
}
137+
opts.files++
138+
}
139+
140+
for tile := 0; tile < opts.TilesPerDev; tile++ {
141+
path := fmt.Sprintf("%s/gt/gt%d", base, tile)
142+
if err := os.MkdirAll(path, dirMode); err != nil {
143+
return err
144+
}
145+
opts.dirs++
146+
}
147+
148+
return nil
149+
}
150+
151+
func addDevfsDriTree(root string, opts *genOptions, i int) error {
152+
base := fmt.Sprintf("%s/dri", root)
153+
if err := os.MkdirAll(base, dirMode); err != nil {
154+
return err
155+
}
156+
opts.dirs++
157+
158+
mode := uint32(fileMode | devNullType)
159+
devid := int(unix.Mkdev(uint32(devNullMajor), uint32(devNullMinor)))
160+
161+
file := fmt.Sprintf("%s/card%d", base, cardBase+i)
162+
if err := unix.Mknod(file, mode, devid); err != nil {
163+
return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w",
164+
devNullMajor, devNullMinor, file, err)
165+
}
166+
opts.devs++
167+
168+
file = fmt.Sprintf("%s/renderD%d", base, renderBase+i)
169+
if err := unix.Mknod(file, mode, devid); err != nil {
170+
return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w",
171+
devNullMajor, devNullMinor, file, err)
172+
}
173+
opts.devs++
174+
175+
return nil
176+
}
177+
178+
func addDebugfsDriTree(root string, opts *genOptions, i int) error {
179+
base := fmt.Sprintf("%s/kernel/debug/dri/%d", root, i)
180+
if err := os.MkdirAll(base, dirMode); err != nil {
181+
return err
182+
}
183+
opts.dirs++
184+
185+
path := fmt.Sprintf("%s/i915_capabilities", base)
186+
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileMode)
187+
188+
if err != nil {
189+
return err
190+
}
191+
defer f.Close()
192+
opts.files++
193+
194+
// keys are in random order which provides extra testing for NFD label parsing code
195+
for key, value := range opts.Capabilities {
196+
line := fmt.Sprintf("%s: %s\n", key, value)
197+
if _, err = f.WriteString(line); err != nil {
198+
return err
199+
}
200+
}
201+
202+
return nil
203+
}
204+
205+
func removeExistingDir(path, name string) {
206+
entries, err := os.ReadDir(path)
207+
if err != nil && !errors.Is(err, fs.ErrNotExist) {
208+
log.Fatalf("ERROR: ReadDir() failed on fake %s path '%s': %v", name, path, err)
209+
}
210+
211+
if len(entries) == 0 {
212+
return
213+
}
214+
215+
if name == "sysfs" && len(entries) > 2 {
216+
log.Fatalf("ERROR: >2 entries in '%s' - real sysfs?", path)
217+
}
218+
219+
if name == "devfs" && (entries[0].Name() != "dri" || len(entries) > 1) {
220+
log.Fatalf("ERROR: >1 entries in '%s', or '%s' != 'dri' - real devfs?", path, entries[0].Name())
221+
}
222+
223+
log.Printf("WARN: removing already existing fake %s path '%s'", name, path)
224+
225+
if err = os.RemoveAll(path); err != nil {
226+
log.Fatalf("ERROR: removing existing %s in '%s' failed: %v", name, path, err)
227+
}
228+
}
229+
230+
// generateDriFiles generates the fake sysfs + debugfs + devfs dirs & files according to given options.
231+
func generateDriFiles(opts genOptions) {
232+
if opts.Info != "" {
233+
log.Printf("Config: '%s'", opts.Info)
234+
}
235+
236+
removeExistingDir(devfsPath, "devfs")
237+
removeExistingDir(sysfsPath, "sysfs")
238+
log.Printf("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'",
239+
sysfsPath, devfsPath)
240+
241+
opts.dirs, opts.files = 0, 0
242+
for i := 0; i < opts.DevCount; i++ {
243+
if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil {
244+
log.Fatalf("ERROR: dev-%d sysfs tree generation failed: %v", i, err)
245+
}
246+
247+
if err := addDebugfsDriTree(sysfsPath, &opts, i); err != nil {
248+
log.Fatalf("ERROR: dev-%d debugfs tree generation failed: %v", i, err)
249+
}
250+
251+
if err := addDevfsDriTree(devfsPath, &opts, i); err != nil {
252+
log.Fatalf("ERROR: dev-%d devfs tree generation failed: %v", i, err)
253+
}
254+
}
255+
log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files)
256+
}
257+
258+
// getOptions parses options from given JSON file, validates and returns them.
259+
func getOptions(name string) genOptions {
260+
if name == "" {
261+
log.Fatal("ERROR: no fake device spec provided")
262+
}
263+
264+
data, err := os.ReadFile(name)
265+
if err != nil {
266+
log.Fatalf("ERROR: reading JSON spec file '%s' failed: %v", name, err)
267+
}
268+
269+
if verbose {
270+
log.Printf("Using fake device spec: %v\n", string(data))
271+
}
272+
273+
var opts genOptions
274+
if err = json.Unmarshal(data, &opts); err != nil {
275+
log.Fatalf("ERROR: Unmarshaling JSON spec file '%s' failed: %v", name, err)
276+
}
277+
278+
if opts.DevCount < 1 || opts.DevCount > maxDevs {
279+
log.Fatalf("ERROR: invalid device count: 1 <= %d <= %d", opts.DevCount, maxDevs)
280+
}
281+
282+
if opts.VfsPerPf > 0 {
283+
if opts.TilesPerDev > 0 || opts.DevsPerNode > 0 {
284+
log.Fatalf("ERROR: SR-IOV VFs (%d) with device tiles (%d) or Numa nodes (%d) is unsupported for faking",
285+
opts.VfsPerPf, opts.TilesPerDev, opts.DevsPerNode)
286+
}
287+
288+
if opts.DevCount%(opts.VfsPerPf+1) != 0 {
289+
log.Fatalf("ERROR: %d devices cannot be evenly split to between set of 1 SR-IOV PF + %d VFs",
290+
opts.DevCount, opts.VfsPerPf)
291+
}
292+
}
293+
294+
if opts.DevsPerNode > opts.DevCount {
295+
log.Fatalf("ERROR: DevsPerNode (%d) > DevCount (%d)", opts.DevsPerNode, opts.DevCount)
296+
}
297+
298+
if opts.DevMemSize%mib != 0 {
299+
log.Fatalf("ERROR: Invalid memory size (%f MiB), not even MiB", float64(opts.DevMemSize)/mib)
300+
}
301+
302+
return opts
303+
}
304+
305+
func main() {
306+
var name string
307+
308+
flag.StringVar(&name, "json", "", "JSON spec for fake device sysfs, debugfs and devfs content")
309+
flag.BoolVar(&verbose, "verbose", false, "More verbose output")
310+
flag.Parse()
311+
312+
generateDriFiles(getOptions(name))
313+
}

0 commit comments

Comments
 (0)