Skip to content

Commit dd92a41

Browse files
committed
This will go on forever
Signed-off-by: ghokun <[email protected]>
1 parent 5298f7d commit dd92a41

File tree

2 files changed

+16
-15
lines changed

2 files changed

+16
-15
lines changed

manifests/device-plugin.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ spec:
4545
capabilities:
4646
add: ["SYS_ADMIN"]
4747
containers:
48-
- image: ghcr.io/kuartis/kuartis-virtual-gpu-device-plugin:0.4.9
48+
- image: ghcr.io/kuartis/kuartis-virtual-gpu-device-plugin:0.4.10
4949
name: kuartis-virtual-gpu-device-plugin-ctr
5050
command:
5151
- /usr/bin/virtual-gpu-device-plugin

pkg/gpu/nvidia/metrics.go

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -96,20 +96,21 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
9696
log.Printf("Found %d processes on GPU %d", len(processes), i)
9797
for _, process := range processes {
9898
containerId := getContainerId(process.Pid)
99-
container := containerMap[containerId]
100-
log.Printf("Using %s Found container %+v for process: %d", containerId, container, process.Pid)
101-
collected = append(collected, metric{
102-
Pid: process.Pid,
103-
UsedGpuMemory: process.UsedGpuMemory,
104-
GpuIndex: i,
105-
GpuUUID: getDeviceUUID(d),
106-
Node: container.Node,
107-
Namespace: container.Namespace,
108-
Pod: container.Pod,
109-
PodUid: container.PodUid,
110-
Container: container.Container,
111-
ContainerId: container.ContainerId,
112-
})
99+
if container, ok := containerMap[strings.TrimSpace(containerId)]; ok {
100+
log.Printf("Using %s Found container %+v for process: %d", containerId, container, process.Pid)
101+
collected = append(collected, metric{
102+
Pid: process.Pid,
103+
UsedGpuMemory: process.UsedGpuMemory,
104+
GpuIndex: i,
105+
GpuUUID: getDeviceUUID(d),
106+
Node: container.Node,
107+
Namespace: container.Namespace,
108+
Pod: container.Pod,
109+
PodUid: container.PodUid,
110+
Container: container.Container,
111+
ContainerId: container.ContainerId,
112+
})
113+
}
113114
}
114115
}
115116

0 commit comments

Comments
 (0)