@@ -26,8 +26,7 @@ const (
26
26
27
27
var node = os .Getenv ("NODE_NAME" )
28
28
29
- var metricsFormat = `
30
- # HELP gpu_memory_usage_per_container Shows the GPU memory usage per container.
29
+ var metricsFormat = `# HELP gpu_memory_usage_per_container Shows the GPU memory usage per container.
31
30
# TYPE gpu_memory_usage_per_container gauge
32
31
{{- range $m := . }}
33
32
gpu_memory_usage_per_container{pid="{{ $m.Pid }}",gpuindex="{{ $m.GpuIndex }}",gpuuuid="{{ $m.GpuUUID }}",node="{{ $m.Node }}",namespace="{{ $m.Namespace }}",pod="{{ $m.Pod }}",poduid="{{ $m.PodUid }}",container="{{ $m.Container }}",containerid="{{ $m.ContainerId }}"} {{ $m.UsedGpuMemory }}
@@ -86,7 +85,6 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
86
85
ContainerId : container .GetId (),
87
86
}
88
87
}
89
- log .Printf ("Current map %+v" , containerMap )
90
88
collected := []metric {}
91
89
for i := 0 ; i < getDeviceCount (); i ++ {
92
90
d , ret := nvml .DeviceGetHandleByIndex (i )
@@ -96,7 +94,7 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
96
94
log .Printf ("Found %d processes on GPU %d" , len (processes ), i )
97
95
for _ , process := range processes {
98
96
containerId := getContainerId (process .Pid )
99
- if container , ok := containerMap [strings . TrimSpace ( containerId ) ]; ok {
97
+ if container , ok := containerMap [containerId ]; ok {
100
98
log .Printf ("Using %s Found container %+v for process: %d" , containerId , container , process .Pid )
101
99
collected = append (collected , metric {
102
100
Pid : process .Pid ,
@@ -145,6 +143,5 @@ func getContainerId(pid uint32) string {
145
143
}
146
144
proc := string (data )
147
145
containerId := proc [strings .LastIndex (proc , "/" )+ 1 :]
148
- log .Printf ("Found container id %s for process: %d" , containerId , pid )
149
- return containerId
146
+ return strings .TrimSpace (containerId )
150
147
}
0 commit comments