@@ -74,6 +74,7 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
74
74
log .Println ("Error getting containers:" , err )
75
75
return
76
76
}
77
+ log .Printf ("Found %d containers" , len (containers .Containers ))
77
78
containerMap := make (map [string ]containerInfo )
78
79
for _ , container := range containers .GetContainers () {
79
80
containerMap [container .Id ] = containerInfo {
@@ -91,6 +92,7 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
91
92
check (ret )
92
93
processes , ret := nvml .DeviceGetMPSComputeRunningProcesses (d )
93
94
check (ret )
95
+ log .Printf ("Found %d processes on GPU %d" , len (processes ), i )
94
96
for _ , process := range processes {
95
97
containerId := getContainerId (process .Pid )
96
98
container := containerMap [containerId ]
@@ -139,5 +141,7 @@ func getContainerId(pid uint32) string {
139
141
log .Printf ("Error reading proc file %s for process: %d, error: %s" , file , pid , err )
140
142
}
141
143
proc := string (data )
142
- return proc [strings .LastIndex (proc , "/" )+ 1 :]
144
+ containerId := proc [strings .LastIndex (proc , "/" )+ 1 :]
145
+ log .Printf ("Found container id %s for process: %d" , containerId , pid )
146
+ return containerId
143
147
}
0 commit comments