@@ -74,6 +74,7 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
7474 log .Println ("Error getting containers:" , err )
7575 return
7676 }
77+ log .Printf ("Found %d containers" , len (containers .Containers ))
7778 containerMap := make (map [string ]containerInfo )
7879 for _ , container := range containers .GetContainers () {
7980 containerMap [container .Id ] = containerInfo {
@@ -91,6 +92,7 @@ func collectMetrics(w http.ResponseWriter, r *http.Request) {
9192 check (ret )
9293 processes , ret := nvml .DeviceGetMPSComputeRunningProcesses (d )
9394 check (ret )
95+ log .Printf ("Found %d processes on GPU %d" , len (processes ), i )
9496 for _ , process := range processes {
9597 containerId := getContainerId (process .Pid )
9698 container := containerMap [containerId ]
@@ -139,5 +141,7 @@ func getContainerId(pid uint32) string {
139141 log .Printf ("Error reading proc file %s for process: %d, error: %s" , file , pid , err )
140142 }
141143 proc := string (data )
142- return proc [strings .LastIndex (proc , "/" )+ 1 :]
144+ containerId := proc [strings .LastIndex (proc , "/" )+ 1 :]
145+ log .Printf ("Found container id %s for process: %d" , containerId , pid )
146+ return containerId
143147}
0 commit comments