Skip to content

Commit 6ad5d91

Browse files
committed
Add GPU scheduler plugin
wgs.py: Weighted GPU scheduler, find a GPU_i that satisfies the minimum (W_a * G_ui + W_b * G_mi) W_a : default is 0.5 W_b : default is 0.5 G_ui: GPU utilization G_mi: GPU memory usage May assign to the same GPU when GPUs are always busy. gfs.py: Gfair scheduler for (i = 0 to GPU_num-1) if (G[i].u < 1/W_a) G[i].u = 1/W_a G[i].cap = W_a*G[i].u + W_b*G[i].m v_gpu[] = load gpu capability table for (i = 0 to GPU_num-1) if v_gpu[i] = 0 v_gpu[i] = 1.0 G[i].cap = G[i].cap * v_gpu[i] Find a G[k].cap that satisfies the minimum of {G[].cap} v_gpu[k] = G[k].cap atomic write v_gpu[] to the gpu capability table
1 parent 733fa01 commit 6ad5d91

File tree

3 files changed

+161
-2
lines changed

3 files changed

+161
-2
lines changed

gfs.py

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from atomicwrites import atomic_write
2+
from subprocess import check_output, CalledProcessError
3+
4+
class Utilization:
5+
def __init__(self, gpu, memory):
6+
self.gpu = float(gpu)
7+
self.memory = float(memory)
8+
if self.gpu < 2:
9+
self.gpu = 2
10+
def set_gpu(self, gpu):
11+
self.gpu = gpu
12+
def set_mem(self, memory):
13+
self.memory = memory
14+
def set_id(self, id):
15+
self.id = id
16+
def set_cap(self, wa, wb):
17+
self.cap = wa*self.gpu + wb*self.memory
18+
19+
output_gpu = check_output('nvidia-smi --query-gpu=utilization.gpu --format=csv', shell=True)
20+
output_gpu_split = output_gpu.split('\n')
21+
device_num = len(output_gpu_split) - 2
22+
23+
d_gpu = []
24+
for i in range(device_num):
25+
d_gpu.append(filter(str.isdigit, output_gpu_split[i+1]))
26+
#print d_gpu[i]
27+
28+
output_memory = check_output('nvidia-smi --query-gpu=memory.used --format=csv', shell=True)
29+
output_memory_split = output_memory.split('\n')
30+
31+
d_memory = []
32+
for i in range(device_num):
33+
d_memory.append(filter(str.isdigit, output_memory_split[i+1]))
34+
#print d_memory[i]
35+
36+
37+
output_memory = check_output('nvidia-smi --query-gpu=memory.total --format=csv', shell=True)
38+
output_memory_split = output_memory.split('\n')
39+
40+
for i in range(device_num):
41+
d_memory[i] = float(d_memory[i]) / float(filter(str.isdigit, output_memory_split[i+1]))
42+
#print d_memory[i]
43+
44+
Wa = 0.5
45+
Wb = 0.5
46+
device_obj=[]
47+
for i in range(device_num):
48+
device_obj.append(Utilization(d_gpu[i], d_memory[i]))
49+
device_obj[i].set_cap(Wa, Wb)
50+
device_obj[i].set_id(i)
51+
#print device_obj[i].gpu
52+
#print device_obj[i].memory
53+
#print device_obj[i].cap
54+
#print device_obj[i].id
55+
56+
v_gpu = []
57+
f_gpu = []
58+
file = open('/home/coldfunction/qCUDA_0.1/qCUDA/.gpu_info', 'r')
59+
for i in range(device_num):
60+
line = file.readline()
61+
num = float(line)
62+
63+
v_gpu.append(num)
64+
f_gpu.append(num)
65+
# if num == 0:
66+
# print(num)
67+
file.close()
68+
69+
70+
for i in range(device_num):
71+
if v_gpu[i] == 0:
72+
v_gpu[i] = 1.0
73+
74+
device_obj[i].cap = (v_gpu[i] * device_obj[i].cap)
75+
#print (device_obj[i].cap)
76+
77+
#print(v_gpu[i])
78+
79+
80+
#print
81+
device_obj.sort(key=lambda i: i.cap)
82+
83+
id = device_obj[0].id
84+
85+
f_gpu[id] = device_obj[0].cap
86+
87+
with atomic_write('/home/coldfunction/qCUDA_0.1/qCUDA/.gpu_info', overwrite=True) as f:
88+
for i in range(device_num):
89+
f.write(str(f_gpu[i]))
90+
f.write('\n')
91+
92+
print id
93+
94+
95+
96+
97+
#f = open(".select_g", 'w')
98+
#s = str(device_obj[0].id)
99+
#f.write(s)

qcu-device/hw/misc/virtio-qcuda.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "hw/virtio/virtio-bus.h"
66
#include "hw/virtio/virtio-qcuda.h"
77
#include <sys/mman.h>
8-
//#include <stdio.h>
98

109
#ifdef CONFIG_CUDA
1110
#include <cuda.h>
@@ -224,7 +223,7 @@ static void qcu_cudaRegisterFatBinary(VirtioQCArg *arg)
224223
FILE *fp;
225224
char buffer[20];
226225
int id = 0;
227-
fp=popen("python /home/cocotion/qcuda/select_gpu.py", "r");
226+
fp=popen("python /home/coldfunction/qCUDA_0.1/qCUDA/gfs.py", "r");
228227
id = (fgets(buffer, sizeof(buffer), fp) != NULL)?atoi(buffer):0;
229228

230229
pclose(fp);

wgs.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from subprocess import check_output, CalledProcessError
2+
3+
class Utilization:
4+
def __init__(self, gpu, memory):
5+
self.gpu = float(gpu)
6+
self.memory = float(memory)
7+
def set_gpu(self, gpu):
8+
self.gpu = gpu
9+
def set_mem(self, memory):
10+
self.memory = memory
11+
def set_id(self, id):
12+
self.id = id
13+
def set_cap(self, wa, wb):
14+
self.cap = wa*self.gpu + wb*self.memory
15+
16+
output_gpu = check_output('nvidia-smi --query-gpu=utilization.gpu --format=csv', shell=True)
17+
output_gpu_split = output_gpu.split('\n')
18+
device_num = len(output_gpu_split) - 2
19+
20+
d_gpu = []
21+
for i in range(device_num):
22+
d_gpu.append(filter(str.isdigit, output_gpu_split[i+1]))
23+
#print d_gpu[i]
24+
25+
output_memory = check_output('nvidia-smi --query-gpu=memory.used --format=csv', shell=True)
26+
output_memory_split = output_memory.split('\n')
27+
28+
d_memory = []
29+
for i in range(device_num):
30+
d_memory.append(filter(str.isdigit, output_memory_split[i+1]))
31+
#print d_memory[i]
32+
33+
34+
output_memory = check_output('nvidia-smi --query-gpu=memory.total --format=csv', shell=True)
35+
output_memory_split = output_memory.split('\n')
36+
37+
for i in range(device_num):
38+
d_memory[i] = float(d_memory[i]) / float(filter(str.isdigit, output_memory_split[i+1]))
39+
#print d_memory[i]
40+
41+
Wa = 0.5
42+
Wb = 0.5
43+
device_obj=[]
44+
for i in range(device_num):
45+
device_obj.append(Utilization(d_gpu[i], d_memory[i]))
46+
device_obj[i].set_cap(Wa, Wb)
47+
device_obj[i].set_id(i)
48+
#print device_obj[i].gpu
49+
#print device_obj[i].memory
50+
#print device_obj[i].cap
51+
#print device_obj[i].id
52+
53+
#print
54+
device_obj.sort(key=lambda i: i.cap)
55+
#print device_obj[0].cap
56+
print device_obj[0].id
57+
58+
59+
#f = open(".select_g", 'w')
60+
#s = str(device_obj[0].id)
61+
#f.write(s)

0 commit comments

Comments
 (0)