This repository was archived by the owner on Jan 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbw-report.py
executable file
·414 lines (353 loc) · 14.4 KB
/
bw-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#!/usr/bin/env python2
# Copyright (C) 2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
import os
import sys
import subprocess
import argparse
import shutil
from signal import signal, SIGINT
# all time in seconds
DEFAULT_MEASURE_TIME = 1000
DEFAULT_INTERVAL = 5
FNULL = open(os.devnull, 'w')
cur_dir = os.getcwd()
def collect_system_bw(all_stores_dict, pid):
if pid == -1:
lp = os.path.join(cur_dir, "logs", "system.log")
else:
lp = os.path.join(cur_dir, "logs", str(pid), "system.log")
if not os.path.exists(lp):
sys.exit("No %s found, something wrong!\n" % lp)
fd = open(lp)
t = 0.0
while True:
try:
l = fd.readline()
if not l:
break
l = l.split()
if len(l) == 2:
if l[1] == "MEM_INST_RETIRED.ALL_STORES":
all_stores_dict[0] = int(l[0].replace(',', ''))
if len(l) == 4 and l[1] == "seconds":
t = float(l[0])
except IOError:
break
fd.close()
return t
def collect_task_bw(dram_read_dict, pmem_read_dict, pmem_write_dict, all_stores_dict, pid):
if pid == -1:
lp = os.path.join(cur_dir, "logs", "task.log")
else:
lp = os.path.join(cur_dir, "logs", str(pid), "task.log")
if not os.path.exists(lp):
sys.exit("No %s found, something wrong!\n" % lp)
fd = open(lp)
t = 0.0
start_time = ""
while True:
try:
l = fd.readline()
if not l:
break
l = l.split()
if pid != -1:
# when there's multiplexing, length is 3, otherwise 2.
if (len(l) == 2) or (len(l) == 3):
if l[1] == "OCR_READ_DRAM":
dram_read_dict[str(pid)] = int(l[0].replace(',', ''))
elif l[1] == "OCR_READ_PMEM":
pmem_read_dict[str(pid)] = int(l[0].replace(',', ''))
elif l[1] == "OCR_WRITE_PMEM":
pmem_write_dict[str(pid)] = int(l[0].replace(',', ''))
elif l[1] == "MEM_INST_RETIRED.ALL_STORES":
all_stores_dict[str(pid)] = int(l[0].replace(',', ''))
else:
# when there's multiplexing, length is 4, otherwise 3.
if (len(l) == 3) or (len(l) == 4):
if l[2] == "OCR_READ_DRAM":
dram_read_dict[l[0]] = int(l[1].replace(',', ''))
elif l[2] == "OCR_READ_PMEM":
pmem_read_dict[l[0]] = int(l[1].replace(',', ''))
elif l[2] == "OCR_WRITE_PMEM":
pmem_write_dict[l[0]] = int(l[1].replace(',', ''))
elif l[2] == "MEM_INST_RETIRED.ALL_STORES":
all_stores_dict[l[0]] = int(l[1].replace(',', ''))
if len(l) == 4 and l[1] == "seconds":
t = float(l[0])
if len(l) == 8 and l[1] == "started":
start_time = str(l[6])
except IOError:
break
fd.close()
return start_time, t
def collect_multi_imc_bw(pid):
if pid == -1:
lp = os.path.join(cur_dir, "logs", "unc.log")
else:
lp = os.path.join(cur_dir, "logs", str(pid), "unc.log")
if not os.path.exists(lp):
sys.exit("No %s found, something wrong!\n" % lp)
fd = open(lp)
read_total = 0
write_total = 0
pmem_read = 0
pmem_write = 0
t = 0.0
while True:
try:
l = fd.readline()
if not l:
break
l = l.split()
# when there's multiplexing, length is 3, otherwise 2.
if (len(l) == 2) or (len(l) == 3):
if "PMM_RPQ" in l[1]:
pmem_read += float(l[0].replace(',', ''))
elif "PMM_WPQ" in l[1]:
pmem_write += float(l[0].replace(',', ''))
elif "RPQ" in l[1]:
read_total += float(l[0].replace(',', ''))
elif "WPQ" in l[1]:
write_total += float(l[0].replace(',', ''))
else:
break
if len(l) == 4:
t = float(l[0])
except IOError:
break
fd.close()
return float(read_total) * 64, float(write_total) * 64, float(pmem_read) * 64, float(pmem_write) * 64, t
def get_pid_max():
m = os.popen('cat /proc/sys/kernel/pid_max').read().strip()
return int(m)
def parse_args(cmd_d):
ap = argparse.ArgumentParser(description='Report per-task memory read/write bandwidth.')
ap.add_argument('-p', '--pid', type=int, nargs='*', default=-1,\
help='task PID to monitor, multi PIDs with space in between, default -1 for all tasks')
ap.add_argument('-t', '--time', type=int, default=1000,\
help='measure time in seconds, 0 for infinite, default 1000s')
ap.add_argument('-i', '--interval', type=int, default=5,\
help='refresh interval in seconds, default 5s')
ap.add_argument('-pmem', '--pmem', action="store_true",\
help='monitor persistent memory bandwidth too, default not')
ap.add_mutually_exclusive_group(required=False)
ap.add_argument('-dram', '--dram', dest='dram', action="store_true",\
help='monitor DRAM related bandwidths, default TRUE')
ap.add_argument('-no-dram', '--no-dram', dest='dram', action="store_false",\
help='do not monitor DRAM realted bandwidths')
ap.set_defaults(dram=True)
args = ap.parse_args()
if args.pid != -1:
num_tasks = len(args.pid)
for pid in args.pid:
if pid > get_pid_max() or pid < -1:
sys.exit("Invalid PID: %d" % pid)
cmd = ["./bw-collect.py"]
cmd.append("--pid")
cmd.append(str(pid))
cmd_d[str(pid)] = cmd
else:
pid = -1
cmd = ["./bw-collect.py"]
cmd.append("--pid")
cmd.append(str(pid))
if args.time > 0:
m_time = args.time
elif args.time == 0:
m_time = sys.maxint if (sys.version == 2) else sys.maxsize
else:
print("Invalid measure time(%d), using default(1000s)." % args.time)
m_time = DEFAULT_MEASURE_TIME
if args.interval:
if args.interval > 0 and args.interval < m_time:
i = args.interval
else:
print("Invalid interval(%d), using default(5s)." % args.interval)
i = DEFAULT_INTERVAL
else:
i = DEFAULT_INTERVAL if(m_time > DEFAULT_INTERVAL) else m_time
if pid == -1:
cmd.append("--time")
cmd.append(str(i))
cmd_d[str(pid)] = cmd
if args.pmem:
cmd.append("--pmem")
else:
for pid in cmd_d:
cmd = cmd_d[str(pid)]
cmd.append("--time")
cmd.append(str(i))
cmd_d[str(pid)] = cmd
if args.pmem:
cmd.append("--pmem")
print("")
print("Monitoring %s for %d seconds, refreshing in every %d seconds."\
% ("all tasks" if(pid == -1) else "%d task(s)" % num_tasks, m_time, i))
return pid, m_time, i, args.pmem, args.dram
def clean_logs(pid):
if pid == -1:
if os.path.exists(os.path.join(cur_dir, "logs", "task.log")):
os.remove(os.path.join(cur_dir, "logs", "task.log"))
if os.path.exists(os.path.join(cur_dir, "logs", "unc.log")):
os.remove(os.path.join(cur_dir, "logs", "unc.log"))
if os.path.exists(os.path.join(cur_dir, "logs", "system.log")):
os.remove(os.path.join(cur_dir, "logs", "system.log"))
else:
if os.path.exists(os.path.join(cur_dir, "logs", str(pid))):
shutil.rmtree(os.path.join(cur_dir, "logs", str(pid)))
# remove logs folder if it's empty
if not os.listdir(os.path.join(cur_dir, "logs")):
os.rmdir(os.path.join(cur_dir, "logs"))
def calc_print_bw(pid):
run = 1
task_dram_read_dict = {}
task_pmem_read_dict = {}
task_pmem_write_dict = {}
task_all_stores_dict = {}
start_time, task_time = collect_task_bw(task_dram_read_dict,
task_pmem_read_dict, task_pmem_write_dict, task_all_stores_dict, pid)
system_all_stores_dict = {}
system_time = collect_system_bw(system_all_stores_dict, pid)
dram_read_bytes, dram_write_bytes, pmem_read_bytes, pmem_write_bytes, imc_time = collect_multi_imc_bw(pid)
if task_time == 0.0 or imc_time == 0.0 or system_time == 0.0:
# time is 0 means task ended, just return
return 0
dram_read_bw = dram_read_bytes / (1024*1024) / imc_time
dram_write_bw = dram_write_bytes / (1024*1024) / imc_time
pmem_read_bw = pmem_read_bytes / (1024*1024) / imc_time
pmem_write_bw = pmem_write_bytes / (1024*1024) / imc_time
for k in sorted(task_dram_read_dict, key=task_dram_read_dict.__getitem__, reverse=True):
# per-task DRAM read bandwidth and its percentage of total DRAM BW
v = float(task_dram_read_dict[k] * 64)
task_dram_read_bw = v / (1024*1024) / task_time
if (dram_read_bw != 0):
r = task_dram_read_bw / dram_read_bw
# per-task PMEM read bandwidth and its percentage of total PMEM BW
p = 0.0
task_pmem_read_bw = 0.0
if pmem_mon and (pmem_read_bw != 0) and (k in task_pmem_read_dict):
v = float(task_pmem_read_dict[k] * 64)
task_pmem_read_bw = v / (1024*1024) / task_time
p = task_pmem_read_bw / pmem_read_bw
q = 0.0
task_pmem_write_bw = 0.0
if pmem_mon and (pmem_write_bw != 0) and (k in task_pmem_write_dict):
v = float(task_pmem_write_dict[k] * 64)
task_pmem_write_bw = v / (1024*1024) / task_time
q = task_pmem_write_bw / pmem_write_bw
task_write_bw = 0.0
task_name = ""
# when "perf stat -a --per-thread..", k looks like "python2-47361",
# need to extract pid out from the string
if pid == -1:
task_pid = k.split('-')[-1]
if k in task_all_stores_dict and system_all_stores_dict[0] > 0:
f = float(task_all_stores_dict[k]) / float(system_all_stores_dict[0])
task_write_bw = f * dram_write_bw
# when 'perf stat --per-thread' for all tasks, get task name from task_pid instead of k
if pid == -1:
task_name = k.split('-')[0]
else:
# if failed to get the task_name, do not print for this task
args = ['cat', '/proc/%s/comm' % k]
try:
task_name = subprocess.check_output(args, stderr=FNULL).strip()
if sys.version_info.major > 2:
task_name = task_name.decode()
except subprocess.CalledProcessError:
run = 0
break
# only print for tasks that read/write BW ratio is not 0.0
if(r > 0.0005 or f > 0.0005 or p > 0.0005):
print_bw(start_time, dram_read_bw, dram_write_bw, pmem_read_bw, \
pmem_write_bw, task_pid if pid == -1 else k, task_name,\
task_dram_read_bw, r * 100.0, task_write_bw, f * 100.0,\
task_pmem_read_bw, p*100.0, task_pmem_write_bw, q*100.0)
clean_logs(pid)
return run
def get_terminal_resolution():
rows, columns = os.popen('stty size', 'r').read().split()
return columns, rows
def print_header():
sys.stdout.write("\n")
sys.stdout.write("%8s" % "Time")
if dram_mon:
sys.stdout.write("%16s" % "DramReadBW")
sys.stdout.write("%16s" % "DramWriteBW")
if pmem_mon:
sys.stdout.write("%16s" % "PmemReadBW")
sys.stdout.write("%16s" % "PmemWriteBW")
sys.stdout.write("%8s" % "TaskPID")
sys.stdout.write("%21s" % "TaskName")
if dram_mon:
sys.stdout.write("%15s" % "TaskDramReadBW")
sys.stdout.write("%12s" % "DramReadBW%")
sys.stdout.write("%17s" % "*TaskDramWriteBW")
sys.stdout.write("%14s" % "*DramWriteBW%")
if pmem_mon:
sys.stdout.write("%15s" % "TaskPmemReadBW")
sys.stdout.write("%12s" % "PmemReadBW%")
sys.stdout.write("%17s" % "*TaskPmemWriteBW")
sys.stdout.write("%14s" % "*PmemWriteBW%")
sys.stdout.write("\n")
sys.stdout.flush()
def print_bw(time, dram_r, dram_w, pmem_r, pmem_w, t_pid, t_name, t_r, t_r_perc,\
t_w, t_w_perc, t_pmem_r_bw, t_pmem_r_bw_perc, t_pmem_w_bw, t_pmem_w_bw_perc):
sys.stdout.write("%8s" % time)
if dram_mon:
sys.stdout.write("%10.1f MiB/s" % dram_r)
sys.stdout.write("%10.1f MiB/s" % dram_w)
if pmem_mon:
sys.stdout.write("%10.1f MiB/s" % pmem_r)
sys.stdout.write("%10.1f MiB/s" % pmem_w)
sys.stdout.write("%8s" % t_pid)
sys.stdout.write("%21s" % t_name)
if dram_mon:
sys.stdout.write("%9.1f MiB/s" % t_r)
sys.stdout.write("%11.1f%%" % t_r_perc)
sys.stdout.write("%11.1f MiB/s" % t_w)
sys.stdout.write("%13.1f%%" % t_w_perc)
if pmem_mon:
sys.stdout.write("%9.1f MiB/s" % t_pmem_r_bw)
sys.stdout.write("%11.1f%%" % t_pmem_r_bw_perc)
sys.stdout.write("%9.1f MiB/s" % t_pmem_w_bw)
sys.stdout.write("%11.1f%%" % t_pmem_w_bw_perc)
sys.stdout.write("\n")
sys.stdout.flush()
# main() starts
time = 0
cmd_dict = {}
p_id, measure_time, interval, pmem_mon, dram_mon = parse_args(cmd_dict)
def sighandler(sig, frame):
clean_logs(p_id)
print("")
exit("Monitoring interrupted by SIGINT or user CTRL-C. Logs cleared.")
signal(SIGINT, sighandler)
if pmem_mon:
print("\"pmem\" specified, persistent memory related bandwidth monitoring added.")
if not dram_mon:
print("\"no-dram\" specified, DRAM related bandwidth will not be printed.")
if p_id == -1:
print("")
print("!!! NOTE: Tasks with all 0.0% read/write BW consumptions are not listed.")
print_header()
while time < measure_time:
procs = []
for p in sorted(cmd_dict, key=cmd_dict.__getitem__, reverse=True):
proc = subprocess.Popen(cmd_dict[p], stderr=FNULL)
procs.append((p, proc))
for p, proc in procs:
out, err = proc.communicate()
if err:
# 'perf stat' failed means process stopped, remove it from cmd_dict[p, cmd]
del cmd_dict[p]
clean_logs(p)
continue
running = calc_print_bw(int(p))
if running == 0:
# task stopped, remove it from cmd_dict[p, cmd]
del cmd_dict[p]
time = time + interval
print("Done!")