-
Notifications
You must be signed in to change notification settings - Fork 453
/
Copy pathpy-analysis-benchmark.py
125 lines (96 loc) · 4.38 KB
/
py-analysis-benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
Script for the local benchmarking of the o2 analysis tasks,
running them with multiple processing jobs (NCORES)
and multiple readers (NREADERS) over input files (INPUT_FILE).
Tasks to be benchmarked are in the BENCHMARK_TASKS dict.
Usage: python3 py-analysis-benchmark.py
Ouput: CSV file (OUTPUT_CSV) with benchmarking results:
'tname', 'ncores', 'nreaders', 'time_mean' (s), 'time_std' (s),
'input_size' (MB), 'input_length', 'timestamp', 'cpu_load', 'ncpu', 'machine'
"""
import csv
from datetime import datetime
import itertools
import os
import statistics as stat
from string import Template
import subprocess
import timeit
def get_cl_output(cmd) -> str:
try:
output_ = str(subprocess.check_output(cmd, shell=True), 'utf-8')
except subprocess.CalledProcessError:
output_ = ''
return output_.strip('\n')
def get_cpu_load():
uptime_ = get_cl_output('uptime')
load_last_15 = uptime_.split(' ')[-1]
return load_last_15
def get_timestamp():
return datetime.now().strftime("%m/%d/%Y %H:%M")
def get_time_std(t_res):
try:
std_ = stat.stdev(t_res)
except stat.StatisticsError:
std_ = -1
return std_
#benchmarking setup
INPUT_FILE = '@filelist.txt'
OUTPUT_CSV = 'benchmark_data.csv'
NCORES = [1, 2, 4]
NREADERS = [1, 2, 4]
NTRIALS = 2
LARGE_SHM_SEGMENT_SIZE = False
CPU_SELECTION = False
#tasks to be benchmarked
BENCHMARK_TASKS = {
'o2-analysistutorial-void': '-b --pipeline void:${n}',
'o2-analysistutorial-histograms': '-b --pipeline eta-and-phi-histograms:${n},pt-histogram:${n},etaphi-histogram:${n}',
'o2-analysis-trackselection': '-b --pipeline track-selection:${n},track_extension:${n}',
'o2-analysis-correlations': '-b --pipeline correlation-task:${n}',
#'o2-analysis-vertexing-hf': '-b --pipeline vertexerhf-candidatebuildingDzero:${n},vertexerhf-decayvertexbuilder2prong:${n}'
}
O2_ROOT = os.environ.get('O2_ROOT')
if not O2_ROOT:
print('O2_ROOT not found')
raise ValueError
MACHINE = get_cl_output('hostname')
NCPU = get_cl_output('grep processor /proc/cpuinfo | wc -l')
with open(INPUT_FILE[1:],'r') as f:
fnames = f.readlines()
INPUT_SIZE = round(sum([os.stat(l.strip('\n')).st_size for l in fnames])/1024/1024)
INPUT_LENGTH = len(fnames)
SHA256SUM_TASK = Template('cat ${file_list} | xargs -P ${n} -n1 -I{} sha256sum {}')
#COMPOSITE_TASK = Template('o2-analysis-trackselection -b --pipeline track-selection:${n},track-extension:${n} --aod-file ${file_list} --readers ${n} | o2-analysistutorial-histogram-track-selection -b --pipeline histogram-track-selection:${n} --select=0')
for k in BENCHMARK_TASKS:
BENCHMARK_TASKS[k] = Template(BENCHMARK_TASKS[k])
with open(OUTPUT_CSV, 'w') as f:
writer = csv.writer(f)
writer.writerow(('tname', 'ncores', 'nreaders', 'time_mean', 'time_std',
'input_size', 'input_length', 'timestamp', 'cpu_load', 'ncpu', 'machine'))
for ncores in NCORES:
cmd_sha256sum = SHA256SUM_TASK.substitute(file_list=INPUT_FILE[1:], n=str(ncores))
t = timeit.Timer('os.system(cmd_sha256sum)', globals=globals())
t_res = t.repeat(NTRIALS, 1)
writer.writerow( ('sha256sum', ncores, -1, stat.mean(t_res), get_time_std(t_res),
INPUT_SIZE, INPUT_LENGTH, get_timestamp(), get_cpu_load(), NCPU, MACHINE) )
for ncores, nreaders in itertools.product(NCORES, NREADERS):
for tname, targ in BENCHMARK_TASKS.items():
targ = targ.substitute(n=str(ncores))
cmd_list = [tname] + targ.split(' ')
if CPU_SELECTION:
if ncores == 2:
cmd_list = ['taskset','-c','5,15'] + cmd_list
elif ncores == 4:
cmd_list = ['taskset','-c','1,3,11,13'] + cmd_list
if LARGE_SHM_SEGMENT_SIZE:
cmd_list += ['--shm-segment-size', str(16000000000)]
cmd_list += ['--aod-file', INPUT_FILE]
cmd_list += ['--readers', str(nreaders)]
t = timeit.Timer('subprocess.run(cmd_list)', globals=globals())
t_res = t.repeat(NTRIALS, 1)
writer.writerow( (tname[3:], ncores, nreaders, stat.mean(t_res), get_time_std(t_res),
INPUT_SIZE, INPUT_LENGTH, get_timestamp(), get_cpu_load(), NCPU, MACHINE) )
#alinsure
#numa0 0-11,24-35
#numa1 12-23,36-47