|
| 1 | +""" |
| 2 | +Script for the local benchmarking of the o2 analysis tasks, |
| 3 | +running them with multiple processing jobs (NCORES) |
| 4 | +and multiple readers (NREADERS) over input files (INPUT_FILE). |
| 5 | +Tasks to be benchmarked are in the BENCHMARK_TASKS dict. |
| 6 | +
|
| 7 | +Usage: python3 py-analysis-benchmark.py |
| 8 | +
|
| 9 | +Ouput: CSV file (OUTPUT_CSV) with benchmarking results: |
| 10 | +'tname', 'ncores', 'nreaders', 'time_mean' (s), 'time_std' (s), |
| 11 | +'input_size' (MB), 'input_length', 'timestamp', 'cpu_load', 'ncpu', 'machine' |
| 12 | +""" |
| 13 | + |
1 | 14 | import csv
|
2 | 15 | from datetime import datetime
|
3 | 16 | import itertools
|
|
7 | 20 | import subprocess
|
8 | 21 | import timeit
|
9 | 22 |
|
10 |
| -O2_ROOT = os.environ.get('O2_ROOT') |
11 |
| -if not O2_ROOT: |
12 |
| - print('O2_ROOT not found') |
13 |
| - raise ValueError |
14 |
| - |
15 |
| -INPUT_FILE = '@filelist.txt' |
16 |
| -OUTPUT_CSV = 'benchmark_data.csv' |
17 | 23 |
|
18 |
| -with open(INPUT_FILE[1:],'r') as f: |
19 |
| - fnames = f.readlines() |
20 |
| - input_size = round(sum([os.stat(l.strip('\n')).st_size for l in fnames])/1024/1024) |
21 |
| - input_length = len(fnames) |
| 24 | +def get_cl_output(cmd) -> str: |
| 25 | + try: |
| 26 | + output_ = str(subprocess.check_output(cmd, shell=True), 'utf-8') |
| 27 | + except subprocess.CalledProcessError: |
| 28 | + output_ = '' |
| 29 | + return output_.strip('\n') |
22 | 30 |
|
23 |
| -NCORES = [1, 2, 4] |
24 |
| -NREADERS = [1, 2, 4] |
25 |
| -NTRIALS = 3 |
26 | 31 |
|
27 |
| -CPU_SELECTION = False |
| 32 | +def get_cpu_load(): |
| 33 | + uptime_ = get_cl_output('uptime') |
| 34 | + load_last_15 = uptime_.split(' ')[-1] |
| 35 | + return load_last_15 |
28 | 36 |
|
29 |
| -SHA256SUM_TASK = Template('cat ${file_list} | xargs -P ${n} -n1 -I{} sha256sum {}') |
30 | 37 |
|
31 |
| -#COMPOSITE_TASK = Template('o2-analysis-trackselection -b --pipeline track-selection:${n},track-extension:${n} --aod-file ${file_list} --readers ${n} | o2-analysistutorial-histogram-track-selection -b --pipeline histogram-track-selection:${n} --select=0') |
| 38 | +def get_timestamp(): |
| 39 | + return datetime.now().strftime("%m/%d/%Y %H:%M") |
| 40 | + |
| 41 | + |
| 42 | +def get_time_std(t_res): |
| 43 | + try: |
| 44 | + std_ = stat.stdev(t_res) |
| 45 | + except stat.StatisticsError: |
| 46 | + std_ = -1 |
| 47 | + return std_ |
| 48 | + |
32 | 49 |
|
| 50 | +#benchmarking setup |
| 51 | +INPUT_FILE = '@filelist.txt' |
| 52 | +OUTPUT_CSV = 'benchmark_data.csv' |
| 53 | +NCORES = [1, 2, 4] |
| 54 | +NREADERS = [1, 2, 4] |
| 55 | +NTRIALS = 2 |
| 56 | +LARGE_SHM_SEGMENT_SIZE = False |
| 57 | +CPU_SELECTION = False |
| 58 | + |
| 59 | +#tasks to be benchmarked |
33 | 60 | BENCHMARK_TASKS = {
|
| 61 | + 'o2-analysistutorial-void': '-b --pipeline void:${n}', |
34 | 62 | 'o2-analysistutorial-histograms': '-b --pipeline eta-and-phi-histograms:${n},pt-histogram:${n},etaphi-histogram:${n}',
|
35 | 63 | 'o2-analysis-trackselection': '-b --pipeline track-selection:${n},track_extension:${n}',
|
36 |
| - #'o2-analysis-vertexing-hf': '-b --pipeline vertexerhf-candidatebuildingDzero:${n},vertexerhf-decayvertexbuilder2prong:${n}', |
| 64 | + 'o2-analysis-correlations': '-b --pipeline correlation-task:${n}', |
| 65 | + #'o2-analysis-vertexing-hf': '-b --pipeline vertexerhf-candidatebuildingDzero:${n},vertexerhf-decayvertexbuilder2prong:${n}' |
37 | 66 | }
|
38 |
| - |
| 67 | + |
| 68 | + |
| 69 | +O2_ROOT = os.environ.get('O2_ROOT') |
| 70 | +if not O2_ROOT: |
| 71 | + print('O2_ROOT not found') |
| 72 | + raise ValueError |
| 73 | + |
| 74 | +MACHINE = get_cl_output('hostname') |
| 75 | +NCPU = get_cl_output('grep processor /proc/cpuinfo | wc -l') |
| 76 | +with open(INPUT_FILE[1:],'r') as f: |
| 77 | + fnames = f.readlines() |
| 78 | + INPUT_SIZE = round(sum([os.stat(l.strip('\n')).st_size for l in fnames])/1024/1024) |
| 79 | + INPUT_LENGTH = len(fnames) |
| 80 | + |
| 81 | + |
| 82 | +SHA256SUM_TASK = Template('cat ${file_list} | xargs -P ${n} -n1 -I{} sha256sum {}') |
| 83 | +#COMPOSITE_TASK = Template('o2-analysis-trackselection -b --pipeline track-selection:${n},track-extension:${n} --aod-file ${file_list} --readers ${n} | o2-analysistutorial-histogram-track-selection -b --pipeline histogram-track-selection:${n} --select=0') |
| 84 | + |
39 | 85 | for k in BENCHMARK_TASKS:
|
40 | 86 | BENCHMARK_TASKS[k] = Template(BENCHMARK_TASKS[k])
|
41 | 87 |
|
42 | 88 | with open(OUTPUT_CSV, 'w') as f:
|
43 | 89 | writer = csv.writer(f)
|
44 |
| - writer.writerow(['tname', 'ncores', 'nreaders', 'time_mean', 'time_std', 'input_size', 'input_length']) |
| 90 | + writer.writerow(('tname', 'ncores', 'nreaders', 'time_mean', 'time_std', |
| 91 | + 'input_size', 'input_length', 'timestamp', 'cpu_load', 'ncpu', 'machine')) |
45 | 92 |
|
46 | 93 | for ncores in NCORES:
|
47 | 94 | cmd_sha256sum = SHA256SUM_TASK.substitute(file_list=INPUT_FILE[1:], n=str(ncores))
|
48 | 95 | t = timeit.Timer('os.system(cmd_sha256sum)', globals=globals())
|
49 | 96 | t_res = t.repeat(NTRIALS, 1)
|
50 |
| - writer.writerow( ('sha256sum', ncores, -1, stat.mean(t_res), stat.stdev(t_res), input_size, input_length) ) |
| 97 | + writer.writerow( ('sha256sum', ncores, -1, stat.mean(t_res), get_time_std(t_res), |
| 98 | + INPUT_SIZE, INPUT_LENGTH, get_timestamp(), get_cpu_load(), NCPU, MACHINE) ) |
51 | 99 |
|
52 | 100 | for ncores, nreaders in itertools.product(NCORES, NREADERS):
|
53 |
| - |
54 |
| - #cmd_composite = COMPOSITE_TASK.substitute(file_list=INPUT_FILE,n=str(ncores)) |
55 |
| - #t = timeit.Timer('os.system(cmd_composite)', globals=globals()) |
56 |
| - #t_res = t.repeat(NTRIALS, 1) |
57 |
| - #writer.writerow( ('analysistutorial-histogram-track-selection', ncores, nreaders, stat.mean(t_res), stat.stdev(t_res), input_size, input_length) ) |
58 |
| - |
| 101 | + |
59 | 102 | for tname, targ in BENCHMARK_TASKS.items():
|
60 | 103 | targ = targ.substitute(n=str(ncores))
|
61 | 104 | cmd_list = [tname] + targ.split(' ')
|
|
65 | 108 | cmd_list = ['taskset','-c','5,15'] + cmd_list
|
66 | 109 | elif ncores == 4:
|
67 | 110 | cmd_list = ['taskset','-c','1,3,11,13'] + cmd_list
|
| 111 | + |
| 112 | + if LARGE_SHM_SEGMENT_SIZE: |
| 113 | + cmd_list += ['--shm-segment-size', str(16000000000)] |
68 | 114 |
|
69 | 115 | cmd_list += ['--aod-file', INPUT_FILE]
|
70 | 116 | cmd_list += ['--readers', str(nreaders)]
|
71 | 117 |
|
72 | 118 | t = timeit.Timer('subprocess.run(cmd_list)', globals=globals())
|
73 | 119 | t_res = t.repeat(NTRIALS, 1)
|
74 |
| - writer.writerow( (tname[3:], ncores, nreaders, stat.mean(t_res), stat.stdev(t_res), input_size, input_length) ) |
| 120 | + writer.writerow( (tname[3:], ncores, nreaders, stat.mean(t_res), get_time_std(t_res), |
| 121 | + INPUT_SIZE, INPUT_LENGTH, get_timestamp(), get_cpu_load(), NCPU, MACHINE) ) |
75 | 122 |
|
76 | 123 | #alinsure
|
77 | 124 | #numa0 0-11,24-35
|
|
0 commit comments