|
| 1 | +from eventio import EventIOFile |
| 2 | +from eventio.iact import RunHeader, EventHeader, RunEnd |
| 3 | +from fact.io import to_h5py |
| 4 | +from multiprocessing import cpu_count |
| 5 | +from concurrent.futures import ProcessPoolExecutor, as_completed |
| 6 | +from tqdm import tqdm |
| 7 | +import os |
| 8 | +import click |
| 9 | +import pandas as pd |
| 10 | +import numpy as np |
| 11 | +import re |
| 12 | + |
| 13 | + |
| 14 | +def get_headers(f): |
| 15 | + run_header = None |
| 16 | + run_end = None |
| 17 | + event_headers = [] |
| 18 | + |
| 19 | + cf = EventIOFile(f) |
| 20 | + |
| 21 | + for o in cf: |
| 22 | + if isinstance(o, EventHeader): |
| 23 | + event_headers.append(o.parse()) |
| 24 | + elif isinstance(o, RunHeader): |
| 25 | + run_header = o.parse() |
| 26 | + elif isinstance(o, RunEnd): |
| 27 | + run_end = o.parse() |
| 28 | + |
| 29 | + return run_header, np.array(event_headers), run_end |
| 30 | + |
| 31 | + |
| 32 | +event_columns = [ |
| 33 | + 'run_number', |
| 34 | + 'event_number', |
| 35 | + 'particle_id', |
| 36 | + 'total_energy', |
| 37 | + 'starting_altitude', |
| 38 | + 'first_target_id', |
| 39 | + 'first_interaction_height', |
| 40 | + 'momentum_x', |
| 41 | + 'momentum_y', |
| 42 | + 'momentum_minus_z', |
| 43 | + 'zenith', |
| 44 | + 'azimuth', |
| 45 | + 'n_reuse', |
| 46 | + 'viewcone_inner_angle', |
| 47 | + 'viewcone_outer_angle', |
| 48 | +] |
| 49 | + |
| 50 | +run_header_columns = [ |
| 51 | + 'run_number', |
| 52 | + 'date', |
| 53 | + 'energy_spectrum_slope', |
| 54 | + 'energy_min', |
| 55 | + 'energy_max', |
| 56 | + 'n_showers', |
| 57 | + 'x_scatter', |
| 58 | + 'y_scatter', |
| 59 | +] |
| 60 | + |
| 61 | + |
| 62 | +@click.command() |
| 63 | +@click.argument('outputfile') |
| 64 | +@click.argument( |
| 65 | + 'inputdir', |
| 66 | + nargs=-1, |
| 67 | + type=click.Path(exists=True, file_okay=False, dir_okay=True), |
| 68 | +) |
| 69 | +@click.option('--infile-re', default=r'.*.eventio(\.gz|\.zst)?') |
| 70 | +@click.option('--n-jobs', default=cpu_count(), type=int) |
| 71 | +def main(outputfile, inputdir, infile_re, n_jobs): |
| 72 | + inputfiles = [] |
| 73 | + file_re = re.compile(infile_re) |
| 74 | + |
| 75 | + for d in tqdm(inputdir): |
| 76 | + for root, dirs, files in os.walk(os.path.abspath(d)): |
| 77 | + for f in files: |
| 78 | + if file_re.match(f): |
| 79 | + inputfiles.append(os.path.join(root, f)) |
| 80 | + |
| 81 | + print('Processing', len(inputfiles), 'files') |
| 82 | + |
| 83 | + with ProcessPoolExecutor(n_jobs) as pool: |
| 84 | + futures = [pool.submit(get_headers, f) for f in inputfiles] |
| 85 | + |
| 86 | + run_headers = [] |
| 87 | + run_ends = [] |
| 88 | + |
| 89 | + reuses = [] |
| 90 | + for future in tqdm(as_completed(futures), total=len(inputfiles)): |
| 91 | + run_header, event_headers, run_end = future.result() |
| 92 | + |
| 93 | + run_headers.append(run_header) |
| 94 | + run_ends.append(run_end) |
| 95 | + |
| 96 | + df = pd.DataFrame(event_headers[event_columns]) |
| 97 | + to_h5py(df, outputfile, key='corsika_events', mode='a') |
| 98 | + reuses.append(df['n_reuse'].iloc[0]) |
| 99 | + |
| 100 | + print('saving runwise information') |
| 101 | + runs = pd.DataFrame(np.array(run_headers)[run_header_columns]) |
| 102 | + runs['n_events'] = np.array(run_ends)['n_events'] |
| 103 | + runs['n_reuse'] = reuses |
| 104 | + |
| 105 | + to_h5py(runs, outputfile, key='corsika_runs', mode='a') |
| 106 | + print('done') |
| 107 | + |
| 108 | + |
| 109 | +if __name__ == '__main__': |
| 110 | + main() |
0 commit comments