|
| 1 | +from corsikaio import CorsikaFile |
| 2 | +from fact.io import to_h5py |
| 3 | +from multiprocessing import Pool, cpu_count |
| 4 | +from tqdm import tqdm |
| 5 | +import os |
| 6 | +import click |
| 7 | +import pandas as pd |
| 8 | +import numpy as np |
| 9 | +from glob import glob |
| 10 | + |
| 11 | + |
| 12 | +def get_headers(f): |
| 13 | + with CorsikaFile(f) as cf: |
| 14 | + run_header, event_headers, run_end = cf.read_headers() |
| 15 | + return run_header, event_headers, run_end |
| 16 | + |
| 17 | + |
| 18 | +event_columns = [ |
| 19 | + 'run_number', |
| 20 | + 'event_number', |
| 21 | + 'particle_id', |
| 22 | + 'total_energy', |
| 23 | + 'starting_altitude', |
| 24 | + 'first_target_id', |
| 25 | + 'first_interaction_height', |
| 26 | + 'momentum_x', |
| 27 | + 'momentum_y', |
| 28 | + 'momentum_minus_z', |
| 29 | + 'zenith', |
| 30 | + 'azimuth', |
| 31 | +] |
| 32 | + |
| 33 | +run_header_columns = [ |
| 34 | + 'run_number', |
| 35 | + 'date', |
| 36 | + 'energy_spectrum_slope', |
| 37 | + 'energy_min', |
| 38 | + 'energy_max', |
| 39 | +] |
| 40 | + |
| 41 | + |
| 42 | +@click.command() |
| 43 | +@click.argument('outputfile') |
| 44 | +@click.argument( |
| 45 | + 'inputdir', |
| 46 | + nargs=-1, |
| 47 | + type=click.Path(exists=True, file_okay=False, dir_okay=True), |
| 48 | +) |
| 49 | +def main(outputfile, inputdir): |
| 50 | + inputfiles = [] |
| 51 | + for d in inputdir: |
| 52 | + inputfiles.extend(glob(os.path.join(d, 'cer*'))) |
| 53 | + |
| 54 | + for f in inputfiles[:]: |
| 55 | + if f + '.gz' in inputfiles: |
| 56 | + inputfiles.remove(f + '.gz') |
| 57 | + |
| 58 | + print('Processing', len(inputfiles), 'files') |
| 59 | + |
| 60 | + with Pool(cpu_count()) as pool: |
| 61 | + results = pool.imap_unordered(get_headers, inputfiles) |
| 62 | + |
| 63 | + run_headers = [] |
| 64 | + run_ends = [] |
| 65 | + |
| 66 | + for run_header, event_headers, run_end in tqdm(results, total=len(inputfiles)): |
| 67 | + |
| 68 | + run_headers.append(run_header) |
| 69 | + run_ends.append(run_end) |
| 70 | + |
| 71 | + df = pd.DataFrame(event_headers[event_columns]) |
| 72 | + to_h5py(df, outputfile, key='corsika_events', mode='a') |
| 73 | + |
| 74 | + print('saving runwise information') |
| 75 | + runs = pd.DataFrame(np.array(run_headers)[run_header_columns]) |
| 76 | + runs['n_events'] = np.array(run_ends)['n_events'] |
| 77 | + |
| 78 | + to_h5py(runs, outputfile, key='corsika_runs', mode='a') |
| 79 | + print('done') |
| 80 | + |
| 81 | + |
| 82 | +if __name__ == '__main__': |
| 83 | + main() |
0 commit comments