Skip to content

Commit 9fcbea2

Browse files
authored
Merge pull request #32 from fact-project/datacheck_file
Datacheck file
2 parents 0fcf917 + b3e28d5 commit 9fcbea2

File tree

1 file changed

+67
-20
lines changed

1 file changed

+67
-20
lines changed

erna/scripts/gather_fits.py

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
@click.option('--end', '-e', help='Last night to get data from')
2929
@click.option('--source', default='Crab')
3030
@click.option('--datacheck', help='The name of a condition set for the datacheck')
31+
@click.option('--runlist', help='A csv file with columns night, run_id, the runs to get')
3132
@click.option('-r', '--run-type', default='data', help='The runtype to consider')
32-
def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck, run_type):
33+
def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck, runlist, run_type):
3334
'''
3435
Gather the fits outputfiles of the erna automatic processing into a hdf5 file.
3536
The hdf5 file is written using h5py and contains the level 2 features in the
@@ -46,23 +47,40 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck
4647
database.init(**config['processing_database'])
4748
database.connect()
4849

49-
if datacheck is not None and datacheck not in datacheck_conditions:
50-
print('Conditions must be any of: ')
51-
for key in datacheck_conditions:
52-
print(key)
50+
if datacheck and runlist:
51+
print('Only one of datacheck or runlist allowed')
5352
sys.exit(1)
5453

54+
if datacheck is not None:
55+
if not (datacheck in datacheck_conditions or os.path.isfile(datacheck)):
56+
print('Conditions must be a file or any of: ')
57+
for key in datacheck_conditions:
58+
print(key)
59+
sys.exit(1)
60+
5561
processing_db = create_mysql_engine(**config['processing_database'])
5662
fact_db = create_mysql_engine(**config['fact_database'])
5763

58-
jar = (
59-
Jar
60-
.select(Jar.id, Jar.version)
61-
.where(Jar.version == ft_version)
62-
.get()
63-
)
64+
try:
65+
jar = (
66+
Jar
67+
.select(Jar.id, Jar.version)
68+
.where(Jar.version == ft_version)
69+
.get()
70+
)
71+
except Jar.DoesNotExist:
72+
print('FACT-Tools version not found, avaliable jars are')
73+
for jar in Jar.select(Jar.version):
74+
print(jar.version)
75+
sys.exit(1)
6476

65-
xml = XML.get(jar=jar, name=xml_name)
77+
try:
78+
xml = XML.get(jar=jar, name=xml_name)
79+
except XML.DoesNotExist:
80+
print('XML not found, avaliable xmls are:')
81+
for xml in XML.select(XML.name).join(Jar).where(Jar.version == ft_version):
82+
print(xml.name)
83+
sys.exit(1)
6684

6785
job_query = (
6886
Job
@@ -89,23 +107,52 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck
89107
sql, params = job_query.sql()
90108

91109
jobs = pd.read_sql_query(sql, processing_db, params=params)
92-
conditions = [
93-
'fNight <= {}'.format(jobs.night.max()),
94-
'fNight >= {}'.format(jobs.night.min()),
95-
'fSourceName = "{}"'.format(source),
96-
]
110+
if runlist is None:
111+
conditions = [
112+
'fNight <= {}'.format(jobs.night.max()),
113+
'fNight >= {}'.format(jobs.night.min()),
114+
'fSourceName = "{}"'.format(source),
115+
]
116+
else:
117+
wanted_runs = pd.read_csv(runlist)
118+
conditions = [
119+
'fNight <= {}'.format(wanted_runs.night.max()),
120+
'fNight >= {}'.format(wanted_runs.night.min()),
121+
]
122+
97123
if datacheck is not None:
98-
conditions.extend(datacheck_conditions[datacheck])
124+
if os.path.isfile(datacheck):
125+
with open(datacheck, 'r') as f:
126+
conditions.extend(f.read().splitlines())
127+
else:
128+
conditions.extend(datacheck_conditions[datacheck])
99129

100130
runs = get_runs(fact_db, conditions=conditions).set_index(['night', 'run_id'])
101131
jobs = jobs.join(runs, on=['night', 'run_id'], how='inner')
102-
successful_jobs = jobs.query('status == "success"')
103132

133+
if runlist is not None:
134+
jobs = wanted_runs.join(
135+
jobs.set_index(['night', 'run_id']),
136+
on=['night', 'run_id'],
137+
how='inner',
138+
)
139+
140+
successful_jobs = jobs.query('status == "success"')
104141
total = len(jobs)
105142
successful = len(successful_jobs)
143+
144+
if runlist is not None:
145+
if len(wanted_runs) != successful:
146+
click.confirm(
147+
'Only {} of {} runs available, continue?:'.format(
148+
total, len(wanted_runs)
149+
),
150+
abort=True,
151+
)
152+
106153
if total != successful:
107154
click.confirm(
108-
'Only {} of {} jobs finished, continue? [y, N] :'.format(successful, total),
155+
'Only {} of {} jobs finished, continue?'.format(successful, total),
109156
abort=True,
110157
)
111158

0 commit comments

Comments
 (0)