Skip to content

Commit c159f2c

Browse files
authored
Merge pull request #68 from fact-project/new_conditions
New conditions
2 parents f5cee3d + 95cc00d commit c159f2c

6 files changed

+26
-17
lines changed

erna/scripts/fetch_fact_runs.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
logger = logging.getLogger(__name__)
1212

13+
from fact_conditions import create_condition_set
1314

1415
@click.command()
1516
@click.argument('earliest_night')
@@ -18,7 +19,7 @@
1819
@click.option('--source', help='Name of the source to analyze. e.g Crab', default='Crab')
1920
@click.option('--max_delta_t', default=30, help='Maximum time difference (minutes) allowed between drs and data files.', type=click.INT)
2021
@click.option('--parts', default=1, help='Number of parts to split the .json file into. This is useful for submitting this to a cluster later on', type=click.INT)
21-
@click.option('--conditions', help='Name of the data conditions as given in datacheck_conditions.py e.g std', default='std')
22+
@click.option('--conditions', '-c', help='Name of the data conditions as given in datacheck_conditions.py e.g @standard or "fParameter < 42 "', default=['@standard'], multiple=True)
2223
@click.password_option(help='password to read from the always awesome RunDB')
2324
def main(earliest_night, latest_night, data_dir, source, max_delta_t, parts, password, conditions):
2425
''' This script connects to the rundb and fetches all runs belonging to the specified source.
@@ -32,7 +33,9 @@ def main(earliest_night, latest_night, data_dir, source, max_delta_t, parts, pa
3233

3334
factdb = create_engine("mysql+pymysql://factread:{}@129.194.168.95/factdata".format(password))
3435

35-
data_conditions=dcc.conditions[conditions]
36+
# create the set of conditions we want to use
37+
data_conditions = create_condition_set(conditions)
38+
3639
mapping = erna.load(earliest_night, latest_night, data_dir, source_name=source, timedelta_in_minutes=max_delta_t, factdb=factdb, data_conditions=data_conditions)
3740
if mapping.empty:
3841
logger.error('No entries matching the conditions could be found in the RunDB')

erna/scripts/process_fact_data.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def make_jobs(jar, xml, aux_source_path, output_directory, df_mapping, engine,
5858

5959
return jobs
6060

61+
from fact_conditions import create_condition_set
6162

6263
@click.command()
6364
@click.argument('earliest_night')
@@ -75,7 +76,7 @@ def make_jobs(jar, xml, aux_source_path, output_directory, df_mapping, engine,
7576
@click.option('--log_level', type=click.Choice(['INFO', 'DEBUG', 'WARN']), help='increase output verbosity', default='INFO')
7677
@click.option('--port', help='The port through which to communicate with the JobMonitor', default=12856, type=int)
7778
@click.option('--source', help='Name of the source to analyze. e.g Crab', default='Crab')
78-
@click.option('--conditions', help='Name of the data conditions as given in datacheck_conditions.py e.g standard', default='standard')
79+
@click.option('--conditions', '-c', help='Name of the data conditions as given in datacheck_conditions.py e.g @standard or "fParameter < 42 "', default=['@standard'], multiple=True)
7980
@click.option('--max_delta_t', default=30, help='Maximum time difference (minutes) allowed between drs and data files.', type=click.INT)
8081
@click.option('--local', default=False,is_flag=True, help='Flag indicating whether jobs should be executed localy .')
8182
@click.option('--local_output', default=False, is_flag=True,
@@ -114,7 +115,10 @@ def main(earliest_night, latest_night, data_dir, jar, xml, aux_source, out, queu
114115
os.makedirs(output_directory, exist_ok=True)
115116
logger.info("Writing output data to {}".format(out))
116117
factdb = sqlalchemy.create_engine("mysql+pymysql://factread:{}@129.194.168.95/factdata".format(password))
117-
data_conditions=dcc.conditions[conditions]
118+
119+
# create the set of conditions we want to use
120+
data_conditions = create_condition_set(conditions)
121+
118122
df_runs = erna.load(earliest_night, latest_night, data_dir, source_name=source, timedelta_in_minutes=max_delta_t, factdb=factdb, data_conditions=data_conditions)
119123

120124
# check for missing data and fix possible wrong file extension (.fz->.gz)

erna/scripts/process_fact_data_qsub.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def read_outputs_to_list(job_output_paths):
4646

4747

4848

49-
49+
from fact_conditions import create_condition_set
5050

5151
@click.command()
5252
@click.argument('earliest_night')
@@ -66,7 +66,7 @@ def read_outputs_to_list(job_output_paths):
6666
@click.option('--log_level', type=click.Choice(['INFO', 'DEBUG', 'WARN']), help='increase output verbosity', default='INFO')
6767
@click.option('--port', help='The port through which to communicate with the JobMonitor', default=12856, type=int)
6868
@click.option('--source', help='Name of the source to analyze. e.g Crab', default='Crab')
69-
@click.option('--conditions', help='Name of the data conditions as given in datacheck_conditions.py e.g std', default='data')
69+
@click.option('--conditions', '-c', help='Name of the data conditions as given in datacheck_conditions.py e.g @standard or "fParameter < 42 "', default=['@standard'], multiple=True)
7070
@click.option('--max_delta_t', default=30, help='Maximum time difference (minutes) allowed between drs and data files.', type=click.INT)
7171
@click.option('--local', default=False,is_flag=True, help='Flag indicating whether jobs should be executed localy .')
7272
@click.option('--yes', help="Assume 'yes'if your asked to continue processing and start jobs", default=False, is_flag=True)
@@ -98,7 +98,10 @@ def main(earliest_night, latest_night, data_dir, jar, xml, aux_source, out, queu
9898
os.makedirs(output_directory, exist_ok=True)
9999

100100
factdb = sqlalchemy.create_engine("mysql+pymysql://factread:{}@129.194.168.95/factdata".format(password))
101-
data_conditions=dcc.conditions[conditions]
101+
102+
# create the set of conditions we want to use
103+
data_conditions = create_condition_set(conditions)
104+
102105
df_loaded = erna.load(earliest_night, latest_night, data_dir, source_name=source, timedelta_in_minutes=max_delta_t, factdb=factdb, data_conditions=data_conditions)
103106
df_loaded.to_hdf(out+".tmp", "loaded", mode="a")
104107

erna/scripts/process_fact_mc.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
logger = logging.getLogger(__name__)
1818

1919
def make_jobs(jar, xml, data_paths, drs_paths,
20-
engine, queue, vmem, num_jobs, walltime, output_path=None, filename_format="{basename}_{num}.json"):
20+
engine, queue, vmem, num_jobs, walltime, output_path=None, local_output_extension="json"):
2121
jobs = []
2222

2323
data_partitions = np.array_split(data_paths, num_jobs)
@@ -35,7 +35,7 @@ def make_jobs(jar, xml, data_paths, drs_paths,
3535
if output_path:
3636
# create the filenames for each single local run
3737
file_name, _ = path.splitext(path.basename(output_path))
38-
file_name = create_filename_from_format(filename_format, file_name, num)
38+
file_name = "{}_{}.{}".format(file_name, num, local_output_extension)
3939
out_path = path.dirname(output_path)
4040
run = [jar, xml, df, path.join(out_path, file_name)]
4141
stream_runner = stream_runner_local
@@ -81,12 +81,8 @@ def make_jobs(jar, xml, data_paths, drs_paths,
8181
show_default=True)
8282
@click.option('--mcdrs', type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True))
8383
@click.option('--mcwildcard', help="Gives the wildcard for searching the folder for files.", type=click.STRING, default='**/*_Events.fit*')
84-
@click.option('--local_output_format', default="{basename}_{num}.json", help="Give the file format for the local output funktionality."
85-
+ "%b will replace the out filename and %[1-9]n the given local number."
86-
+ "Default is: '{basename}_{num}.json'.Only works with option --local_output. ")
87-
@click.option('--yes', help="Assume 'yes'if your asked to continue processing and start jobs", default=False, is_flag=True)
88-
def main( jar, xml, out, mc_path, queue, walltime, engine, num_jobs, vmem, log_level, port, local, local_output, mcdrs, mcwildcard, local_output_format, yes):
89-
84+
@click.option('--local_output_extension', default="json", help="Give the file format for the local output funktionality.")
85+
def main( jar, xml, out, mc_path, queue, walltime, engine, num_jobs, vmem, log_level, port, local, local_output, mcdrs, mcwildcard, local_output_extension):
9086
'''
9187
Script to execute fact-tools on MonteCarlo files. Use the MC_PATH argument to specifiy the folders containing the MC
9288
'''
@@ -142,7 +138,7 @@ def main( jar, xml, out, mc_path, queue, walltime, engine, num_jobs, vmem, log_l
142138
job_list = make_jobs(
143139
jarpath, xmlpath, mc_paths_array,
144140
drs_paths_array, engine, queue,
145-
vmem, num_jobs, walltime, output_path=local_output_dir, filename_format=local_output_format
141+
vmem, num_jobs, walltime, output_path=local_output_dir, local_output_extension=local_output_extension
146142
)
147143
else:
148144
job_list = make_jobs(

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
https://github.com/fact-project/fact_conditions/archive/v0.1.tar.gz
2+
https://github.com/mackaiver/gridmap/archive/a38271b35a96adbda0c0f3e62f3751b3c8c3982a.tar.gz

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='erna',
5-
version='0.8.2',
5+
version='0.8.3',
66
description='Easy RuN Access. Tools that help to do batch processing of FACT data',
77
url='https://github.com/fact-project/erna',
88
author='Kai Brügge, Jens Buss, Maximilian Nöthe',
@@ -41,6 +41,7 @@
4141
'xmltodict',
4242
'wrapt',
4343
# 'gridmap>=0.13.1', install from https://github.com/mackaiver/gridmap'
44+
# 'fact_condition', install from https://github.com/fact-project/fact_conditions
4445
],
4546
zip_safe=False,
4647
entry_points={

0 commit comments

Comments
 (0)