From ceb979287adcfd4ce05653280a9f6d348391765e Mon Sep 17 00:00:00 2001 From: jarema Date: Tue, 17 Jul 2018 23:59:08 +0200 Subject: [PATCH 01/12] add time logs for slurn --- mcpartools/generator.py | 8 ++ mcpartools/mcengine/data/collect.sh | 25 +++- mcpartools/mcengine/data/run_fluka.sh | 28 ++++- mcpartools/mcengine/data/run_shieldhit.sh | 25 ++++ mcpartools/scheduler/base.py | 16 +++ mcpartools/scheduler/common.py | 1 + mcpartools/scheduler/data/merge_logs.sh | 147 ++++++++++++++++++++++ mcpartools/scheduler/data/submit_slurm.sh | 27 +++- mcpartools/scheduler/slurm.py | 2 + mcpartools/scheduler/torque.py | 2 + 10 files changed, 277 insertions(+), 4 deletions(-) create mode 100755 mcpartools/scheduler/data/merge_logs.sh diff --git a/mcpartools/generator.py b/mcpartools/generator.py index a0446cc..60a4ef6 100644 --- a/mcpartools/generator.py +++ b/mcpartools/generator.py @@ -146,6 +146,9 @@ def run(self): # make symlinks to external files found self.symlink_external_files() + # generate script merging info logs + self.generate_merge_logs_script() + # store information about command line arguments, date, time, user and hostname into generatemc.log self.save_logs() @@ -242,3 +245,8 @@ def save_logs(self): file_logger.info('Date and time: ' + time.strftime("%Y-%m-%d %H:%M:%S")) file_logger.info('username@hostname: ' + getpass.getuser() + '@' + socket.gethostname()) file_logger.info('Current working directory: ' + os.getcwd()) + + def generate_merge_logs_script(self): + wspdir_name = 'workspace' + wspdir_path = os.path.join(self.main_dir, wspdir_name) + self.scheduler.write_merge_logs_script(wspdir_path) diff --git a/mcpartools/mcengine/data/collect.sh b/mcpartools/mcengine/data/collect.sh index fb5f701..aad0db8 100755 --- a/mcpartools/mcengine/data/collect.sh +++ b/mcpartools/mcengine/data/collect.sh @@ -3,10 +3,33 @@ # Exit immediately if a simple command exits with a non-zero status. set -e +START=$(date +%s) + INPUT_WILDCARD={output_dir:s}/workspace/job_*/{wildcard:s} OUTPUT_DIRECTORY={output_dir:s}/output +LOG_FILE=$OUTPUT_DIRECTORY/info.log # make output folder mkdir -p $OUTPUT_DIRECTORY -{collect_action:s} \ No newline at end of file +echo "###########################################################" > $LOG_FILE +echo "################### COLLECT INFORMATION ###################" >> $LOG_FILE +echo "###########################################################" >> $LOG_FILE +echo "#" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# END = -" >> $LOG_FILE +echo "# TIME IN SECONDS = -" >> $LOG_FILE +echo "# STATUS = 1" >> $LOG_FILE +echo "#" >> $LOG_FILE + +{collect_action:s} +COLLECT_STATUS=$? + +let "EXECUTION_TIME = $(date +%s) - $START" + +# end time is in line number 6 +sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +# collapsed time is in line number 7 +sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE +# status is in line number 8 +sed -i "8s/.*/# STATUS = 0/" $LOG_FILE diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index 3595927..081b0ad 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -3,14 +3,40 @@ # Exit immediately if a simple command exits with a non-zero status. set -e +START=$(date +%s) + # location of FLUKA binary file FLUKA_BIN={fluka_bin:s} +WORK_DIR={working_directory:s} # go to working directory -cd {working_directory:s} +cd $WORK_DIR + +LOG_FILE=$WORK_DIR"/info.log" + +echo "###########################################################" > $LOG_FILE +echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE +echo "###########################################################" >> $LOG_FILE +echo "#" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# END = -" >> $LOG_FILE +echo "# TIME IN SECONDS = -" >> $LOG_FILE +echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE +echo "# STATUS = 1" >> $LOG_FILE +echo "#" >> $LOG_FILE # run rfluka $FLUKA_BIN -N0 -M1 {engine_options:s} {input_basename:s} +SIMULATION_STATUS=$? + +let "EXECUTION_TIME = $(date +%s) - $START" + +# end time is in line number 6 +sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +# collapsed time is in line number 7 +sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE +# status is in line number 9 +sed -i "9s/.*/# STATUS =`printf "%20d" $SIMULATION_STATUS`/" $LOG_FILE # each fluka run will save files with same name, in order to distinguish output from multiple runs # we rename output files, appending suffix with jobid to each of them diff --git a/mcpartools/mcengine/data/run_shieldhit.sh b/mcpartools/mcengine/data/run_shieldhit.sh index 33b2293..6b46762 100755 --- a/mcpartools/mcengine/data/run_shieldhit.sh +++ b/mcpartools/mcengine/data/run_shieldhit.sh @@ -3,6 +3,9 @@ # Exit immediately if a simple command exits with a non-zero status. set -e + +START=$(date +%s) + # location of SHIELDHIT binary file SHIELDHIT_BIN={shieldhit_bin:s} @@ -21,6 +24,28 @@ GEO_FILE={geo_file:s} MAT_FILE={mat_file:s} DETECT_FILE={detect_file:s} +LOG_FILE=$WORK_DIR"/info.log" + +echo "###########################################################" > $LOG_FILE +echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE +echo "###########################################################" >> $LOG_FILE +echo "#" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# END = -" >> $LOG_FILE +echo "# TIME IN SECONDS = -" >> $LOG_FILE +echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE +echo "# STATUS = 1" >> $LOG_FILE +echo "#" >> $LOG_FILE + # execute simulation $SHIELDHIT_BIN --beamfile=$BEAM_FILE --geofile=$GEO_FILE --matfile=$MAT_FILE --detectfile=$DETECT_FILE -n $PARTICLE_NO -N $RNG_SEED {engine_options:s} $WORK_DIR +SIMULATION_STATUS=$? + +let "EXECUTION_TIME = $(date +%s) - $START" +# end time is in line number 6 +sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +# collapsed time is in line number 7 +sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE +# status is in line number 9 +sed -i "9s/.*/# STATUS =`printf "%20d" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file diff --git a/mcpartools/scheduler/base.py b/mcpartools/scheduler/base.py index 3f58efa..077d6a9 100644 --- a/mcpartools/scheduler/base.py +++ b/mcpartools/scheduler/base.py @@ -25,6 +25,7 @@ def __init__(self, scheduler_options): submit_script = 'submit.sh' main_run_script = 'main_run.sh' + merge_logs_script = 'merge_logs.sh' def submit_script_body(self, jobs_no, main_dir, workspace_dir): from pkg_resources import resource_string @@ -51,6 +52,11 @@ def main_run_script_body(self, jobs_no, workspace_dir): jobs_no=jobs_no) return self.main_run_script + def merge_logs_body(self, workspace_dir, output_dir): + from pkg_resources import resource_string + tpl = resource_string(__name__, self.merge_logs_script_template) + return tpl.decode("ascii").format(workspace_dir=workspace_dir, collect_dir=output_dir) + def write_submit_script(self, main_dir, script_basename, jobs_no, workspace_dir): script_path = os.path.join(main_dir, script_basename) fd = open(script_path, 'w') @@ -72,3 +78,13 @@ def write_main_run_script(self, jobs_no, output_dir): os.chmod(out_file_path, 0o750) logger.debug("Saved main run script: " + out_file_path) logger.debug("Output dir " + output_dir) + + def write_merge_logs_script(self, workspace_dir, output_dir): + workspace_dir_abspath = os.path.abspath(workspace_dir) + output_dir_abspath = os.path.abspath(output_dir) + out_file_path = os.path.join(workspace_dir_abspath, self.merge_logs_script) + fd = open(out_file_path, 'w') + fd.write(self.merge_logs_body(workspace_dir_abspath, output_dir_abspath)) + fd.close() + os.chmod(out_file_path, 0o750) + logger.debug("Saved merge logs script: " + out_file_path) diff --git a/mcpartools/scheduler/common.py b/mcpartools/scheduler/common.py index a7152b3..3835d88 100644 --- a/mcpartools/scheduler/common.py +++ b/mcpartools/scheduler/common.py @@ -16,6 +16,7 @@ def __init__(self): @classmethod def get_scheduler(cls, scheduler_options, log_location): file_logger = logging.getLogger('file_logger') + try: srun_output = check_output(['srun --version'], shell=True) file_logger.info("srun version: {}".format(srun_output[:-1])) diff --git a/mcpartools/scheduler/data/merge_logs.sh b/mcpartools/scheduler/data/merge_logs.sh new file mode 100755 index 0000000..cb420f6 --- /dev/null +++ b/mcpartools/scheduler/data/merge_logs.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +# Exit immediately if a simple command exits with a non-zero status. +set -e + +function writeLogHeader(){{ + echo "###########################################################" > ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END" >> ${{LOG_FILE}} + + for i in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; + do + if [[ $(cat $i) =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + else + echo "Cannot get job ID from $i file" + continue + fi + + if [[ $(cat $i) =~ $START_REGEX ]]; + then + START_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get start time from $i file" + continue + fi + + if [[ $(cat $i) =~ $END_REGEX ]]; + then + END_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get end time from $i file" + continue + fi + + if [[ $(cat $i) =~ $STATUS_REGEX ]]; + then + STATUS=${{BASH_REMATCH[1]}}; + else + echo "Cannot get status from $i file" + continue + fi + + echo "# `printf "%5d" $JOB_ID` $START_TIME $END_TIME" >> ${{LOG_FILE}} + done + echo "#" >> ${{LOG_FILE}} +}} + +function writeTimeInSeconds(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo " ID TIME STATUS " >> ${{LOG_FILE}} + + for i in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; + do + if [[ $(cat $i) =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + else + echo "Cannot get job ID from $i file" + continue + fi + + if [[ $(cat $i) =~ $COLLAPSED_TIME_REGEX ]]; + then + COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get collapsed time from $i file" + continue + fi + + if [[ $(cat $i) =~ $STATUS_REGEX ]]; + then + STATUS=${{BASH_REMATCH[1]}}; + else + echo "Cannot get status from $i file" + continue + fi + + TASK_NUMBER=$((TASK_NUMBER + 1)) + + if [[ ${{STATUS}} -ne 0 ]] + then + FAILED=$((FAILED + 1)) + else + SUCCESSES=$((SUCCESSES + 1)) + TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) + fi + + echo " `printf "%5d" $JOB_ID` `printf "%20d" $COLLAPSED_TIME` `printf "%10d" $STATUS`" >> ${{LOG_FILE}} + done + echo "#" >> ${{LOG_FILE}} +}} + +function writeJobsDetailInformation(){{ + cat ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}} >> ${{LOG_FILE}} +}} + +function writeSummary(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} + echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} + echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} + + if [[ ${{SUCCESSES}} -ne 0 ]] + then + echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} + else + echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} + fi + + echo "#" >> ${{LOG_FILE}} +}} + +function appendCollectInfo() {{ + if [ -f $COLLECT_LOG ]; then + cat $COLLECT_LOG >> ${{LOG_FILE}} + fi +}} + +JOBS_LOG_FILE="info.log" +WORKSPACE={workspace_dir:s} +LOG_FILE=${{WORKSPACE}}/${{JOBS_LOG_FILE}} +JOB_ID_REGEX="#+ DETAILED INFORMATION ABOUT JOB\s+([0-9]*)" +START_REGEX="# START\s+=\s(.{{19}})" +END_REGEX="# END\s+=\s(.{{19}})" +STATUS_REGEX="# STATUS\s+=\s+([0-9]*)" +COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" +COLLECT_LOG={collect_dir:s}/info.log +TASK_NUMBER=0 +SUCCESSES=0 +FAILED=0 +TOTAL_TIME=0 + +writeLogHeader +writeTimeInSeconds +writeJobsDetailInformation +appendCollectInfo +writeSummary diff --git a/mcpartools/scheduler/data/submit_slurm.sh b/mcpartools/scheduler/data/submit_slurm.sh index b060d3d..564c130 100755 --- a/mcpartools/scheduler/data/submit_slurm.sh +++ b/mcpartools/scheduler/data/submit_slurm.sh @@ -2,7 +2,8 @@ # Log file submit.log will be created in the same directory submit.sh is located # submit.log is for storing stdout and stderr of sbatch command, for log info from individual jobs see {log_dir:s} directory -LOGFILE="$(cd $(dirname $0) && pwd)/submit.log" +WORK_DIR=$(cd $(dirname $0) && pwd) +LOGFILE=$WORK_DIR/submit.log echo -n "" > "$LOGFILE" # Create temporary files for parsing stdout and stderr output from sbatch command before storing them in submit.log @@ -35,7 +36,7 @@ if [ "`cat $ERR`" != "" ] ; then cat $ERR >> "$LOGFILE" fi -# If parallel calculation submission was successful, we proceed to submit collect script +# If parallel calculation submission was successful, we proceed to submit collect script and the create a log file if [ -n "$CALC_JOBID" ] ; then COLLECT_CMD="sbatch {options_args:s} --dependency=afterany:$CALC_JOBID --output='{log_dir:s}/output_%j_collect.log' --error='{log_dir:s}/error_%j_collect.log' --parsable {main_dir:s}/{collect_script_name:s} > $OUT 2> $ERR" eval $COLLECT_CMD @@ -58,4 +59,26 @@ if [ -n "$CALC_JOBID" ] ; then echo "---------------------" >> "$LOGFILE" cat $ERR >> "$LOGFILE" fi + + MERGE_LOGS_CMD="sbatch --dependency=afterany:$COLLECT_JOBID {main_dir:s}/workspace/merge_logs.sh > $OUT 2> $ERR" + eval $MERGE_LOGS_CMD + + echo "" >> "$LOGFILE" + echo "Merge logs" >> "$LOGFILE" + echo "Merge command: $MERGE_LOGS_CMD" >> "$LOGFILE" + + # If sbatch command ended with a success log following info + if [ $? -eq 0 ] ; then + MERGE__JOBID=`cat $OUT | cut -d ";" -f 1` + echo "Job ID: $MERGE__JOBID" >> "$LOGFILE" + echo "Submission time: `date +"%Y-%m-%d %H:%M:%S"`" >> "$LOGFILE" + fi + + # If output from stderr isn't an empty string then log it as well to submit.log + if [ "`cat $ERR`" != "" ] ; then + echo "---------------------" >> "$LOGFILE" + echo "ERROR MESSAGE" >>"$LOGFILE" + echo "---------------------" >> "$LOGFILE" + cat $ERR >> "$LOGFILE" + fi fi diff --git a/mcpartools/scheduler/slurm.py b/mcpartools/scheduler/slurm.py index b809ac2..46e499d 100644 --- a/mcpartools/scheduler/slurm.py +++ b/mcpartools/scheduler/slurm.py @@ -13,3 +13,5 @@ def __init__(self, options_content): submit_script_template = os.path.join('data', 'submit_slurm.sh') main_run_script_template = os.path.join('data', 'run_slurm.sh') + + merge_logs_script_template = os.path.join('data', 'merge_logs.sh') diff --git a/mcpartools/scheduler/torque.py b/mcpartools/scheduler/torque.py index 7fcf067..c40e596 100644 --- a/mcpartools/scheduler/torque.py +++ b/mcpartools/scheduler/torque.py @@ -13,3 +13,5 @@ def __init__(self, options_content): submit_script_template = os.path.join('data', 'submit_torque.sh') main_run_script_template = os.path.join('data', 'run_torque.sh') + + merge_logs_script_template = os.path.join('data', 'merge_logs.sh') From 783228a81e5d96f768f33d7641a907da0dd9e8e3 Mon Sep 17 00:00:00 2001 From: jarema Date: Tue, 14 Aug 2018 22:14:42 +0200 Subject: [PATCH 02/12] implement status script, improve time logs collecting --- mcpartools/generator.py | 12 +++++++- mcpartools/mcengine/data/run_fluka.sh | 2 +- mcpartools/mcengine/data/run_shieldhit.sh | 2 +- mcpartools/scheduler/base.py | 34 +++++++++++++++++++---- mcpartools/scheduler/data/merge_logs.sh | 22 +++++++++++++-- mcpartools/scheduler/data/run_torque.sh | 4 ++- mcpartools/scheduler/data/status.sh | 15 ++++++++++ mcpartools/scheduler/data/submit_slurm.sh | 8 ++++-- mcpartools/scheduler/slurm.py | 2 ++ mcpartools/scheduler/torque.py | 2 ++ 10 files changed, 89 insertions(+), 14 deletions(-) create mode 100755 mcpartools/scheduler/data/status.sh diff --git a/mcpartools/generator.py b/mcpartools/generator.py index 60a4ef6..50f46d2 100644 --- a/mcpartools/generator.py +++ b/mcpartools/generator.py @@ -149,6 +149,9 @@ def run(self): # generate script merging info logs self.generate_merge_logs_script() + # generate status script + self.generate_status_script() + # store information about command line arguments, date, time, user and hostname into generatemc.log self.save_logs() @@ -247,6 +250,13 @@ def save_logs(self): file_logger.info('Current working directory: ' + os.getcwd()) def generate_merge_logs_script(self): + wspdir_name = 'workspace' + output_name = 'output' + wspdir_path = os.path.join(self.main_dir, wspdir_name) + collect_path = os.path.join(self.main_dir, output_name) + self.scheduler.write_merge_logs_script(wspdir_path, collect_path, self.main_dir) + + def generate_status_script(self): wspdir_name = 'workspace' wspdir_path = os.path.join(self.main_dir, wspdir_name) - self.scheduler.write_merge_logs_script(wspdir_path) + self.scheduler.write_status_script(self.main_dir, wspdir_path) diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index 081b0ad..58c69b3 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -22,7 +22,7 @@ echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE -echo "# STATUS = 1" >> $LOG_FILE +echo "# STATUS = -" >> $LOG_FILE echo "#" >> $LOG_FILE # run rfluka diff --git a/mcpartools/mcengine/data/run_shieldhit.sh b/mcpartools/mcengine/data/run_shieldhit.sh index 6b46762..0ccb615 100755 --- a/mcpartools/mcengine/data/run_shieldhit.sh +++ b/mcpartools/mcengine/data/run_shieldhit.sh @@ -34,7 +34,7 @@ echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE -echo "# STATUS = 1" >> $LOG_FILE +echo "# STATUS = -" >> $LOG_FILE echo "#" >> $LOG_FILE # execute simulation diff --git a/mcpartools/scheduler/base.py b/mcpartools/scheduler/base.py index 077d6a9..b884213 100644 --- a/mcpartools/scheduler/base.py +++ b/mcpartools/scheduler/base.py @@ -26,6 +26,7 @@ def __init__(self, scheduler_options): submit_script = 'submit.sh' main_run_script = 'main_run.sh' merge_logs_script = 'merge_logs.sh' + status_script = 'status.sh' def submit_script_body(self, jobs_no, main_dir, workspace_dir): from pkg_resources import resource_string @@ -52,10 +53,17 @@ def main_run_script_body(self, jobs_no, workspace_dir): jobs_no=jobs_no) return self.main_run_script - def merge_logs_body(self, workspace_dir, output_dir): + def merge_logs_body(self, workspace_dir, collect_dir, main_dir): from pkg_resources import resource_string tpl = resource_string(__name__, self.merge_logs_script_template) - return tpl.decode("ascii").format(workspace_dir=workspace_dir, collect_dir=output_dir) + return tpl.decode("ascii").format(workspace_dir=workspace_dir, + collect_dir=collect_dir, + main_dir=main_dir) + + def status_body(self, merge_script_path): + from pkg_resources import resource_string + tpl = resource_string(__name__, self.status_script_template) + return tpl.decode("ascii").format(merge_script_path=merge_script_path) def write_submit_script(self, main_dir, script_basename, jobs_no, workspace_dir): script_path = os.path.join(main_dir, script_basename) @@ -79,12 +87,28 @@ def write_main_run_script(self, jobs_no, output_dir): logger.debug("Saved main run script: " + out_file_path) logger.debug("Output dir " + output_dir) - def write_merge_logs_script(self, workspace_dir, output_dir): + def write_merge_logs_script(self, workspace_dir, collect_dir, main_dir): workspace_dir_abspath = os.path.abspath(workspace_dir) - output_dir_abspath = os.path.abspath(output_dir) + collect_dir_abspath = os.path.abspath(collect_dir) + main_dir_abspath = os.path.abspath(main_dir) + out_file_path = os.path.join(workspace_dir_abspath, self.merge_logs_script) + fd = open(out_file_path, 'w') - fd.write(self.merge_logs_body(workspace_dir_abspath, output_dir_abspath)) + fd.write(self.merge_logs_body(workspace_dir_abspath, collect_dir_abspath, main_dir_abspath)) fd.close() os.chmod(out_file_path, 0o750) logger.debug("Saved merge logs script: " + out_file_path) + + def write_status_script(self, main_dir, workspace_dir): + main_dir_abspath = os.path.abspath(main_dir) + out_file_path = os.path.join(main_dir_abspath, self.status_script) + + workspace_dir_abspath = os.path.abspath(workspace_dir) + merge_log_script = os.path.join(workspace_dir_abspath, self.merge_logs_script) + + fd = open(out_file_path, 'w') + fd.write(self.status_body(merge_log_script)) + fd.close() + os.chmod(out_file_path, 0o750) + logger.debug("Saved status script: " + out_file_path) \ No newline at end of file diff --git a/mcpartools/scheduler/data/merge_logs.sh b/mcpartools/scheduler/data/merge_logs.sh index cb420f6..1275d25 100755 --- a/mcpartools/scheduler/data/merge_logs.sh +++ b/mcpartools/scheduler/data/merge_logs.sh @@ -84,6 +84,11 @@ function writeTimeInSeconds(){{ TASK_NUMBER=$((TASK_NUMBER + 1)) +# check if status is a number + if ! [[ ${{STATUS}} =~ ^[0-9]+$ ]] ; then + continue + fi + if [[ ${{STATUS}} -ne 0 ]] then FAILED=$((FAILED + 1)) @@ -126,9 +131,22 @@ function appendCollectInfo() {{ fi }} -JOBS_LOG_FILE="info.log" WORKSPACE={workspace_dir:s} -LOG_FILE=${{WORKSPACE}}/${{JOBS_LOG_FILE}} +MAIN_DIR={main_dir:s} + +if [ $# -eq 0 ] + then + FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log + LOG_FILE=${{MAIN_DIR}}/${{FILE_NAME}} + else + LOG_FILE=$1 +fi + +if [ ! -f ${{MAIN_DIR}}/submit.log ]; then + exit 1 +fi + +JOBS_LOG_FILE="info.log" JOB_ID_REGEX="#+ DETAILED INFORMATION ABOUT JOB\s+([0-9]*)" START_REGEX="# START\s+=\s(.{{19}})" END_REGEX="# END\s+=\s(.{{19}})" diff --git a/mcpartools/scheduler/data/run_torque.sh b/mcpartools/scheduler/data/run_torque.sh index a8734d5..a1552c4 100644 --- a/mcpartools/scheduler/data/run_torque.sh +++ b/mcpartools/scheduler/data/run_torque.sh @@ -1,8 +1,10 @@ #!/usr/bin/env bash +#PBS -l select=1:ncpus=3:mem=1gb +#PBS -A ccbmc7 # Exit immediately if a simple command exits with a non-zero status. set -e {options_header:s} # Run individual jobs -{workspace_dir:s}/job_`printf %04d $PBS_ARRAYID`/run.sh +{workspace_dir:s}/job_`printf %04d $PBS_ARRAY_INDEX`/run.sh diff --git a/mcpartools/scheduler/data/status.sh b/mcpartools/scheduler/data/status.sh new file mode 100755 index 0000000..636a5b2 --- /dev/null +++ b/mcpartools/scheduler/data/status.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log + +STATUS_CMD="{merge_script_path:s} $FILE_NAME" + +eval $STATUS_CMD +CMD_STATUS=$? + +if [[ $CMD_STATUS -eq 0 ]] +then + echo "Status successfully saved to file: $FILE_NAME" +else + echo "Unable to create status file" +fi \ No newline at end of file diff --git a/mcpartools/scheduler/data/submit_slurm.sh b/mcpartools/scheduler/data/submit_slurm.sh index 564c130..69f32bc 100755 --- a/mcpartools/scheduler/data/submit_slurm.sh +++ b/mcpartools/scheduler/data/submit_slurm.sh @@ -45,10 +45,12 @@ if [ -n "$CALC_JOBID" ] ; then echo "Result collection" >> "$LOGFILE" echo "Submission command: $COLLECT_CMD" >> "$LOGFILE" + LAST_JOB_ID=$CALC_JOBID + # If sbatch command ended with a success log following info if [ $? -eq 0 ] ; then - COLLECT_JOBID=`cat $OUT | cut -d ";" -f 1` - echo "Job ID: $COLLECT_JOBID" >> "$LOGFILE" + LAST_JOB_ID=`cat $OUT | cut -d ";" -f 1` + echo "Job ID: $LAST_JOB_ID" >> "$LOGFILE" echo "Submission time: `date +"%Y-%m-%d %H:%M:%S"`" >> "$LOGFILE" fi @@ -60,7 +62,7 @@ if [ -n "$CALC_JOBID" ] ; then cat $ERR >> "$LOGFILE" fi - MERGE_LOGS_CMD="sbatch --dependency=afterany:$COLLECT_JOBID {main_dir:s}/workspace/merge_logs.sh > $OUT 2> $ERR" + MERGE_LOGS_CMD="sbatch --dependency=afterany:$LAST_JOB_ID --output='{log_dir:s}/output_%j_merge_logs.log' --error='{log_dir:s}/error_%j_merge_logs.log' --parsable {main_dir:s}/workspace/merge_logs.sh > $OUT 2> $ERR" eval $MERGE_LOGS_CMD echo "" >> "$LOGFILE" diff --git a/mcpartools/scheduler/slurm.py b/mcpartools/scheduler/slurm.py index 46e499d..7b8cd26 100644 --- a/mcpartools/scheduler/slurm.py +++ b/mcpartools/scheduler/slurm.py @@ -15,3 +15,5 @@ def __init__(self, options_content): main_run_script_template = os.path.join('data', 'run_slurm.sh') merge_logs_script_template = os.path.join('data', 'merge_logs.sh') + + status_script_template = os.path.join('data', 'status.sh') diff --git a/mcpartools/scheduler/torque.py b/mcpartools/scheduler/torque.py index c40e596..0cb55c4 100644 --- a/mcpartools/scheduler/torque.py +++ b/mcpartools/scheduler/torque.py @@ -15,3 +15,5 @@ def __init__(self, options_content): main_run_script_template = os.path.join('data', 'run_torque.sh') merge_logs_script_template = os.path.join('data', 'merge_logs.sh') + + status_script_template = os.path.join('data', 'status.sh') From c7562e6a2c45013d37db4d8fa8e77c9db4dbb5b3 Mon Sep 17 00:00:00 2001 From: jarema Date: Tue, 28 Aug 2018 21:26:35 +0200 Subject: [PATCH 03/12] add torque implementation --- mcpartools/scheduler/data/submit_torque.sh | 31 ++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/mcpartools/scheduler/data/submit_torque.sh b/mcpartools/scheduler/data/submit_torque.sh index b66d72f..735a8ba 100644 --- a/mcpartools/scheduler/data/submit_torque.sh +++ b/mcpartools/scheduler/data/submit_torque.sh @@ -11,13 +11,14 @@ ERR=`mktemp` # On exit or if the script is interrupted (i.e. by receiving SIGINT signal) delete temporary files trap "rm -f $OUT $ERR" EXIT -qsub {options_args:s} -t 1-{jobs_no:d} -o {log_dir:s} -e {log_dir:s} -terse {script_dir:s}/{calculate_script_name:s} > $OUT 2> $ERR +qsub {options_args:s} -J 1-{jobs_no:d} -o {log_dir:s} -e {log_dir:s} {script_dir:s}/{calculate_script_name:s} > $OUT 2> $ERR echo "Saving logs to $LOGFILE" # If qsub command ended with a success log following info if [ $? -eq 0 ] ; then - echo "Job ID: `cat $OUT | cut -d ";" -f 1`" > "$LOGFILE" + CALC_JOBID=`cat $OUT | cut -d ";" -f 1` + echo "Job ID: $CALC_JOBID" > "$LOGFILE" echo "Submission time: `date +"%Y-%m-%d %H:%M:%S"`" >> "$LOGFILE" fi @@ -28,3 +29,29 @@ if [ "`cat $ERR`" != "" ] ; then echo "---------------------" >> "$LOGFILE" cat $ERR >> "$LOGFILE" fi + +# If parallel calculation submission was successful, we proceed to submit collect script and the create a log file +if [ -n "$CALC_JOBID" ] ; then + + MERGE_LOGS_CMD="qsub -W depend=afterany:$CALC_JOBID -o {log_dir:s} -e {log_dir:s} {script_dir:s}/merge_logs.sh > $OUT 2> $ERR" + eval $MERGE_LOGS_CMD + + echo "" >> "$LOGFILE" + echo "Merge logs" >> "$LOGFILE" + echo "Merge command: $MERGE_LOGS_CMD" >> "$LOGFILE" + + # If sbatch command ended with a success log following info + if [ $? -eq 0 ] ; then + MERGE__JOBID=`cat $OUT | cut -d ";" -f 1` + echo "Job ID: $MERGE__JOBID" >> "$LOGFILE" + echo "Submission time: `date +"%Y-%m-%d %H:%M:%S"`" >> "$LOGFILE" + fi + + # If output from stderr isn't an empty string then log it as well to submit.log + if [ "`cat $ERR`" != "" ] ; then + echo "---------------------" >> "$LOGFILE" + echo "ERROR MESSAGE" >>"$LOGFILE" + echo "---------------------" >> "$LOGFILE" + cat $ERR >> "$LOGFILE" + fi +fi \ No newline at end of file From 91bf50e1b5c708445921fdd3f24afcd71f3d8792 Mon Sep 17 00:00:00 2001 From: jarema Date: Tue, 28 Aug 2018 21:37:52 +0200 Subject: [PATCH 04/12] refactor --- mcpartools/scheduler/data/run_torque.sh | 4 +--- mcpartools/scheduler/data/submit_torque.sh | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/mcpartools/scheduler/data/run_torque.sh b/mcpartools/scheduler/data/run_torque.sh index a1552c4..a8734d5 100644 --- a/mcpartools/scheduler/data/run_torque.sh +++ b/mcpartools/scheduler/data/run_torque.sh @@ -1,10 +1,8 @@ #!/usr/bin/env bash -#PBS -l select=1:ncpus=3:mem=1gb -#PBS -A ccbmc7 # Exit immediately if a simple command exits with a non-zero status. set -e {options_header:s} # Run individual jobs -{workspace_dir:s}/job_`printf %04d $PBS_ARRAY_INDEX`/run.sh +{workspace_dir:s}/job_`printf %04d $PBS_ARRAYID`/run.sh diff --git a/mcpartools/scheduler/data/submit_torque.sh b/mcpartools/scheduler/data/submit_torque.sh index 735a8ba..ddba9a2 100644 --- a/mcpartools/scheduler/data/submit_torque.sh +++ b/mcpartools/scheduler/data/submit_torque.sh @@ -11,7 +11,7 @@ ERR=`mktemp` # On exit or if the script is interrupted (i.e. by receiving SIGINT signal) delete temporary files trap "rm -f $OUT $ERR" EXIT -qsub {options_args:s} -J 1-{jobs_no:d} -o {log_dir:s} -e {log_dir:s} {script_dir:s}/{calculate_script_name:s} > $OUT 2> $ERR +qsub {options_args:s} -t 1-{jobs_no:d} -o {log_dir:s} -e {log_dir:s} {script_dir:s}/{calculate_script_name:s} > $OUT 2> $ERR echo "Saving logs to $LOGFILE" From bb4f10d7771dbd49d6720c3fc78aea388b8b56e3 Mon Sep 17 00:00:00 2001 From: jarema Date: Mon, 3 Sep 2018 23:26:36 +0200 Subject: [PATCH 05/12] pep8 refactor --- mcpartools/scheduler/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mcpartools/scheduler/base.py b/mcpartools/scheduler/base.py index b884213..6ae3a01 100644 --- a/mcpartools/scheduler/base.py +++ b/mcpartools/scheduler/base.py @@ -111,4 +111,4 @@ def write_status_script(self, main_dir, workspace_dir): fd.write(self.status_body(merge_log_script)) fd.close() os.chmod(out_file_path, 0o750) - logger.debug("Saved status script: " + out_file_path) \ No newline at end of file + logger.debug("Saved status script: " + out_file_path) From c48bdd2e435194fcc75dbd4b5d88ee4aba67b747 Mon Sep 17 00:00:00 2001 From: jarema Date: Sun, 4 Nov 2018 18:49:39 +0100 Subject: [PATCH 06/12] fix fluka bug with 0 job id --- mcpartools/mcengine/data/run_fluka.sh | 6 ++++++ mcpartools/mcengine/fluka.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index 58c69b3..11a1903 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -8,6 +8,12 @@ START=$(date +%s) # location of FLUKA binary file FLUKA_BIN={fluka_bin:s} +# number of particles per job +PARTICLE_NO={particle_no:d} + +# seed of RNG +RNG_SEED={job_id:d} + WORK_DIR={working_directory:s} # go to working directory cd $WORK_DIR diff --git a/mcpartools/mcengine/fluka.py b/mcpartools/mcengine/fluka.py index d9f31d6..308e28e 100644 --- a/mcpartools/mcengine/fluka.py +++ b/mcpartools/mcengine/fluka.py @@ -32,6 +32,8 @@ def __init__(self, input_path, mc_run_script, collect_method, mc_engine_options) self.collect_script_content = resource_string(__name__, self.collect_script).decode('ascii') + self.particle_no = 1 + @property def input_files(self): # TODO check if additional files are needed @@ -54,6 +56,7 @@ def randomize(self, new_seed): self.input_lines = result def set_particle_no(self, particle_no): + self.particle_no = particle_no result = [] for l in self.input_lines: # TODO better discovery needed @@ -82,6 +85,7 @@ def save_run_script(self, output_dir, jobid): engine_options=self.engine_options, working_directory=output_dir_abs_path, input_basename=input_base_name, + particle_no=self.particle_no, job_id=jobid) out_file_name = 'run.sh' out_file_path = os.path.join(output_dir, out_file_name) From 7d5305497fe67fd9a5a4ecf2c92b24ba4b3791ec Mon Sep 17 00:00:00 2001 From: jarema Date: Sun, 11 Nov 2018 13:19:05 +0100 Subject: [PATCH 07/12] improve status file --- mcpartools/mcengine/data/collect.sh | 4 +- mcpartools/mcengine/data/run_fluka.sh | 13 +- mcpartools/mcengine/data/run_shieldhit.sh | 15 +- mcpartools/scheduler/data/merge_logs.sh | 165 ------------- mcpartools/scheduler/data/merge_logs_slurm.sh | 227 ++++++++++++++++++ .../scheduler/data/merge_logs_torque.sh | 227 ++++++++++++++++++ mcpartools/scheduler/data/status.sh | 4 +- mcpartools/scheduler/slurm.py | 2 +- mcpartools/scheduler/torque.py | 2 +- 9 files changed, 480 insertions(+), 179 deletions(-) delete mode 100755 mcpartools/scheduler/data/merge_logs.sh create mode 100755 mcpartools/scheduler/data/merge_logs_slurm.sh create mode 100755 mcpartools/scheduler/data/merge_logs_torque.sh diff --git a/mcpartools/mcengine/data/collect.sh b/mcpartools/mcengine/data/collect.sh index fb3f50b..7f5dddf 100755 --- a/mcpartools/mcengine/data/collect.sh +++ b/mcpartools/mcengine/data/collect.sh @@ -19,7 +19,7 @@ echo "###########################################################" > $LOG_FILE echo "################### COLLECT INFORMATION ###################" >> $LOG_FILE echo "###########################################################" >> $LOG_FILE echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# STATUS = 1" >> $LOG_FILE @@ -31,7 +31,7 @@ COLLECT_STATUS=$? let "EXECUTION_TIME = $(date +%s) - $START" # end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE # collapsed time is in line number 7 sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE # status is in line number 8 diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index 11a1903..fd1bf52 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -24,7 +24,7 @@ echo "###########################################################" > $LOG_FILE echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE echo "###########################################################" >> $LOG_FILE echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE @@ -33,16 +33,21 @@ echo "#" >> $LOG_FILE # run rfluka $FLUKA_BIN -N0 -M1 {engine_options:s} {input_basename:s} -SIMULATION_STATUS=$? +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi let "EXECUTION_TIME = $(date +%s) - $START" # end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE # collapsed time is in line number 7 sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE # status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20d" $SIMULATION_STATUS`/" $LOG_FILE +sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE # each fluka run will save files with same name, in order to distinguish output from multiple runs # we rename output files, appending suffix with jobid to each of them diff --git a/mcpartools/mcengine/data/run_shieldhit.sh b/mcpartools/mcengine/data/run_shieldhit.sh index 5cc2371..9cbe2d2 100755 --- a/mcpartools/mcengine/data/run_shieldhit.sh +++ b/mcpartools/mcengine/data/run_shieldhit.sh @@ -30,7 +30,7 @@ echo "###########################################################" > $LOG_FILE echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE echo "###########################################################" >> $LOG_FILE echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE @@ -42,13 +42,20 @@ cd {working_directory:s} # execute simulation $SHIELDHIT_BIN --beamfile=$BEAM_FILE --geofile=$GEO_FILE --matfile=$MAT_FILE --detectfile=$DETECT_FILE -n $PARTICLE_NO -N $RNG_SEED {engine_options:s} $WORK_DIR -SIMULATION_STATUS=$? + +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi + let "EXECUTION_TIME = $(date +%s) - $START" # end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE # collapsed time is in line number 7 sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE # status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20d" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file +sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file diff --git a/mcpartools/scheduler/data/merge_logs.sh b/mcpartools/scheduler/data/merge_logs.sh deleted file mode 100755 index 1275d25..0000000 --- a/mcpartools/scheduler/data/merge_logs.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env bash - -# Exit immediately if a simple command exits with a non-zero status. -set -e - -function writeLogHeader(){{ - echo "###########################################################" > ${{LOG_FILE}} - echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# ID START END" >> ${{LOG_FILE}} - - for i in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; - do - if [[ $(cat $i) =~ $JOB_ID_REGEX ]]; - then - JOB_ID=${{BASH_REMATCH[1]}}; - else - echo "Cannot get job ID from $i file" - continue - fi - - if [[ $(cat $i) =~ $START_REGEX ]]; - then - START_TIME=${{BASH_REMATCH[1]}}; - else - echo "Cannot get start time from $i file" - continue - fi - - if [[ $(cat $i) =~ $END_REGEX ]]; - then - END_TIME=${{BASH_REMATCH[1]}}; - else - echo "Cannot get end time from $i file" - continue - fi - - if [[ $(cat $i) =~ $STATUS_REGEX ]]; - then - STATUS=${{BASH_REMATCH[1]}}; - else - echo "Cannot get status from $i file" - continue - fi - - echo "# `printf "%5d" $JOB_ID` $START_TIME $END_TIME" >> ${{LOG_FILE}} - done - echo "#" >> ${{LOG_FILE}} -}} - -function writeTimeInSeconds(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo " ID TIME STATUS " >> ${{LOG_FILE}} - - for i in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; - do - if [[ $(cat $i) =~ $JOB_ID_REGEX ]]; - then - JOB_ID=${{BASH_REMATCH[1]}}; - else - echo "Cannot get job ID from $i file" - continue - fi - - if [[ $(cat $i) =~ $COLLAPSED_TIME_REGEX ]]; - then - COLLAPSED_TIME=${{BASH_REMATCH[1]}}; - else - echo "Cannot get collapsed time from $i file" - continue - fi - - if [[ $(cat $i) =~ $STATUS_REGEX ]]; - then - STATUS=${{BASH_REMATCH[1]}}; - else - echo "Cannot get status from $i file" - continue - fi - - TASK_NUMBER=$((TASK_NUMBER + 1)) - -# check if status is a number - if ! [[ ${{STATUS}} =~ ^[0-9]+$ ]] ; then - continue - fi - - if [[ ${{STATUS}} -ne 0 ]] - then - FAILED=$((FAILED + 1)) - else - SUCCESSES=$((SUCCESSES + 1)) - TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) - fi - - echo " `printf "%5d" $JOB_ID` `printf "%20d" $COLLAPSED_TIME` `printf "%10d" $STATUS`" >> ${{LOG_FILE}} - done - echo "#" >> ${{LOG_FILE}} -}} - -function writeJobsDetailInformation(){{ - cat ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}} >> ${{LOG_FILE}} -}} - -function writeSummary(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} - echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} - echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} - - if [[ ${{SUCCESSES}} -ne 0 ]] - then - echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} - else - echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} - fi - - echo "#" >> ${{LOG_FILE}} -}} - -function appendCollectInfo() {{ - if [ -f $COLLECT_LOG ]; then - cat $COLLECT_LOG >> ${{LOG_FILE}} - fi -}} - -WORKSPACE={workspace_dir:s} -MAIN_DIR={main_dir:s} - -if [ $# -eq 0 ] - then - FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log - LOG_FILE=${{MAIN_DIR}}/${{FILE_NAME}} - else - LOG_FILE=$1 -fi - -if [ ! -f ${{MAIN_DIR}}/submit.log ]; then - exit 1 -fi - -JOBS_LOG_FILE="info.log" -JOB_ID_REGEX="#+ DETAILED INFORMATION ABOUT JOB\s+([0-9]*)" -START_REGEX="# START\s+=\s(.{{19}})" -END_REGEX="# END\s+=\s(.{{19}})" -STATUS_REGEX="# STATUS\s+=\s+([0-9]*)" -COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" -COLLECT_LOG={collect_dir:s}/info.log -TASK_NUMBER=0 -SUCCESSES=0 -FAILED=0 -TOTAL_TIME=0 - -writeLogHeader -writeTimeInSeconds -writeJobsDetailInformation -appendCollectInfo -writeSummary diff --git a/mcpartools/scheduler/data/merge_logs_slurm.sh b/mcpartools/scheduler/data/merge_logs_slurm.sh new file mode 100755 index 0000000..602a7fd --- /dev/null +++ b/mcpartools/scheduler/data/merge_logs_slurm.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash + +function writeLogHeader(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + echo " ID START END STATUS TIME" + + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + JOB_STAT=`squeue -j${{ARRAY_JOB_ID}}_${{JOB_ID}} -o '%V %t %M' -h 2> /dev/null` + SQUEUE_STATUS=$? + + if [[ $SQUEUE_STATUS -ne 0 || -z $JOB_STAT ]] + then + INFO_FILE=$i/$JOBS_LOG_FILE + + if [[ $(cat $INFO_FILE) =~ $START_REGEX ]]; + then + START_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get start time from $INFO_FILE file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $END_REGEX ]]; + then + END_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get end time from $INFO_FILE file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $STATUS_REGEX ]]; + then + STATUS=${{BASH_REMATCH[1]}}; + else + echo "Cannot get status from $i file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $COLLAPSED_TIME_REGEX ]]; + then + COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get collapsed time from $i file" + continue + fi + + else + START_TIME=`echo ${{JOB_STAT}} | cut -d ' ' -f 1` + STATUS=`echo ${{JOB_STAT}} | cut -d ' ' -f 2` + COLLAPSED_TIME=`echo ${{JOB_STAT}} | cut -d ' ' -f 3` + if [[ -z $COLLAPSED_TIME ]] + then + COLLAPSED_TIME='0' + else + COLLAPSED_TIME=`echo $COLLAPSED_TIME | awk -F ":" '{{ print $1 * 60 + $2 }}'` + fi + + END_TIME="-" + fi + + TASK_NUMBER=$((TASK_NUMBER + 1)) + + + if [[ ${{STATUS}} == "CD" ]] + then + SUCCESSES=$((SUCCESSES + 1)) + TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) + elif [[ ${{STATUS}} != "R" && ${{STATUS}} != "PD" && ${{STATUS}} != "S" ]] + then + FAILED=$((FAILED + 1)) + fi + + if [[ $COLLAPSED_TIME -gt $MAX_TIME ]] + then + MAX_TIME=$COLLAPSED_TIME + fi + + JOB_STATUSES+=(${{STATUS}}) + JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" + done + echo "#" >> ${{LOG_FILE}} +}} + + +function writeTimeInSeconds(){{ + + echo "###########################################################" >> ${{LOG_FILE}} + echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo " ID TIME STATUS " >> ${{LOG_FILE}} + + for i in `seq 1 $TASK_NUMBER`; + do + + echo " `printf "%5d" $i` `printf "%20d" ${{JOB_EXECUTION_TIME[(($i - 1))]}}` `printf "%10s" ${{JOB_STATUSES[(($i - 1))]}}`" >> ${{LOG_FILE}} + done + echo "#" >> ${{LOG_FILE}} + +}} + +function writeJobsDetailInformation(){{ + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + INFO_FILE=$i/$JOBS_LOG_FILE + + if [ ! -f $INFO_FILE ]; then + continue + fi + + EXECUTION_TIME=${{JOB_EXECUTION_TIME[(($JOB_ID - 1))]}} + SIMULATION_STATUS=${{JOB_STATUSES[(($JOB_ID - 1))]}} + + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $INFO_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $INFO_FILE + + cat $INFO_FILE >> ${{LOG_FILE}} + done +}} + +function writeSummary(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} + echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} + echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} + + if [[ ${{SUCCESSES}} -ne 0 ]] + then + echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" $MAX_TIME`" >> ${{LOG_FILE}} + else + echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" -`" >> ${{LOG_FILE}} + fi + + echo "#" >> ${{LOG_FILE}} +}} + +function appendCollectInfo() {{ + if [ -f $COLLECT_LOG ]; then + cat $COLLECT_LOG >> ${{LOG_FILE}} + fi +}} + +WORKSPACE={workspace_dir:s} +MAIN_DIR={main_dir:s} +COLLECT_LOG={collect_dir:s}/info.log + +if [ $# -eq 0 ] + then + FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log + LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} + else + LOG_FILE=$1 +fi + +JOBS_LOG_FILE="info.log" +JOB_ID_REGEX="job_([0-9]*)" +START_REGEX="# START\s+=\s(.{{19}})" +END_REGEX="# END\s+=\s(.{{19}})" +STATUS_REGEX="# STATUS\s+=\s+([A-Z]*)" +COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" +TASK_NUMBER=0 +SUCCESSES=0 +FAILED=0 +MAX_TIME=0 +TOTAL_TIME=0 +JOB_STATUSES=() +JOB_EXECUTION_TIME=() + +LOGFILE="${{MAIN_DIR}}/submit.log" + +RE="Job ID: ([0-9]*)" + +# no log file. Probably submit.sh not run +if [ ! -f $LOGFILE ]; then + echo "File not found: $LOGFILE" + echo "Make sure you run submit script" + exit 1 +fi + +if [[ $(cat $LOGFILE) =~ $RE ]]; +then + ARRAY_JOB_ID=${{BASH_REMATCH[1]}}; +fi + +writeLogHeader +writeTimeInSeconds +writeJobsDetailInformation +appendCollectInfo +writeSummary diff --git a/mcpartools/scheduler/data/merge_logs_torque.sh b/mcpartools/scheduler/data/merge_logs_torque.sh new file mode 100755 index 0000000..d14fe32 --- /dev/null +++ b/mcpartools/scheduler/data/merge_logs_torque.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash + +function writeLogHeader(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + echo " ID START END STATUS TIME" + + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + JOB_STAT=`qstat ${{ARRAY_JOB_ID}}'['${{JOB_ID}}']' 2> /dev/null | tail -1` + QSTAT_STATUS=$? + INFO_FILE=$i/$JOBS_LOG_FILE + + if [[ QSTAT_STATUS -ne 0 || -z $JOB_STAT ]] + then + if [[ $(cat $INFO_FILE) =~ $END_REGEX ]]; + then + END_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get end time from $INFO_FILE file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $STATUS_REGEX ]]; + then + STATUS=${{BASH_REMATCH[1]}}; + else + echo "Cannot get status from $i file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $COLLAPSED_TIME_REGEX ]]; + then + COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get collapsed time from $i file" + continue + fi + + else + + STATUS=`echo ${{JOB_STAT}} | awk '{{print $5}}'` + + COLLAPSED_TIME=`echo ${{JOB_STAT}} | awk '{{print $4}}'` + + if [[ -z $COLLAPSED_TIME ]] + then + COLLAPSED_TIME='0' + else + COLLAPSED_TIME=`echo $COLLAPSED_TIME | awk -F ":" '{{ print $1 * 3600 + $2 * 60 + $3 }}'` + fi + + END_TIME="-" + fi + + if [[ -f $INFO_FILE && $(cat $INFO_FILE) =~ $START_REGEX ]]; + then + START_TIME=${{BASH_REMATCH[1]}}; + else + START_TIME="-" + fi + + TASK_NUMBER=$((TASK_NUMBER + 1)) + + + if [[ ${{STATUS}} == "CD" || ${{STATUS}} == "C" ]] + then + SUCCESSES=$((SUCCESSES + 1)) + TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) + elif [[ ${{STATUS}} != "R" && ${{STATUS}} != "W" && ${{STATUS}} != "S" && ${{STATUS}} != "Q" ]] + then + FAILED=$((FAILED + 1)) + fi + + if [[ $COLLAPSED_TIME -gt $MAX_TIME ]] + then + MAX_TIME=$COLLAPSED_TIME + fi + + JOB_STATUSES+=(${{STATUS}}) + JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" + done + echo "#" >> ${{LOG_FILE}} +}} + + +function writeTimeInSeconds(){{ + + echo "###########################################################" >> ${{LOG_FILE}} + echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo " ID TIME STATUS " >> ${{LOG_FILE}} + + for i in `seq 1 $TASK_NUMBER`; + do + + echo " `printf "%5d" $i` `printf "%20d" ${{JOB_EXECUTION_TIME[(($i - 1))]}}` `printf "%10s" ${{JOB_STATUSES[(($i - 1))]}}`" >> ${{LOG_FILE}} + done + echo "#" >> ${{LOG_FILE}} + +}} + +function writeJobsDetailInformation(){{ + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + INFO_FILE=$i/$JOBS_LOG_FILE + + if [ ! -f $INFO_FILE ]; then + continue + fi + + EXECUTION_TIME=${{JOB_EXECUTION_TIME[(($JOB_ID - 1))]}} + SIMULATION_STATUS=${{JOB_STATUSES[(($JOB_ID - 1))]}} + + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $INFO_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $INFO_FILE + + cat $INFO_FILE >> ${{LOG_FILE}} + done +}} + +function writeSummary(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} + echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} + echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} + + if [[ ${{SUCCESSES}} -ne 0 ]] + then + echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" $MAX_TIME`" >> ${{LOG_FILE}} + else + echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" -`" >> ${{LOG_FILE}} + fi + + echo "#" >> ${{LOG_FILE}} +}} + +function appendCollectInfo() {{ + if [ -f $COLLECT_LOG ]; then + cat $COLLECT_LOG >> ${{LOG_FILE}} + fi +}} + +WORKSPACE={workspace_dir:s} +MAIN_DIR={main_dir:s} +COLLECT_LOG={collect_dir:s}/info.log + +if [ $# -eq 0 ] + then + FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log + LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} + else + LOG_FILE=$1 +fi + +JOBS_LOG_FILE="info.log" +JOB_ID_REGEX="job_([0-9]*)" +START_REGEX="# START\s+=\s(.{{19}})" +END_REGEX="# END\s+=\s(.{{19}})" +STATUS_REGEX="# STATUS\s+=\s+([A-Z]*)" +COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" +TASK_NUMBER=0 +SUCCESSES=0 +FAILED=0 +MAX_TIME=0 +TOTAL_TIME=0 +JOB_STATUSES=() +JOB_EXECUTION_TIME=() + +LOGFILE="${{MAIN_DIR}}/submit.log" + +RE="Job ID: ([0-9]*)" + +# no log file. Probably submit.sh not run +if [ ! -f $LOGFILE ]; then + echo "File not found: $LOGFILE" + echo "Make sure you run submit script" + exit 1 +fi + +if [[ $(cat $LOGFILE) =~ $RE ]]; +then + ARRAY_JOB_ID=${{BASH_REMATCH[1]}}; +fi + +writeLogHeader +writeTimeInSeconds +writeJobsDetailInformation +appendCollectInfo +writeSummary diff --git a/mcpartools/scheduler/data/status.sh b/mcpartools/scheduler/data/status.sh index 636a5b2..95aa399 100755 --- a/mcpartools/scheduler/data/status.sh +++ b/mcpartools/scheduler/data/status.sh @@ -2,14 +2,14 @@ FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log -STATUS_CMD="{merge_script_path:s} $FILE_NAME" +STATUS_CMD="{merge_script_path:s} workspace/$FILE_NAME" eval $STATUS_CMD CMD_STATUS=$? if [[ $CMD_STATUS -eq 0 ]] then - echo "Status successfully saved to file: $FILE_NAME" + echo "Status successfully saved to file: workspace/$FILE_NAME" else echo "Unable to create status file" fi \ No newline at end of file diff --git a/mcpartools/scheduler/slurm.py b/mcpartools/scheduler/slurm.py index 7b8cd26..03e488d 100644 --- a/mcpartools/scheduler/slurm.py +++ b/mcpartools/scheduler/slurm.py @@ -14,6 +14,6 @@ def __init__(self, options_content): main_run_script_template = os.path.join('data', 'run_slurm.sh') - merge_logs_script_template = os.path.join('data', 'merge_logs.sh') + merge_logs_script_template = os.path.join('data', 'merge_logs_slurm.sh') status_script_template = os.path.join('data', 'status.sh') diff --git a/mcpartools/scheduler/torque.py b/mcpartools/scheduler/torque.py index 0cb55c4..0f184d1 100644 --- a/mcpartools/scheduler/torque.py +++ b/mcpartools/scheduler/torque.py @@ -14,6 +14,6 @@ def __init__(self, options_content): main_run_script_template = os.path.join('data', 'run_torque.sh') - merge_logs_script_template = os.path.join('data', 'merge_logs.sh') + merge_logs_script_template = os.path.join('data', 'merge_logs_torque.sh') status_script_template = os.path.join('data', 'status.sh') From 14e6b0fb592cbc412c23a2fdf1919d14326cf13f Mon Sep 17 00:00:00 2001 From: jarema Date: Sun, 11 Nov 2018 13:19:05 +0100 Subject: [PATCH 08/12] improve status file --- mcpartools/generator.py | 11 +- mcpartools/mcengine/data/collect.sh | 4 +- mcpartools/mcengine/data/run_fluka.sh | 13 +- mcpartools/mcengine/data/run_shieldhit.sh | 15 +- mcpartools/scheduler/common.py | 1 - mcpartools/scheduler/data/merge_logs.sh | 165 ------------- mcpartools/scheduler/data/merge_logs_slurm.sh | 227 ++++++++++++++++++ .../scheduler/data/merge_logs_torque.sh | 227 ++++++++++++++++++ mcpartools/scheduler/data/status.sh | 4 +- mcpartools/scheduler/slurm.py | 2 +- mcpartools/scheduler/torque.py | 2 +- 11 files changed, 485 insertions(+), 186 deletions(-) delete mode 100755 mcpartools/scheduler/data/merge_logs.sh create mode 100755 mcpartools/scheduler/data/merge_logs_slurm.sh create mode 100755 mcpartools/scheduler/data/merge_logs_torque.sh diff --git a/mcpartools/generator.py b/mcpartools/generator.py index 80a87cd..66c3c23 100644 --- a/mcpartools/generator.py +++ b/mcpartools/generator.py @@ -98,6 +98,8 @@ def valid(self): class Generator: + wspdir_name = 'workspace' + def __init__(self, options): self.options = options self.mc_engine = EngineDiscover.get_mcengine(input_path=self.options.input_path, @@ -175,8 +177,7 @@ def generate_main_dir(self): file_logger.addHandler(logging.FileHandler(os.path.join(dir_path, "generatemc.log"), mode='w+')) def generate_workspace(self): - wspdir_name = 'workspace' - wspdir_path = os.path.join(self.main_dir, wspdir_name) + wspdir_path = os.path.join(self.main_dir, self.wspdir_name) logger.debug("Generated workspace directory path: " + wspdir_path) os.mkdir(wspdir_path) self.workspace_dir = wspdir_path @@ -250,13 +251,11 @@ def save_logs(self): file_logger.info('Current working directory: ' + os.getcwd()) def generate_merge_logs_script(self): - wspdir_name = 'workspace' output_name = 'output' - wspdir_path = os.path.join(self.main_dir, wspdir_name) + wspdir_path = os.path.join(self.main_dir, self.wspdir_name) collect_path = os.path.join(self.main_dir, output_name) self.scheduler.write_merge_logs_script(wspdir_path, collect_path, self.main_dir) def generate_status_script(self): - wspdir_name = 'workspace' - wspdir_path = os.path.join(self.main_dir, wspdir_name) + wspdir_path = os.path.join(self.main_dir, self.wspdir_name) self.scheduler.write_status_script(self.main_dir, wspdir_path) diff --git a/mcpartools/mcengine/data/collect.sh b/mcpartools/mcengine/data/collect.sh index fb3f50b..7f5dddf 100755 --- a/mcpartools/mcengine/data/collect.sh +++ b/mcpartools/mcengine/data/collect.sh @@ -19,7 +19,7 @@ echo "###########################################################" > $LOG_FILE echo "################### COLLECT INFORMATION ###################" >> $LOG_FILE echo "###########################################################" >> $LOG_FILE echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# STATUS = 1" >> $LOG_FILE @@ -31,7 +31,7 @@ COLLECT_STATUS=$? let "EXECUTION_TIME = $(date +%s) - $START" # end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE # collapsed time is in line number 7 sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE # status is in line number 8 diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index 11a1903..fd1bf52 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -24,7 +24,7 @@ echo "###########################################################" > $LOG_FILE echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE echo "###########################################################" >> $LOG_FILE echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE @@ -33,16 +33,21 @@ echo "#" >> $LOG_FILE # run rfluka $FLUKA_BIN -N0 -M1 {engine_options:s} {input_basename:s} -SIMULATION_STATUS=$? +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi let "EXECUTION_TIME = $(date +%s) - $START" # end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE # collapsed time is in line number 7 sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE # status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20d" $SIMULATION_STATUS`/" $LOG_FILE +sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE # each fluka run will save files with same name, in order to distinguish output from multiple runs # we rename output files, appending suffix with jobid to each of them diff --git a/mcpartools/mcengine/data/run_shieldhit.sh b/mcpartools/mcengine/data/run_shieldhit.sh index 5cc2371..9cbe2d2 100755 --- a/mcpartools/mcengine/data/run_shieldhit.sh +++ b/mcpartools/mcengine/data/run_shieldhit.sh @@ -30,7 +30,7 @@ echo "###########################################################" > $LOG_FILE echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE echo "###########################################################" >> $LOG_FILE echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%d %H:%M:%S"`" >> $LOG_FILE +echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE echo "# END = -" >> $LOG_FILE echo "# TIME IN SECONDS = -" >> $LOG_FILE echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE @@ -42,13 +42,20 @@ cd {working_directory:s} # execute simulation $SHIELDHIT_BIN --beamfile=$BEAM_FILE --geofile=$GEO_FILE --matfile=$MAT_FILE --detectfile=$DETECT_FILE -n $PARTICLE_NO -N $RNG_SEED {engine_options:s} $WORK_DIR -SIMULATION_STATUS=$? + +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi + let "EXECUTION_TIME = $(date +%s) - $START" # end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%d %H:%M:%S"`/" $LOG_FILE +sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE # collapsed time is in line number 7 sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE # status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20d" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file +sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file diff --git a/mcpartools/scheduler/common.py b/mcpartools/scheduler/common.py index 3835d88..a7152b3 100644 --- a/mcpartools/scheduler/common.py +++ b/mcpartools/scheduler/common.py @@ -16,7 +16,6 @@ def __init__(self): @classmethod def get_scheduler(cls, scheduler_options, log_location): file_logger = logging.getLogger('file_logger') - try: srun_output = check_output(['srun --version'], shell=True) file_logger.info("srun version: {}".format(srun_output[:-1])) diff --git a/mcpartools/scheduler/data/merge_logs.sh b/mcpartools/scheduler/data/merge_logs.sh deleted file mode 100755 index 1275d25..0000000 --- a/mcpartools/scheduler/data/merge_logs.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env bash - -# Exit immediately if a simple command exits with a non-zero status. -set -e - -function writeLogHeader(){{ - echo "###########################################################" > ${{LOG_FILE}} - echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# ID START END" >> ${{LOG_FILE}} - - for i in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; - do - if [[ $(cat $i) =~ $JOB_ID_REGEX ]]; - then - JOB_ID=${{BASH_REMATCH[1]}}; - else - echo "Cannot get job ID from $i file" - continue - fi - - if [[ $(cat $i) =~ $START_REGEX ]]; - then - START_TIME=${{BASH_REMATCH[1]}}; - else - echo "Cannot get start time from $i file" - continue - fi - - if [[ $(cat $i) =~ $END_REGEX ]]; - then - END_TIME=${{BASH_REMATCH[1]}}; - else - echo "Cannot get end time from $i file" - continue - fi - - if [[ $(cat $i) =~ $STATUS_REGEX ]]; - then - STATUS=${{BASH_REMATCH[1]}}; - else - echo "Cannot get status from $i file" - continue - fi - - echo "# `printf "%5d" $JOB_ID` $START_TIME $END_TIME" >> ${{LOG_FILE}} - done - echo "#" >> ${{LOG_FILE}} -}} - -function writeTimeInSeconds(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo " ID TIME STATUS " >> ${{LOG_FILE}} - - for i in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; - do - if [[ $(cat $i) =~ $JOB_ID_REGEX ]]; - then - JOB_ID=${{BASH_REMATCH[1]}}; - else - echo "Cannot get job ID from $i file" - continue - fi - - if [[ $(cat $i) =~ $COLLAPSED_TIME_REGEX ]]; - then - COLLAPSED_TIME=${{BASH_REMATCH[1]}}; - else - echo "Cannot get collapsed time from $i file" - continue - fi - - if [[ $(cat $i) =~ $STATUS_REGEX ]]; - then - STATUS=${{BASH_REMATCH[1]}}; - else - echo "Cannot get status from $i file" - continue - fi - - TASK_NUMBER=$((TASK_NUMBER + 1)) - -# check if status is a number - if ! [[ ${{STATUS}} =~ ^[0-9]+$ ]] ; then - continue - fi - - if [[ ${{STATUS}} -ne 0 ]] - then - FAILED=$((FAILED + 1)) - else - SUCCESSES=$((SUCCESSES + 1)) - TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) - fi - - echo " `printf "%5d" $JOB_ID` `printf "%20d" $COLLAPSED_TIME` `printf "%10d" $STATUS`" >> ${{LOG_FILE}} - done - echo "#" >> ${{LOG_FILE}} -}} - -function writeJobsDetailInformation(){{ - cat ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}} >> ${{LOG_FILE}} -}} - -function writeSummary(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} - echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} - echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} - - if [[ ${{SUCCESSES}} -ne 0 ]] - then - echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} - else - echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} - fi - - echo "#" >> ${{LOG_FILE}} -}} - -function appendCollectInfo() {{ - if [ -f $COLLECT_LOG ]; then - cat $COLLECT_LOG >> ${{LOG_FILE}} - fi -}} - -WORKSPACE={workspace_dir:s} -MAIN_DIR={main_dir:s} - -if [ $# -eq 0 ] - then - FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log - LOG_FILE=${{MAIN_DIR}}/${{FILE_NAME}} - else - LOG_FILE=$1 -fi - -if [ ! -f ${{MAIN_DIR}}/submit.log ]; then - exit 1 -fi - -JOBS_LOG_FILE="info.log" -JOB_ID_REGEX="#+ DETAILED INFORMATION ABOUT JOB\s+([0-9]*)" -START_REGEX="# START\s+=\s(.{{19}})" -END_REGEX="# END\s+=\s(.{{19}})" -STATUS_REGEX="# STATUS\s+=\s+([0-9]*)" -COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" -COLLECT_LOG={collect_dir:s}/info.log -TASK_NUMBER=0 -SUCCESSES=0 -FAILED=0 -TOTAL_TIME=0 - -writeLogHeader -writeTimeInSeconds -writeJobsDetailInformation -appendCollectInfo -writeSummary diff --git a/mcpartools/scheduler/data/merge_logs_slurm.sh b/mcpartools/scheduler/data/merge_logs_slurm.sh new file mode 100755 index 0000000..602a7fd --- /dev/null +++ b/mcpartools/scheduler/data/merge_logs_slurm.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash + +function writeLogHeader(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + echo " ID START END STATUS TIME" + + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + JOB_STAT=`squeue -j${{ARRAY_JOB_ID}}_${{JOB_ID}} -o '%V %t %M' -h 2> /dev/null` + SQUEUE_STATUS=$? + + if [[ $SQUEUE_STATUS -ne 0 || -z $JOB_STAT ]] + then + INFO_FILE=$i/$JOBS_LOG_FILE + + if [[ $(cat $INFO_FILE) =~ $START_REGEX ]]; + then + START_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get start time from $INFO_FILE file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $END_REGEX ]]; + then + END_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get end time from $INFO_FILE file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $STATUS_REGEX ]]; + then + STATUS=${{BASH_REMATCH[1]}}; + else + echo "Cannot get status from $i file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $COLLAPSED_TIME_REGEX ]]; + then + COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get collapsed time from $i file" + continue + fi + + else + START_TIME=`echo ${{JOB_STAT}} | cut -d ' ' -f 1` + STATUS=`echo ${{JOB_STAT}} | cut -d ' ' -f 2` + COLLAPSED_TIME=`echo ${{JOB_STAT}} | cut -d ' ' -f 3` + if [[ -z $COLLAPSED_TIME ]] + then + COLLAPSED_TIME='0' + else + COLLAPSED_TIME=`echo $COLLAPSED_TIME | awk -F ":" '{{ print $1 * 60 + $2 }}'` + fi + + END_TIME="-" + fi + + TASK_NUMBER=$((TASK_NUMBER + 1)) + + + if [[ ${{STATUS}} == "CD" ]] + then + SUCCESSES=$((SUCCESSES + 1)) + TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) + elif [[ ${{STATUS}} != "R" && ${{STATUS}} != "PD" && ${{STATUS}} != "S" ]] + then + FAILED=$((FAILED + 1)) + fi + + if [[ $COLLAPSED_TIME -gt $MAX_TIME ]] + then + MAX_TIME=$COLLAPSED_TIME + fi + + JOB_STATUSES+=(${{STATUS}}) + JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" + done + echo "#" >> ${{LOG_FILE}} +}} + + +function writeTimeInSeconds(){{ + + echo "###########################################################" >> ${{LOG_FILE}} + echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo " ID TIME STATUS " >> ${{LOG_FILE}} + + for i in `seq 1 $TASK_NUMBER`; + do + + echo " `printf "%5d" $i` `printf "%20d" ${{JOB_EXECUTION_TIME[(($i - 1))]}}` `printf "%10s" ${{JOB_STATUSES[(($i - 1))]}}`" >> ${{LOG_FILE}} + done + echo "#" >> ${{LOG_FILE}} + +}} + +function writeJobsDetailInformation(){{ + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + INFO_FILE=$i/$JOBS_LOG_FILE + + if [ ! -f $INFO_FILE ]; then + continue + fi + + EXECUTION_TIME=${{JOB_EXECUTION_TIME[(($JOB_ID - 1))]}} + SIMULATION_STATUS=${{JOB_STATUSES[(($JOB_ID - 1))]}} + + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $INFO_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $INFO_FILE + + cat $INFO_FILE >> ${{LOG_FILE}} + done +}} + +function writeSummary(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} + echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} + echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} + + if [[ ${{SUCCESSES}} -ne 0 ]] + then + echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" $MAX_TIME`" >> ${{LOG_FILE}} + else + echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" -`" >> ${{LOG_FILE}} + fi + + echo "#" >> ${{LOG_FILE}} +}} + +function appendCollectInfo() {{ + if [ -f $COLLECT_LOG ]; then + cat $COLLECT_LOG >> ${{LOG_FILE}} + fi +}} + +WORKSPACE={workspace_dir:s} +MAIN_DIR={main_dir:s} +COLLECT_LOG={collect_dir:s}/info.log + +if [ $# -eq 0 ] + then + FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log + LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} + else + LOG_FILE=$1 +fi + +JOBS_LOG_FILE="info.log" +JOB_ID_REGEX="job_([0-9]*)" +START_REGEX="# START\s+=\s(.{{19}})" +END_REGEX="# END\s+=\s(.{{19}})" +STATUS_REGEX="# STATUS\s+=\s+([A-Z]*)" +COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" +TASK_NUMBER=0 +SUCCESSES=0 +FAILED=0 +MAX_TIME=0 +TOTAL_TIME=0 +JOB_STATUSES=() +JOB_EXECUTION_TIME=() + +LOGFILE="${{MAIN_DIR}}/submit.log" + +RE="Job ID: ([0-9]*)" + +# no log file. Probably submit.sh not run +if [ ! -f $LOGFILE ]; then + echo "File not found: $LOGFILE" + echo "Make sure you run submit script" + exit 1 +fi + +if [[ $(cat $LOGFILE) =~ $RE ]]; +then + ARRAY_JOB_ID=${{BASH_REMATCH[1]}}; +fi + +writeLogHeader +writeTimeInSeconds +writeJobsDetailInformation +appendCollectInfo +writeSummary diff --git a/mcpartools/scheduler/data/merge_logs_torque.sh b/mcpartools/scheduler/data/merge_logs_torque.sh new file mode 100755 index 0000000..d14fe32 --- /dev/null +++ b/mcpartools/scheduler/data/merge_logs_torque.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash + +function writeLogHeader(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + echo " ID START END STATUS TIME" + + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + JOB_STAT=`qstat ${{ARRAY_JOB_ID}}'['${{JOB_ID}}']' 2> /dev/null | tail -1` + QSTAT_STATUS=$? + INFO_FILE=$i/$JOBS_LOG_FILE + + if [[ QSTAT_STATUS -ne 0 || -z $JOB_STAT ]] + then + if [[ $(cat $INFO_FILE) =~ $END_REGEX ]]; + then + END_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get end time from $INFO_FILE file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $STATUS_REGEX ]]; + then + STATUS=${{BASH_REMATCH[1]}}; + else + echo "Cannot get status from $i file" + continue + fi + + if [[ $(cat $INFO_FILE) =~ $COLLAPSED_TIME_REGEX ]]; + then + COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + else + echo "Cannot get collapsed time from $i file" + continue + fi + + else + + STATUS=`echo ${{JOB_STAT}} | awk '{{print $5}}'` + + COLLAPSED_TIME=`echo ${{JOB_STAT}} | awk '{{print $4}}'` + + if [[ -z $COLLAPSED_TIME ]] + then + COLLAPSED_TIME='0' + else + COLLAPSED_TIME=`echo $COLLAPSED_TIME | awk -F ":" '{{ print $1 * 3600 + $2 * 60 + $3 }}'` + fi + + END_TIME="-" + fi + + if [[ -f $INFO_FILE && $(cat $INFO_FILE) =~ $START_REGEX ]]; + then + START_TIME=${{BASH_REMATCH[1]}}; + else + START_TIME="-" + fi + + TASK_NUMBER=$((TASK_NUMBER + 1)) + + + if [[ ${{STATUS}} == "CD" || ${{STATUS}} == "C" ]] + then + SUCCESSES=$((SUCCESSES + 1)) + TOTAL_TIME=$((TOTAL_TIME + $COLLAPSED_TIME)) + elif [[ ${{STATUS}} != "R" && ${{STATUS}} != "W" && ${{STATUS}} != "S" && ${{STATUS}} != "Q" ]] + then + FAILED=$((FAILED + 1)) + fi + + if [[ $COLLAPSED_TIME -gt $MAX_TIME ]] + then + MAX_TIME=$COLLAPSED_TIME + fi + + JOB_STATUSES+=(${{STATUS}}) + JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" + done + echo "#" >> ${{LOG_FILE}} +}} + + +function writeTimeInSeconds(){{ + + echo "###########################################################" >> ${{LOG_FILE}} + echo "############### EXECUTION TIME IN SECONDS #################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo " ID TIME STATUS " >> ${{LOG_FILE}} + + for i in `seq 1 $TASK_NUMBER`; + do + + echo " `printf "%5d" $i` `printf "%20d" ${{JOB_EXECUTION_TIME[(($i - 1))]}}` `printf "%10s" ${{JOB_STATUSES[(($i - 1))]}}`" >> ${{LOG_FILE}} + done + echo "#" >> ${{LOG_FILE}} + +}} + +function writeJobsDetailInformation(){{ + for i in ${{WORKSPACE}}/job_*; + do + JOB_DIR_NAME=$(basename $i) + if [[ $JOB_DIR_NAME =~ $JOB_ID_REGEX ]]; + then + JOB_ID=${{BASH_REMATCH[1]}}; + + # remove 0 at the beginning of the number + JOB_ID=$(expr $JOB_ID + 0) + else + echo "Cannot get job ID from $i file" + continue + fi + + INFO_FILE=$i/$JOBS_LOG_FILE + + if [ ! -f $INFO_FILE ]; then + continue + fi + + EXECUTION_TIME=${{JOB_EXECUTION_TIME[(($JOB_ID - 1))]}} + SIMULATION_STATUS=${{JOB_STATUSES[(($JOB_ID - 1))]}} + + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $INFO_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $INFO_FILE + + cat $INFO_FILE >> ${{LOG_FILE}} + done +}} + +function writeSummary(){{ + echo "###########################################################" >> ${{LOG_FILE}} + echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# NUMBER OF TASKS = `printf "%20d" $TASK_NUMBER`" >> ${{LOG_FILE}} + echo "# SUCCESS = `printf "%20d" $SUCCESSES`" >> ${{LOG_FILE}} + echo "# FAILED = `printf "%20d" $FAILED`" >> ${{LOG_FILE}} + + if [[ ${{SUCCESSES}} -ne 0 ]] + then + echo "# AVERAGE TIME [s]= `printf "%20d" $(($TOTAL_TIME / $SUCCESSES))`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" $MAX_TIME`" >> ${{LOG_FILE}} + else + echo "# AVERAGE TIME [s]= `printf "%20s" -`" >> ${{LOG_FILE}} + echo "# MAX TIME [s] = `printf "%20s" -`" >> ${{LOG_FILE}} + fi + + echo "#" >> ${{LOG_FILE}} +}} + +function appendCollectInfo() {{ + if [ -f $COLLECT_LOG ]; then + cat $COLLECT_LOG >> ${{LOG_FILE}} + fi +}} + +WORKSPACE={workspace_dir:s} +MAIN_DIR={main_dir:s} +COLLECT_LOG={collect_dir:s}/info.log + +if [ $# -eq 0 ] + then + FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log + LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} + else + LOG_FILE=$1 +fi + +JOBS_LOG_FILE="info.log" +JOB_ID_REGEX="job_([0-9]*)" +START_REGEX="# START\s+=\s(.{{19}})" +END_REGEX="# END\s+=\s(.{{19}})" +STATUS_REGEX="# STATUS\s+=\s+([A-Z]*)" +COLLAPSED_TIME_REGEX="# TIME IN SECONDS\s+=\s+([0-9]*)" +TASK_NUMBER=0 +SUCCESSES=0 +FAILED=0 +MAX_TIME=0 +TOTAL_TIME=0 +JOB_STATUSES=() +JOB_EXECUTION_TIME=() + +LOGFILE="${{MAIN_DIR}}/submit.log" + +RE="Job ID: ([0-9]*)" + +# no log file. Probably submit.sh not run +if [ ! -f $LOGFILE ]; then + echo "File not found: $LOGFILE" + echo "Make sure you run submit script" + exit 1 +fi + +if [[ $(cat $LOGFILE) =~ $RE ]]; +then + ARRAY_JOB_ID=${{BASH_REMATCH[1]}}; +fi + +writeLogHeader +writeTimeInSeconds +writeJobsDetailInformation +appendCollectInfo +writeSummary diff --git a/mcpartools/scheduler/data/status.sh b/mcpartools/scheduler/data/status.sh index 636a5b2..95aa399 100755 --- a/mcpartools/scheduler/data/status.sh +++ b/mcpartools/scheduler/data/status.sh @@ -2,14 +2,14 @@ FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log -STATUS_CMD="{merge_script_path:s} $FILE_NAME" +STATUS_CMD="{merge_script_path:s} workspace/$FILE_NAME" eval $STATUS_CMD CMD_STATUS=$? if [[ $CMD_STATUS -eq 0 ]] then - echo "Status successfully saved to file: $FILE_NAME" + echo "Status successfully saved to file: workspace/$FILE_NAME" else echo "Unable to create status file" fi \ No newline at end of file diff --git a/mcpartools/scheduler/slurm.py b/mcpartools/scheduler/slurm.py index 7b8cd26..03e488d 100644 --- a/mcpartools/scheduler/slurm.py +++ b/mcpartools/scheduler/slurm.py @@ -14,6 +14,6 @@ def __init__(self, options_content): main_run_script_template = os.path.join('data', 'run_slurm.sh') - merge_logs_script_template = os.path.join('data', 'merge_logs.sh') + merge_logs_script_template = os.path.join('data', 'merge_logs_slurm.sh') status_script_template = os.path.join('data', 'status.sh') diff --git a/mcpartools/scheduler/torque.py b/mcpartools/scheduler/torque.py index 0cb55c4..0f184d1 100644 --- a/mcpartools/scheduler/torque.py +++ b/mcpartools/scheduler/torque.py @@ -14,6 +14,6 @@ def __init__(self, options_content): main_run_script_template = os.path.join('data', 'run_torque.sh') - merge_logs_script_template = os.path.join('data', 'merge_logs.sh') + merge_logs_script_template = os.path.join('data', 'merge_logs_torque.sh') status_script_template = os.path.join('data', 'status.sh') From 170c4ffebb685ff41f7312fa6c03783b098a4a47 Mon Sep 17 00:00:00 2001 From: jarema Date: Mon, 19 Nov 2018 11:54:08 +0100 Subject: [PATCH 09/12] add link to job log file in log directory --- mcpartools/scheduler/data/merge_logs_slurm.sh | 23 ++++++++++++++----- .../scheduler/data/merge_logs_torque.sh | 21 +++++++++++++---- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/mcpartools/scheduler/data/merge_logs_slurm.sh b/mcpartools/scheduler/data/merge_logs_slurm.sh index 1a624ad..bcdebb8 100755 --- a/mcpartools/scheduler/data/merge_logs_slurm.sh +++ b/mcpartools/scheduler/data/merge_logs_slurm.sh @@ -146,6 +146,16 @@ function writeJobsDetailInformation(){{ done }} +function createLinkToJobLog(){{ + I=1 + mkdir -p $JOB_LOG_LINK_DIR + for FILE in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; + do + ln -f $FILE $JOB_LOG_LINK_DIR/`printf "job_%04d.log" $I` + I=$((I + 1)) + done +}} + function writeSummary(){{ echo "###########################################################" >> ${{LOG_FILE}} echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} @@ -198,25 +208,26 @@ MAX_TIME=0 TOTAL_TIME=0 JOB_STATUSES=() JOB_EXECUTION_TIME=() - -LOGFILE="${{MAIN_DIR}}/submit.log" +JOB_LOG_LINK_DIR=${{MAIN_DIR}}/log/jobs_log +SUBMIT_LOG_FILE="${{MAIN_DIR}}/submit.log" RE="Job ID: ([0-9]*)" # no log file. Probably submit.sh not run -if [ ! -f $LOGFILE ]; then - echo "File not found: $LOGFILE" +if [ ! -f $SUBMIT_LOG_FILE ]; then + echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 fi -if [[ $(cat $LOGFILE) =~ $RE ]]; +if [[ $(cat $SUBMIT_LOG_FILE) =~ $RE ]]; then ARRAY_JOB_ID=${{BASH_REMATCH[1]}}; fi writeLogHeader writeTimeInSeconds -#writeJobsDetailInformation +writeJobsDetailInformation +createLinkToJobLog appendCollectInfo writeSummary diff --git a/mcpartools/scheduler/data/merge_logs_torque.sh b/mcpartools/scheduler/data/merge_logs_torque.sh index a03ab1c..67372ed 100755 --- a/mcpartools/scheduler/data/merge_logs_torque.sh +++ b/mcpartools/scheduler/data/merge_logs_torque.sh @@ -146,6 +146,16 @@ function writeJobsDetailInformation(){{ done }} +function createLinkToJobLog(){{ + I=1 + mkdir -p $JOB_LOG_LINK_DIR + for FILE in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; + do + ln -f $FILE $JOB_LOG_LINK_DIR/`printf "job_%04d.log" $I` + I=$((I + 1)) + done +}} + function writeSummary(){{ echo "###########################################################" >> ${{LOG_FILE}} echo "######################## SUMMARY ##########################" >> ${{LOG_FILE}} @@ -198,19 +208,19 @@ MAX_TIME=0 TOTAL_TIME=0 JOB_STATUSES=() JOB_EXECUTION_TIME=() - -LOGFILE="${{MAIN_DIR}}/submit.log" +JOB_LOG_LINK_DIR=${{MAIN_DIR}}/log/jobs_log +SUBMIT_LOG_FILE="${{MAIN_DIR}}/submit.log" RE="Job ID: ([0-9]*)" # no log file. Probably submit.sh not run -if [ ! -f $LOGFILE ]; then - echo "File not found: $LOGFILE" +if [ ! -f $SUBMIT_LOG_FILE ]; then + echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 fi -if [[ $(cat $LOGFILE) =~ $RE ]]; +if [[ $(cat $SUBMIT_LOG_FILE) =~ $RE ]]; then ARRAY_JOB_ID=${{BASH_REMATCH[1]}}; fi @@ -218,5 +228,6 @@ fi writeLogHeader writeTimeInSeconds writeJobsDetailInformation +createLinkToJobLog appendCollectInfo writeSummary From dab17f59b3611344e15caf6de5947d5913f7fbe7 Mon Sep 17 00:00:00 2001 From: jarema Date: Wed, 21 Nov 2018 21:03:37 +0100 Subject: [PATCH 10/12] make logging scripts more reliable --- mcpartools/scheduler/data/merge_logs_slurm.sh | 22 ++++++++++++++++--- .../scheduler/data/merge_logs_torque.sh | 22 ++++++++++++++++--- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/mcpartools/scheduler/data/merge_logs_slurm.sh b/mcpartools/scheduler/data/merge_logs_slurm.sh index bcdebb8..8d6f49c 100755 --- a/mcpartools/scheduler/data/merge_logs_slurm.sh +++ b/mcpartools/scheduler/data/merge_logs_slurm.sh @@ -48,16 +48,24 @@ function writeLogHeader(){{ if [[ $(cat $INFO_FILE) =~ $STATUS_REGEX ]]; then STATUS=${{BASH_REMATCH[1]}}; + if [ -z "$STATUS" ]; + then + STATUS="-" + fi else - echo "Cannot get status from $i file" + echo "Cannot get status from $INFO_FILE file" continue fi if [[ $(cat $INFO_FILE) =~ $COLLAPSED_TIME_REGEX ]]; then COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + if [ -z "$COLLAPSED_TIME" ]; + then + COLLAPSED_TIME=0 + fi else - echo "Cannot get collapsed time from $i file" + echo "Cannot get collapsed time from $INFO_FILE file" continue fi @@ -151,7 +159,10 @@ function createLinkToJobLog(){{ mkdir -p $JOB_LOG_LINK_DIR for FILE in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; do - ln -f $FILE $JOB_LOG_LINK_DIR/`printf "job_%04d.log" $I` + if [ -f $FILE ] + then + ln -f $FILE $JOB_LOG_LINK_DIR/`printf "job_%04d.log" $I` + fi I=$((I + 1)) done }} @@ -218,6 +229,11 @@ if [ ! -f $SUBMIT_LOG_FILE ]; then echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 +else + touch $LOG_FILE + if [ ! -f $LOG_FILE ]; then + exit 2 + fi fi if [[ $(cat $SUBMIT_LOG_FILE) =~ $RE ]]; diff --git a/mcpartools/scheduler/data/merge_logs_torque.sh b/mcpartools/scheduler/data/merge_logs_torque.sh index 67372ed..def88d2 100755 --- a/mcpartools/scheduler/data/merge_logs_torque.sh +++ b/mcpartools/scheduler/data/merge_logs_torque.sh @@ -39,16 +39,24 @@ function writeLogHeader(){{ if [[ $(cat $INFO_FILE) =~ $STATUS_REGEX ]]; then STATUS=${{BASH_REMATCH[1]}}; + if [ -z "$STATUS" ]; + then + STATUS="-" + fi else - echo "Cannot get status from $i file" + echo "Cannot get status from $INFO_FILE file" continue fi if [[ $(cat $INFO_FILE) =~ $COLLAPSED_TIME_REGEX ]]; then COLLAPSED_TIME=${{BASH_REMATCH[1]}}; + if [ -z "$COLLAPSED_TIME" ]; + then + COLLAPSED_TIME=0 + fi else - echo "Cannot get collapsed time from $i file" + echo "Cannot get collapsed time from $INFO_FILE file" continue fi @@ -151,7 +159,10 @@ function createLinkToJobLog(){{ mkdir -p $JOB_LOG_LINK_DIR for FILE in ${{WORKSPACE}}/job_*/${{JOBS_LOG_FILE}}; do - ln -f $FILE $JOB_LOG_LINK_DIR/`printf "job_%04d.log" $I` + if [ -f $FILE ] + then + ln -f $FILE $JOB_LOG_LINK_DIR/`printf "job_%04d.log" $I` + fi I=$((I + 1)) done }} @@ -218,6 +229,11 @@ if [ ! -f $SUBMIT_LOG_FILE ]; then echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 +else + touch $LOG_FILE + if [ ! -f $LOG_FILE ]; then + exit 2 + fi fi if [[ $(cat $SUBMIT_LOG_FILE) =~ $RE ]]; From 5bcc3983c30559828902c1dd5e607a7661e74599 Mon Sep 17 00:00:00 2001 From: jarema Date: Fri, 23 Nov 2018 18:44:38 +0100 Subject: [PATCH 11/12] move logic to from run to main run --- mcpartools/mcengine/data/run_fluka.sh | 39 +---------- mcpartools/mcengine/data/run_shieldhit.sh | 32 --------- mcpartools/mcengine/fluka.py | 4 -- mcpartools/scheduler/base.py | 8 +-- mcpartools/scheduler/data/merge_logs_slurm.sh | 65 +++++++++++++------ .../scheduler/data/merge_logs_torque.sh | 58 +++++++++++++---- mcpartools/scheduler/data/run_slurm.sh | 48 +++++++++++++- mcpartools/scheduler/data/run_torque.sh | 46 ++++++++++++- mcpartools/scheduler/data/status.sh | 32 +++++++-- mcpartools/scheduler/data/submit_slurm.sh | 2 +- 10 files changed, 208 insertions(+), 126 deletions(-) diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index fd1bf52..3595927 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -3,51 +3,14 @@ # Exit immediately if a simple command exits with a non-zero status. set -e -START=$(date +%s) - # location of FLUKA binary file FLUKA_BIN={fluka_bin:s} -# number of particles per job -PARTICLE_NO={particle_no:d} - -# seed of RNG -RNG_SEED={job_id:d} - -WORK_DIR={working_directory:s} # go to working directory -cd $WORK_DIR - -LOG_FILE=$WORK_DIR"/info.log" - -echo "###########################################################" > $LOG_FILE -echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE -echo "###########################################################" >> $LOG_FILE -echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE -echo "# END = -" >> $LOG_FILE -echo "# TIME IN SECONDS = -" >> $LOG_FILE -echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE -echo "# STATUS = -" >> $LOG_FILE -echo "#" >> $LOG_FILE +cd {working_directory:s} # run rfluka $FLUKA_BIN -N0 -M1 {engine_options:s} {input_basename:s} -if [[ $? -ne 0 ]] -then - SIMULATION_STATUS="ST" -else - SIMULATION_STATUS="CD" -fi - -let "EXECUTION_TIME = $(date +%s) - $START" - -# end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE -# collapsed time is in line number 7 -sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE -# status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE # each fluka run will save files with same name, in order to distinguish output from multiple runs # we rename output files, appending suffix with jobid to each of them diff --git a/mcpartools/mcengine/data/run_shieldhit.sh b/mcpartools/mcengine/data/run_shieldhit.sh index 9cbe2d2..a8f395c 100755 --- a/mcpartools/mcengine/data/run_shieldhit.sh +++ b/mcpartools/mcengine/data/run_shieldhit.sh @@ -3,9 +3,6 @@ # Exit immediately if a simple command exits with a non-zero status. set -e - -START=$(date +%s) - # location of SHIELD-HIT12A binary file SHIELDHIT_BIN={shieldhit_bin:s} @@ -24,38 +21,9 @@ GEO_FILE={geo_file:s} MAT_FILE={mat_file:s} DETECT_FILE={detect_file:s} -LOG_FILE=$WORK_DIR"/info.log" - -echo "###########################################################" > $LOG_FILE -echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE -echo "###########################################################" >> $LOG_FILE -echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE -echo "# END = -" >> $LOG_FILE -echo "# TIME IN SECONDS = -" >> $LOG_FILE -echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE -echo "# STATUS = -" >> $LOG_FILE -echo "#" >> $LOG_FILE - # go to working directory cd {working_directory:s} # execute simulation $SHIELDHIT_BIN --beamfile=$BEAM_FILE --geofile=$GEO_FILE --matfile=$MAT_FILE --detectfile=$DETECT_FILE -n $PARTICLE_NO -N $RNG_SEED {engine_options:s} $WORK_DIR -if [[ $? -ne 0 ]] -then - SIMULATION_STATUS="ST" -else - SIMULATION_STATUS="CD" -fi - - -let "EXECUTION_TIME = $(date +%s) - $START" - -# end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE -# collapsed time is in line number 7 -sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE -# status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file diff --git a/mcpartools/mcengine/fluka.py b/mcpartools/mcengine/fluka.py index 308e28e..d9f31d6 100644 --- a/mcpartools/mcengine/fluka.py +++ b/mcpartools/mcengine/fluka.py @@ -32,8 +32,6 @@ def __init__(self, input_path, mc_run_script, collect_method, mc_engine_options) self.collect_script_content = resource_string(__name__, self.collect_script).decode('ascii') - self.particle_no = 1 - @property def input_files(self): # TODO check if additional files are needed @@ -56,7 +54,6 @@ def randomize(self, new_seed): self.input_lines = result def set_particle_no(self, particle_no): - self.particle_no = particle_no result = [] for l in self.input_lines: # TODO better discovery needed @@ -85,7 +82,6 @@ def save_run_script(self, output_dir, jobid): engine_options=self.engine_options, working_directory=output_dir_abs_path, input_basename=input_base_name, - particle_no=self.particle_no, job_id=jobid) out_file_name = 'run.sh' out_file_path = os.path.join(output_dir, out_file_name) diff --git a/mcpartools/scheduler/base.py b/mcpartools/scheduler/base.py index 6ae3a01..acf8b5d 100644 --- a/mcpartools/scheduler/base.py +++ b/mcpartools/scheduler/base.py @@ -45,12 +45,12 @@ def submit_script_body(self, jobs_no, main_dir, workspace_dir): main_dir=main_dir, collect_script_name='collect.sh') - def main_run_script_body(self, jobs_no, workspace_dir): + def main_run_script_body(self, particle_no, workspace_dir): from pkg_resources import resource_string tpl = resource_string(__name__, self.main_run_script_template) self.main_run_script = tpl.decode('ascii').format(options_header=self.options_header, workspace_dir=workspace_dir, - jobs_no=jobs_no) + particle_no=particle_no) return self.main_run_script def merge_logs_body(self, workspace_dir, collect_dir, main_dir): @@ -77,11 +77,11 @@ def write_submit_script(self, main_dir, script_basename, jobs_no, workspace_dir) logger.debug("Jobs no " + str(jobs_no)) logger.debug("Workspace " + abs_path_workspace) - def write_main_run_script(self, jobs_no, output_dir): + def write_main_run_script(self, particle_no, output_dir): output_dir_abspath = os.path.abspath(output_dir) out_file_path = os.path.join(output_dir_abspath, self.main_run_script) fd = open(out_file_path, 'w') - fd.write(self.main_run_script_body(jobs_no=jobs_no, workspace_dir=output_dir_abspath)) + fd.write(self.main_run_script_body(particle_no=particle_no, workspace_dir=output_dir_abspath)) fd.close() os.chmod(out_file_path, 0o750) logger.debug("Saved main run script: " + out_file_path) diff --git a/mcpartools/scheduler/data/merge_logs_slurm.sh b/mcpartools/scheduler/data/merge_logs_slurm.sh index 8d6f49c..517dc54 100755 --- a/mcpartools/scheduler/data/merge_logs_slurm.sh +++ b/mcpartools/scheduler/data/merge_logs_slurm.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash function writeLogHeader(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + fi echo " ID START END STATUS TIME" for i in ${{WORKSPACE}}/job_*; @@ -102,10 +105,18 @@ function writeLogHeader(){{ JOB_STATUSES+=(${{STATUS}}) JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) - echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + fi echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" done - echo "#" >> ${{LOG_FILE}} + + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "#" >> ${{LOG_FILE}} + fi }} @@ -198,13 +209,18 @@ WORKSPACE={workspace_dir:s} MAIN_DIR={main_dir:s} COLLECT_LOG={collect_dir:s}/info.log -if [ $# -eq 0 ] - then - FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log - LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} - else - LOG_FILE=$1 -fi +SAVE_TO_FILE_FLAG=false +while getopts ":s" opt; do + case $opt in + s) + SAVE_TO_FILE_FLAG=true + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done JOBS_LOG_FILE="info.log" JOB_ID_REGEX="job_([0-9]*)" @@ -221,7 +237,8 @@ JOB_STATUSES=() JOB_EXECUTION_TIME=() JOB_LOG_LINK_DIR=${{MAIN_DIR}}/log/jobs_log SUBMIT_LOG_FILE="${{MAIN_DIR}}/submit.log" - +FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log +LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} RE="Job ID: ([0-9]*)" # no log file. Probably submit.sh not run @@ -229,7 +246,10 @@ if [ ! -f $SUBMIT_LOG_FILE ]; then echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 -else +fi + +if [[ $SAVE_TO_FILE_FLAG = true ]] +then touch $LOG_FILE if [ ! -f $LOG_FILE ]; then exit 2 @@ -242,8 +262,11 @@ then fi writeLogHeader -writeTimeInSeconds -writeJobsDetailInformation -createLinkToJobLog -appendCollectInfo -writeSummary +if [[ $SAVE_TO_FILE_FLAG = true ]] +then + writeTimeInSeconds + writeJobsDetailInformation + createLinkToJobLog + appendCollectInfo + writeSummary +fi \ No newline at end of file diff --git a/mcpartools/scheduler/data/merge_logs_torque.sh b/mcpartools/scheduler/data/merge_logs_torque.sh index def88d2..859f6cc 100755 --- a/mcpartools/scheduler/data/merge_logs_torque.sh +++ b/mcpartools/scheduler/data/merge_logs_torque.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash function writeLogHeader(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + fi echo " ID START END STATUS TIME" for i in ${{WORKSPACE}}/job_*; @@ -102,10 +105,17 @@ function writeLogHeader(){{ JOB_STATUSES+=(${{STATUS}}) JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) - echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + fi echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" done - echo "#" >> ${{LOG_FILE}} + + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "#" >> ${{LOG_FILE}} + fi }} @@ -206,6 +216,19 @@ if [ $# -eq 0 ] LOG_FILE=$1 fi +SAVE_TO_FILE_FLAG=false +while getopts ":s" opt; do + case $opt in + s) + SAVE_TO_FILE_FLAG=true + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + JOBS_LOG_FILE="info.log" JOB_ID_REGEX="job_([0-9]*)" START_REGEX="# START\s+=\s(.{{19}})" @@ -221,7 +244,8 @@ JOB_STATUSES=() JOB_EXECUTION_TIME=() JOB_LOG_LINK_DIR=${{MAIN_DIR}}/log/jobs_log SUBMIT_LOG_FILE="${{MAIN_DIR}}/submit.log" - +FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log +LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} RE="Job ID: ([0-9]*)" # no log file. Probably submit.sh not run @@ -229,7 +253,10 @@ if [ ! -f $SUBMIT_LOG_FILE ]; then echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 -else +fi + +if [[ $SAVE_TO_FILE_FLAG = true ]] +then touch $LOG_FILE if [ ! -f $LOG_FILE ]; then exit 2 @@ -242,8 +269,11 @@ then fi writeLogHeader -writeTimeInSeconds -writeJobsDetailInformation -createLinkToJobLog -appendCollectInfo -writeSummary +if [[ $SAVE_TO_FILE_FLAG = true ]] +then + writeTimeInSeconds + writeJobsDetailInformation + createLinkToJobLog + appendCollectInfo + writeSummary +fi diff --git a/mcpartools/scheduler/data/run_slurm.sh b/mcpartools/scheduler/data/run_slurm.sh index 141dda2..e104bf5 100755 --- a/mcpartools/scheduler/data/run_slurm.sh +++ b/mcpartools/scheduler/data/run_slurm.sh @@ -1,8 +1,50 @@ #!/usr/bin/env bash -# Exit immediately if a simple command exits with a non-zero status. -set -e + +function initLogFile(){{ + START=$(date +%s) + echo "###########################################################" > $LOG_FILE + echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $SLURM_ARRAY_TASK_ID` ############" >> $LOG_FILE + echo "###########################################################" >> $LOG_FILE + echo "#" >> $LOG_FILE + echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE + echo "# END = -" >> $LOG_FILE + echo "# TIME IN SECONDS = -" >> $LOG_FILE + echo "# NO OF PARTICLES =`printf "%20d" $PARTICLES_NO`" >> $LOG_FILE + echo "# STATUS = -" >> $LOG_FILE + echo "#" >> $LOG_FILE +}} + +function completeLogFile(){{ + let "EXECUTION_TIME = $(date +%s) - $START" + + # end time is in line number 6 + sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE +}} + {options_header:s} +WORKSPACE_PATH={workspace_dir:s} +JOB_DIR_PATH=$WORKSPACE_PATH/job_`printf %04d $SLURM_ARRAY_TASK_ID` + +LOG_FILE=$JOB_DIR_PATH"/info.log" + +PARTICLES_NO={particle_no:d} + +initLogFile + # Run individual jobs -{workspace_dir:s}/job_`printf %04d $SLURM_ARRAY_TASK_ID`/run.sh +$JOB_DIR_PATH/run.sh + +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi + +completeLogFile \ No newline at end of file diff --git a/mcpartools/scheduler/data/run_torque.sh b/mcpartools/scheduler/data/run_torque.sh index a8734d5..623ba3a 100644 --- a/mcpartools/scheduler/data/run_torque.sh +++ b/mcpartools/scheduler/data/run_torque.sh @@ -1,8 +1,50 @@ #!/usr/bin/env bash -# Exit immediately if a simple command exits with a non-zero status. -set -e + +function initLogFile(){{ + START=$(date +%s) + echo "###########################################################" > $LOG_FILE + echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $SLURM_ARRAY_TASK_ID` ############" >> $LOG_FILE + echo "###########################################################" >> $LOG_FILE + echo "#" >> $LOG_FILE + echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE + echo "# END = -" >> $LOG_FILE + echo "# TIME IN SECONDS = -" >> $LOG_FILE + echo "# NO OF PARTICLES =`printf "%20d" $PARTICLES_NO`" >> $LOG_FILE + echo "# STATUS = -" >> $LOG_FILE + echo "#" >> $LOG_FILE +}} + +function completeLogFile(){{ + let "EXECUTION_TIME = $(date +%s) - $START" + + # end time is in line number 6 + sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE +}} + {options_header:s} +WORKSPACE_PATH={workspace_dir:s} +JOB_DIR_PATH=$WORKSPACE_PATH/job_`printf %04d $SLURM_ARRAY_TASK_ID` + +LOG_FILE=$JOB_DIR_PATH"/info.log" + +PARTICLES_NO={particle_no:d} + +initLogFile + # Run individual jobs {workspace_dir:s}/job_`printf %04d $PBS_ARRAYID`/run.sh + +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi + +completeLogFile \ No newline at end of file diff --git a/mcpartools/scheduler/data/status.sh b/mcpartools/scheduler/data/status.sh index 95aa399..da37060 100755 --- a/mcpartools/scheduler/data/status.sh +++ b/mcpartools/scheduler/data/status.sh @@ -1,15 +1,33 @@ #!/usr/bin/env bash -FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log - -STATUS_CMD="{merge_script_path:s} workspace/$FILE_NAME" +STATUS_CMD="{merge_script_path:s}" +SAVE_TO_FILE_FLAG=false +while getopts ":s" opt; do + case $opt in + s) + SAVE_TO_FILE_FLAG=true + STATUS_CMD="$STATUS_CMD -s" + ;; + h) + cat <&2 + exit 1 + ;; + esac +done eval $STATUS_CMD CMD_STATUS=$? -if [[ $CMD_STATUS -eq 0 ]] +if [[ $CMD_STATUS -eq 0 && $SAVE_TO_FILE_FLAG = true ]] then - echo "Status successfully saved to file: workspace/$FILE_NAME" -else - echo "Unable to create status file" + echo "Status successfully saved to file." fi \ No newline at end of file diff --git a/mcpartools/scheduler/data/submit_slurm.sh b/mcpartools/scheduler/data/submit_slurm.sh index 69f32bc..70ebfdd 100755 --- a/mcpartools/scheduler/data/submit_slurm.sh +++ b/mcpartools/scheduler/data/submit_slurm.sh @@ -62,7 +62,7 @@ if [ -n "$CALC_JOBID" ] ; then cat $ERR >> "$LOGFILE" fi - MERGE_LOGS_CMD="sbatch --dependency=afterany:$LAST_JOB_ID --output='{log_dir:s}/output_%j_merge_logs.log' --error='{log_dir:s}/error_%j_merge_logs.log' --parsable {main_dir:s}/workspace/merge_logs.sh > $OUT 2> $ERR" + MERGE_LOGS_CMD="sbatch --dependency=afterany:$LAST_JOB_ID --output='{log_dir:s}/output_%j_merge_logs.log' --error='{log_dir:s}/error_%j_merge_logs.log' --parsable {main_dir:s}/workspace/merge_logs.sh -s> $OUT 2> $ERR" eval $MERGE_LOGS_CMD echo "" >> "$LOGFILE" From ec6f13417a18ce9d7821c9dfa66f01b8d97196a1 Mon Sep 17 00:00:00 2001 From: jarema Date: Fri, 23 Nov 2018 18:44:38 +0100 Subject: [PATCH 12/12] move logic to from run to main run --- mcpartools/generator.py | 2 +- mcpartools/mcengine/data/run_fluka.sh | 39 +---------- mcpartools/mcengine/data/run_shieldhit.sh | 32 --------- mcpartools/mcengine/fluka.py | 4 -- mcpartools/scheduler/base.py | 8 +-- mcpartools/scheduler/data/merge_logs_slurm.sh | 65 +++++++++++++------ .../scheduler/data/merge_logs_torque.sh | 58 +++++++++++++---- mcpartools/scheduler/data/run_slurm.sh | 48 +++++++++++++- mcpartools/scheduler/data/run_torque.sh | 46 ++++++++++++- mcpartools/scheduler/data/status.sh | 32 +++++++-- mcpartools/scheduler/data/submit_slurm.sh | 2 +- 11 files changed, 209 insertions(+), 127 deletions(-) diff --git a/mcpartools/generator.py b/mcpartools/generator.py index 66c3c23..fbed51e 100644 --- a/mcpartools/generator.py +++ b/mcpartools/generator.py @@ -195,7 +195,7 @@ def generate_workspace(self): self.mc_engine.save_run_script(jobdir_path, jobid + 1) - self.scheduler.write_main_run_script(jobs_no=self.options.jobs_no, output_dir=self.workspace_dir) + self.scheduler.write_main_run_script(particle_no=self.options.particle_no, output_dir=self.workspace_dir) self.mc_engine.write_collect_script(self.main_dir) def generate_submit_script(self): diff --git a/mcpartools/mcengine/data/run_fluka.sh b/mcpartools/mcengine/data/run_fluka.sh index fd1bf52..3595927 100755 --- a/mcpartools/mcengine/data/run_fluka.sh +++ b/mcpartools/mcengine/data/run_fluka.sh @@ -3,51 +3,14 @@ # Exit immediately if a simple command exits with a non-zero status. set -e -START=$(date +%s) - # location of FLUKA binary file FLUKA_BIN={fluka_bin:s} -# number of particles per job -PARTICLE_NO={particle_no:d} - -# seed of RNG -RNG_SEED={job_id:d} - -WORK_DIR={working_directory:s} # go to working directory -cd $WORK_DIR - -LOG_FILE=$WORK_DIR"/info.log" - -echo "###########################################################" > $LOG_FILE -echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE -echo "###########################################################" >> $LOG_FILE -echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE -echo "# END = -" >> $LOG_FILE -echo "# TIME IN SECONDS = -" >> $LOG_FILE -echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE -echo "# STATUS = -" >> $LOG_FILE -echo "#" >> $LOG_FILE +cd {working_directory:s} # run rfluka $FLUKA_BIN -N0 -M1 {engine_options:s} {input_basename:s} -if [[ $? -ne 0 ]] -then - SIMULATION_STATUS="ST" -else - SIMULATION_STATUS="CD" -fi - -let "EXECUTION_TIME = $(date +%s) - $START" - -# end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE -# collapsed time is in line number 7 -sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE -# status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE # each fluka run will save files with same name, in order to distinguish output from multiple runs # we rename output files, appending suffix with jobid to each of them diff --git a/mcpartools/mcengine/data/run_shieldhit.sh b/mcpartools/mcengine/data/run_shieldhit.sh index 9cbe2d2..a8f395c 100755 --- a/mcpartools/mcengine/data/run_shieldhit.sh +++ b/mcpartools/mcengine/data/run_shieldhit.sh @@ -3,9 +3,6 @@ # Exit immediately if a simple command exits with a non-zero status. set -e - -START=$(date +%s) - # location of SHIELD-HIT12A binary file SHIELDHIT_BIN={shieldhit_bin:s} @@ -24,38 +21,9 @@ GEO_FILE={geo_file:s} MAT_FILE={mat_file:s} DETECT_FILE={detect_file:s} -LOG_FILE=$WORK_DIR"/info.log" - -echo "###########################################################" > $LOG_FILE -echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $RNG_SEED` ############" >> $LOG_FILE -echo "###########################################################" >> $LOG_FILE -echo "#" >> $LOG_FILE -echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE -echo "# END = -" >> $LOG_FILE -echo "# TIME IN SECONDS = -" >> $LOG_FILE -echo "# NO OF PARTICLES =`printf "%20d" $PARTICLE_NO`" >> $LOG_FILE -echo "# STATUS = -" >> $LOG_FILE -echo "#" >> $LOG_FILE - # go to working directory cd {working_directory:s} # execute simulation $SHIELDHIT_BIN --beamfile=$BEAM_FILE --geofile=$GEO_FILE --matfile=$MAT_FILE --detectfile=$DETECT_FILE -n $PARTICLE_NO -N $RNG_SEED {engine_options:s} $WORK_DIR -if [[ $? -ne 0 ]] -then - SIMULATION_STATUS="ST" -else - SIMULATION_STATUS="CD" -fi - - -let "EXECUTION_TIME = $(date +%s) - $START" - -# end time is in line number 6 -sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE -# collapsed time is in line number 7 -sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE -# status is in line number 9 -sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE \ No newline at end of file diff --git a/mcpartools/mcengine/fluka.py b/mcpartools/mcengine/fluka.py index 308e28e..d9f31d6 100644 --- a/mcpartools/mcengine/fluka.py +++ b/mcpartools/mcengine/fluka.py @@ -32,8 +32,6 @@ def __init__(self, input_path, mc_run_script, collect_method, mc_engine_options) self.collect_script_content = resource_string(__name__, self.collect_script).decode('ascii') - self.particle_no = 1 - @property def input_files(self): # TODO check if additional files are needed @@ -56,7 +54,6 @@ def randomize(self, new_seed): self.input_lines = result def set_particle_no(self, particle_no): - self.particle_no = particle_no result = [] for l in self.input_lines: # TODO better discovery needed @@ -85,7 +82,6 @@ def save_run_script(self, output_dir, jobid): engine_options=self.engine_options, working_directory=output_dir_abs_path, input_basename=input_base_name, - particle_no=self.particle_no, job_id=jobid) out_file_name = 'run.sh' out_file_path = os.path.join(output_dir, out_file_name) diff --git a/mcpartools/scheduler/base.py b/mcpartools/scheduler/base.py index 6ae3a01..acf8b5d 100644 --- a/mcpartools/scheduler/base.py +++ b/mcpartools/scheduler/base.py @@ -45,12 +45,12 @@ def submit_script_body(self, jobs_no, main_dir, workspace_dir): main_dir=main_dir, collect_script_name='collect.sh') - def main_run_script_body(self, jobs_no, workspace_dir): + def main_run_script_body(self, particle_no, workspace_dir): from pkg_resources import resource_string tpl = resource_string(__name__, self.main_run_script_template) self.main_run_script = tpl.decode('ascii').format(options_header=self.options_header, workspace_dir=workspace_dir, - jobs_no=jobs_no) + particle_no=particle_no) return self.main_run_script def merge_logs_body(self, workspace_dir, collect_dir, main_dir): @@ -77,11 +77,11 @@ def write_submit_script(self, main_dir, script_basename, jobs_no, workspace_dir) logger.debug("Jobs no " + str(jobs_no)) logger.debug("Workspace " + abs_path_workspace) - def write_main_run_script(self, jobs_no, output_dir): + def write_main_run_script(self, particle_no, output_dir): output_dir_abspath = os.path.abspath(output_dir) out_file_path = os.path.join(output_dir_abspath, self.main_run_script) fd = open(out_file_path, 'w') - fd.write(self.main_run_script_body(jobs_no=jobs_no, workspace_dir=output_dir_abspath)) + fd.write(self.main_run_script_body(particle_no=particle_no, workspace_dir=output_dir_abspath)) fd.close() os.chmod(out_file_path, 0o750) logger.debug("Saved main run script: " + out_file_path) diff --git a/mcpartools/scheduler/data/merge_logs_slurm.sh b/mcpartools/scheduler/data/merge_logs_slurm.sh index 8d6f49c..517dc54 100755 --- a/mcpartools/scheduler/data/merge_logs_slurm.sh +++ b/mcpartools/scheduler/data/merge_logs_slurm.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash function writeLogHeader(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + fi echo " ID START END STATUS TIME" for i in ${{WORKSPACE}}/job_*; @@ -102,10 +105,18 @@ function writeLogHeader(){{ JOB_STATUSES+=(${{STATUS}}) JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) - echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + fi echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" done - echo "#" >> ${{LOG_FILE}} + + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "#" >> ${{LOG_FILE}} + fi }} @@ -198,13 +209,18 @@ WORKSPACE={workspace_dir:s} MAIN_DIR={main_dir:s} COLLECT_LOG={collect_dir:s}/info.log -if [ $# -eq 0 ] - then - FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log - LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} - else - LOG_FILE=$1 -fi +SAVE_TO_FILE_FLAG=false +while getopts ":s" opt; do + case $opt in + s) + SAVE_TO_FILE_FLAG=true + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done JOBS_LOG_FILE="info.log" JOB_ID_REGEX="job_([0-9]*)" @@ -221,7 +237,8 @@ JOB_STATUSES=() JOB_EXECUTION_TIME=() JOB_LOG_LINK_DIR=${{MAIN_DIR}}/log/jobs_log SUBMIT_LOG_FILE="${{MAIN_DIR}}/submit.log" - +FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log +LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} RE="Job ID: ([0-9]*)" # no log file. Probably submit.sh not run @@ -229,7 +246,10 @@ if [ ! -f $SUBMIT_LOG_FILE ]; then echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 -else +fi + +if [[ $SAVE_TO_FILE_FLAG = true ]] +then touch $LOG_FILE if [ ! -f $LOG_FILE ]; then exit 2 @@ -242,8 +262,11 @@ then fi writeLogHeader -writeTimeInSeconds -writeJobsDetailInformation -createLinkToJobLog -appendCollectInfo -writeSummary +if [[ $SAVE_TO_FILE_FLAG = true ]] +then + writeTimeInSeconds + writeJobsDetailInformation + createLinkToJobLog + appendCollectInfo + writeSummary +fi \ No newline at end of file diff --git a/mcpartools/scheduler/data/merge_logs_torque.sh b/mcpartools/scheduler/data/merge_logs_torque.sh index def88d2..859f6cc 100755 --- a/mcpartools/scheduler/data/merge_logs_torque.sh +++ b/mcpartools/scheduler/data/merge_logs_torque.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash function writeLogHeader(){{ - echo "###########################################################" >> ${{LOG_FILE}} - echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} - echo "###########################################################" >> ${{LOG_FILE}} - echo "#" >> ${{LOG_FILE}} - echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "###########################################################" >> ${{LOG_FILE}} + echo "############ START AND END OF JOBS EXECUTION ##############" >> ${{LOG_FILE}} + echo "###########################################################" >> ${{LOG_FILE}} + echo "#" >> ${{LOG_FILE}} + echo "# ID START END STATUS TIME" >> ${{LOG_FILE}} + fi echo " ID START END STATUS TIME" for i in ${{WORKSPACE}}/job_*; @@ -102,10 +105,17 @@ function writeLogHeader(){{ JOB_STATUSES+=(${{STATUS}}) JOB_EXECUTION_TIME+=(${{COLLAPSED_TIME}}) - echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "# `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" >> ${{LOG_FILE}} + fi echo " `printf "%5d" $JOB_ID` `printf "%20s" $START_TIME` `printf "%20s" $END_TIME` `printf "%10s" $STATUS` `printf "%6s" $COLLAPSED_TIME`" done - echo "#" >> ${{LOG_FILE}} + + if [[ $SAVE_TO_FILE_FLAG = true ]] + then + echo "#" >> ${{LOG_FILE}} + fi }} @@ -206,6 +216,19 @@ if [ $# -eq 0 ] LOG_FILE=$1 fi +SAVE_TO_FILE_FLAG=false +while getopts ":s" opt; do + case $opt in + s) + SAVE_TO_FILE_FLAG=true + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + JOBS_LOG_FILE="info.log" JOB_ID_REGEX="job_([0-9]*)" START_REGEX="# START\s+=\s(.{{19}})" @@ -221,7 +244,8 @@ JOB_STATUSES=() JOB_EXECUTION_TIME=() JOB_LOG_LINK_DIR=${{MAIN_DIR}}/log/jobs_log SUBMIT_LOG_FILE="${{MAIN_DIR}}/submit.log" - +FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log +LOG_FILE=${{MAIN_DIR}}/workspace/${{FILE_NAME}} RE="Job ID: ([0-9]*)" # no log file. Probably submit.sh not run @@ -229,7 +253,10 @@ if [ ! -f $SUBMIT_LOG_FILE ]; then echo "File not found: $SUBMIT_LOG_FILE" echo "Make sure you run submit script" exit 1 -else +fi + +if [[ $SAVE_TO_FILE_FLAG = true ]] +then touch $LOG_FILE if [ ! -f $LOG_FILE ]; then exit 2 @@ -242,8 +269,11 @@ then fi writeLogHeader -writeTimeInSeconds -writeJobsDetailInformation -createLinkToJobLog -appendCollectInfo -writeSummary +if [[ $SAVE_TO_FILE_FLAG = true ]] +then + writeTimeInSeconds + writeJobsDetailInformation + createLinkToJobLog + appendCollectInfo + writeSummary +fi diff --git a/mcpartools/scheduler/data/run_slurm.sh b/mcpartools/scheduler/data/run_slurm.sh index 141dda2..e104bf5 100755 --- a/mcpartools/scheduler/data/run_slurm.sh +++ b/mcpartools/scheduler/data/run_slurm.sh @@ -1,8 +1,50 @@ #!/usr/bin/env bash -# Exit immediately if a simple command exits with a non-zero status. -set -e + +function initLogFile(){{ + START=$(date +%s) + echo "###########################################################" > $LOG_FILE + echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $SLURM_ARRAY_TASK_ID` ############" >> $LOG_FILE + echo "###########################################################" >> $LOG_FILE + echo "#" >> $LOG_FILE + echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE + echo "# END = -" >> $LOG_FILE + echo "# TIME IN SECONDS = -" >> $LOG_FILE + echo "# NO OF PARTICLES =`printf "%20d" $PARTICLES_NO`" >> $LOG_FILE + echo "# STATUS = -" >> $LOG_FILE + echo "#" >> $LOG_FILE +}} + +function completeLogFile(){{ + let "EXECUTION_TIME = $(date +%s) - $START" + + # end time is in line number 6 + sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE +}} + {options_header:s} +WORKSPACE_PATH={workspace_dir:s} +JOB_DIR_PATH=$WORKSPACE_PATH/job_`printf %04d $SLURM_ARRAY_TASK_ID` + +LOG_FILE=$JOB_DIR_PATH"/info.log" + +PARTICLES_NO={particle_no:d} + +initLogFile + # Run individual jobs -{workspace_dir:s}/job_`printf %04d $SLURM_ARRAY_TASK_ID`/run.sh +$JOB_DIR_PATH/run.sh + +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi + +completeLogFile \ No newline at end of file diff --git a/mcpartools/scheduler/data/run_torque.sh b/mcpartools/scheduler/data/run_torque.sh index a8734d5..623ba3a 100644 --- a/mcpartools/scheduler/data/run_torque.sh +++ b/mcpartools/scheduler/data/run_torque.sh @@ -1,8 +1,50 @@ #!/usr/bin/env bash -# Exit immediately if a simple command exits with a non-zero status. -set -e + +function initLogFile(){{ + START=$(date +%s) + echo "###########################################################" > $LOG_FILE + echo "######### DETAILED INFORMATION ABOUT JOB `printf "%5d" $SLURM_ARRAY_TASK_ID` ############" >> $LOG_FILE + echo "###########################################################" >> $LOG_FILE + echo "#" >> $LOG_FILE + echo "# START = `date +"%Y-%m-%dT%H:%M:%S"`" >> $LOG_FILE + echo "# END = -" >> $LOG_FILE + echo "# TIME IN SECONDS = -" >> $LOG_FILE + echo "# NO OF PARTICLES =`printf "%20d" $PARTICLES_NO`" >> $LOG_FILE + echo "# STATUS = -" >> $LOG_FILE + echo "#" >> $LOG_FILE +}} + +function completeLogFile(){{ + let "EXECUTION_TIME = $(date +%s) - $START" + + # end time is in line number 6 + sed -i "6s/.*/# END = `date +"%Y-%m-%dT%H:%M:%S"`/" $LOG_FILE + # collapsed time is in line number 7 + sed -i "7s/.*/# TIME IN SECONDS =`printf "%20d" $EXECUTION_TIME`/" $LOG_FILE + # status is in line number 9 + sed -i "9s/.*/# STATUS =`printf "%20s" $SIMULATION_STATUS`/" $LOG_FILE +}} + {options_header:s} +WORKSPACE_PATH={workspace_dir:s} +JOB_DIR_PATH=$WORKSPACE_PATH/job_`printf %04d $SLURM_ARRAY_TASK_ID` + +LOG_FILE=$JOB_DIR_PATH"/info.log" + +PARTICLES_NO={particle_no:d} + +initLogFile + # Run individual jobs {workspace_dir:s}/job_`printf %04d $PBS_ARRAYID`/run.sh + +if [[ $? -ne 0 ]] +then + SIMULATION_STATUS="ST" +else + SIMULATION_STATUS="CD" +fi + +completeLogFile \ No newline at end of file diff --git a/mcpartools/scheduler/data/status.sh b/mcpartools/scheduler/data/status.sh index 95aa399..da37060 100755 --- a/mcpartools/scheduler/data/status.sh +++ b/mcpartools/scheduler/data/status.sh @@ -1,15 +1,33 @@ #!/usr/bin/env bash -FILE_NAME=status_`date +%Y%m%d_%H%M%S`.log - -STATUS_CMD="{merge_script_path:s} workspace/$FILE_NAME" +STATUS_CMD="{merge_script_path:s}" +SAVE_TO_FILE_FLAG=false +while getopts ":s" opt; do + case $opt in + s) + SAVE_TO_FILE_FLAG=true + STATUS_CMD="$STATUS_CMD -s" + ;; + h) + cat <&2 + exit 1 + ;; + esac +done eval $STATUS_CMD CMD_STATUS=$? -if [[ $CMD_STATUS -eq 0 ]] +if [[ $CMD_STATUS -eq 0 && $SAVE_TO_FILE_FLAG = true ]] then - echo "Status successfully saved to file: workspace/$FILE_NAME" -else - echo "Unable to create status file" + echo "Status successfully saved to file." fi \ No newline at end of file diff --git a/mcpartools/scheduler/data/submit_slurm.sh b/mcpartools/scheduler/data/submit_slurm.sh index 69f32bc..70ebfdd 100755 --- a/mcpartools/scheduler/data/submit_slurm.sh +++ b/mcpartools/scheduler/data/submit_slurm.sh @@ -62,7 +62,7 @@ if [ -n "$CALC_JOBID" ] ; then cat $ERR >> "$LOGFILE" fi - MERGE_LOGS_CMD="sbatch --dependency=afterany:$LAST_JOB_ID --output='{log_dir:s}/output_%j_merge_logs.log' --error='{log_dir:s}/error_%j_merge_logs.log' --parsable {main_dir:s}/workspace/merge_logs.sh > $OUT 2> $ERR" + MERGE_LOGS_CMD="sbatch --dependency=afterany:$LAST_JOB_ID --output='{log_dir:s}/output_%j_merge_logs.log' --error='{log_dir:s}/error_%j_merge_logs.log' --parsable {main_dir:s}/workspace/merge_logs.sh -s> $OUT 2> $ERR" eval $MERGE_LOGS_CMD echo "" >> "$LOGFILE"