forked from draskot/Vini
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_number_cores_per_node
executable file
·51 lines (42 loc) · 2.27 KB
/
get_number_cores_per_node
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
partition=$1
walltime=$2
scheduler=`cat $WORKDIR/scheduler`
version=`cat $WORKDIR/version`
job_submit=`cat $WORKDIR/job_submit`
job_cancel=`cat $WORKDIR/job_cancel`
rm -f $WORKDIR/cores.out
partition=`cat $WORKDIR/${partition}_partition`
excluded_nodes=`cat $WORKDIR/excluded_${partition}_nodes`
echo "#! /bin/bash" > $WORKDIR/get_number_of_cores
echo "#SBATCH --job-name=cores" >> $WORKDIR/get_number_of_cores
echo "#SBATCH --output=$WORKDIR/cores.out" >> $WORKDIR/get_number_of_cores
echo "#SBATCH --error=$WORKDIR/cores.err" >> $WORKDIR/get_number_of_cores
echo "#SBATCH --time=00:00:"$walltime >> $WORKDIR/get_number_of_cores
echo "#SBATCH --exclude=$excluded_nodes" >> $WORKDIR/get_number_of_cores
if [[ $partition == gpu ]] && [[ -e /etc/slurm/gres.conf ]]
then
echo "#SBATCH --gres=gpu:0" >> $WORKDIR/get_number_of_cores
fi
echo "#SBATCH --cpus-per-task=1" >> $WORKDIR/get_number_of_cores
echo "#SBATCH --mem=2gb" >> $WORKDIR/get_number_of_cores
echo "#SBATCH --partition="$partition >> $WORKDIR/get_number_of_cores
echo "WORKDIR=$WORKDIR" >> $WORKDIR/get_number_of_cores
echo "lscpu" >> $WORKDIR/get_number_of_cores
chmod u+x $WORKDIR/get_number_of_cores
echo "Trying to determine the number of CPU cores per ${partition} node. This may last up to $walltime seconds, please do not interrupt."
rm -f $WORKDIR/cores.*
$job_submit -Q $WORKDIR/get_number_of_cores
timeout ${walltime}s $vini_dir/wait_until_jobs_finish
if [ -e $WORKDIR/cores.out ]
then
grep "CPU(s)" $WORKDIR/cores.out > tmp ; CPUs=`head -1 tmp` ; CPUs=`echo $CPUs | awk '{print $2}'`
grep "Thread(s)" $WORKDIR/cores.out > tmp ; Threads=`head -1 tmp` ; Threads=`echo $Threads | awk '{print $4}'`
cores=`echo $CPUs $Threads | awk '{print $1 / $2}'`
echo "Found $cores CPU cores per ${partition} node."
echo $cores > $WORKDIR/${partition}_cores
else
${job_cancel} -u $USER ; echo
echo "Failed to access any ${partition} node. A probable reason is that all nodes are either busy or down."
read -p "Write the number of CPU cores per ${partition} node here:" cores
echo $cores > $WORKDIR/${partition}_cores
fi