Skip to content

Commit cd548c3

Browse files
authored
Merge pull request #36 from adamlin120/patch-1
2 parents 6b97d4b + d4fdb31 commit cd548c3

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

orchestration/slurm/launchers/accelerate-launcher.slurm

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ ACCELERATE_CONFIG_FILE=accelerate.yaml
3333
# EDIT if it's not 8-gpus per node
3434
GPUS_PER_NODE=8
3535
NNODES=$SLURM_NNODES
36+
NUM_PROCESSES=$(($NNODES * $GPUS_PER_NODE))
3637

3738
# define the node 0 hostname:port
3839
MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
@@ -45,6 +46,8 @@ MASTER_PORT=6000
4546
LAUNCHER="python -u -m accelerate.commands.launch \
4647
--rdzv_conf "rdzv_backend=c10d,rdzv_endpoint=$MASTER_ADDR:$MASTER_PORT" \
4748
--config_file $ACCELERATE_CONFIG_FILE \
49+
--num_processes $NUM_PROCESSES \
50+
--num_machines $NNODES \
4851
--main_process_ip $MASTER_ADDR \
4952
--main_process_port $MASTER_PORT \
5053
--machine_rank \$SLURM_PROCID \

0 commit comments

Comments
 (0)