[float8] Support passing extra args to benchmarking script (#1961)

danielvegamyhre · web-flow · commit 7bb7f2387328 · 2025-03-27T07:43:05.000-07:00
diff --git a/benchmarks/float8/training/README.md b/benchmarks/float8/training/README.md
@@ -14,5 +14,6 @@ Training parameters can be configured via environment variables.
     - `FLOAT8_RECIPE_WITH_BEST_SETTINGS`: "rowwise" or "tensorwise". Applies float8 training with the specified scaling recipe, as well as additional training configs which are optimal for that scaling recipe. See `float8_training_benchmark.sh` for more details.
     - `BATCH_SIZE`: Defaults to 1.
     - `STEPS`: Defaults to 100.
+    - `EXTRA_ARGS`: Extra arguments to pass to torchtitan training script. See [torchtitan](https://github.com/pytorch/torchtitan) docs for the full list of options.
 
 **NOTE**: `torch.compile` and FSDP2 are always used. Other forms of parallelism supported in torchtitan are not yet supported in this script.
diff --git a/benchmarks/float8/training/float8_training_benchmark.sh b/benchmarks/float8/training/float8_training_benchmark.sh
@@ -22,6 +22,7 @@ if [ -z "${TORCHTITAN_ROOT}" ]; then
   echo " * FLOAT8_RECIPE_WITH_BEST_SETTINGS: "rowwise" or "tensorwise". if set, use float8 training in torchtitan with the specified recipe, including the additional settings which are optimal for that recipe. otherwise, use bf16 mixed precision training."
   echo " * BATCH_SIZE: defaults to 1."
   echo " * STEPS: defaults to 100."
+  echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
   exit 1
 fi
 
@@ -44,7 +45,7 @@ cd ${TORCHTITAN_ROOT}
 echo "float8 args: ${FLOAT8_ARGS}"
 
 # run the command with the specified arguments
-CONFIG_FILE="./torchtitan/models/llama/train_configs/llama3_8b.toml" ${TORCHTITAN_ROOT}/run_train.sh --training.steps=${STEPS} --training.batch_size=${BATCH_SIZE} --training.compile ${FLOAT8_ARGS} 2>&1 | tee ${LOG_FILE}
+CONFIG_FILE="./torchtitan/models/llama/train_configs/llama3_8b.toml" ${TORCHTITAN_ROOT}/run_train.sh --training.steps=${STEPS} --training.batch_size=${BATCH_SIZE} --training.compile ${FLOAT8_ARGS} ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
 
 # return to original working directory
 cd $original_dir