diff --git a/leaderboard/README.md b/leaderboard/README.md
index 660c5e7be..8993f7778 100644
--- a/leaderboard/README.md
+++ b/leaderboard/README.md
@@ -21,6 +21,7 @@ Follow the setup instructions in the evaluation harness [README](https://github.
 
 Create two folders `generations_$model` and `metrics_$model` where you will save the generated code and the metrics respectively for your model `$model`.
 ```bash
+model=YOUR_MODEL
 cd bigcode-evaluation-harness
 mkdir generations_$model
 mkdir metrics_$model
@@ -29,7 +30,8 @@ mkdir metrics_$model
 To run the evaluation, we first generate the code solutions for the target tasks on GPUs, then execute the code on a docker container (only cpus are needed).
 
 ### 2- Generation
-Below are the instruction for generating the code solutions sequentially or in parallel with slurm. You might need to reduce the batch size for some models or change the precision based on your device.
+Below are the instruction for generating the code solutions sequentially or in parallel with slurm. 
+You might need to reduce the batch size for some models, change the precision based on your device or change max_length to 1024 for some tasks based on your tokeniser.
 ```bash
 # after activating env and setting up accelerate...
 langs=(py js java cpp swift php d jl lua r rkt rs)
@@ -58,6 +60,7 @@ for lang in "${langs[@]}"; do
             --trust_remote_code \
             --use_auth_token \
             --generation_only \
+            --save_generations \
             --save_generations_path $generations_path
     echo "Task $task done"
 done
@@ -111,7 +114,7 @@ for lang in "${langs[@]}"; do
         task=multiple-$lang
     fi
 
-    gen_suffix=generations_$task\_$model.json
+    gen_suffix=generations_$task\_$model\_$task.json
     metric_suffix=metrics_$task\_$model.json
     echo "Evaluation of $model on $task benchmark, data in $generations_path/$gen_suffix"