Fix Qwen config (#2377)

acisseJZhong · jessicazhongeee · web-flow · commit a0f74c323354 · 2025-02-10T17:10:37.000-08:00
Co-authored-by: JessicaZhong &lt;zhengjesszhong@gmail.com&gt;
diff --git a/recipes/configs/qwen2_5/14B_lora_single_device.yaml b/recipes/configs/qwen2_5/14B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-14B-Instruct --output-dir /tmp/Qwen2_5-14B-Instruct
+#   tune download Qwen/Qwen2.5-14B-Instruct --output-dir /tmp/Qwen2.5-14B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/14B_lora_single_device
@@ -30,13 +30,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-14B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-14B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-14B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-14B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-14B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-14B-Instruct
   checkpoint_files:
     filename_format: model-{}-of-{}.safetensors
     max_filename: "00008"
diff --git a/recipes/configs/qwen2_5/32B_lora.yaml b/recipes/configs/qwen2_5/32B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-32B-Instruct --output-dir /tmp/Qwen2_5-32B-Instruct
+#   tune download Qwen/Qwen2.5-32B-Instruct --output-dir /tmp/Qwen2.5-32B-Instruct
 #
 # To launch on 8 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 8 lora_finetune_distributed --config qwen2_5/32B_lora
@@ -28,13 +28,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-32B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-32B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-32B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-32B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-32B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-32B-Instruct
   checkpoint_files:
     filename_format: model-{}-of-{}.safetensors
     max_filename: "00017"
diff --git a/recipes/configs/qwen2_5/3B_full.yaml b/recipes/configs/qwen2_5/3B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5-3B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/3B_full
@@ -22,8 +22,8 @@ output_dir: /tmp/torchtune/qwen2_5_3B/full # /tmp may be deleted by your system.
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-3B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-3B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-3B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-3B-Instruct/merges.txt
   max_seq_len: null
 
 # Dataset
@@ -39,7 +39,7 @@ model:
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-3B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-3B-Instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors,
diff --git a/recipes/configs/qwen2_5/3B_full_single_device.yaml b/recipes/configs/qwen2_5/3B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5-3B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
@@ -24,8 +24,8 @@ output_dir: /tmp/torchtune/qwen2_5_3B/full_single_device # /tmp may be deleted b
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-3B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-3B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-3B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-3B-Instruct/merges.txt
   max_seq_len: null
 
 # Dataset
@@ -41,7 +41,7 @@ model:
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-3B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-3B-Instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors,
diff --git a/recipes/configs/qwen2_5/3B_lora.yaml b/recipes/configs/qwen2_5/3B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5-3B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/3B_lora
@@ -30,13 +30,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-3B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-3B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-3B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-3B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-3B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-3B-Instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors,
diff --git a/recipes/configs/qwen2_5/3B_lora_single_device.yaml b/recipes/configs/qwen2_5/3B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5-3B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/3B_lora_single_device
@@ -29,13 +29,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-3B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-3B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-3B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-3B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-3B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-3B-Instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors,
diff --git a/recipes/configs/qwen2_5/72B_lora.yaml b/recipes/configs/qwen2_5/72B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-72B-Instruct --output-dir /tmp/Qwen2_5-72B-Instruct
+#   tune download Qwen/Qwen2.5-72B-Instruct --output-dir /tmp/Qwen2.5-72B-Instruct
 #
 # To launch on 8 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 8 lora_finetune_distributed --config qwen2_5/72B_lora
@@ -28,13 +28,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-72B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-72B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-72B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-72B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-72B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-72B-Instruct
   checkpoint_files:
     filename_format: model-{}-of-{}.safetensors
     max_filename: "00037"
diff --git a/recipes/configs/qwen2_5/7B_full.yaml b/recipes/configs/qwen2_5/7B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5-7B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/7B_full
@@ -22,8 +22,8 @@ output_dir: /tmp/torchtune/qwen2_5_7B/full # /tmp may be deleted by your system.
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-7B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-7B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-7B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-7B-Instruct/merges.txt
   max_seq_len: null
 
 # Dataset
@@ -39,7 +39,7 @@ model:
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-7B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-7B-Instruct
   checkpoint_files: [
     model-00001-of-00004.safetensors,
     model-00002-of-00004.safetensors,
diff --git a/recipes/configs/qwen2_5/7B_full_single_device.yaml b/recipes/configs/qwen2_5/7B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5-7B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
@@ -24,8 +24,8 @@ output_dir: /tmp/torchtune/qwen2_5_7B/full_single_device # /tmp may be deleted b
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-7B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-7B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-7B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-7B-Instruct/merges.txt
   max_seq_len: null
 
 # Dataset
@@ -41,7 +41,7 @@ model:
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-7B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-7B-Instruct
   checkpoint_files: [
     model-00001-of-00004.safetensors,
     model-00002-of-00004.safetensors,
diff --git a/recipes/configs/qwen2_5/7B_lora.yaml b/recipes/configs/qwen2_5/7B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5-7B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/7B_lora
@@ -31,13 +31,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-7B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-7B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-7B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-7B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-7B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-7B-Instruct
   checkpoint_files: [
     model-00001-of-00004.safetensors,
     model-00002-of-00004.safetensors,
diff --git a/recipes/configs/qwen2_5/7B_lora_single_device.yaml b/recipes/configs/qwen2_5/7B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5-7B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/7B_lora_single_device
@@ -30,13 +30,13 @@ model:
 
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-7B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-7B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-7B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-7B-Instruct/merges.txt
   max_seq_len: null
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-7B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-7B-Instruct
   checkpoint_files: [
     model-00001-of-00004.safetensors,
     model-00002-of-00004.safetensors,
diff --git a/recipes/configs/qwen2_5/evaluation.yaml b/recipes/configs/qwen2_5/evaluation.yaml
@@ -11,7 +11,7 @@ model:
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Qwen2_5-0_5B-Instruct
+  checkpoint_dir: /tmp/Qwen2.5-0_5B-Instruct
   checkpoint_files: [
     model.safetensors,
   ]
@@ -21,8 +21,8 @@ checkpointer:
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
-  path: /tmp/Qwen2_5-0_5B-Instruct/vocab.json
-  merges_file: /tmp/Qwen2_5-0_5B-Instruct/merges.txt
+  path: /tmp/Qwen2.5-0_5B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2.5-0_5B-Instruct/merges.txt
   max_seq_len: null
 
 # Environment