Add Phi4

krammnic · krammnic · commit 36309084e27a · 2024-12-22T02:33:00.000+03:00
diff --git a/recipes/configs/phi3/evaluation.yaml b/recipes/configs/phi3/evaluation.yaml
@@ -7,25 +7,25 @@ output_dir: ./ # Not needed
 
 # Model Arguments
 model:
-  _component_: torchtune.models.phi3.phi3_mini
+  _component_: torchtune.models.phi4.phi4_mini
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
+  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI3_MINI
+  model_type: PHI4_MINI
 resume_from_checkpoint: False
 
 # Tokenizer
 tokenizer:
-  _component_: torchtune.models.phi3.phi3_mini_tokenizer
-  path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
+  _component_: torchtune.models.phi4.phi4_mini_tokenizer
+  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
   max_seq_len: null
 
 # Environment
diff --git a/recipes/configs/phi4/mini_full.yaml b/recipes/configs/phi4/mini_full.yaml
@@ -3,43 +3,43 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
 #
 # Run this config on 4 GPUs using the following:
-#  tune run --nproc_per_node 4 full_finetune_distributed --config phi3/mini_full
+#  tune run --nproc_per_node 4 full_finetune_distributed --config phi4/mini_full
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run --nproc_per_node 4 full_finetune_distributed --config phi3/mini_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run --nproc_per_node 4 full_finetune_distributed --config phi4/mini_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works best when the model is being fine-tuned on 2+ GPUs.
 # Single device full finetuning requires more memory optimizations. It's
 # best to use mini_low_memory.yaml for those cases
 
-output_dir: /tmp/torchtune/phi3_mini/full # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/phi4_mini/full # /tmp may be deleted by your system. Change it to your preference.
 
 # Model arguments
 model:
-  _component_: torchtune.models.phi3.phi3_mini
+  _component_: torchtune.models.phi4.phi4_mini
 
 # Tokenizer
 tokenizer:
-  _component_: torchtune.models.phi3.phi3_mini_tokenizer
-  path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
+  _component_: torchtune.models.phi4.phi4_mini_tokenizer
+  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
+  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI3_MINI
+  model_type: PHI4_MINI
 resume_from_checkpoint: False
 
 # Dataset
diff --git a/recipes/configs/phi4/mini_full_low_memory.yaml b/recipes/configs/phi4/mini_full_low_memory.yaml
@@ -3,45 +3,45 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
 #   pip install bitsandbytes
 #
 # To launch on a single device, run the following command from root:
-#   tune run full_finetune_single_device --config phi3/mini_full_low_memory
+#   tune run full_finetune_single_device --config phi4/mini_full_low_memory
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run full_finetune_single_device --config phi3/mini_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run full_finetune_single_device --config phi4/mini_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works only for training on single device.
 
-output_dir: /tmp/torchtune/phi3_mini/full_low_memory # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/phi4_mini/full_low_memory # /tmp may be deleted by your system. Change it to your preference.
 
 # Model arguments
 model:
-  _component_: torchtune.models.phi3.phi3_mini
+  _component_: torchtune.models.phi4.phi4_mini
 
 # Tokenizer
 tokenizer:
-  _component_: torchtune.models.phi3.phi3_mini_tokenizer
-  path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
+  _component_: torchtune.models.phi4.phi4_mini_tokenizer
+  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
+  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI3_MINI
+  model_type: PHI4_MINI
 resume_from_checkpoint: False
 
 # Dataset
diff --git a/recipes/configs/phi4/mini_lora.yaml b/recipes/configs/phi4/mini_lora.yaml
@@ -3,25 +3,25 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
-#   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi3/mini_lora
+#   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi4/mini_lora
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi3/mini_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi4/mini_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works best when the model is being fine-tuned on 2+ GPUs.
 # For single device LoRA finetuning please use mini_lora_single_device.yaml
 # or mini_qlora_single_device.yaml
 
-output_dir: /tmp/torchtune/phi3_mini/lora # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/phi4_mini/lora # /tmp may be deleted by your system. Change it to your preference.
 
 # Model arguments
 model:
-  _component_: torchtune.models.phi3.lora_phi3_mini
+  _component_: torchtune.models.phi4.lora_phi4_mini
   lora_attn_modules: ['q_proj', 'v_proj', 'output_proj']
   apply_lora_to_mlp: True
   apply_lora_to_output: False
@@ -31,21 +31,21 @@ model:
 
 # Tokenizer
 tokenizer:
-  _component_: torchtune.models.phi3.phi3_mini_tokenizer
-  path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
+  _component_: torchtune.models.phi4.phi4_mini_tokenizer
+  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
+  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI3_MINI
+  model_type: PHI4_MINI
 resume_from_checkpoint: False
 save_adapter_weights_only: False
 
diff --git a/recipes/configs/phi4/mini_lora_single_device.yaml b/recipes/configs/phi4/mini_lora_single_device.yaml
@@ -3,23 +3,23 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
-#   tune run lora_finetune_single_device --config phi3/mini_lora_single_device
+#   tune run lora_finetune_single_device --config phi4/mini_lora_single_device
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run lora_finetune_single_device --config phi3/mini_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run lora_finetune_single_device --config phi4/mini_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works only for training on single device.
 
-output_dir: /tmp/torchtune/phi3_mini/lora_single_device # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/phi4_mini/lora_single_device # /tmp may be deleted by your system. Change it to your preference.
 
 # Model arguments
 model:
-  _component_: torchtune.models.phi3.lora_phi3_mini
+  _component_: torchtune.models.phi4.lora_phi4_mini
   lora_attn_modules: ['q_proj', 'v_proj', 'output_proj']
   apply_lora_to_mlp: True
   apply_lora_to_output: False
@@ -29,21 +29,21 @@ model:
 
 # Tokenizer
 tokenizer:
-  _component_: torchtune.models.phi3.phi3_mini_tokenizer
-  path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
+  _component_: torchtune.models.phi4.phi4_mini_tokenizer
+  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
+  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI3_MINI
+  model_type: PHI4_MINI
 resume_from_checkpoint: False
 save_adapter_weights_only: False
 
@@ -95,7 +95,7 @@ profiler:
   enabled: False
 
   #Output directory of trace artifacts
-  output_dir: /tmp/Phi-3-mini-4k-instruct/profiling_outputs
+  output_dir: /tmp/Phi-4-mini-16k-instruct/profiling_outputs
 
   #`torch.profiler.ProfilerActivity` types to trace
   cpu: True
diff --git a/recipes/configs/phi4/mini_qlora_single_device.yaml b/recipes/configs/phi4/mini_qlora_single_device.yaml
@@ -3,23 +3,23 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
-#   tune run lora_finetune_single_device --config phi3/mini_qlora_single_device
+#   tune run lora_finetune_single_device --config phi4/mini_qlora_single_device
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run lora_finetune_single_device --config phi3/mini_qlora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run lora_finetune_single_device --config phi4/mini_qlora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works only for training on single device.
 
-output_dir: /tmp/torchtune/phi3_mini/qlora_single_device # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/phi4_mini/qlora_single_device # /tmp may be deleted by your system. Change it to your preference.
 
 # Model arguments
 model:
-  _component_: torchtune.models.phi3.qlora_phi3_mini
+  _component_: torchtune.models.phi4.qlora_phi4_mini
   lora_attn_modules: ['q_proj', 'v_proj', 'output_proj']
   apply_lora_to_mlp: True
   apply_lora_to_output: False
@@ -29,21 +29,21 @@ model:
 
 # Tokenizer
 tokenizer:
-  _component_: torchtune.models.phi3.phi3_mini_tokenizer
-  path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
+  _component_: torchtune.models.phi4.phi4_mini_tokenizer
+  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
+  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI3_MINI
+  model_type: PHI4_MINI
 resume_from_checkpoint: False
 save_adapter_weights_only: False
 
@@ -95,7 +95,7 @@ profiler:
   enabled: False
 
   # Output directory of trace artifacts
-  output_dir: /tmp/Phi-3-mini-4k-instruct/profiling_outputs
+  output_dir: /tmp/Phi-4-mini-16k-instruct/profiling_outputs
 
   #`torch.profiler.ProfilerActivity` types to trace
   cpu: True
diff --git a/torchtune/_recipe_registry.py b/torchtune/_recipe_registry.py
@@ -59,6 +59,10 @@ class Recipe:
                 name="phi3/mini_full_low_memory",
                 file_path="phi3/mini_full_low_memory.yaml",
             ),
+            Config(
+                name="phi4/mini_full_low_memory",
+                file_path="phi4/mini_full_low_memory.yaml",
+            ),
             Config(
                 name="qwen2/7B_full_single_device",
                 file_path="qwen2/7B_full_single_device.yaml",
@@ -114,6 +118,7 @@ class Recipe:
             Config(name="gemma2/9B_full", file_path="gemma2/9B_full.yaml"),
             Config(name="gemma2/27B_full", file_path="gemma2/27B_full.yaml"),
             Config(name="phi3/mini_full", file_path="phi3/mini_full.yaml"),
+            Config(name="phi4/mini_full", file_path="phi4/mini_full.yaml"),
             Config(name="qwen2/7B_full", file_path="qwen2/7B_full.yaml"),
             Config(name="qwen2/0.5B_full", file_path="qwen2/0.5B_full.yaml"),
             Config(name="qwen2/1.5B_full", file_path="qwen2/1.5B_full.yaml"),
@@ -252,6 +257,14 @@ class Recipe:
                 name="phi3/mini_qlora_single_device",
                 file_path="phi3/mini_qlora_single_device.yaml",
             ),
+            Config(
+                name="phi4/mini_lora_single_device",
+                file_path="phi4/mini_lora_single_device.yaml",
+            ),
+            Config(
+                name="phi4/mini_qlora_single_device",
+                file_path="phi4/mini_qlora_single_device.yaml",
+            ),
             Config(
                 name="qwen2/7B_lora_single_device",
                 file_path="qwen2/7B_lora_single_device.yaml",
@@ -371,6 +384,7 @@ class Recipe:
             Config(name="gemma2/9B_lora", file_path="gemma2/9B_lora.yaml"),
             Config(name="gemma2/27B_lora", file_path="gemma2/27B_lora.yaml"),
             Config(name="phi3/mini_lora", file_path="phi3/mini_lora.yaml"),
+            Config(name="phi4/mini_lora", file_path="phi3/mini_lora.yaml"),
             Config(name="qwen2/7B_lora", file_path="qwen2/7B_lora.yaml"),
             Config(name="qwen2/0.5B_lora", file_path="qwen2/0.5B_lora.yaml"),
             Config(name="qwen2/1.5B_lora", file_path="qwen2/1.5B_lora.yaml"),