File tree 14 files changed +69
-49
lines changed
14 files changed +69
-49
lines changed Original file line number Diff line number Diff line change @@ -72,7 +72,8 @@ torchtune provides the following finetuning recipes for training on one or more
72
72
| DoRA/QDoRA Finetuning | ✅ | ✅ | ❌ | [ lora_finetune_single_device] ( recipes/lora_finetune_single_device.py ) <br > [ lora_finetune_distributed] ( recipes/lora_finetune_distributed.py ) | [ Llama3 8B QDoRA single-device] ( recipes/configs/llama3/8B_qdora_single_device.yaml ) <br > [ Llama3 8B DoRA distributed] ( recipes/configs/llama3/8B_dora.yaml )
73
73
| Quantization-Aware Training | ❌ | ✅ | ❌ | [ qat_distributed] ( recipes/qat_distributed.py ) | [ Llama3 8B QAT] ( recipes/configs/llama3/8B_qat_full.yaml )
74
74
| Quantization-Aware Training and LoRA Finetuning | ❌ | ✅ | ❌ | [ qat_lora_finetune_distributed] ( recipes/qat_lora_finetune_distributed.py ) | [ Llama3 8B QAT] ( recipes/configs/llama3/8B_qat_lora.yaml )
75
- | Direct Preference Optimization | ✅ | ✅ | ❌ | [ lora_dpo_single_device] ( recipes/lora_dpo_single_device.py ) <br > [ lora_dpo_distributed] ( recipes/lora_dpo_distributed.py ) | [ Llama2 7B single-device] ( recipes/configs/llama2/7B_lora_dpo_single_device.yaml ) <br > [ Llama2 7B distributed] ( recipes/configs/llama2/7B_lora_dpo.yaml )
75
+ | Direct Preference Optimization: Full Finetuning | ❌ | ✅ | ❌ | [ full_dpo_distributed] ( recipes/full_dpo_distributed.py ) | [ Llama3.1 8B DPO] ( recipes/configs/llama3_1/8B_full_dpo.yaml )
76
+ | LoRA Direct Preference Optimization | ✅ | ✅ | ❌ | [ lora_dpo_single_device] ( recipes/lora_dpo_single_device.py ) <br > [ lora_dpo_distributed] ( recipes/lora_dpo_distributed.py ) | [ Llama3.1 8B single-device] ( recipes/configs/llama3_1/8B_lora_dpo_single_device.yaml ) <br > [ Llama3.1 8B distributed] ( recipes/configs/llama3_1/8B_lora_dpo.yaml )
76
77
| Proximal Policy Optimization | ✅ | ❌ | ❌ | [ ppo_full_finetune_single_device] ( recipes/ppo_full_finetune_single_device.py ) | [ Mistral 7B] ( recipes/configs/mistral/7B_full_ppo_low_memory.yaml )
77
78
| LoRA Knowledge Distillation | ✅ | ✅ | ❌ | [ knowledge_distillation_single_device] ( recipes/knowledge_distillation_single_device.py ) <br > [ knowledge_distillation_distributed] ( recipes/knowledge_distillation_distributed.py ) | [ Qwen2 1.5B -> 0.5B single-device] ( recipes/configs/qwen2/1.5B_to_0.5B_KD_lora_single_device.yaml ) <br > [ Qwen2 1.5B -> 0.5B distributed] ( recipes/configs/qwen2/1.5B_to_0.5B_KD_lora_distributed.yaml )
78
79
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-14B-Instruct --output-dir /tmp/Qwen2_5 -14B-Instruct
6
+ # tune download Qwen/Qwen2.5-14B-Instruct --output-dir /tmp/Qwen2.5 -14B-Instruct
7
7
#
8
8
# To launch on a single device, run the following command from root:
9
9
# tune run lora_finetune_single_device --config qwen2_5/14B_lora_single_device
@@ -30,13 +30,13 @@ model:
30
30
31
31
tokenizer :
32
32
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
33
- path : /tmp/Qwen2_5 -14B-Instruct/vocab.json
34
- merges_file : /tmp/Qwen2_5 -14B-Instruct/merges.txt
33
+ path : /tmp/Qwen2.5 -14B-Instruct/vocab.json
34
+ merges_file : /tmp/Qwen2.5 -14B-Instruct/merges.txt
35
35
max_seq_len : null
36
36
37
37
checkpointer :
38
38
_component_ : torchtune.training.FullModelHFCheckpointer
39
- checkpoint_dir : /tmp/Qwen2_5 -14B-Instruct
39
+ checkpoint_dir : /tmp/Qwen2.5 -14B-Instruct
40
40
checkpoint_files :
41
41
filename_format : model-{}-of-{}.safetensors
42
42
max_filename : " 00008"
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-32B-Instruct --output-dir /tmp/Qwen2_5 -32B-Instruct
6
+ # tune download Qwen/Qwen2.5-32B-Instruct --output-dir /tmp/Qwen2.5 -32B-Instruct
7
7
#
8
8
# To launch on 8 devices, run the following command from root:
9
9
# tune run --nnodes 1 --nproc_per_node 8 lora_finetune_distributed --config qwen2_5/32B_lora
@@ -28,13 +28,13 @@ model:
28
28
29
29
tokenizer :
30
30
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
31
- path : /tmp/Qwen2_5 -32B-Instruct/vocab.json
32
- merges_file : /tmp/Qwen2_5 -32B-Instruct/merges.txt
31
+ path : /tmp/Qwen2.5 -32B-Instruct/vocab.json
32
+ merges_file : /tmp/Qwen2.5 -32B-Instruct/merges.txt
33
33
max_seq_len : null
34
34
35
35
checkpointer :
36
36
_component_ : torchtune.training.FullModelHFCheckpointer
37
- checkpoint_dir : /tmp/Qwen2_5 -32B-Instruct
37
+ checkpoint_dir : /tmp/Qwen2.5 -32B-Instruct
38
38
checkpoint_files :
39
39
filename_format : model-{}-of-{}.safetensors
40
40
max_filename : " 00017"
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5 -3B-Instruct
6
+ # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5 -3B-Instruct
7
7
#
8
8
# To launch on 2 devices, run the following command from root:
9
9
# tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/3B_full
@@ -22,8 +22,8 @@ output_dir: /tmp/torchtune/qwen2_5_3B/full # /tmp may be deleted by your system.
22
22
# Tokenizer
23
23
tokenizer :
24
24
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
25
- path : /tmp/Qwen2_5 -3B-Instruct/vocab.json
26
- merges_file : /tmp/Qwen2_5 -3B-Instruct/merges.txt
25
+ path : /tmp/Qwen2.5 -3B-Instruct/vocab.json
26
+ merges_file : /tmp/Qwen2.5 -3B-Instruct/merges.txt
27
27
max_seq_len : null
28
28
29
29
# Dataset
39
39
40
40
checkpointer :
41
41
_component_ : torchtune.training.FullModelHFCheckpointer
42
- checkpoint_dir : /tmp/Qwen2_5 -3B-Instruct
42
+ checkpoint_dir : /tmp/Qwen2.5 -3B-Instruct
43
43
checkpoint_files : [
44
44
model-00001-of-00002.safetensors,
45
45
model-00002-of-00002.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5 -3B-Instruct
6
+ # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5 -3B-Instruct
7
7
#
8
8
# The default config uses an optimizer from bitsandbytes. If you do not have it installed,
9
9
# you can install it with
@@ -24,8 +24,8 @@ output_dir: /tmp/torchtune/qwen2_5_3B/full_single_device # /tmp may be deleted b
24
24
# Tokenizer
25
25
tokenizer :
26
26
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
27
- path : /tmp/Qwen2_5 -3B-Instruct/vocab.json
28
- merges_file : /tmp/Qwen2_5 -3B-Instruct/merges.txt
27
+ path : /tmp/Qwen2.5 -3B-Instruct/vocab.json
28
+ merges_file : /tmp/Qwen2.5 -3B-Instruct/merges.txt
29
29
max_seq_len : null
30
30
31
31
# Dataset
41
41
42
42
checkpointer :
43
43
_component_ : torchtune.training.FullModelHFCheckpointer
44
- checkpoint_dir : /tmp/Qwen2_5 -3B-Instruct
44
+ checkpoint_dir : /tmp/Qwen2.5 -3B-Instruct
45
45
checkpoint_files : [
46
46
model-00001-of-00002.safetensors,
47
47
model-00002-of-00002.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5 -3B-Instruct
6
+ # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5 -3B-Instruct
7
7
#
8
8
# To launch on 2 devices, run the following command from root:
9
9
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/3B_lora
@@ -30,13 +30,13 @@ model:
30
30
31
31
tokenizer :
32
32
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
33
- path : /tmp/Qwen2_5 -3B-Instruct/vocab.json
34
- merges_file : /tmp/Qwen2_5 -3B-Instruct/merges.txt
33
+ path : /tmp/Qwen2.5 -3B-Instruct/vocab.json
34
+ merges_file : /tmp/Qwen2.5 -3B-Instruct/merges.txt
35
35
max_seq_len : null
36
36
37
37
checkpointer :
38
38
_component_ : torchtune.training.FullModelHFCheckpointer
39
- checkpoint_dir : /tmp/Qwen2_5 -3B-Instruct
39
+ checkpoint_dir : /tmp/Qwen2.5 -3B-Instruct
40
40
checkpoint_files : [
41
41
model-00001-of-00002.safetensors,
42
42
model-00002-of-00002.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5 -3B-Instruct
6
+ # tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2.5 -3B-Instruct
7
7
#
8
8
# To launch on a single device, run the following command from root:
9
9
# tune run lora_finetune_single_device --config qwen2_5/3B_lora_single_device
@@ -29,13 +29,13 @@ model:
29
29
30
30
tokenizer :
31
31
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
32
- path : /tmp/Qwen2_5 -3B-Instruct/vocab.json
33
- merges_file : /tmp/Qwen2_5 -3B-Instruct/merges.txt
32
+ path : /tmp/Qwen2.5 -3B-Instruct/vocab.json
33
+ merges_file : /tmp/Qwen2.5 -3B-Instruct/merges.txt
34
34
max_seq_len : null
35
35
36
36
checkpointer :
37
37
_component_ : torchtune.training.FullModelHFCheckpointer
38
- checkpoint_dir : /tmp/Qwen2_5 -3B-Instruct
38
+ checkpoint_dir : /tmp/Qwen2.5 -3B-Instruct
39
39
checkpoint_files : [
40
40
model-00001-of-00002.safetensors,
41
41
model-00002-of-00002.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-72B-Instruct --output-dir /tmp/Qwen2_5 -72B-Instruct
6
+ # tune download Qwen/Qwen2.5-72B-Instruct --output-dir /tmp/Qwen2.5 -72B-Instruct
7
7
#
8
8
# To launch on 8 devices, run the following command from root:
9
9
# tune run --nnodes 1 --nproc_per_node 8 lora_finetune_distributed --config qwen2_5/72B_lora
@@ -28,13 +28,13 @@ model:
28
28
29
29
tokenizer :
30
30
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
31
- path : /tmp/Qwen2_5 -72B-Instruct/vocab.json
32
- merges_file : /tmp/Qwen2_5 -72B-Instruct/merges.txt
31
+ path : /tmp/Qwen2.5 -72B-Instruct/vocab.json
32
+ merges_file : /tmp/Qwen2.5 -72B-Instruct/merges.txt
33
33
max_seq_len : null
34
34
35
35
checkpointer :
36
36
_component_ : torchtune.training.FullModelHFCheckpointer
37
- checkpoint_dir : /tmp/Qwen2_5 -72B-Instruct
37
+ checkpoint_dir : /tmp/Qwen2.5 -72B-Instruct
38
38
checkpoint_files :
39
39
filename_format : model-{}-of-{}.safetensors
40
40
max_filename : " 00037"
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5 -7B-Instruct
6
+ # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5 -7B-Instruct
7
7
#
8
8
# To launch on 2 devices, run the following command from root:
9
9
# tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/7B_full
@@ -22,8 +22,8 @@ output_dir: /tmp/torchtune/qwen2_5_7B/full # /tmp may be deleted by your system.
22
22
# Tokenizer
23
23
tokenizer :
24
24
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
25
- path : /tmp/Qwen2_5 -7B-Instruct/vocab.json
26
- merges_file : /tmp/Qwen2_5 -7B-Instruct/merges.txt
25
+ path : /tmp/Qwen2.5 -7B-Instruct/vocab.json
26
+ merges_file : /tmp/Qwen2.5 -7B-Instruct/merges.txt
27
27
max_seq_len : null
28
28
29
29
# Dataset
39
39
40
40
checkpointer :
41
41
_component_ : torchtune.training.FullModelHFCheckpointer
42
- checkpoint_dir : /tmp/Qwen2_5 -7B-Instruct
42
+ checkpoint_dir : /tmp/Qwen2.5 -7B-Instruct
43
43
checkpoint_files : [
44
44
model-00001-of-00004.safetensors,
45
45
model-00002-of-00004.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5 -7B-Instruct
6
+ # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5 -7B-Instruct
7
7
#
8
8
# The default config uses an optimizer from bitsandbytes. If you do not have it installed,
9
9
# you can install it with
@@ -24,8 +24,8 @@ output_dir: /tmp/torchtune/qwen2_5_7B/full_single_device # /tmp may be deleted b
24
24
# Tokenizer
25
25
tokenizer :
26
26
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
27
- path : /tmp/Qwen2_5 -7B-Instruct/vocab.json
28
- merges_file : /tmp/Qwen2_5 -7B-Instruct/merges.txt
27
+ path : /tmp/Qwen2.5 -7B-Instruct/vocab.json
28
+ merges_file : /tmp/Qwen2.5 -7B-Instruct/merges.txt
29
29
max_seq_len : null
30
30
31
31
# Dataset
41
41
42
42
checkpointer :
43
43
_component_ : torchtune.training.FullModelHFCheckpointer
44
- checkpoint_dir : /tmp/Qwen2_5 -7B-Instruct
44
+ checkpoint_dir : /tmp/Qwen2.5 -7B-Instruct
45
45
checkpoint_files : [
46
46
model-00001-of-00004.safetensors,
47
47
model-00002-of-00004.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5 -7B-Instruct
6
+ # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5 -7B-Instruct
7
7
#
8
8
# To launch on 2 devices, run the following command from root:
9
9
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/7B_lora
@@ -31,13 +31,13 @@ model:
31
31
32
32
tokenizer :
33
33
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
34
- path : /tmp/Qwen2_5 -7B-Instruct/vocab.json
35
- merges_file : /tmp/Qwen2_5 -7B-Instruct/merges.txt
34
+ path : /tmp/Qwen2.5 -7B-Instruct/vocab.json
35
+ merges_file : /tmp/Qwen2.5 -7B-Instruct/merges.txt
36
36
max_seq_len : null
37
37
38
38
checkpointer :
39
39
_component_ : torchtune.training.FullModelHFCheckpointer
40
- checkpoint_dir : /tmp/Qwen2_5 -7B-Instruct
40
+ checkpoint_dir : /tmp/Qwen2.5 -7B-Instruct
41
41
checkpoint_files : [
42
42
model-00001-of-00004.safetensors,
43
43
model-00002-of-00004.safetensors,
Original file line number Diff line number Diff line change 3
3
#
4
4
# This config assumes that you've run the following command before launching
5
5
# this run:
6
- # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5 -7B-Instruct
6
+ # tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2.5 -7B-Instruct
7
7
#
8
8
# To launch on a single device, run the following command from root:
9
9
# tune run lora_finetune_single_device --config qwen2_5/7B_lora_single_device
@@ -30,13 +30,13 @@ model:
30
30
31
31
tokenizer :
32
32
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
33
- path : /tmp/Qwen2_5 -7B-Instruct/vocab.json
34
- merges_file : /tmp/Qwen2_5 -7B-Instruct/merges.txt
33
+ path : /tmp/Qwen2.5 -7B-Instruct/vocab.json
34
+ merges_file : /tmp/Qwen2.5 -7B-Instruct/merges.txt
35
35
max_seq_len : null
36
36
37
37
checkpointer :
38
38
_component_ : torchtune.training.FullModelHFCheckpointer
39
- checkpoint_dir : /tmp/Qwen2_5 -7B-Instruct
39
+ checkpoint_dir : /tmp/Qwen2.5 -7B-Instruct
40
40
checkpoint_files : [
41
41
model-00001-of-00004.safetensors,
42
42
model-00002-of-00004.safetensors,
Original file line number Diff line number Diff line change 11
11
12
12
checkpointer :
13
13
_component_ : torchtune.training.FullModelHFCheckpointer
14
- checkpoint_dir : /tmp/Qwen2_5 -0_5B-Instruct
14
+ checkpoint_dir : /tmp/Qwen2.5 -0_5B-Instruct
15
15
checkpoint_files : [
16
16
model.safetensors,
17
17
]
@@ -21,8 +21,8 @@ checkpointer:
21
21
# Tokenizer
22
22
tokenizer :
23
23
_component_ : torchtune.models.qwen2_5.qwen2_5_tokenizer
24
- path : /tmp/Qwen2_5 -0_5B-Instruct/vocab.json
25
- merges_file : /tmp/Qwen2_5 -0_5B-Instruct/merges.txt
24
+ path : /tmp/Qwen2.5 -0_5B-Instruct/vocab.json
25
+ merges_file : /tmp/Qwen2.5 -0_5B-Instruct/merges.txt
26
26
max_seq_len : null
27
27
28
28
# Environment
Original file line number Diff line number Diff line change 22
22
flex_attention ,
23
23
)
24
24
25
- flex_attention_compiled = torch .compile (flex_attention , dynamic = False )
25
+ def compile_flex_attention ():
26
+ try :
27
+ return torch .compile (flex_attention , dynamic = False )
28
+ except Exception as e :
29
+ # It may fail on some combinations of hardware/versions. Using max-autotune fixes this issue.
30
+ # Context: https://github.com/pytorch/torchtune/issues/2113
31
+ _log .info (
32
+ f"Compiling flex_attention failed with error '{ e } '. Retrying with mode='max-autotune'."
33
+ )
34
+ try :
35
+ return torch .compile (flex_attention , dynamic = False , mode = "max-autotune" )
36
+ except Exception as e :
37
+ _log .info (
38
+ f"Compiling flex_attention failed with error: '{ e } ', "
39
+ "Updating your pytorch version to nightlies may solve it, or you can set"
40
+ "in your config dataset.packed=False to avoid using flex attention."
41
+ )
42
+ raise
43
+
44
+ flex_attention_compiled = compile_flex_attention ()
26
45
27
46
# We cannot do nested compile, but flex attention only has perf benefits
28
47
# when compiled. To insulate it from the compiler, we wrap it with
You can’t perform that action at this time.
0 commit comments