37
37
from ipex_llm .transformers .qlora import get_peft_model , prepare_model_for_kbit_training
38
38
from ipex_llm .transformers import AutoModelForCausalLM
39
39
import transformers
40
- from transformers import AutoTokenizer , TrainingArguments , BitsAndBytesConfig
40
+ from transformers import AutoTokenizer , BitsAndBytesConfig
41
41
from datasets import load_dataset
42
42
from peft import LoraConfig
43
- from trl import DPOTrainer
43
+ from trl import DPOConfig , DPOTrainer
44
44
import argparse
45
45
46
46
@@ -83,7 +83,7 @@ def chatml_format(example):
83
83
dataset_path = args .dataset
84
84
output_path = args .output_path
85
85
gradient_checkpointing = args .gradient_checkpointing
86
-
86
+
87
87
# Load dataset
88
88
dataset = load_dataset (dataset_path )['train' ]
89
89
@@ -143,12 +143,15 @@ def chatml_format(example):
143
143
ref_model = ref_model .to ('xpu' )
144
144
145
145
# Training arguments
146
- training_args = TrainingArguments (
146
+ training_args = DPOConfig (
147
147
per_device_train_batch_size = 4 ,
148
148
gradient_accumulation_steps = 4 ,
149
149
gradient_checkpointing = gradient_checkpointing ,
150
150
learning_rate = 5e-5 ,
151
151
lr_scheduler_type = "cosine" ,
152
+ beta = 0.1 ,
153
+ max_prompt_length = 1024 ,
154
+ max_length = 1536 ,
152
155
max_steps = 200 ,
153
156
save_strategy = "no" ,
154
157
logging_steps = 1 ,
@@ -166,9 +169,6 @@ def chatml_format(example):
166
169
args = training_args ,
167
170
train_dataset = dataset ,
168
171
tokenizer = tokenizer ,
169
- beta = 0.1 ,
170
- max_prompt_length = 1024 ,
171
- max_length = 1536 ,
172
172
)
173
173
174
174
# Fine-tune model with DPO
0 commit comments