Skip to content

Commit 2e5f2e5

Browse files
authored
fix dpo finetune (#12774)
1 parent 9697197 commit 2e5f2e5

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

python/llm/example/GPU/LLM-Finetuning/DPO/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@ conda create -n llm python=3.11
1717
conda activate llm
1818
# below command will install intel_extension_for_pytorch==2.1.10+xpu as default
1919
pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
20-
pip install datasets
20+
pip install transformers==4.45.0 "trl<0.12.0" datasets
2121
pip install peft==0.10.0
22-
pip install 'trl<0.9'
23-
# Note, if you don't want to reinstall BNBs dependencies, append the `--no-deps` flag!
24-
pip install --no-deps --force-reinstall 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
22+
pip install bitsandbytes==0.45.1
2523
```
2624

2725
### 2. Configures OneAPI environment variables

python/llm/example/GPU/LLM-Finetuning/DPO/dpo_finetuning.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737
from ipex_llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training
3838
from ipex_llm.transformers import AutoModelForCausalLM
3939
import transformers
40-
from transformers import AutoTokenizer, TrainingArguments, BitsAndBytesConfig
40+
from transformers import AutoTokenizer, BitsAndBytesConfig
4141
from datasets import load_dataset
4242
from peft import LoraConfig
43-
from trl import DPOTrainer
43+
from trl import DPOConfig, DPOTrainer
4444
import argparse
4545

4646

@@ -83,7 +83,7 @@ def chatml_format(example):
8383
dataset_path = args.dataset
8484
output_path = args.output_path
8585
gradient_checkpointing = args.gradient_checkpointing
86-
86+
8787
# Load dataset
8888
dataset = load_dataset(dataset_path)['train']
8989

@@ -143,12 +143,15 @@ def chatml_format(example):
143143
ref_model = ref_model.to('xpu')
144144

145145
# Training arguments
146-
training_args = TrainingArguments(
146+
training_args = DPOConfig(
147147
per_device_train_batch_size=4,
148148
gradient_accumulation_steps=4,
149149
gradient_checkpointing=gradient_checkpointing,
150150
learning_rate=5e-5,
151151
lr_scheduler_type="cosine",
152+
beta=0.1,
153+
max_prompt_length=1024,
154+
max_length=1536,
152155
max_steps=200,
153156
save_strategy="no",
154157
logging_steps=1,
@@ -166,9 +169,6 @@ def chatml_format(example):
166169
args=training_args,
167170
train_dataset=dataset,
168171
tokenizer=tokenizer,
169-
beta=0.1,
170-
max_prompt_length=1024,
171-
max_length=1536,
172172
)
173173

174174
# Fine-tune model with DPO

0 commit comments

Comments
 (0)