fix galore and peft finetune example (#12776)

MeouSker77 · web-flow · commit b4c9e23f73d4 · 2025-02-06T16:36:13.000+08:00
diff --git a/python/llm/example/GPU/LLM-Finetuning/GaLore/README.md b/python/llm/example/GPU/LLM-Finetuning/GaLore/README.md
@@ -13,11 +13,8 @@ conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 pip install galore-torch
-pip install accelerate==0.28.0
-pip install bitsandbytes==0.43.0
-pip install datasets==2.18.0
-pip install transformers==4.39.1
-pip install trl==0.8.1
+pip install transformers==4.45.0 "trl<0.12.0" datasets
+pip install bitsandbytes==0.45.1
 ```
 
 ### 2. GaLore Finetune
diff --git a/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md b/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/README.md
@@ -14,10 +14,10 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install transformers==4.36.0 datasets
+pip install transformers==4.45.0 "trl<0.12.0" datasets
+pip install bitsandbytes==0.45.1 scipy
 pip install fire peft==0.10.0
 pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # necessary to run distributed finetuning
-pip install bitsandbytes scipy
 ```
 
 ### 2. Configures OneAPI environment variables
diff --git a/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/alpaca-lora/finetune.py b/python/llm/example/GPU/LLM-Finetuning/HF-PEFT/alpaca-lora/finetune.py
@@ -53,10 +53,10 @@
     LoraConfig,
     get_peft_model,
     get_peft_model_state_dict,
-    prepare_model_for_int8_training,
+    prepare_model_for_kbit_training,
     set_peft_model_state_dict,
 )
-from transformers import LlamaForCausalLM, LlamaTokenizer
+from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from utils.prompter import Prompter
 
@@ -145,14 +145,14 @@ def train(
     if len(wandb_log_model) > 0:
         os.environ["WANDB_LOG_MODEL"] = wandb_log_model
 
-    model = LlamaForCausalLM.from_pretrained(
+    model = AutoModelForCausalLM.from_pretrained(
         base_model,
-        load_in_8bit=True,
+        load_in_4bit=True,
         torch_dtype=torch.float16,
         device_map=device_map,
     )
 
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
 
     tokenizer.pad_token_id = (
         0  # unk. we want this to be different from the eos token
@@ -207,7 +207,7 @@ def generate_and_tokenize_prompt(data_point):
             ]  # could be sped up, probably
         return tokenized_full_prompt
 
-    model = prepare_model_for_int8_training(model)
+    model = prepare_model_for_kbit_training(model)
 
     config = LoraConfig(
         r=lora_r,