fix lisa finetune example (#12775)

MeouSker77 · web-flow · commit c0d6b282b834 · 2025-02-06T16:35:43.000+08:00
diff --git a/python/llm/example/GPU/LLM-Finetuning/LISA/README.md b/python/llm/example/GPU/LLM-Finetuning/LISA/README.md
@@ -13,10 +13,8 @@ conda create -n llm python=3.11
 conda activate llm
 # below command will install intel_extension_for_pytorch==2.1.10+xpu as default
 pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-pip install bitsandbytes==0.43.0
-pip install datasets==2.18.0
-pip install --upgrade transformers==4.36.0
-pip install scipy fire
+pip install transformers==4.45.0 "trl<0.12.0" datasets
+pip install bitsandbytes==0.45.1 scipy fire
 ```
 
 ### 2. LISA Finetune
@@ -51,23 +49,23 @@ Optional parameters for `lisa_finetuning.py`:
 
 ```log
 ......
-{'loss': 1.8391, 'learning_rate': 1.9967238104745695e-05, 'epoch': 0.03}                                                                                                    
-{'loss': 1.8242, 'learning_rate': 1.9869167087338908e-05, 'epoch': 0.05}                                                                                                    
+{'loss': 1.8391, 'learning_rate': 1.9967238104745695e-05, 'epoch': 0.03}
+{'loss': 1.8242, 'learning_rate': 1.9869167087338908e-05, 'epoch': 0.05}
   5%|██████▉                                                        | 20/388 [xx:xx<x:xx:xx,  x.xxs/it]
 Activating layers at indices: [10] for the next steps.
-{'loss': 1.8128, 'learning_rate': 1.9706429546259592e-05, 'epoch': 0.08}                                                                                                    
-{'loss': 1.775, 'learning_rate': 1.9480091799562706e-05, 'epoch': 0.1}                                                                                                      
+{'loss': 1.8128, 'learning_rate': 1.9706429546259592e-05, 'epoch': 0.08}
+{'loss': 1.775, 'learning_rate': 1.9480091799562706e-05, 'epoch': 0.1}
  10%|██████████████                                                  | 40/388 [xx:xx<xx:xx,  x.xxs/it]
 Activating layers at indices: [30] for the next steps.
-{'loss': 1.7669, 'learning_rate': 1.9191636897958123e-05, 'epoch': 0.13}                                                                                                    
-{'loss': 1.7749, 'learning_rate': 1.8842954907300236e-05, 'epoch': 0.15}                                                                                                    
+{'loss': 1.7669, 'learning_rate': 1.9191636897958123e-05, 'epoch': 0.13}
+{'loss': 1.7749, 'learning_rate': 1.8842954907300236e-05, 'epoch': 0.15}
  15%|█████████████████████                                           | 60/388 [xx:xx<xx:xx,  x.xxs/it]
 Activating layers at indices: [26] for the next steps.
-{'loss': 1.7735, 'learning_rate': 1.8436330524160048e-05, 'epoch': 0.18}                                                                                                    
-{'loss': 1.7199, 'learning_rate': 1.797442810562721e-05, 'epoch': 0.21}                                                                                                     
+{'loss': 1.7735, 'learning_rate': 1.8436330524160048e-05, 'epoch': 0.18}
+{'loss': 1.7199, 'learning_rate': 1.797442810562721e-05, 'epoch': 0.21}
  21%|████████████████████████████                                    | 80/388 [xx:xx<xx:xx,  x.xxs/it]
 Activating layers at indices: [17] for the next steps.
-{'loss': 1.7328, 'learning_rate': 1.7460274211432463e-05, 'epoch': 0.23}                                                                                                    
+{'loss': 1.7328, 'learning_rate': 1.7460274211432463e-05, 'epoch': 0.23}
  25%|█████████████████████████████████▋                             | 96/388 [xx:xx<xx:xx,  x.xxs/it]
  ......
 
diff --git a/python/llm/example/GPU/LLM-Finetuning/LISA/lisa_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/LISA/lisa_finetuning.py
@@ -90,9 +90,10 @@ def train(
     model = AutoModelForCausalLM.from_pretrained(
         base_model,
         load_in_low_bit="bf16",
-        optimize_model=True,
+        optimize_model=False,
         torch_dtype=torch.bfloat16,
-        trust_remote_code=True
+        trust_remote_code=True,
+        modules_to_not_convert=["lm_head"],     # avoid optimize lm_head
     )
 
     model = model.to("xpu")