Skip to content

Commit d0d9c9d

Browse files
authored
remove load_in_8bit usage as it is not supported a long time ago (#12779)
1 parent 9e9b6c9 commit d0d9c9d

File tree

7 files changed

+9
-15
lines changed

7 files changed

+9
-15
lines changed

python/llm/dev/benchmark/harness/README.md

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Harness Evaluation
2-
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with IPEX-LLM under
2+
[Harness evaluation](https://github.com/EleutherAI/lm-evaluation-harness) allows users to eaisly get accuracy on various datasets. Here we have enabled harness evaluation with IPEX-LLM under
33
[Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) settings.
44
Before running, make sure to have [ipex-llm](../../../README.md) installed.
55

@@ -53,21 +53,21 @@ AutoModelForCausalLM.from_pretrained = partial(AutoModelForCausalLM.from_pretrai
5353
```
5454
to the following codes to load the low bit models.
5555
```python
56-
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
56+
class ModifiedAutoModelForCausalLM(AutoModelForCausalLM):
5757
@classmethod
5858
def load_low_bit(cls,*args,**kwargs):
59-
for k in ['load_in_low_bit', 'device_map', 'max_memory', 'load_in_8bit','load_in_4bit']:
59+
for k in ['load_in_low_bit', 'device_map', 'max_memory','load_in_4bit']:
6060
kwargs.pop(k)
6161
return super().load_low_bit(*args, **kwargs)
6262

6363
AutoModelForCausalLM.from_pretrained=partial(ModifiedAutoModelForCausalLM.load_low_bit, *self.bigdl_llm_kwargs)
6464
```
6565
### 2.Please pass the argument `trust_remote_code=True` to allow custom code to be run.
66-
`lm-evaluation-harness` doesn't pass `trust_remote_code=true` argument to datasets. This may cause errors similar to the following one:
66+
`lm-evaluation-harness` doesn't pass `trust_remote_code=true` argument to datasets. This may cause errors similar to the following one:
6767
```
68-
RuntimeError: Job config of task=winogrande, precision=sym_int4 failed.
68+
RuntimeError: Job config of task=winogrande, precision=sym_int4 failed.
6969
Error Message: The repository for winogrande contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/winogrande.
70-
please pass the argument trust_remote_code=True to allow custom code to be run.
70+
please pass the argument trust_remote_code=True to allow custom code to be run.
7171
```
7272
Please refer to these:
7373

python/llm/example/GPU/LLM-Finetuning/axolotl/llama3-qlora.yml

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ base_model: meta-llama/Meta-Llama-3-8B
33
model_type: AutoModelForCausalLM
44
tokenizer_type: AutoTokenizer
55

6-
load_in_8bit: false
76
load_in_4bit: true
87
strict: false
98

python/llm/example/GPU/LLM-Finetuning/axolotl/lora.yml

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ base_model: NousResearch/Llama-2-7b-hf
33
model_type: LlamaForCausalLM
44
tokenizer_type: LlamaTokenizer
55

6-
load_in_8bit: false
76
load_in_4bit: true
87
strict: false
98

python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
44
tokenizer_type: LlamaTokenizer
55
is_llama_derived_model: true
66

7-
load_in_8bit: false
87
load_in_4bit: true
98
strict: false
109

python/llm/example/GPU/Speculative-Decoding/EAGLE/evaluation/gen_ea_answer_llama2chat_e2_ipex_optimize.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,6 @@ def get_model_answers(
312312
torch_dtype=torch.float16,
313313
# torch_dtype=torch.float32,
314314
low_cpu_mem_usage=True,
315-
# load_in_8bit=True,
316315
total_token=args.total_token,
317316
depth=args.depth,
318317
top_k=args.top_k,
@@ -384,7 +383,7 @@ def get_model_answers(
384383
]
385384
if len(stop_token_ids_index) > 0:
386385
output_ids = output_ids[: stop_token_ids_index[0]]
387-
386+
388387
output = tokenizer.decode(
389388
output_ids,
390389
spaces_between_special_tokens=False,
@@ -572,8 +571,8 @@ def reorg_answer_file(answer_file):
572571
)
573572

574573
parser.add_argument(
575-
"--enable-ipex-llm",
576-
action='store_true',
574+
"--enable-ipex-llm",
575+
action='store_true',
577576
help="Enable ipex-llm optimization"
578577
)
579578
args = parser.parse_args()

python/llm/src/ipex_llm/transformers/model.py

-1
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,6 @@ def from_pretrained(cls,
233233
optimize_model = False
234234
kwargs["modules_to_not_convert"] = ["lm_head"]
235235

236-
load_in_8bit = kwargs.pop("load_in_8bit", False)
237236
from ipex_llm.llm_patching import bigdl_patched
238237
if bigdl_patched == 'Train':
239238
global patched_training_mode

python/llm/src/ipex_llm/transformers/npu_model.py

-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ def from_pretrained(cls, *args, **kwargs):
117117
# ignore following arguments
118118
ignore_argument(kwargs, "model_hub")
119119
ignore_argument(kwargs, "load_in_4bit")
120-
ignore_argument(kwargs, "load_in_8bit")
121120
ignore_argument(kwargs, "imatrix")
122121
ignore_argument(kwargs, "cpu_embedding")
123122
ignore_argument(kwargs, "embedding_qtype")

0 commit comments

Comments
 (0)