forked from foundation-model-stack/fms-acceleration
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautogptq.yaml
24 lines (19 loc) · 869 Bytes
/
autogptq.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# PEFT-related acceleration
peft:
# quantization-releated acceleration
# e.g., kernels for quantized base weights
quantization:
# AutoGPTQ quantized base weights.
auto_gptq:
# Kernel to be used for GPTQ linear laeyer
# NOTE: Not all kernels are suitable for PEFT training; need to use
# kernels that support autograd forward / backward. The best
# recommendation at the moment is "triton_v2".
kernel: triton_v2
# If true, then will already expect quantized checkpoint
# passed into TrainingArguments.model_name_or_path
from_quantized: True
# Setting to false, will create GPTQ-LORA using the local autogptq package.
# if true, will create legacy implementation of GPTQ-LORA using external
# `auto_gptq`. Refer to README for installation instructions
use_external_lib: False