configs/quantization/methods/Awq/awq_w_only_vlm.yml

base:
    seed: &seed 42
model:
    type: model_type
    path: model path
    tokenizer_mode: slow
    torch_dtype: auto
calib:
    name: custom_mm
    download: False
    path: calib data path
    apply_chat_template: True
    add_answer: True # Defalut is False. If set it to Ture, calib data will add answers.
    n_samples: 8
    bs: -1
    seq_len: 512
    padding: True
    seed: *seed
eval:
    eval_pos: [pretrain, fake_quant]
    type: vqa
    name: mme
    download: False
    path: MME dataset path
    bs: 1
    inference_per_block: False
quant:
    vision:
        method: Awq
        weight:
            bit: 4
            symmetric: False
            granularity: per_group
            group_size: 128
        special:
            trans: True
            # The options for "trans_version" include "v1" and "v2".
            # But their results don't differ significantly.
            trans_version: v2
            weight_clip: True
            # For 2-bit quantization, setting "clip_sym: False" will yield better results.
            clip_sym: True
    language:
        method: Awq
        weight:
            bit: 4
            symmetric: False
            granularity: per_group
            group_size: 128
        special:
            trans: True
            # The options for "trans_version" include "v1" and "v2".
            # But their results don't differ significantly.
            trans_version: v2
            weight_clip: True
            # For 2-bit quantization, setting "clip_sym: False" will yield better results.
            clip_sym: True
save:
    save_trans: False
    save_fake: False
    save_path: /path/to/save/