Skip to content

Commit 04ac060

Browse files
crasmcebtenzzre
andauthored
python : add check-requirements.sh and GitHub workflow (#4585)
* python: add check-requirements.sh and GitHub workflow This script and workflow forces package versions to remain compatible across all convert*.py scripts, while allowing secondary convert scripts to import dependencies not wanted in convert.py. * Move requirements into ./requirements * Fail on "==" being used for package requirements (but can be suppressed) * Enforce "compatible release" syntax instead of == * Update workflow * Add upper version bound for transformers and protobuf * improve check-requirements.sh * small syntax change * don't remove venvs if nocleanup is passed * See if this fixes docker workflow * Move check-requirements.sh into ./scripts/ --------- Co-authored-by: Jared Van Bortel <[email protected]>
1 parent 68eccbd commit 04ac060

16 files changed

+378
-148
lines changed

.devops/full-cuda.Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ ARG CUDA_DOCKER_ARCH=all
1414
RUN apt-get update && \
1515
apt-get install -y build-essential python3 python3-pip git
1616

17-
COPY requirements.txt requirements.txt
17+
COPY requirements.txt requirements.txt
18+
COPY requirements requirements
1819

1920
RUN pip install --upgrade pip setuptools wheel \
2021
&& pip install -r requirements.txt

.devops/full-rocm.Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ ARG ROCM_DOCKER_ARCH=\
2323
gfx1101 \
2424
gfx1102
2525

26-
COPY requirements.txt requirements.txt
26+
COPY requirements.txt requirements.txt
27+
COPY requirements requirements
2728

2829
RUN pip install --upgrade pip setuptools wheel \
2930
&& pip install -r requirements.txt

.devops/full.Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ FROM ubuntu:$UBUNTU_VERSION as build
55
RUN apt-get update && \
66
apt-get install -y build-essential python3 python3-pip git
77

8-
COPY requirements.txt requirements.txt
8+
COPY requirements.txt requirements.txt
9+
COPY requirements requirements
910

1011
RUN pip install --upgrade pip setuptools wheel \
1112
&& pip install -r requirements.txt

.devops/main-rocm.Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ ARG ROCM_DOCKER_ARCH=\
2323
gfx1101 \
2424
gfx1102
2525

26-
COPY requirements.txt requirements.txt
26+
COPY requirements.txt requirements.txt
27+
COPY requirements requirements
2728

2829
RUN pip install --upgrade pip setuptools wheel \
2930
&& pip install -r requirements.txt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: Python check requirements.txt
2+
3+
on:
4+
push:
5+
paths:
6+
- 'scripts/check-requirements.sh'
7+
- 'convert*.py'
8+
- 'requirements.txt'
9+
- 'requirements/*.txt'
10+
pull_request:
11+
paths:
12+
- 'scripts/check-requirements.sh'
13+
- 'convert*.py'
14+
- 'requirements.txt'
15+
- 'requirements/*.txt'
16+
17+
jobs:
18+
python-check-requirements:
19+
runs-on: ubuntu-latest
20+
name: check-requirements
21+
steps:
22+
- name: Check out source repository
23+
uses: actions/checkout@v3
24+
- name: Set up Python environment
25+
uses: actions/setup-python@v4
26+
with:
27+
python-version: "3.11"
28+
- name: Run check-requirements.sh script
29+
run: bash scripts/check-requirements.sh nocleanup

convert-hf-to-gguf.py

+50-45
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def _set_vocab_gpt2(self):
242242
tokens: list[bytearray] = []
243243
toktypes: list[int] = []
244244

245-
from transformers import AutoTokenizer # type: ignore[attr-defined]
245+
from transformers import AutoTokenizer
246246
tokenizer = AutoTokenizer.from_pretrained(dir_model)
247247
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
248248
assert max(tokenizer.vocab.values()) < vocab_size
@@ -856,7 +856,7 @@ def set_gguf_parameters(self):
856856
hparams = self.hparams
857857
block_count = hparams["num_hidden_layers"]
858858

859-
self.gguf_writer.add_name(dir_model.name)
859+
self.gguf_writer.add_name(self.dir_model.name)
860860
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
861861
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
862862
self.gguf_writer.add_block_count(block_count)
@@ -902,7 +902,7 @@ def set_vocab(self):
902902
tokens: list[bytearray] = []
903903
toktypes: list[int] = []
904904

905-
from transformers import AutoTokenizer # type: ignore[attr-defined]
905+
from transformers import AutoTokenizer
906906
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
907907
vocab_size = hparams["vocab_size"]
908908
assert max(tokenizer.get_vocab().values()) < vocab_size
@@ -1185,57 +1185,62 @@ def parse_args() -> argparse.Namespace:
11851185
return parser.parse_args()
11861186

11871187

1188-
args = parse_args()
1188+
def main() -> None:
1189+
args = parse_args()
11891190

1190-
dir_model = args.model
1191+
dir_model = args.model
11911192

1192-
if args.awq_path:
1193-
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
1194-
from awq.apply_awq import add_scale_weights
1195-
tmp_model_path = args.model / "weighted_model"
1196-
dir_model = tmp_model_path
1197-
if tmp_model_path.is_dir():
1198-
print(f"{tmp_model_path} exists as a weighted model.")
1193+
if args.awq_path:
1194+
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
1195+
from awq.apply_awq import add_scale_weights
1196+
tmp_model_path = args.model / "weighted_model"
1197+
dir_model = tmp_model_path
1198+
if tmp_model_path.is_dir():
1199+
print(f"{tmp_model_path} exists as a weighted model.")
1200+
else:
1201+
tmp_model_path.mkdir(parents=True, exist_ok=True)
1202+
print("Saving new weighted model ...")
1203+
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
1204+
print(f"Saved weighted model at {tmp_model_path}.")
1205+
1206+
if not dir_model.is_dir():
1207+
print(f'Error: {args.model} is not a directory', file=sys.stderr)
1208+
sys.exit(1)
1209+
1210+
ftype_map = {
1211+
"f32": gguf.GGMLQuantizationType.F32,
1212+
"f16": gguf.GGMLQuantizationType.F16,
1213+
}
1214+
1215+
if args.outfile is not None:
1216+
fname_out = args.outfile
11991217
else:
1200-
tmp_model_path.mkdir(parents=True, exist_ok=True)
1201-
print("Saving new weighted model ...")
1202-
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
1203-
print(f"Saved weighted model at {tmp_model_path}.")
1204-
1205-
if not dir_model.is_dir():
1206-
print(f'Error: {args.model} is not a directory', file=sys.stderr)
1207-
sys.exit(1)
1218+
# output in the same directory as the model by default
1219+
fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
12081220

1209-
ftype_map = {
1210-
"f32": gguf.GGMLQuantizationType.F32,
1211-
"f16": gguf.GGMLQuantizationType.F16,
1212-
}
1221+
print(f"Loading model: {dir_model.name}")
12131222

1214-
if args.outfile is not None:
1215-
fname_out = args.outfile
1216-
else:
1217-
# output in the same directory as the model by default
1218-
fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
1223+
hparams = Model.load_hparams(dir_model)
12191224

1220-
print(f"Loading model: {dir_model.name}")
1225+
with torch.inference_mode():
1226+
model_class = Model.from_model_architecture(hparams["architectures"][0])
1227+
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
12211228

1222-
hparams = Model.load_hparams(dir_model)
1229+
print("Set model parameters")
1230+
model_instance.set_gguf_parameters()
12231231

1224-
with torch.inference_mode():
1225-
model_class = Model.from_model_architecture(hparams["architectures"][0])
1226-
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
1232+
print("Set model tokenizer")
1233+
model_instance.set_vocab()
12271234

1228-
print("Set model parameters")
1229-
model_instance.set_gguf_parameters()
1235+
if args.vocab_only:
1236+
print(f"Exporting model vocab to '{fname_out}'")
1237+
model_instance.write_vocab()
1238+
else:
1239+
print(f"Exporting model to '{fname_out}'")
1240+
model_instance.write()
12301241

1231-
print("Set model tokenizer")
1232-
model_instance.set_vocab()
1242+
print(f"Model successfully exported to '{fname_out}'")
12331243

1234-
if args.vocab_only:
1235-
print(f"Exporting model vocab to '{fname_out}'")
1236-
model_instance.write_vocab()
1237-
else:
1238-
print(f"Exporting model to '{fname_out}'")
1239-
model_instance.write()
12401244

1241-
print(f"Model successfully exported to '{fname_out}'")
1245+
if __name__ == '__main__':
1246+
main()

convert-lora-to-ggml.py

+92-91
Original file line numberDiff line numberDiff line change
@@ -47,95 +47,96 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
4747
fout.seek((fout.tell() + 31) & -32)
4848

4949

50-
if len(sys.argv) < 2:
51-
print(f"Usage: python {sys.argv[0]} <path> [arch]")
52-
print(
53-
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
54-
)
55-
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
56-
sys.exit(1)
57-
58-
input_json = os.path.join(sys.argv[1], "adapter_config.json")
59-
input_model = os.path.join(sys.argv[1], "adapter_model.bin")
60-
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")
61-
62-
model = torch.load(input_model, map_location="cpu")
63-
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
64-
65-
if arch_name not in gguf.MODEL_ARCH_NAMES.values():
66-
print(f"Error: unsupported architecture {arch_name}")
67-
sys.exit(1)
68-
69-
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
70-
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
71-
72-
with open(input_json, "r") as f:
73-
params = json.load(f)
74-
75-
if params["peft_type"] != "LORA":
76-
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
77-
sys.exit(1)
78-
79-
if params["fan_in_fan_out"] is True:
80-
print("Error: param fan_in_fan_out is not supported")
81-
sys.exit(1)
82-
83-
if params["bias"] is not None and params["bias"] != "none":
84-
print("Error: param bias is not supported")
85-
sys.exit(1)
86-
87-
# TODO: these seem to be layers that have been trained but without lora.
88-
# doesn't seem widely used but eventually should be supported
89-
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
90-
print("Error: param modules_to_save is not supported")
91-
sys.exit(1)
92-
93-
with open(output_path, "wb") as fout:
94-
fout.truncate()
95-
96-
write_file_header(fout, params)
97-
for k, v in model.items():
98-
orig_k = k
99-
if k.endswith(".default.weight"):
100-
k = k.replace(".default.weight", ".weight")
101-
if k in ["llama_proj.weight", "llama_proj.bias"]:
102-
continue
103-
if k.endswith("lora_A.weight"):
104-
if v.dtype != torch.float16 and v.dtype != torch.float32:
50+
if __name__ == '__main__':
51+
if len(sys.argv) < 2:
52+
print(f"Usage: python {sys.argv[0]} <path> [arch]")
53+
print(
54+
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
55+
)
56+
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
57+
sys.exit(1)
58+
59+
input_json = os.path.join(sys.argv[1], "adapter_config.json")
60+
input_model = os.path.join(sys.argv[1], "adapter_model.bin")
61+
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")
62+
63+
model = torch.load(input_model, map_location="cpu")
64+
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
65+
66+
if arch_name not in gguf.MODEL_ARCH_NAMES.values():
67+
print(f"Error: unsupported architecture {arch_name}")
68+
sys.exit(1)
69+
70+
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
71+
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone
72+
73+
with open(input_json, "r") as f:
74+
params = json.load(f)
75+
76+
if params["peft_type"] != "LORA":
77+
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
78+
sys.exit(1)
79+
80+
if params["fan_in_fan_out"] is True:
81+
print("Error: param fan_in_fan_out is not supported")
82+
sys.exit(1)
83+
84+
if params["bias"] is not None and params["bias"] != "none":
85+
print("Error: param bias is not supported")
86+
sys.exit(1)
87+
88+
# TODO: these seem to be layers that have been trained but without lora.
89+
# doesn't seem widely used but eventually should be supported
90+
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
91+
print("Error: param modules_to_save is not supported")
92+
sys.exit(1)
93+
94+
with open(output_path, "wb") as fout:
95+
fout.truncate()
96+
97+
write_file_header(fout, params)
98+
for k, v in model.items():
99+
orig_k = k
100+
if k.endswith(".default.weight"):
101+
k = k.replace(".default.weight", ".weight")
102+
if k in ["llama_proj.weight", "llama_proj.bias"]:
103+
continue
104+
if k.endswith("lora_A.weight"):
105+
if v.dtype != torch.float16 and v.dtype != torch.float32:
106+
v = v.float()
107+
v = v.T
108+
else:
105109
v = v.float()
106-
v = v.T
107-
else:
108-
v = v.float()
109-
110-
t = v.detach().numpy()
111-
112-
prefix = "base_model.model."
113-
if k.startswith(prefix):
114-
k = k[len(prefix) :]
115-
116-
lora_suffixes = (".lora_A.weight", ".lora_B.weight")
117-
if k.endswith(lora_suffixes):
118-
suffix = k[-len(lora_suffixes[0]):]
119-
k = k[: -len(lora_suffixes[0])]
120-
else:
121-
print(f"Error: unrecognized tensor name {orig_k}")
122-
sys.exit(1)
123-
124-
tname = name_map.get_name(k)
125-
if tname is None:
126-
print(f"Error: could not map tensor name {orig_k}")
127-
print(" Note: the arch parameter must be specified if the model is not llama")
128-
sys.exit(1)
129-
130-
if suffix == ".lora_A.weight":
131-
tname += ".weight.loraA"
132-
elif suffix == ".lora_B.weight":
133-
tname += ".weight.loraB"
134-
else:
135-
assert False
136-
137-
print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
138-
write_tensor_header(fout, tname, t.shape, t.dtype)
139-
t.tofile(fout)
140-
141-
print(f"Converted {input_json} and {input_model} to {output_path}")
110+
111+
t = v.detach().numpy()
112+
113+
prefix = "base_model.model."
114+
if k.startswith(prefix):
115+
k = k[len(prefix) :]
116+
117+
lora_suffixes = (".lora_A.weight", ".lora_B.weight")
118+
if k.endswith(lora_suffixes):
119+
suffix = k[-len(lora_suffixes[0]):]
120+
k = k[: -len(lora_suffixes[0])]
121+
else:
122+
print(f"Error: unrecognized tensor name {orig_k}")
123+
sys.exit(1)
124+
125+
tname = name_map.get_name(k)
126+
if tname is None:
127+
print(f"Error: could not map tensor name {orig_k}")
128+
print(" Note: the arch parameter must be specified if the model is not llama")
129+
sys.exit(1)
130+
131+
if suffix == ".lora_A.weight":
132+
tname += ".weight.loraA"
133+
elif suffix == ".lora_B.weight":
134+
tname += ".weight.loraB"
135+
else:
136+
assert False
137+
138+
print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
139+
write_tensor_header(fout, tname, t.shape, t.dtype)
140+
t.tofile(fout)
141+
142+
print(f"Converted {input_json} and {input_model} to {output_path}")

convert-persimmon-to-gguf.py

100644100755
+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/usr/bin/env python3
12
import torch
23
import os
34
from pprint import pprint

requirements-hf-to-gguf.txt

-3
This file was deleted.

0 commit comments

Comments
 (0)