Skip to content

Commit 34b0a08

Browse files
gguf-py: Refactor and allow reading/modifying existing GGUF files (#3981)
* gguf-py: Refactor and add file reading support * Replay changes from #3871 Credit to @cebtenzzre for that pull * Various type annotation fixes. * sort imports with isort (again) * Fix missing return statement in add_tensor * style cleanup with flake8 * fix NamedTuple and Enum usage * Fix an issue with state init in GGUFReader Move examples to an examples/ directory Clean up examples Add an example of modifying keys in a GGUF file Update documentation with info on examples Try to support people importing gguf/gguf.py directly * Damagage is not a word. * Clean up gguf-py/examples/modify_gguf.py whitespace Co-authored-by: Jared Van Bortel <[email protected]> * Update gguf-py/examples/modify_gguf.py formatting Co-authored-by: Jared Van Bortel <[email protected]> * Update gguf-py/gguf/gguf_reader.py type hint Co-authored-by: Jared Van Bortel <[email protected]> * Make examples executable, formatting changes * Add more information to GGUFReader and examples comments * Include a gguf Python package version bump * Add convert-gguf-endian.py script * cleanup * gguf-py : bump minor version * Reorganize scripts * Make GGUFReader endian detection less arbitrary * Add JSON dumping support to gguf-dump.py Which I kind of regret now * A few for gguf-dump.py cleanups * Murder accidental tuple in gguf-py/scripts/gguf-dump.py Co-authored-by: Jared Van Bortel <[email protected]> * cleanup * constants : remove unneeded type annotations * fix python 3.8 compat * Set up gguf- scripts in pyproject.toml * And include scripts/__init__.py, derp * convert.py: We can't currently support Q8_0 on big endian. * gguf-py: SpecialVocab: Always try available sources for special token ids gguf-py: SpecialVocab: Try to load merges from merges.txt if not in tokenizer.json gguf-py: SpecialVocab: Add 'add_bos_token' type bools to GGUF metadata u * cleanup * Promote add_X_token to GGUF metadata for BOS and EOS --------- Co-authored-by: Jared Van Bortel <[email protected]> Co-authored-by: Jared Van Bortel <[email protected]>
1 parent 4a4fd3e commit 34b0a08

20 files changed

+1982
-1176
lines changed

convert-baichuan-hf-to-gguf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from sentencepiece import SentencePieceProcessor # type: ignore[import]
1717

1818
if 'NO_LOCAL_GGUF' not in os.environ:
19-
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
19+
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
2020
import gguf
2121

2222

convert-llama-ggml-to-gguf.py

+2-22
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,9 @@
1212

1313
import os
1414
if 'NO_LOCAL_GGUF' not in os.environ:
15-
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
15+
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
1616
import gguf
1717

18-
# Note: Does not support GGML_QKK_64
19-
QK_K = 256
20-
# Items here are (block size, type size)
21-
GGML_QUANT_SIZES = {
22-
gguf.GGMLQuantizationType.F32 : (1, 4),
23-
gguf.GGMLQuantizationType.F16 : (1, 2),
24-
gguf.GGMLQuantizationType.Q4_0 : (32, 2 + 16),
25-
gguf.GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
26-
gguf.GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
27-
gguf.GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
28-
gguf.GGMLQuantizationType.Q8_0 : (32, 2 + 32),
29-
gguf.GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
30-
gguf.GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
31-
gguf.GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
32-
gguf.GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
33-
gguf.GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
34-
gguf.GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
35-
gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
36-
}
37-
3818
class GGMLFormat(IntEnum):
3919
GGML = 0
4020
GGMF = 1
@@ -125,7 +105,7 @@ def load(self, data, offset):
125105
(n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
126106
assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
127107
assert name_len < 4096, 'Absurd tensor name length'
128-
quant = GGML_QUANT_SIZES.get(dtype)
108+
quant = gguf.GGML_QUANT_SIZES.get(dtype)
129109
assert quant is not None, 'Unknown tensor type'
130110
(blksize, tysize) = quant
131111
offset += 12

convert-persimmon-to-gguf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pathlib import Path
77
from sentencepiece import SentencePieceProcessor
88
if 'NO_LOCAL_GGUF' not in os.environ:
9-
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
9+
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
1010
import gguf
1111

1212
def _flatten_dict(dct, tensors, prefix=None):

convert.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33

44
import argparse
55
import concurrent.futures
6-
import copy
76
import enum
87
import faulthandler
98
import functools
10-
import io
119
import itertools
1210
import json
1311
import math
@@ -23,14 +21,14 @@
2321
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
2422
from dataclasses import dataclass
2523
from pathlib import Path
26-
from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
24+
from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
2725

2826
import numpy as np
2927
from sentencepiece import SentencePieceProcessor
3028

3129
import os
3230
if 'NO_LOCAL_GGUF' not in os.environ:
33-
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
31+
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
3432
import gguf
3533

3634
if TYPE_CHECKING:
@@ -851,7 +849,7 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
851849
elif isinstance(vocab, BpeVocab):
852850
self.gguf.add_tokenizer_model("gpt2")
853851
else:
854-
raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
852+
raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
855853
self.gguf.add_token_list(tokens)
856854
self.gguf.add_token_scores(scores)
857855
self.gguf.add_token_types(toktypes)
@@ -905,7 +903,7 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
905903
return dt.quantize(arr)
906904

907905
@staticmethod
908-
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
906+
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
909907
check_vocab_size(params, vocab)
910908

911909
of = OutputFile(fname_out, endianess=endianess)
@@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None:
11141112

11151113

11161114
def main(args_in: list[str] | None = None) -> None:
1115+
output_choices = ["f32", "f16"]
1116+
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
1117+
# We currently only support Q8_0 output on little endian systems.
1118+
output_choices.append("q8_0")
11171119
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
11181120
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
11191121
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
11201122
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
1121-
parser.add_argument("--outtype", choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
1123+
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
11221124
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
11231125
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
11241126
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")

examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pathlib import Path
1010

1111
if 'NO_LOCAL_GGUF' not in os.environ:
12-
sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py' / 'gguf'))
12+
sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py'))
1313
import gguf
1414

1515
# gguf constants

gguf-py/README.md

+10
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@ as an example for its usage.
1111
pip install gguf
1212
```
1313

14+
## API Examples/Simple Tools
15+
16+
[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.
17+
18+
[scripts/gguf-dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-dump.py) — Dumps a GGUF file's metadata to the console.
19+
20+
[scripts/gguf-set-metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-set-metadata.py) — Allows changing simple metadata values in a GGUF file by key.
21+
22+
[scripts/gguf-convert-endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-convert-endian.py) — Allows converting the endianness of GGUF files.
23+
1424
## Development
1525
Maintainers who participate in development of this package are advised to install it in editable mode:
1626

gguf-py/examples/writer.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
from pathlib import Path
4+
5+
import numpy as np
6+
7+
# Necessary to load the local gguf package
8+
sys.path.insert(0, str(Path(__file__).parent.parent))
9+
10+
from gguf import GGUFWriter # noqa: E402
11+
12+
13+
# Example usage:
14+
def writer_example() -> None:
15+
# Example usage with a file
16+
gguf_writer = GGUFWriter("example.gguf", "llama")
17+
18+
gguf_writer.add_architecture()
19+
gguf_writer.add_block_count(12)
20+
gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
21+
gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
22+
gguf_writer.add_custom_alignment(64)
23+
24+
tensor1 = np.ones((32,), dtype=np.float32) * 100.0
25+
tensor2 = np.ones((64,), dtype=np.float32) * 101.0
26+
tensor3 = np.ones((96,), dtype=np.float32) * 102.0
27+
28+
gguf_writer.add_tensor("tensor1", tensor1)
29+
gguf_writer.add_tensor("tensor2", tensor2)
30+
gguf_writer.add_tensor("tensor3", tensor3)
31+
32+
gguf_writer.write_header_to_file()
33+
gguf_writer.write_kv_data_to_file()
34+
gguf_writer.write_tensors_to_file()
35+
36+
gguf_writer.close()
37+
38+
39+
if __name__ == '__main__':
40+
writer_example()

gguf-py/gguf/__init__.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
from .gguf import *
1+
from .constants import *
2+
from .gguf_reader import *
3+
from .gguf_writer import *
4+
from .tensor_mapping import *
5+
from .vocab import *

0 commit comments

Comments
 (0)