Skip to content

Commit ce893e1

Browse files
Rework byteswapping code in gguf-py
Move out details from byteswapping tensor blocks code
1 parent 1c63021 commit ce893e1

File tree

1 file changed

+64
-106
lines changed

1 file changed

+64
-106
lines changed

Diff for: gguf-py/gguf/scripts/gguf_convert_endian.py

+64-106
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,61 @@
1919
logger = logging.getLogger("gguf-convert-endian")
2020

2121

22+
def byteswap_q4_0(tensor, block_offs):
23+
# Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
24+
25+
# Byte-Swap f16 sized delta field
26+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
27+
delta.byteswap(inplace=True)
28+
29+
30+
def byteswap_q8_0(tensor, block_offs):
31+
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
32+
33+
# Byte-Swap f16 sized delta field
34+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
35+
delta.byteswap(inplace=True)
36+
37+
38+
def byteswap_q4_k(tensor, block_offs):
39+
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
40+
41+
# Byte-Swap f16 sized fields
42+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
43+
delta.byteswap(inplace=True)
44+
45+
delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
46+
delta.byteswap(inplace=True)
47+
48+
49+
def byteswap_q6_k(tensor, block_offs):
50+
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
51+
52+
# Byte-Swap f16 sized field
53+
delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
54+
delta.byteswap(inplace=True)
55+
56+
57+
byteswap_tensors = {
58+
gguf.GGMLQuantizationType.Q4_0: {
59+
"block_size": 18, # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
60+
"byteswap_func": byteswap_q4_0,
61+
},
62+
gguf.GGMLQuantizationType.Q8_0: {
63+
"block_size": 34, # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
64+
"byteswap_func": byteswap_q8_0,
65+
},
66+
gguf.GGMLQuantizationType.Q4_K: {
67+
"block_size": 144, # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
68+
"byteswap_func": byteswap_q4_k,
69+
},
70+
gguf.GGMLQuantizationType.Q6_K: {
71+
"block_size": 210, # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
72+
"byteswap_func": byteswap_q6_k,
73+
},
74+
}
75+
76+
2277
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
2378
file_endian = reader.endianess.name
2479
if reader.byte_order == 'S':
@@ -32,14 +87,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
3287
sys.exit(0)
3388
logger.info("* Checking tensors for conversion compatibility")
3489
for tensor in reader.tensors:
35-
if tensor.tensor_type not in (
36-
gguf.GGMLQuantizationType.F32,
37-
gguf.GGMLQuantizationType.F16,
38-
gguf.GGMLQuantizationType.Q4_0,
39-
gguf.GGMLQuantizationType.Q8_0,
40-
gguf.GGMLQuantizationType.Q4_K,
41-
gguf.GGMLQuantizationType.Q6_K,
42-
):
90+
if tensor.tensor_type not in byteswap_tensors and \
91+
tensor.tensor_type not in (
92+
gguf.GGMLQuantizationType.F32,
93+
gguf.GGMLQuantizationType.F16,
94+
):
4395
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
4496
logger.info(f"* Preparing to convert from {file_endian} to {order}")
4597
if args.dry_run:
@@ -73,40 +125,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
73125
part.byteswap(inplace=True)
74126

75127
# Byte-swap tensor data if necessary
76-
if tensor.tensor_type == gguf.GGMLQuantizationType.Q4_0:
77-
# Handle Q4_0 tensor blocks (block_q4_0)
78-
# Specific handling of block_q4_0 is required.
79-
# Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
80-
81-
# first flatten structure
82-
oldshape = tensor.data.shape
83-
newshape = 1
84-
for i in tensor.data.shape:
85-
newshape *= i
86-
87-
tensor.data.resize(newshape)
88-
89-
block_size = 18 # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
90-
91-
n_blocks = len(tensor.data) // block_size
92-
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
93-
block_offs = block_num * block_size
94-
95-
# Byte-Swap f16 sized delta field
96-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
97-
delta.byteswap(inplace=True)
98-
99-
# Byte-Swap Q8 weights
100-
if block_num % 100000 == 0:
101-
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
102-
103-
# restore old shape in case it's ever used
104-
tensor.data.resize(oldshape)
105-
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
106-
# Handle Q8_0 tensor blocks (block_q8_0)
107-
# Specific handling of block_q8_0 is required.
108-
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
109-
128+
if tensor.tensor_type in byteswap_tensors:
110129
# first flatten structure
111130
oldshape = tensor.data.shape
112131
newshape = 1
@@ -115,76 +134,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
115134

116135
tensor.data.resize(newshape)
117136

118-
block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
119-
120-
n_blocks = len(tensor.data) // block_size
121-
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
122-
block_offs = block_num * block_size
123-
124-
# Byte-Swap f16 sized delta field
125-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
126-
delta.byteswap(inplace=True)
127-
128-
# Byte-Swap Q8 weights
129-
if block_num % 100000 == 0:
130-
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
131-
132-
# restore old shape in case it's ever used
133-
tensor.data.resize(oldshape)
134-
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q4_K:
135-
# Handle Q4_K tensor blocks (block_q4_k)
136-
# Specific handling of block_q4_k is required.
137-
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
138-
139-
# first flatten structure
140-
oldshape = tensor.data.shape
141-
newshape = 1
142-
for i in tensor.data.shape:
143-
newshape *= i
144-
145-
tensor.data.resize(newshape)
146-
147-
block_size = 144
148-
n_blocks = len(tensor.data) // block_size
149-
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
150-
block_offs = block_num * block_size
151-
152-
# Byte-Swap f16 sized fields
153-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
154-
delta.byteswap(inplace=True)
155-
156-
delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
157-
delta.byteswap(inplace=True)
158-
159-
# Byte-Swap
160-
if block_num % 100000 == 0:
161-
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
162-
163-
# restore old shape in case it's ever used
164-
tensor.data.resize(oldshape)
165-
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K:
166-
# Handle Q6_K tensor blocks (block_q6_k)
167-
# Specific handling of block_q6_k is required.
168-
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
169-
170-
# first flatten structure
171-
oldshape = tensor.data.shape
172-
newshape = 1
173-
for i in tensor.data.shape:
174-
newshape *= i
175-
176-
tensor.data.resize(newshape)
137+
block_size = byteswap_tensors[tensor.tensor_type]["block_size"]
138+
byteswap_func = byteswap_tensors[tensor.tensor_type]["byteswap_func"]
177139

178-
block_size = 210
179140
n_blocks = len(tensor.data) // block_size
180141
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
181142
block_offs = block_num * block_size
182143

183-
# Byte-Swap f16 sized field
184-
delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
185-
delta.byteswap(inplace=True)
144+
byteswap_func(tensor, block_offs)
186145

187-
# Byte-Swap
188146
if block_num % 100000 == 0:
189147
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
190148

0 commit comments

Comments
 (0)