Skip to content

Commit 4ae4108

Browse files
Rework byteswapping code in gguf-py
Move out details from byteswapping tensor blocks code
1 parent 1c63021 commit 4ae4108

File tree

1 file changed

+57
-105
lines changed

1 file changed

+57
-105
lines changed

gguf-py/gguf/scripts/gguf_convert_endian.py

+57-105
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,55 @@
1818

1919
logger = logging.getLogger("gguf-convert-endian")
2020

21+
def byteswap_q4_0(tensor, block_offs):
22+
# Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
23+
24+
# Byte-Swap f16 sized delta field
25+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
26+
delta.byteswap(inplace=True)
27+
28+
def byteswap_q8_0(tensor, block_offs):
29+
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
30+
31+
# Byte-Swap f16 sized delta field
32+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
33+
delta.byteswap(inplace=True)
34+
35+
def byteswap_q4_k(tensor, block_offs):
36+
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
37+
38+
# Byte-Swap f16 sized fields
39+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
40+
delta.byteswap(inplace=True)
41+
42+
delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
43+
delta.byteswap(inplace=True)
44+
45+
def byteswap_q6_k(tensor, block_offs):
46+
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
47+
48+
# Byte-Swap f16 sized field
49+
delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
50+
delta.byteswap(inplace=True)
51+
52+
byteswap_tensors = {
53+
gguf.GGMLQuantizationType.Q4_0: {
54+
"block_size": 18, # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
55+
"byteswap_func": byteswap_q4_0,
56+
},
57+
gguf.GGMLQuantizationType.Q8_0: {
58+
"block_size": 34, # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
59+
"byteswap_func": byteswap_q8_0,
60+
},
61+
gguf.GGMLQuantizationType.Q4_K: {
62+
"block_size": 144, # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
63+
"byteswap_func": byteswap_q4_k,
64+
},
65+
gguf.GGMLQuantizationType.Q6_K: {
66+
"block_size": 210, # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
67+
"byteswap_func": byteswap_q6_k,
68+
},
69+
}
2170

2271
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
2372
file_endian = reader.endianess.name
@@ -32,13 +81,10 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
3281
sys.exit(0)
3382
logger.info("* Checking tensors for conversion compatibility")
3483
for tensor in reader.tensors:
35-
if tensor.tensor_type not in (
36-
gguf.GGMLQuantizationType.F32,
37-
gguf.GGMLQuantizationType.F16,
38-
gguf.GGMLQuantizationType.Q4_0,
39-
gguf.GGMLQuantizationType.Q8_0,
40-
gguf.GGMLQuantizationType.Q4_K,
41-
gguf.GGMLQuantizationType.Q6_K,
84+
if tensor.tensor_type not in byteswap_tensors and \
85+
tensor.tensor_type not in (
86+
gguf.GGMLQuantizationType.F32,
87+
gguf.GGMLQuantizationType.F16,
4288
):
4389
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
4490
logger.info(f"* Preparing to convert from {file_endian} to {order}")
@@ -73,69 +119,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
73119
part.byteswap(inplace=True)
74120

75121
# Byte-swap tensor data if necessary
76-
if tensor.tensor_type == gguf.GGMLQuantizationType.Q4_0:
77-
# Handle Q4_0 tensor blocks (block_q4_0)
78-
# Specific handling of block_q4_0 is required.
79-
# Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
80-
81-
# first flatten structure
82-
oldshape = tensor.data.shape
83-
newshape = 1
84-
for i in tensor.data.shape:
85-
newshape *= i
86-
87-
tensor.data.resize(newshape)
88-
89-
block_size = 18 # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
90-
91-
n_blocks = len(tensor.data) // block_size
92-
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
93-
block_offs = block_num * block_size
94-
95-
# Byte-Swap f16 sized delta field
96-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
97-
delta.byteswap(inplace=True)
98-
99-
# Byte-Swap Q8 weights
100-
if block_num % 100000 == 0:
101-
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
102-
103-
# restore old shape in case it's ever used
104-
tensor.data.resize(oldshape)
105-
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
106-
# Handle Q8_0 tensor blocks (block_q8_0)
107-
# Specific handling of block_q8_0 is required.
108-
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
109-
110-
# first flatten structure
111-
oldshape = tensor.data.shape
112-
newshape = 1
113-
for i in tensor.data.shape:
114-
newshape *= i
115-
116-
tensor.data.resize(newshape)
117-
118-
block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
119-
120-
n_blocks = len(tensor.data) // block_size
121-
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
122-
block_offs = block_num * block_size
123-
124-
# Byte-Swap f16 sized delta field
125-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
126-
delta.byteswap(inplace=True)
127-
128-
# Byte-Swap Q8 weights
129-
if block_num % 100000 == 0:
130-
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
131-
132-
# restore old shape in case it's ever used
133-
tensor.data.resize(oldshape)
134-
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q4_K:
135-
# Handle Q4_K tensor blocks (block_q4_k)
136-
# Specific handling of block_q4_k is required.
137-
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
138-
122+
if tensor.tensor_type in byteswap_tensors:
139123
# first flatten structure
140124
oldshape = tensor.data.shape
141125
newshape = 1
@@ -144,47 +128,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
144128

145129
tensor.data.resize(newshape)
146130

147-
block_size = 144
148-
n_blocks = len(tensor.data) // block_size
149-
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
150-
block_offs = block_num * block_size
151-
152-
# Byte-Swap f16 sized fields
153-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
154-
delta.byteswap(inplace=True)
155-
156-
delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
157-
delta.byteswap(inplace=True)
158-
159-
# Byte-Swap
160-
if block_num % 100000 == 0:
161-
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
162-
163-
# restore old shape in case it's ever used
164-
tensor.data.resize(oldshape)
165-
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K:
166-
# Handle Q6_K tensor blocks (block_q6_k)
167-
# Specific handling of block_q6_k is required.
168-
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
169-
170-
# first flatten structure
171-
oldshape = tensor.data.shape
172-
newshape = 1
173-
for i in tensor.data.shape:
174-
newshape *= i
175-
176-
tensor.data.resize(newshape)
131+
block_size = byteswap_tensors[tensor.tensor_type]["block_size"]
132+
byteswap_func = byteswap_tensors[tensor.tensor_type]["byteswap_func"]
177133

178-
block_size = 210
179134
n_blocks = len(tensor.data) // block_size
180135
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
181136
block_offs = block_num * block_size
182137

183-
# Byte-Swap f16 sized field
184-
delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
185-
delta.byteswap(inplace=True)
138+
byteswap_func(tensor, block_offs)
186139

187-
# Byte-Swap
188140
if block_num % 100000 == 0:
189141
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
190142

0 commit comments

Comments
 (0)