18
18
19
19
logger = logging .getLogger ("gguf-convert-endian" )
20
20
21
+ def byteswap_q4_0 (tensor , block_offs ):
22
+ # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
23
+
24
+ # Byte-Swap f16 sized delta field
25
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
26
+ delta .byteswap (inplace = True )
27
+
28
+ def byteswap_q8_0 (tensor , block_offs ):
29
+ # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
30
+
31
+ # Byte-Swap f16 sized delta field
32
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
33
+ delta .byteswap (inplace = True )
34
+
35
+ def byteswap_q4_k (tensor , block_offs ):
36
+ # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
37
+
38
+ # Byte-Swap f16 sized fields
39
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
40
+ delta .byteswap (inplace = True )
41
+
42
+ delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
43
+ delta .byteswap (inplace = True )
44
+
45
+ def byteswap_q6_k (tensor , block_offs ):
46
+ # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
47
+
48
+ # Byte-Swap f16 sized field
49
+ delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
50
+ delta .byteswap (inplace = True )
51
+
52
+ byteswap_tensors = {
53
+ gguf .GGMLQuantizationType .Q4_0 : {
54
+ "block_size" : 18 , # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
55
+ "byteswap_func" : byteswap_q4_0 ,
56
+ },
57
+ gguf .GGMLQuantizationType .Q8_0 : {
58
+ "block_size" : 34 , # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
59
+ "byteswap_func" : byteswap_q8_0 ,
60
+ },
61
+ gguf .GGMLQuantizationType .Q4_K : {
62
+ "block_size" : 144 , # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
63
+ "byteswap_func" : byteswap_q4_k ,
64
+ },
65
+ gguf .GGMLQuantizationType .Q6_K : {
66
+ "block_size" : 210 , # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
67
+ "byteswap_func" : byteswap_q6_k ,
68
+ },
69
+ }
21
70
22
71
def convert_byteorder (reader : gguf .GGUFReader , args : argparse .Namespace ) -> None :
23
72
file_endian = reader .endianess .name
@@ -32,13 +81,10 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
32
81
sys .exit (0 )
33
82
logger .info ("* Checking tensors for conversion compatibility" )
34
83
for tensor in reader .tensors :
35
- if tensor .tensor_type not in (
36
- gguf .GGMLQuantizationType .F32 ,
37
- gguf .GGMLQuantizationType .F16 ,
38
- gguf .GGMLQuantizationType .Q4_0 ,
39
- gguf .GGMLQuantizationType .Q8_0 ,
40
- gguf .GGMLQuantizationType .Q4_K ,
41
- gguf .GGMLQuantizationType .Q6_K ,
84
+ if tensor .tensor_type not in byteswap_tensors and \
85
+ tensor .tensor_type not in (
86
+ gguf .GGMLQuantizationType .F32 ,
87
+ gguf .GGMLQuantizationType .F16 ,
42
88
):
43
89
raise ValueError (f"Cannot handle type { tensor .tensor_type .name } for tensor { repr (tensor .name )} " )
44
90
logger .info (f"* Preparing to convert from { file_endian } to { order } " )
@@ -73,69 +119,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
73
119
part .byteswap (inplace = True )
74
120
75
121
# Byte-swap tensor data if necessary
76
- if tensor .tensor_type == gguf .GGMLQuantizationType .Q4_0 :
77
- # Handle Q4_0 tensor blocks (block_q4_0)
78
- # Specific handling of block_q4_0 is required.
79
- # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
80
-
81
- # first flatten structure
82
- oldshape = tensor .data .shape
83
- newshape = 1
84
- for i in tensor .data .shape :
85
- newshape *= i
86
-
87
- tensor .data .resize (newshape )
88
-
89
- block_size = 18 # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
90
-
91
- n_blocks = len (tensor .data ) // block_size
92
- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
93
- block_offs = block_num * block_size
94
-
95
- # Byte-Swap f16 sized delta field
96
- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
97
- delta .byteswap (inplace = True )
98
-
99
- # Byte-Swap Q8 weights
100
- if block_num % 100000 == 0 :
101
- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
102
-
103
- # restore old shape in case it's ever used
104
- tensor .data .resize (oldshape )
105
- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
106
- # Handle Q8_0 tensor blocks (block_q8_0)
107
- # Specific handling of block_q8_0 is required.
108
- # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
109
-
110
- # first flatten structure
111
- oldshape = tensor .data .shape
112
- newshape = 1
113
- for i in tensor .data .shape :
114
- newshape *= i
115
-
116
- tensor .data .resize (newshape )
117
-
118
- block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
119
-
120
- n_blocks = len (tensor .data ) // block_size
121
- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
122
- block_offs = block_num * block_size
123
-
124
- # Byte-Swap f16 sized delta field
125
- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
126
- delta .byteswap (inplace = True )
127
-
128
- # Byte-Swap Q8 weights
129
- if block_num % 100000 == 0 :
130
- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
131
-
132
- # restore old shape in case it's ever used
133
- tensor .data .resize (oldshape )
134
- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
135
- # Handle Q4_K tensor blocks (block_q4_k)
136
- # Specific handling of block_q4_k is required.
137
- # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
138
-
122
+ if tensor .tensor_type in byteswap_tensors :
139
123
# first flatten structure
140
124
oldshape = tensor .data .shape
141
125
newshape = 1
@@ -144,47 +128,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
144
128
145
129
tensor .data .resize (newshape )
146
130
147
- block_size = 144
148
- n_blocks = len (tensor .data ) // block_size
149
- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
150
- block_offs = block_num * block_size
151
-
152
- # Byte-Swap f16 sized fields
153
- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
154
- delta .byteswap (inplace = True )
155
-
156
- delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
157
- delta .byteswap (inplace = True )
158
-
159
- # Byte-Swap
160
- if block_num % 100000 == 0 :
161
- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
162
-
163
- # restore old shape in case it's ever used
164
- tensor .data .resize (oldshape )
165
- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
166
- # Handle Q6_K tensor blocks (block_q6_k)
167
- # Specific handling of block_q6_k is required.
168
- # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
169
-
170
- # first flatten structure
171
- oldshape = tensor .data .shape
172
- newshape = 1
173
- for i in tensor .data .shape :
174
- newshape *= i
175
-
176
- tensor .data .resize (newshape )
131
+ block_size = byteswap_tensors [tensor .tensor_type ]["block_size" ]
132
+ byteswap_func = byteswap_tensors [tensor .tensor_type ]["byteswap_func" ]
177
133
178
- block_size = 210
179
134
n_blocks = len (tensor .data ) // block_size
180
135
for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
181
136
block_offs = block_num * block_size
182
137
183
- # Byte-Swap f16 sized field
184
- delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
185
- delta .byteswap (inplace = True )
138
+ byteswap_func (tensor , block_offs )
186
139
187
- # Byte-Swap
188
140
if block_num % 100000 == 0 :
189
141
inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
190
142
0 commit comments