@@ -35,6 +35,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
35
35
if tensor .tensor_type not in (
36
36
gguf .GGMLQuantizationType .F32 ,
37
37
gguf .GGMLQuantizationType .F16 ,
38
+ gguf .GGMLQuantizationType .Q4_0 ,
38
39
gguf .GGMLQuantizationType .Q8_0 ,
39
40
gguf .GGMLQuantizationType .Q4_K ,
40
41
gguf .GGMLQuantizationType .Q6_K ,
@@ -72,11 +73,48 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
72
73
part .byteswap (inplace = True )
73
74
74
75
# Byte-swap tensor data if necessary
75
- if tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
76
+ if tensor .tensor_type == gguf .GGMLQuantizationType .Q4_0 :
77
+ # Handle Q4_0 tensor blocks (block_q4_0)
78
+ # Specific handling of block_q4_0 is required.
79
+ # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
80
+
81
+ # first flatten structure
82
+ oldshape = tensor .data .shape
83
+ newshape = 1
84
+ for i in tensor .data .shape :
85
+ newshape *= i
86
+
87
+ tensor .data .resize (newshape )
88
+
89
+ block_size = 18 # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
90
+
91
+ n_blocks = len (tensor .data ) // block_size
92
+ for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
93
+ block_offs = block_num * block_size
94
+
95
+ # Byte-Swap f16 sized delta field
96
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
97
+ delta .byteswap (inplace = True )
98
+
99
+ # Byte-Swap Q8 weights
100
+ if block_num % 100000 == 0 :
101
+ inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
102
+
103
+ # restore old shape in case it's ever used
104
+ tensor .data .resize (oldshape )
105
+ elif tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
76
106
# Handle Q8_0 tensor blocks (block_q8_0)
77
107
# Specific handling of block_q8_0 is required.
78
108
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
79
109
110
+ # first flatten structure
111
+ oldshape = tensor .data .shape
112
+ newshape = 1
113
+ for i in tensor .data .shape :
114
+ newshape *= i
115
+
116
+ tensor .data .resize (newshape )
117
+
80
118
block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
81
119
82
120
n_blocks = len (tensor .data ) // block_size
@@ -91,12 +129,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
91
129
if block_num % 100000 == 0 :
92
130
inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
93
131
132
+ # restore old shape in case it's ever used
133
+ tensor .data .resize (oldshape )
94
134
elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
95
135
# Handle Q4_K tensor blocks (block_q4_k)
96
136
# Specific handling of block_q4_k is required.
97
137
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
98
138
99
139
# first flatten structure
140
+ oldshape = tensor .data .shape
100
141
newshape = 1
101
142
for i in tensor .data .shape :
102
143
newshape *= i
@@ -119,12 +160,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
119
160
if block_num % 100000 == 0 :
120
161
inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
121
162
163
+ # restore old shape in case it's ever used
164
+ tensor .data .resize (oldshape )
122
165
elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
123
166
# Handle Q6_K tensor blocks (block_q6_k)
124
167
# Specific handling of block_q6_k is required.
125
168
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
126
169
127
170
# first flatten structure
171
+ oldshape = tensor .data .shape
128
172
newshape = 1
129
173
for i in tensor .data .shape :
130
174
newshape *= i
@@ -144,6 +188,8 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
144
188
if block_num % 100000 == 0 :
145
189
inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
146
190
191
+ # restore old shape in case it's ever used
192
+ tensor .data .resize (oldshape )
147
193
else :
148
194
# Handle other tensor types
149
195
tensor .data .byteswap (inplace = True )
0 commit comments