@@ -88,6 +88,83 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
88
88
}
89
89
#endif
90
90
91
+ #if defined(DATA_A_IQ1_S)
92
+ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
93
+ const uint ib32 = iqs / 32;
94
+ const uint ib8 = iqs / 8;
95
+ const int i8 = int(iqs % 8);
96
+ const uint qh = data_a[a_offset + ib].qh[ib32];
97
+ const uint qs = data_a[a_offset + ib].qs[ib8];
98
+ const float dl = float(2 * bitfieldExtract(qh, 12, 3) + 1);
99
+ const float delta = ((qh & 0x8000) != 0) ? -IQ1S_DELTA : IQ1S_DELTA;
100
+ const uint idxhi = bitfieldExtract(qh, 3 * int(ib8 & 3), 3);
101
+ const int16_t grid = int16_t(iq1s_grid[qs | (idxhi << 8)]);
102
+ // Signed bitfield extract.
103
+ const ivec2 gvec = ivec2(
104
+ bitfieldExtract(grid, 2 * (i8), 2),
105
+ bitfieldExtract(grid, 2 * (i8 + 1), 2)
106
+ );
107
+ return dl * (vec2(gvec) + delta);
108
+ }
109
+ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
110
+ const uint ib32 = iqs / 32;
111
+ const uint ib8 = iqs / 8;
112
+ const int i8 = int(iqs % 8);
113
+ const uint qh = data_a[a_offset + ib].qh[ib32];
114
+ const uint qs = data_a[a_offset + ib].qs[ib8];
115
+ const float dl = 2 * bitfieldExtract(qh, 12, 3) + 1;
116
+ const float delta = ((qh & 0x8000) != 0) ? -IQ1S_DELTA : IQ1S_DELTA;
117
+ const int16_t grid = int16_t(iq1s_grid[qs | (bitfieldExtract(qh, 3 * int(ib8 & 3), 3) << 8)]);
118
+ // Signed bitfield extract.
119
+ const ivec4 gvec = ivec4(
120
+ bitfieldExtract(grid, 2 * (i8), 2),
121
+ bitfieldExtract(grid, 2 * (i8 + 1), 2),
122
+ bitfieldExtract(grid, 2 * (i8 + 2), 2),
123
+ bitfieldExtract(grid, 2 * (i8 + 3), 2)
124
+ );
125
+ return dl * (vec4(gvec) + delta);
126
+ }
127
+ #endif
128
+
129
+ #if defined(DATA_A_IQ1_M)
130
+ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
131
+ const uint ib8 = iqs / 8;
132
+ const uint ib16 = iqs / 16;
133
+ const int i8 = int(iqs % 8);
134
+ const uint sc = data_a[a_offset + ib].scales[iqs / 64];
135
+ const uint qs = data_a[a_offset + ib].qs[ib8];
136
+ const uint qh = data_a[a_offset + ib].qh[ib16] >> (4 * (ib8 & 1));
137
+ const float dl = 2 * bitfieldExtract(sc, 3 * int(ib16 & 3), 3) + 1;
138
+ const float delta = ((qh & 8) != 0) ? -IQ1M_DELTA : IQ1M_DELTA;
139
+ const int16_t grid = int16_t(iq1s_grid[qs | ((qh & 7) << 8)]);
140
+ // Signed bitfield extract.
141
+ const ivec2 gvec = ivec2(
142
+ bitfieldExtract(grid, 2 * (i8), 2),
143
+ bitfieldExtract(grid, 2 * (i8 + 1), 2)
144
+ );
145
+ return dl * (vec2(gvec) + delta);
146
+ }
147
+ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
148
+ const uint ib8 = iqs / 8;
149
+ const uint ib16 = iqs / 16;
150
+ const int i8 = int(iqs % 8);
151
+ const uint sc = data_a[a_offset + ib].scales[iqs / 64];
152
+ const uint qs = data_a[a_offset + ib].qs[ib8];
153
+ const uint qh = data_a[a_offset + ib].qh[ib16] >> (4 * (ib8 & 1));
154
+ const float dl = 2 * bitfieldExtract(sc, 3 * int(ib16 & 3), 3) + 1;
155
+ const float delta = ((qh & 8) != 0) ? -IQ1M_DELTA : IQ1M_DELTA;
156
+ const int16_t grid = int16_t(iq1s_grid[qs | ((qh & 7) << 8)]);
157
+ // Signed bitfield extract.
158
+ const ivec4 gvec = ivec4(
159
+ bitfieldExtract(grid, 2 * (i8), 2),
160
+ bitfieldExtract(grid, 2 * (i8 + 1), 2),
161
+ bitfieldExtract(grid, 2 * (i8 + 2), 2),
162
+ bitfieldExtract(grid, 2 * (i8 + 3), 2)
163
+ );
164
+ return dl * (vec4(gvec) + delta);
165
+ }
166
+ #endif
167
+
91
168
#if defined(DATA_A_IQ2_XXS)
92
169
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
93
170
const uint ib32 = iqs / 32;
@@ -357,7 +434,16 @@ vec2 get_dm(uint ib, uint a_offset) {
357
434
}
358
435
#endif
359
436
360
- #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL)
437
+ #if defined(DATA_A_IQ1_M)
438
+ vec2 get_dm(uint ib, uint a_offset) {
439
+ const uint16_t[4] scales = data_a[a_offset + ib].scales;
440
+ const u16vec4 s = u16vec4(scales[0], scales[1], scales[2], scales[3]) >> 12;
441
+ const float d = float(unpackHalf2x16(s.x | (s.y << 4) | (s.z << 8) | (s.w << 12)).x);
442
+ return vec2(d, 0);
443
+ }
444
+ #endif
445
+
446
+ #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ1_S) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL)
361
447
vec2 get_dm(uint ib, uint a_offset) {
362
448
return vec2(float(data_a[a_offset + ib].d), 0);
363
449
}
0 commit comments