Skip to content

Commit c396183

Browse files
jeffbolznvarthw
authored andcommitted
vulkan: copy iq4_nl LUT into shared memory (ggml-org#10409)
1 parent a77f705 commit c396183

File tree

6 files changed

+29
-4
lines changed

6 files changed

+29
-4
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
1010
void main() {
1111
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;
1212

13+
init_iq4nl_shmem();
14+
1315
const uint tid = gl_LocalInvocationID.x % 64;
1416
const uint il = tid/32;
1517
const uint ir = tid%32;

ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ void main() {
1212
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
1313
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
1414

15+
#if defined(DATA_A_IQ4_NL)
16+
init_iq4nl_shmem();
17+
#endif
18+
1519
if (i00 >= p.ne00) {
1620
return;
1721
}

ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp

+4
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) {
161161
void main() {
162162
const uint first_row = NUM_ROWS * (gl_WorkGroupID.x + gl_NumWorkGroups.x * gl_WorkGroupID.z);
163163

164+
#if defined(DATA_A_IQ4_NL)
165+
init_iq4nl_shmem();
166+
#endif
167+
164168
// do NUM_ROWS at a time, unless there aren't enough remaining rows
165169
if (first_row + NUM_ROWS <= p.stride_d) {
166170
compute_outputs(first_row, NUM_ROWS);

ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp

+4
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ shared u16vec2 row_ids[3072];
7575
#endif
7676

7777
void main() {
78+
#if defined(DATA_A_IQ4_NL)
79+
init_iq4nl_shmem();
80+
#endif
81+
7882
#ifdef MUL_MAT_ID
7983
const uint expert_idx = gl_GlobalInvocationID.z;
8084
#else

ggml/src/ggml-vulkan/vulkan-shaders/types.comp

+12-1
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,21 @@ struct block_iq4_nl_packed16
298298
#define A_TYPE block_iq4_nl
299299
#define A_TYPE_PACKED16 block_iq4_nl_packed16
300300

301-
const int8_t kvalues_iq4nl[16] = {
301+
const int8_t kvalues_iq4nl_const[16] = {
302302
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
303303
int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113)
304304
};
305+
306+
shared FLOAT_TYPE kvalues_iq4nl[16];
307+
308+
void init_iq4nl_shmem()
309+
{
310+
// copy the table into shared memory and sync
311+
if (gl_LocalInvocationIndex.x < 16) {
312+
kvalues_iq4nl[gl_LocalInvocationIndex.x] = FLOAT_TYPE(kvalues_iq4nl_const[gl_LocalInvocationIndex.x]);
313+
}
314+
barrier();
315+
}
305316
#endif
306317

307318
#endif // !defined(GGML_TYPES_COMP)

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,11 @@ void process_shaders() {
331331
shader = (tname == "f32" || tname == "f16") ? "get_rows.comp" : "get_rows_quant.comp";
332332

333333
if (tname == "f16") {
334-
string_to_spv("get_rows_" + tname, shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
334+
string_to_spv("get_rows_" + tname, shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}}));
335335
} else {
336-
string_to_spv("get_rows_" + tname, shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}});
336+
string_to_spv("get_rows_" + tname, shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}}));
337337
}
338-
string_to_spv("get_rows_" + tname + "_f32", shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float"}});
338+
string_to_spv("get_rows_" + tname + "_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float"}}));
339339
}
340340
}
341341

0 commit comments

Comments
 (0)