Skip to content

Commit 4996101

Browse files
authored
Fix gcc warning (#1527)
1 parent eb49333 commit 4996101

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

torchao/csrc/cuda/sparse_marlin/marlin_kernel_nm.cu

+8-5
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,13 @@ __global__ void Marlin_24(
401401
meta_ptr[i] += m_gl_rd_delta_o;
402402
}
403403
// Only fetch scales if this tile starts a new group
404-
if (group_blocks != -1 && pipe % (group_blocks / thread_k_blocks) == 0) {
405-
int4* sh_s_stage = sh_s + s_sh_stage * pipe;
406-
if (s_sh_wr_pred) cp_async4(&sh_s_stage[s_sh_wr], &s[s_gl_rd]);
407-
s_gl_rd += s_gl_rd_delta;
404+
if constexpr (group_blocks != -1) {
405+
if (pipe % (group_blocks / thread_k_blocks) == 0) {
406+
int4 *sh_s_stage = sh_s + s_sh_stage * pipe;
407+
if (s_sh_wr_pred)
408+
cp_async4(&sh_s_stage[s_sh_wr], &s[s_gl_rd]);
409+
s_gl_rd += s_gl_rd_delta;
410+
}
408411
}
409412
}
410413
// Insert a fence even when we are winding down the pipeline to ensure that
@@ -429,7 +432,7 @@ __global__ void Marlin_24(
429432
// however, this does not seem to be a significant bottleneck, while some
430433
// theoretically better attempts have lead to bad instruction ordering by
431434
// the compiler and correspondingly a noticeable drop in performance.
432-
if (group_blocks != -1) {
435+
if constexpr (group_blocks != -1) {
433436
int4* sh_s_stage =
434437
sh_s + s_sh_stage * ((group_blocks / thread_k_blocks) *
435438
(pipe / (group_blocks / thread_k_blocks)));

0 commit comments

Comments
 (0)