We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 227a5ed + 89a66af commit 24078c2Copy full SHA for 24078c2
source/adapters/cuda/enqueue.cpp
@@ -245,13 +245,14 @@ setKernelParams(const ur_context_handle_t Context,
245
return UR_RESULT_SUCCESS;
246
};
247
248
- size_t KernelLocalWorkGroupSize = 0;
+ size_t KernelLocalWorkGroupSize = 1;
249
for (size_t Dim = 0; Dim < WorkDim; Dim++) {
250
auto Err = IsValid(Dim);
251
if (Err != UR_RESULT_SUCCESS)
252
return Err;
253
- // If no error then sum the total local work size per dim.
254
- KernelLocalWorkGroupSize += LocalWorkSize[Dim];
+ // If no error then compute the total local work size as a product of
+ // all dims.
255
+ KernelLocalWorkGroupSize *= LocalWorkSize[Dim];
256
}
257
258
if (hasExceededMaxRegistersPerBlock(Device, Kernel,
0 commit comments