Skip to content

Commit d0624d6

Browse files
committed
chore(gpu): fix multi-gpu div performance
1 parent 00fc281 commit d0624d6

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
443443

444444
int max_blocks_per_sm;
445445
int max_shared_memory = cuda_get_max_shared_memory(0);
446+
cudaSetDevice(gpu_index);
446447
if (max_shared_memory < full_sm_keybundle)
447448
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
448449
&max_blocks_per_sm,

0 commit comments

Comments
 (0)