Skip to content

Commit

Permalink
chore(gpu): add some checks on radix sizes for carry propagation
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Feb 12, 2025
1 parent 16d8af1 commit 987d5bf
Show file tree
Hide file tree
Showing 5 changed files with 374 additions and 154 deletions.
25 changes: 11 additions & 14 deletions backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ template <typename Torus> struct int_radix_lut {
// lwe_trivial_indexes is the intermediary index we need in case
// lwe_indexes_in != lwe_indexes_out
Torus *lwe_trivial_indexes;
Torus *tmp_lwe_before_ks;
CudaRadixCiphertextFFI *tmp_lwe_before_ks;

/// For multi GPU execution we create vectors of pointers for inputs and
/// outputs
Expand Down Expand Up @@ -270,12 +270,10 @@ template <typename Torus> struct int_radix_lut {
num_radix_blocks);

// Keyswitch
Torus big_size =
(params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
Torus small_size =
(params.small_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
tmp_lwe_before_ks =
(Torus *)cuda_malloc_async(big_size, streams[0], gpu_indexes[0]);
tmp_lwe_before_ks = new CudaRadixCiphertextFFI;
create_zero_radix_ciphertext_async<Torus>(
streams[0], gpu_indexes[0], tmp_lwe_before_ks, num_radix_blocks,
params.big_lwe_dimension);
}
degrees = (uint64_t *)malloc(num_luts * sizeof(uint64_t));
max_degrees = (uint64_t *)malloc(num_luts * sizeof(uint64_t));
Expand Down Expand Up @@ -465,12 +463,10 @@ template <typename Torus> struct int_radix_lut {
num_radix_blocks);

// Keyswitch
Torus big_size =
(params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
Torus small_size =
(params.small_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
tmp_lwe_before_ks =
(Torus *)cuda_malloc_async(big_size, streams[0], gpu_indexes[0]);
tmp_lwe_before_ks = new CudaRadixCiphertextFFI;
create_zero_radix_ciphertext_async<Torus>(
streams[0], gpu_indexes[0], tmp_lwe_before_ks, num_radix_blocks,
params.big_lwe_dimension);
}
degrees = (uint64_t *)malloc(num_many_lut * num_luts * sizeof(uint64_t));
max_degrees = (uint64_t *)malloc(num_luts * sizeof(uint64_t));
Expand Down Expand Up @@ -555,7 +551,8 @@ template <typename Torus> struct int_radix_lut {
free(h_lwe_indexes_out);

if (!mem_reuse) {
cuda_drop_async(tmp_lwe_before_ks, streams[0], gpu_indexes[0]);
release_radix_ciphertext(streams[0], gpu_indexes[0], tmp_lwe_before_ks);
delete tmp_lwe_before_ks;
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
for (int i = 0; i < buffer.size(); i++) {
switch (params.pbs_type) {
Expand Down
2 changes: 1 addition & 1 deletion backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ __host__ void zero_out_if(cudaStream_t const *streams,
// We can't use integer_radix_apply_bivariate_lookup_table_kb since the
// second operand is not an array
auto tmp_lwe_array_input = mem_ptr->tmp;
pack_bivariate_blocks_with_single_block<Torus>(
host_pack_bivariate_blocks_with_single_block<Torus>(
streams, gpu_indexes, gpu_count, tmp_lwe_array_input,
predicate->lwe_indexes_in, lwe_array_input, lwe_condition,
predicate->lwe_indexes_in, params.big_lwe_dimension,
Expand Down
Loading

0 comments on commit 987d5bf

Please sign in to comment.