diff --git a/benchmarks/hash_table/static_map_bench.cu b/benchmarks/hash_table/static_map_bench.cu index 59c325d5d..1e8f7789b 100644 --- a/benchmarks/hash_table/static_map_bench.cu +++ b/benchmarks/hash_table/static_map_bench.cu @@ -87,16 +87,24 @@ static void BM_static_map_insert(::benchmark::State& state) } thrust::device_vector> d_pairs(h_pairs); + thrust::device_vector d_keys(h_keys); for (auto _ : state) { - state.ResumeTiming(); - state.PauseTiming(); - map_type map{size, -1, -1}; - state.ResumeTiming(); + map_type map{size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + + cudaEventRecord(start); map.insert(d_pairs.begin(), d_pairs.end()); + cudaEventRecord(stop); + cudaEventSynchronize(stop); - state.PauseTiming(); + float ms; + cudaEventElapsedTime(&ms, start, stop); + + state.SetIterationTime(ms / 1000); } state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) * @@ -112,7 +120,7 @@ static void BM_static_map_search_all(::benchmark::State& state) float occupancy = state.range(1) / float{100}; std::size_t size = num_keys / occupancy; - map_type map{size, -1, -1}; + map_type map{size, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto view = map.get_device_mutable_view(); std::vector h_keys(num_keys); @@ -143,50 +151,62 @@ static void BM_static_map_search_all(::benchmark::State& state) int64_t(state.range(0))); } -BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); +template +static void BM_static_map_erase_all(::benchmark::State& state) +{ + using map_type = cuco::static_map; -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + std::size_t num_keys = state.range(0); + float occupancy = state.range(1) / float{100}; + std::size_t size = num_keys / occupancy; -BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIFORM) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + // static map with erase support + map_type map{size, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::sentinel::erased_key{-2}}; + auto view = map.get_device_mutable_view(); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::UNIFORM) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + std::vector h_keys(num_keys); + std::vector h_values(num_keys); + std::vector> h_pairs(num_keys); + std::vector h_results(num_keys); -BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::GAUSSIAN) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + generate_keys(h_keys.begin(), h_keys.end()); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int32_t, int32_t, dist_type::GAUSSIAN) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + for (auto i = 0; i < num_keys; ++i) { + Key key = h_keys[i]; + Value val = h_keys[i]; + h_pairs[i].first = key; + h_pairs[i].second = val; + } -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + thrust::device_vector d_keys(h_keys); + thrust::device_vector d_results(num_keys); + thrust::device_vector> d_pairs(h_pairs); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIQUE) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + for (auto _ : state) { + state.PauseTiming(); + map.insert(d_pairs.begin(), d_pairs.end()); + state.ResumeTiming(); -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::UNIFORM) - ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + map.erase(d_keys.begin(), d_keys.end()); + } -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::UNIFORM) + state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) * + int64_t(state.range(0))); +} + +BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); + ->Apply(generate_size_and_occupancy) + ->UseManualTime(); -BENCHMARK_TEMPLATE(BM_static_map_insert, int64_t, int64_t, dist_type::GAUSSIAN) +BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) ->Apply(generate_size_and_occupancy); -BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::GAUSSIAN) +BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE) ->Unit(benchmark::kMillisecond) - ->Apply(generate_size_and_occupancy); \ No newline at end of file + ->Apply(generate_size_and_occupancy) + ->UseManualTime(); \ No newline at end of file diff --git a/examples/static_map/custom_type_example.cu b/examples/static_map/custom_type_example.cu index 443323e36..e9044c0cf 100644 --- a/examples/static_map/custom_type_example.cu +++ b/examples/static_map/custom_type_example.cu @@ -91,7 +91,9 @@ int main(void) // Construct a map with 100,000 slots using the given empty key/value sentinels. Note the // capacity is chosen knowing we will insert 80,000 keys, for an load factor of 80%. cuco::static_map map{ - 100'000, empty_key_sentinel, empty_value_sentinel}; + 100'000, + cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}}; // Inserts 80,000 pairs into the map by using the custom hasher and custom equality callable map.insert(pairs_begin, pairs_begin + num_pairs, custom_hash{}, custom_key_equals{}); diff --git a/examples/static_map/static_map_example.cu b/examples/static_map/static_map_example.cu index 12d12578d..018c1278e 100644 --- a/examples/static_map/static_map_example.cu +++ b/examples/static_map/static_map_example.cu @@ -32,8 +32,11 @@ int main(void) // for an load factor of 50%. cudaStream_t str; cudaStreamCreate(&str); - cuco::static_map map{ - 100'000, empty_key_sentinel, empty_value_sentinel, cuco::cuda_allocator{}, str}; + cuco::static_map map{100'000, + cuco::sentinel::empty_key{empty_key_sentinel}, + cuco::sentinel::empty_value{empty_value_sentinel}, + cuco::cuda_allocator{}, + str}; thrust::device_vector> pairs(50'000); diff --git a/include/cuco/detail/dynamic_map.inl b/include/cuco/detail/dynamic_map.inl index 57950ea45..80020232d 100644 --- a/include/cuco/detail/dynamic_map.inl +++ b/include/cuco/detail/dynamic_map.inl @@ -30,7 +30,10 @@ dynamic_map::dynamic_map(std::size_t initial_capac alloc_{alloc} { submaps_.push_back(std::make_unique>( - initial_capacity, empty_key_sentinel, empty_value_sentinel, alloc)); + initial_capacity, + sentinel::empty_key{empty_key_sentinel}, + sentinel::empty_value{empty_value_sentinel}, + alloc)); submap_views_.push_back(submaps_[0]->get_device_view()); submap_mutable_views_.push_back(submaps_[0]->get_device_mutable_view()); @@ -59,7 +62,10 @@ void dynamic_map::reserve(std::size_t n) else { submap_capacity = capacity_; submaps_.push_back(std::make_unique>( - submap_capacity, empty_key_sentinel_, empty_value_sentinel_, alloc_)); + submap_capacity, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + alloc_)); submap_views_.push_back(submaps_[submap_idx]->get_device_view()); submap_mutable_views_.push_back(submaps_[submap_idx]->get_device_mutable_view()); diff --git a/include/cuco/detail/error.hpp b/include/cuco/detail/error.hpp index f5f331222..bb5f67e6a 100644 --- a/include/cuco/detail/error.hpp +++ b/include/cuco/detail/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,3 +80,23 @@ struct cuda_error : public std::runtime_error { cudaError_t const status = (expr); \ assert(cudaSuccess == status); \ } while (0) + +/** + * @brief Macro for checking runtime conditions that throws an exception when + * a condition is violated. + * + * Example usage: + * + * @code + * CUCO_RUNTIME_EXPECTS(key == value, "Key value mismatch"); + * @endcode + * + * @param[in] cond Expression that evaluates to true or false + * @param[in] reason String literal description of the reason that cond is + * expected to be true + * @throw std::runtime_error if the condition evaluates to false. + */ +#define CUCO_RUNTIME_EXPECTS(cond, reason) \ + (!!(cond)) ? static_cast(0) \ + : throw std::runtime_error("cuco failure at: " __FILE__ \ + ":" CUCO_STRINGIFY(__LINE__) ": " reason) diff --git a/include/cuco/detail/static_map.inl b/include/cuco/detail/static_map.inl index b75ac59fc..59bbf31b5 100644 --- a/include/cuco/detail/static_map.inl +++ b/include/cuco/detail/static_map.inl @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -24,14 +25,16 @@ namespace cuco { template -static_map::static_map(std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, - Allocator const& alloc, - cudaStream_t stream) +static_map::static_map( + std::size_t capacity, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + Allocator const& alloc, + cudaStream_t stream) : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) - empty_key_sentinel_{empty_key_sentinel}, - empty_value_sentinel_{empty_value_sentinel}, + empty_key_sentinel_{empty_key_sentinel.value}, + empty_value_sentinel_{empty_value_sentinel.value}, + erased_key_sentinel_{empty_key_sentinel.value}, slot_allocator_{alloc}, counter_allocator_{alloc} { @@ -43,7 +46,36 @@ static_map::static_map(std::size_t capacity, auto const grid_size = (capacity_ + stride * block_size - 1) / (stride * block_size); detail::initialize <<>>( - slots_, empty_key_sentinel, empty_value_sentinel, capacity_); + slots_, empty_key_sentinel_, empty_value_sentinel_, capacity_); +} + +template +static_map::static_map( + std::size_t capacity, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel, + Allocator const& alloc, + cudaStream_t stream) + : capacity_{std::max(capacity, std::size_t{1})}, // to avoid dereferencing a nullptr (Issue #72) + empty_key_sentinel_{empty_key_sentinel.value}, + empty_value_sentinel_{empty_value_sentinel.value}, + erased_key_sentinel_{erased_key_sentinel.value}, + slot_allocator_{alloc}, + counter_allocator_{alloc} +{ + CUCO_RUNTIME_EXPECTS(empty_key_sentinel_ != erased_key_sentinel_, + "The empty key sentinel and erased key sentinel cannot be the same value."); + + slots_ = std::allocator_traits::allocate(slot_allocator_, capacity_); + num_successes_ = std::allocator_traits::allocate(counter_allocator_, 1); + + auto constexpr block_size = 256; + auto constexpr stride = 4; + auto const grid_size = (capacity_ + stride * block_size - 1) / (stride * block_size); + detail::initialize + <<>>( + slots_, empty_key_sentinel_, empty_value_sentinel_, capacity_); } template @@ -119,6 +151,38 @@ void static_map::insert_if(InputIt first, size_ += h_num_successes; } +template +template +void static_map::erase( + InputIt first, InputIt last, Hash hash, KeyEqual key_equal, cudaStream_t stream) +{ + CUCO_RUNTIME_EXPECTS(get_empty_key_sentinel() != get_erased_key_sentinel(), + "You must provide a unique erased key sentinel value at map construction."); + + auto num_keys = std::distance(first, last); + if (num_keys == 0) { return; } + + auto constexpr block_size = 128; + auto constexpr stride = 1; + auto constexpr tile_size = 4; + auto const grid_size = (tile_size * num_keys + stride * block_size - 1) / (stride * block_size); + auto view = get_device_mutable_view(); + + // TODO: memset an atomic variable is unsafe + static_assert(sizeof(std::size_t) == sizeof(atomic_ctr_type)); + CUCO_CUDA_TRY(cudaMemsetAsync(num_successes_, 0, sizeof(atomic_ctr_type), stream)); + std::size_t h_num_successes; + + detail::erase<<>>( + first, first + num_keys, num_successes_, view, hash, key_equal); + CUCO_CUDA_TRY(cudaMemcpyAsync( + &h_num_successes, num_successes_, sizeof(atomic_ctr_type), cudaMemcpyDeviceToHost, stream)); + + CUCO_CUDA_TRY(cudaStreamSynchronize(stream)); // stream sync to ensure h_num_successes is updated + + size_ -= h_num_successes; +} + template template void static_map::find(InputIt first, @@ -186,9 +250,11 @@ template __device__ static_map::device_mutable_view::insert_result static_map::device_mutable_view::packed_cas( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept + iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept { - auto expected_key = this->get_empty_key_sentinel(); auto expected_value = this->get_empty_value_sentinel(); cuco::detail::pair_converter expected_pair{ @@ -216,11 +282,13 @@ template __device__ static_map::device_mutable_view::insert_result static_map::device_mutable_view::back_to_back_cas( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept + iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept { using cuda::std::memory_order_relaxed; - auto expected_key = this->get_empty_key_sentinel(); auto expected_value = this->get_empty_value_sentinel(); // Back-to-back CAS for 8B/8B key/value pairs @@ -254,10 +322,12 @@ template __device__ static_map::device_mutable_view::insert_result static_map::device_mutable_view::cas_dependent_write( - iterator current_slot, value_type const& insert_pair, KeyEqual key_equal) noexcept + iterator current_slot, + value_type const& insert_pair, + KeyEqual key_equal, + Key expected_key) noexcept { using cuda::std::memory_order_relaxed; - auto expected_key = this->get_empty_key_sentinel(); auto& slot_key = current_slot->first; @@ -287,24 +357,25 @@ __device__ bool static_map::device_mutable_view::i key_type const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed); // The user provide `key_equal` can never be used to compare against `empty_key_sentinel` as the // sentinel is not a valid key value. Therefore, first check for the sentinel - auto const slot_is_empty = - detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); + auto const slot_is_available = + detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()) or + detail::bitwise_compare(existing_key, this->get_erased_key_sentinel()); // the key we are trying to insert is already in the map, so we return with failure to insert - if (not slot_is_empty and key_equal(existing_key, insert_pair.first)) { return false; } + if (not slot_is_available and key_equal(existing_key, insert_pair.first)) { return false; } - if (slot_is_empty) { + if (slot_is_available) { auto const status = [&]() { // One single CAS operation if `value_type` is packable if constexpr (cuco::detail::is_packable()) { - return packed_cas(current_slot, insert_pair, key_equal); + return packed_cas(current_slot, insert_pair, key_equal, existing_key); } if constexpr (not cuco::detail::is_packable()) { #if __CUDA_ARCH__ < 700 - return cas_dependent_write(current_slot, insert_pair, key_equal); + return cas_dependent_write(current_slot, insert_pair, key_equal, existing_key); #else - return back_to_back_cas(current_slot, insert_pair, key_equal); + return back_to_back_cas(current_slot, insert_pair, key_equal, existing_key); #endif } }(); @@ -333,32 +404,35 @@ __device__ bool static_map::device_mutable_view::i // The user provide `key_equal` can never be used to compare against `empty_key_sentinel` as the // sentinel is not a valid key value. Therefore, first check for the sentinel - auto const slot_is_empty = - detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); + auto const slot_is_available = + detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()) or + detail::bitwise_compare(existing_key, this->get_erased_key_sentinel()); // the key we are trying to insert is already in the map, so we return with failure to insert - if (g.any(not slot_is_empty and key_equal(existing_key, insert_pair.first))) { return false; } + if (g.any(not slot_is_available and key_equal(existing_key, insert_pair.first))) { + return false; + } - auto const window_contains_empty = g.ballot(slot_is_empty); + auto const window_contains_available = g.ballot(slot_is_available); // we found an empty slot, but not the key we are inserting, so this must // be an empty slot into which we can insert the key - if (window_contains_empty) { + if (window_contains_available) { // the first lane in the group with an empty slot will attempt the insert insert_result status{insert_result::CONTINUE}; - uint32_t src_lane = __ffs(window_contains_empty) - 1; + uint32_t src_lane = __ffs(window_contains_available) - 1; if (g.thread_rank() == src_lane) { // One single CAS operation if `value_type` is packable if constexpr (cuco::detail::is_packable()) { - status = packed_cas(current_slot, insert_pair, key_equal); + status = packed_cas(current_slot, insert_pair, key_equal, existing_key); } // Otherwise, two back-to-back CAS operations else { #if __CUDA_ARCH__ < 700 - status = cas_dependent_write(current_slot, insert_pair, key_equal); + status = cas_dependent_write(current_slot, insert_pair, key_equal, existing_key); #else - status = back_to_back_cas(current_slot, insert_pair, key_equal); + status = back_to_back_cas(current_slot, insert_pair, key_equal, existing_key); #endif } } @@ -382,6 +456,110 @@ __device__ bool static_map::device_mutable_view::i } } +template +template +__device__ bool static_map::device_mutable_view::erase( + key_type const& k, Hash hash, KeyEqual key_equal) noexcept +{ + auto current_slot{initial_slot(k, hash)}; + + value_type const insert_pair = + make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); + + while (true) { + static_assert(sizeof(Key) == sizeof(atomic_key_type)); + static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); + // TODO: Replace reinterpret_cast with atomic ref when available. + value_type slot_contents = *reinterpret_cast(current_slot); + auto existing_key = slot_contents.first; + auto existing_value = slot_contents.second; + + // Key doesn't exist, return false + if (detail::bitwise_compare(existing_key, this->get_empty_key_sentinel())) { return false; } + + // Key exists, return true if successfully deleted + if (key_equal(existing_key, k)) { + if constexpr (cuco::detail::is_packable()) { + auto slot = reinterpret_cast< + cuda::atomic::packed_type>*>( + current_slot); + cuco::detail::pair_converter expected_pair{ + cuco::make_pair(existing_key, existing_value)}; + cuco::detail::pair_converter new_pair{insert_pair}; + + return slot->compare_exchange_strong( + expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + } + if constexpr (not cuco::detail::is_packable()) { + current_slot->second.compare_exchange_strong( + existing_value, insert_pair.second, cuda::std::memory_order_relaxed); + return current_slot->first.compare_exchange_strong( + existing_key, insert_pair.first, cuda::std::memory_order_relaxed); + } + } + + current_slot = next_slot(current_slot); + } +} + +template +template +__device__ bool static_map::device_mutable_view::erase( + CG const& g, key_type const& k, Hash hash, KeyEqual key_equal) noexcept +{ + auto current_slot = initial_slot(g, k, hash); + value_type const insert_pair = + make_pair(this->get_erased_key_sentinel(), this->get_empty_value_sentinel()); + + while (true) { + static_assert(sizeof(Key) == sizeof(atomic_key_type)); + static_assert(sizeof(Value) == sizeof(atomic_mapped_type)); + // TODO: Replace reinterpret_cast with atomic ref when available. + value_type slot_contents = *reinterpret_cast(current_slot); + auto existing_key = slot_contents.first; + auto existing_value = slot_contents.second; + + auto const slot_is_empty = + detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); + + auto const exists = g.ballot(not slot_is_empty and key_equal(existing_key, k)); + + // Key exists, return true if successfully deleted + if (exists) { + uint32_t src_lane = __ffs(exists) - 1; + + bool status; + if (g.thread_rank() == src_lane) { + if constexpr (cuco::detail::is_packable()) { + auto slot = reinterpret_cast< + cuda::atomic::packed_type>*>( + current_slot); + cuco::detail::pair_converter expected_pair{ + cuco::make_pair(existing_key, existing_value)}; + cuco::detail::pair_converter new_pair{insert_pair}; + + status = slot->compare_exchange_strong( + expected_pair.packed, new_pair.packed, cuda::std::memory_order_relaxed); + } + if constexpr (not cuco::detail::is_packable()) { + current_slot->second.compare_exchange_strong( + existing_value, insert_pair.second, cuda::std::memory_order_relaxed); + status = current_slot->first.compare_exchange_strong( + existing_key, insert_pair.first, cuda::std::memory_order_relaxed); + } + } + + uint32_t res_status = g.shfl(static_cast(status), src_lane); + return static_cast(res_status); + } + + // empty slot found, but key not found, must not be in the map + if (g.ballot(slot_is_empty)) { return false; } + + current_slot = next_slot(g, current_slot); + } +} + template template __device__ typename static_map::device_view::iterator diff --git a/include/cuco/detail/static_map_kernels.cuh b/include/cuco/detail/static_map_kernels.cuh index 642373135..d1c2ac5c1 100644 --- a/include/cuco/detail/static_map_kernels.cuh +++ b/include/cuco/detail/static_map_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,6 +156,68 @@ __global__ void insert( if (threadIdx.x == 0) { *num_successes += block_num_successes; } } +template +__global__ void erase( + InputIt first, InputIt last, atomicT* num_successes, viewT view, Hash hash, KeyEqual key_equal) +{ + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t thread_num_successes = 0; + + auto tid = block_size * blockIdx.x + threadIdx.x; + auto it = first + tid; + + while (it < last) { + if (view.erase(*it, hash, key_equal)) { thread_num_successes++; } + it += gridDim.x * block_size; + } + + // compute number of successfully inserted elements for each block + // and atomically add to the grand total + std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); + if (threadIdx.x == 0) { + num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed); + } +} + +template +__global__ void erase( + InputIt first, InputIt last, atomicT* num_successes, viewT view, Hash hash, KeyEqual key_equal) +{ + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + std::size_t thread_num_successes = 0; + + auto tile = cg::tiled_partition(cg::this_thread_block()); + auto tid = block_size * blockIdx.x + threadIdx.x; + auto it = first + tid / tile_size; + + while (it < last) { + if (view.erase(tile, *it, hash, key_equal) and tile.thread_rank() == 0) { + thread_num_successes++; + } + it += (gridDim.x * block_size) / tile_size; + } + + // compute number of successfully inserted elements for each block + // and atomically add to the grand total + std::size_t block_num_successes = BlockReduce(temp_storage).Sum(thread_num_successes); + if (threadIdx.x == 0) { + num_successes->fetch_add(block_num_successes, cuda::std::memory_order_relaxed); + } +} + /** * @brief Inserts key/value pairs in the range `[first, first + n)` if `pred` of the * corresponding stencil returns true. diff --git a/include/cuco/sentinel.hpp b/include/cuco/sentinel.hpp new file mode 100644 index 000000000..53814c438 --- /dev/null +++ b/include/cuco/sentinel.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace cuco { +namespace sentinel { + +template +struct empty_key { + __host__ __device__ empty_key(T v) : value{v} {} + T value; +}; + +template +struct empty_value { + __host__ __device__ empty_value(T v) : value{v} {} + T value; +}; + +template +struct erased_key { + __host__ __device__ erased_key(T v) : value{v} {} + T value; +}; + +} // namespace sentinel +} // namespace cuco \ No newline at end of file diff --git a/include/cuco/static_map.cuh b/include/cuco/static_map.cuh index e8141e606..b1c072da2 100644 --- a/include/cuco/static_map.cuh +++ b/include/cuco/static_map.cuh @@ -39,6 +39,7 @@ #include #include #include +#include namespace cuco { @@ -66,29 +67,38 @@ class dynamic_map; * - Host-side "bulk" operations * - Device-side "singular" operations * - * The host-side bulk operations include `insert`, `find`, and `contains`. These - * APIs should be used when there are a large number of keys to insert or lookup + * The host-side bulk operations include `insert`, `erase`, `find`, and `contains`. These + * APIs should be used when there are a large number of keys to insert, erase or lookup * in the map. For example, given a range of keys specified by device-accessible - * iterators, the bulk `insert` function will insert all keys into the map. + * iterators, the bulk `insert` function will insert all keys into the map. Note that in order + * for a `static_map` instance to support `erase`, the user must provide an `erased_key_sentinel` + * which is distinct from the `empty_key_sentinel` at construction. If `erase` is called on a + * `static_map` which was not constructed in this way, a runtime error will be generated. * * The singular device-side operations allow individual threads to perform * independent insert or find/contains operations from device code. These * operations are accessed through non-owning, trivially copyable "view" types: * `device_view` and `mutable_device_view`. The `device_view` class is an * immutable view that allows only non-modifying operations such as `find` or - * `contains`. The `mutable_device_view` class only allows `insert` operations. - * The two types are separate to prevent erroneous concurrent insert/find - * operations. + * `contains`. The `mutable_device_view` class only allows `insert` and `erase` operations. + * The two types are separate to prevent erroneous concurrent insert/erase/find + * operations. Note that the device-side `erase` may only be called if the corresponding + * `mutable_device_view` was constructed with a user-provided `erased_key_sentinel`. It is + * up to the user to ensure this condition is met. * * Example: * \code{.cpp} * int empty_key_sentinel = -1; - * int empty_value_sentine = -1; + * int empty_value_sentinel = -1; + * int erased_key_sentinel = -2; * * // Constructs a map with 100,000 slots using -1 and -1 as the empty key/value - * // sentinels. Note the capacity is chosen knowing we will insert 50,000 keys, + * // sentinels. The supplied erased key sentinel of -2 must be a different value from the empty + * // key sentinel. If erase functionality is not needed, you may elect to not supply an erased + * // key sentinel to the constructor. Note the capacity is chosen knowing we will insert 50,000 + * keys, * // for an load factor of 50%. - * static_map m{100'000, empty_key_sentinel, empty_value_sentinel}; + * static_map m{100'000, empty_key_sentinel, empty_value_sentinel, erased_value_sentinel}; * * // Create a sequence of pairs {{0,0}, {1,1}, ... {i,i}} * thrust::device_vector> pairs(50,000); @@ -154,6 +164,13 @@ class static_map { static_map(static_map const&) = delete; static_map(static_map&&) = delete; + + template + static_map(std::size_t, T1, T2, Allocator const& = Allocator{}, cudaStream_t = 0) = delete; + + template + static_map(std::size_t, T1, T2, T3, Allocator const& = Allocator{}, cudaStream_t = 0) = delete; + static_map& operator=(static_map const&) = delete; static_map& operator=(static_map&&) = delete; @@ -192,8 +209,22 @@ class static_map { * @param stream Stream used for executing the kernels */ static_map(std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + Allocator const& alloc = Allocator{}, + cudaStream_t stream = 0); + + /** + * @brief Constructs a fixed-size map with erase capability. + * empty_key_sentinel and erased_key_sentinel must be different values. + * + * @throw std::runtime error if the empty key sentinel and erased key sentinel + * are the same value + */ + static_map(std::size_t capacity, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel, Allocator const& alloc = Allocator{}, cudaStream_t stream = 0); @@ -266,6 +297,42 @@ class static_map { KeyEqual key_equal = KeyEqual{}, cudaStream_t stream = 0); + /** + * @brief Erases keys in the range `[first, last)`. + * + * For each key `k` in `[first, last)`, if `contains(k) == true), removes `k` and it's + * associated value from the map. Else, no effect. + * + * Side-effects: + * - `contains(k) == false` + * - `find(k) == end()` + * - `insert({k,v}) == true` + * - `get_size()` is reduced by the total number of erased keys + * + * This function synchronizes `stream`. + * + * @tparam InputIt Device accessible input iterator whose `value_type` is + * convertible to the map's `value_type` + * @tparam Hash Unary callable type + * @tparam KeyEqual Binary callable type + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param hash The unary function to apply to hash each key + * @param key_equal The binary function to compare two keys for equality + * @param stream Stream used for executing the kernels + * + * @throw std::runtime_error if a unique erased key sentinel value was not + * provided at construction + */ + template , + typename KeyEqual = thrust::equal_to> + void erase(InputIt first, + InputIt last, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}, + cudaStream_t stream = 0); + /** * @brief Finds the values corresponding to all keys in the range `[first, last)`. * @@ -361,6 +428,7 @@ class static_map { using slot_type = slot_type; Key empty_key_sentinel_{}; ///< Key value that represents an empty slot + Key erased_key_sentinel_{}; ///< Key value that represents an erased slot Value empty_value_sentinel_{}; ///< Initial Value of empty slot pair_atomic_type* slots_{}; ///< Pointer to flat slots storage std::size_t capacity_{}; ///< Total number of slots @@ -368,10 +436,12 @@ class static_map { __host__ __device__ device_view_base(pair_atomic_type* slots, std::size_t capacity, Key empty_key_sentinel, - Value empty_value_sentinel) noexcept + Value empty_value_sentinel, + Key erased_key_sentinel) noexcept : slots_{slots}, capacity_{capacity}, empty_key_sentinel_{empty_key_sentinel}, + erased_key_sentinel_{erased_key_sentinel}, empty_value_sentinel_{empty_value_sentinel} { } @@ -565,6 +635,11 @@ class static_map { return empty_value_sentinel_; } + __host__ __device__ Key get_erased_key_sentinel() const noexcept + { + return erased_key_sentinel_; + } + /** * @brief Returns iterator to the first slot. * @@ -673,9 +748,14 @@ class static_map { */ __host__ __device__ device_mutable_view(pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel) noexcept - : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel} + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel) noexcept + : device_view_base{slots, + capacity, + empty_key_sentinel.value, + empty_value_sentinel.value, + erased_key_sentinel.value} { } @@ -697,12 +777,14 @@ class static_map { * @param insert_pair The pair to insert * @param key_equal The binary callable used to compare two keys for * equality + * @param expected_key The expected value of the key in the target slot * @return An insert result from the `insert_resullt` enumeration. */ template __device__ insert_result packed_cas(iterator current_slot, value_type const& insert_pair, - KeyEqual key_equal) noexcept; + KeyEqual key_equal, + Key expected_key) noexcept; /** * @brief Inserts the specified key/value pair with two back-to-back CAS operations. @@ -712,12 +794,14 @@ class static_map { * @param insert_pair The pair to insert * @param key_equal The binary callable used to compare two keys for * equality + * @param expected_key The expected value of the key in the target slot * @return An insert result from the `insert_resullt` enumeration. */ template __device__ insert_result back_to_back_cas(iterator current_slot, value_type const& insert_pair, - KeyEqual key_equal) noexcept; + KeyEqual key_equal, + Key expected_key) noexcept; /** * @brief Inserts the specified key/value pair with a CAS of the key and a dependent write of @@ -728,12 +812,14 @@ class static_map { * @param insert_pair The pair to insert * @param key_equal The binary callable used to compare two keys for * equality + * @param expected_key The expected value of the key in the target slot * @return An insert result from the `insert_resullt` enumeration. */ template __device__ insert_result cas_dependent_write(iterator current_slot, value_type const& insert_pair, - KeyEqual key_equal) noexcept; + KeyEqual key_equal, + Key expected_key) noexcept; public: template @@ -741,12 +827,32 @@ class static_map { CG g, pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel) noexcept + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel) noexcept + { + device_view_base::initialize_slots( + g, slots, capacity, empty_key_sentinel.value, empty_value_sentinel.value); + return device_mutable_view{slots, + capacity, + empty_key_sentinel, + empty_value_sentinel, + sentinel::erased_key{empty_key_sentinel.value}}; + } + + /* Features erase support */ + template + __device__ static device_mutable_view make_from_uninitialized_slots( + CG g, + pair_atomic_type* slots, + std::size_t capacity, + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel) noexcept { device_view_base::initialize_slots( g, slots, capacity, empty_key_sentinel, empty_value_sentinel); - return device_mutable_view{slots, capacity, empty_key_sentinel, empty_value_sentinel}; + return device_mutable_view{ + slots, capacity, empty_key_sentinel, empty_value_sentinel, erased_key_sentinel}; } /** @@ -798,6 +904,21 @@ class static_map { value_type const& insert_pair, Hash hash = Hash{}, KeyEqual key_equal = KeyEqual{}) noexcept; + + template , + typename KeyEqual = thrust::equal_to> + __device__ bool erase(key_type const& k, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}) noexcept; + + template , + typename KeyEqual = thrust::equal_to> + __device__ bool erase(CG const& g, + key_type const& k, + Hash hash = Hash{}, + KeyEqual key_equal = KeyEqual{}) noexcept; + }; // class device mutable view /** @@ -830,9 +951,14 @@ class static_map { */ __host__ __device__ device_view(pair_atomic_type* slots, std::size_t capacity, - Key empty_key_sentinel, - Value empty_value_sentinel) noexcept - : device_view_base{slots, capacity, empty_key_sentinel, empty_value_sentinel} + sentinel::empty_key empty_key_sentinel, + sentinel::empty_value empty_value_sentinel, + sentinel::erased_key erased_key_sentinel) noexcept + : device_view_base{slots, + capacity, + empty_key_sentinel.value, + empty_value_sentinel.value, + erased_key_sentinel.value} { } @@ -845,7 +971,8 @@ class static_map { : device_view_base{mutable_map.get_slots(), mutable_map.get_capacity(), mutable_map.get_empty_key_sentinel(), - mutable_map.get_empty_value_sentinel()} + mutable_map.get_empty_value_sentinel(), + mutable_map.get_erased_key_sentinel()} { } @@ -914,10 +1041,12 @@ class static_map { g.sync(); #endif - return device_view(memory_to_use, - source_device_view.get_capacity(), - source_device_view.get_empty_key_sentinel(), - source_device_view.get_empty_value_sentinel()); + return device_view( + memory_to_use, + source_device_view.get_capacity(), + sentinel::empty_key{source_device_view.get_empty_key_sentinel()}, + sentinel::empty_value{source_device_view.get_empty_value_sentinel()}, + sentinel::erased_key{source_device_view.get_erased_key_sentinel()}); } /** @@ -1098,6 +1227,13 @@ class static_map { */ Value get_empty_value_sentinel() const noexcept { return empty_value_sentinel_; } + /** + * @brief Gets the sentinel value used to represent an erased value slot. + * + * @return The sentinel value used to represent an erased value slot + */ + Key get_erased_key_sentinel() const noexcept { return erased_key_sentinel_; } + /** * @brief Constructs a device_view object based on the members of the `static_map` object. * @@ -1105,7 +1241,11 @@ class static_map { */ device_view get_device_view() const noexcept { - return device_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_); + return device_view(slots_, + capacity_, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + sentinel::erased_key{erased_key_sentinel_}); } /** @@ -1115,7 +1255,11 @@ class static_map { */ device_mutable_view get_device_mutable_view() const noexcept { - return device_mutable_view(slots_, capacity_, empty_key_sentinel_, empty_value_sentinel_); + return device_mutable_view(slots_, + capacity_, + sentinel::empty_key{empty_key_sentinel_}, + sentinel::empty_value{empty_value_sentinel_}, + sentinel::erased_key{erased_key_sentinel_}); } private: @@ -1124,6 +1268,7 @@ class static_map { std::size_t size_{}; ///< Number of keys in map Key empty_key_sentinel_{}; ///< Key value that represents an empty slot Value empty_value_sentinel_{}; ///< Initial value of empty slot + Key erased_key_sentinel_{}; ///< Key value that represents an erased slot atomic_ctr_type* num_successes_{}; ///< Number of successfully inserted keys on insert slot_allocator_type slot_allocator_{}; ///< Allocator used to allocate slots counter_allocator_type counter_allocator_{}; ///< Allocator used to allocate `num_successes_` diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 322fb26f0..082b17d66 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -54,6 +54,7 @@ endfunction(ConfigureTest) ################################################################################################### # - static_map tests ------------------------------------------------------------------------------ ConfigureTest(STATIC_MAP_TEST + static_map/erase_test.cu static_map/custom_type_test.cu static_map/duplicate_keys_test.cu static_map/key_sentinel_test.cu diff --git a/tests/static_map/custom_type_test.cu b/tests/static_map/custom_type_test.cu index 1e75bf826..2537c818d 100644 --- a/tests/static_map/custom_type_test.cu +++ b/tests/static_map/custom_type_test.cu @@ -104,7 +104,9 @@ TEMPLATE_TEST_CASE_SIG("User defined key and value type", constexpr std::size_t num = 100; constexpr std::size_t capacity = num * 2; - cuco::static_map map{capacity, sentinel_key, sentinel_value}; + cuco::static_map map{capacity, + cuco::sentinel::empty_key{sentinel_key}, + cuco::sentinel::empty_value{sentinel_value}}; thrust::device_vector insert_keys(num); thrust::device_vector insert_values(num); diff --git a/tests/static_map/duplicate_keys_test.cu b/tests/static_map/duplicate_keys_test.cu index 5cdfbfe7f..edfc9eca8 100644 --- a/tests/static_map/duplicate_keys_test.cu +++ b/tests/static_map/duplicate_keys_test.cu @@ -34,7 +34,8 @@ TEMPLATE_TEST_CASE_SIG("Duplicate keys", (int64_t, int64_t)) { constexpr std::size_t num_keys{500'000}; - cuco::static_map map{num_keys * 2, -1, -1}; + cuco::static_map map{ + num_keys * 2, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/erase_test.cu b/tests/static_map/erase_test.cu new file mode 100644 index 000000000..fa91adda2 --- /dev/null +++ b/tests/static_map/erase_test.cu @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +TEMPLATE_TEST_CASE_SIG("erase key", "", ((typename T), T), (int32_t), (int64_t)) +{ + using Key = T; + using Value = T; + + constexpr std::size_t num_keys = 1'000'000; + constexpr std::size_t capacity = 1'100'000; + + cuco::static_map map{capacity, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::sentinel::erased_key{-2}}; + + auto m_view = map.get_device_mutable_view(); + auto view = map.get_device_view(); + + thrust::device_vector d_keys(num_keys); + thrust::device_vector d_values(num_keys); + thrust::device_vector d_keys_exist(num_keys); + + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1); + thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1); + + auto pairs_begin = + thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), d_values.begin())); + + SECTION("Check basic insert/erase") + { + map.insert(pairs_begin, pairs_begin + num_keys); + + REQUIRE(map.get_size() == num_keys); + + map.erase(d_keys.begin(), d_keys.end()); + + REQUIRE(map.get_size() == 0); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); + + map.insert(pairs_begin, pairs_begin + num_keys); + + REQUIRE(map.get_size() == num_keys); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::all_of(d_keys_exist.begin(), + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); + + map.erase(d_keys.begin(), d_keys.begin() + num_keys / 2); + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), + d_keys_exist.begin() + num_keys / 2, + [] __device__(const bool key_found) { return key_found; })); + + REQUIRE(cuco::test::all_of(d_keys_exist.begin() + num_keys / 2, + d_keys_exist.end(), + [] __device__(const bool key_found) { return key_found; })); + + map.erase(d_keys.begin() + num_keys / 2, d_keys.end()); + REQUIRE(map.get_size() == 0); + } +} \ No newline at end of file diff --git a/tests/static_map/key_sentinel_test.cu b/tests/static_map/key_sentinel_test.cu index a96a60af4..65ae7624f 100644 --- a/tests/static_map/key_sentinel_test.cu +++ b/tests/static_map/key_sentinel_test.cu @@ -36,7 +36,8 @@ TEMPLATE_TEST_CASE_SIG( using Value = T; constexpr std::size_t num_keys{SIZE}; - cuco::static_map map{SIZE * 2, -1, -1}; + cuco::static_map map{ + SIZE * 2, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/shared_memory_test.cu b/tests/static_map/shared_memory_test.cu index 00bbbcd08..9f4b1fc08 100644 --- a/tests/static_map/shared_memory_test.cu +++ b/tests/static_map/shared_memory_test.cu @@ -88,7 +88,8 @@ TEMPLATE_TEST_CASE_SIG("Shared memory static map", // operator yet std::vector> maps; for (std::size_t map_id = 0; map_id < number_of_maps; ++map_id) { - maps.push_back(std::make_unique(map_capacity, -1, -1)); + maps.push_back(std::make_unique( + map_capacity, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1})); } thrust::device_vector d_keys_exist(number_of_maps * elements_in_map); @@ -154,7 +155,11 @@ __global__ void shared_memory_hash_table_kernel(bool* key_found) using map_type = typename cuco::static_map::device_mutable_view; using find_map_type = typename cuco::static_map::device_view; __shared__ typename map_type::slot_type slots[N]; - auto map = map_type::make_from_uninitialized_slots(cg::this_thread_block(), &slots[0], N, -1, -1); + auto map = map_type::make_from_uninitialized_slots(cg::this_thread_block(), + &slots[0], + N, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}); auto g = cg::this_thread_block(); std::size_t index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/tests/static_map/stream_test.cu b/tests/static_map/stream_test.cu index aadee823a..cab215948 100644 --- a/tests/static_map/stream_test.cu +++ b/tests/static_map/stream_test.cu @@ -33,7 +33,11 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys on given stream", cudaStreamCreate(&stream); constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, -1, -1, cuco::cuda_allocator{}, stream}; + cuco::static_map map{1'000'000, + cuco::sentinel::empty_key{-1}, + cuco::sentinel::empty_value{-1}, + cuco::cuda_allocator{}, + stream}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view(); diff --git a/tests/static_map/unique_sequence_test.cu b/tests/static_map/unique_sequence_test.cu index da4b51a3f..7feeb8da9 100644 --- a/tests/static_map/unique_sequence_test.cu +++ b/tests/static_map/unique_sequence_test.cu @@ -30,7 +30,8 @@ TEMPLATE_TEST_CASE_SIG("Unique sequence of keys", (int64_t, int64_t)) { constexpr std::size_t num_keys{500'000}; - cuco::static_map map{1'000'000, -1, -1}; + cuco::static_map map{ + 1'000'000, cuco::sentinel::empty_key{-1}, cuco::sentinel::empty_value{-1}}; auto m_view = map.get_device_mutable_view(); auto view = map.get_device_view();