Skip to content

Commit b633956

Browse files
authored
Refactor seeds buffer with runtime shape (#311)
* Define `SeedArray` container * Refactor use of seed array
1 parent 639c73c commit b633956

File tree

5 files changed

+169
-34
lines changed

5 files changed

+169
-34
lines changed

include/CLUEstering/core/Clusterer.hpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
#include "CLUEstering/data_structures/AssociationMap.hpp"
1414
#include "CLUEstering/data_structures/PointsHost.hpp"
1515
#include "CLUEstering/data_structures/PointsDevice.hpp"
16-
#include "CLUEstering/data_structures/internal/Tiles.hpp"
1716
#include "CLUEstering/data_structures/internal/Followers.hpp"
17+
#include "CLUEstering/data_structures/internal/SeedArray.hpp"
18+
#include "CLUEstering/data_structures/internal/Tiles.hpp"
1819

1920
#include <array>
2021
#include <concepts>
@@ -48,18 +49,13 @@ namespace clue {
4849
std::array<uint8_t, Ndim> m_wrappedCoordinates;
4950

5051
std::optional<TilesDevice> m_tiles;
51-
std::optional<clue::device_buffer<Device, VecArray<int32_t, reserve>>> m_seeds;
52+
std::optional<clue::internal::SeedArray<>> m_seeds;
5253
std::optional<FollowersDevice> m_followers;
5354

5455
void setup(Queue& queue, const PointsHost& h_points, PointsDevice& dev_points) {
5556
detail::setup_tiles(queue, m_tiles, h_points, m_pointsPerTile, m_wrappedCoordinates);
5657
detail::setup_followers(queue, m_followers, h_points.size());
5758
clue::copyToDevice(queue, dev_points, h_points);
58-
59-
if (!m_seeds.has_value()) {
60-
m_seeds = clue::make_device_buffer<VecArray<int32_t, reserve>>(queue);
61-
}
62-
alpaka::memset(queue, *m_seeds, 0x00);
6359
}
6460

6561
template <concepts::convolutional_kernel Kernel>

include/CLUEstering/core/detail/Clusterer.hpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77
#include "CLUEstering/core/detail/ClusteringKernels.hpp"
88
#include "CLUEstering/core/detail/ComputeTiles.hpp"
99
#include "CLUEstering/core/detail/defines.hpp"
10+
#include "CLUEstering/core/detail/SetupFollowers.hpp"
11+
#include "CLUEstering/core/detail/SetupSeeds.hpp"
12+
#include "CLUEstering/core/detail/SetupTiles.hpp"
1013
#include "CLUEstering/data_structures/PointsHost.hpp"
1114
#include "CLUEstering/data_structures/PointsDevice.hpp"
12-
#include "CLUEstering/data_structures/internal/Tiles.hpp"
1315
#include "CLUEstering/data_structures/internal/Followers.hpp"
16+
#include "CLUEstering/data_structures/internal/SeedArray.hpp"
17+
#include "CLUEstering/data_structures/internal/Tiles.hpp"
1418
#include "CLUEstering/utils/get_clusters.hpp"
1519

1620
#include <alpaka/mem/view/Traits.hpp>
@@ -132,10 +136,6 @@ namespace clue {
132136
std::size_t block_size) {
133137
detail::setup_tiles(queue, m_tiles, dev_points, m_pointsPerTile, m_wrappedCoordinates);
134138
detail::setup_followers(queue, m_followers, dev_points.size());
135-
if (!m_seeds.has_value()) {
136-
m_seeds = clue::make_device_buffer<VecArray<int32_t, reserve>>(queue);
137-
}
138-
alpaka::memset(queue, *m_seeds, 0x00);
139139
make_clusters_impl(dev_points, kernel, queue, block_size);
140140
alpaka::wait(queue);
141141
}
@@ -178,11 +178,13 @@ namespace clue {
178178

179179
detail::computeLocalDensity<Acc>(
180180
queue, work_division, m_tiles->view(), dev_points.view(), kernel, m_dc, n_points);
181+
auto seed_candidates = 0ul;
181182
detail::computeNearestHighers<Acc>(
182-
queue, work_division, m_tiles->view(), dev_points.view(), m_dm, n_points);
183+
queue, work_division, m_tiles->view(), dev_points.view(), m_dm, seed_candidates, n_points);
184+
detail::setup_seeds(queue, m_seeds, seed_candidates);
183185
detail::findClusterSeeds<Acc>(queue,
184186
work_division,
185-
m_seeds->data(),
187+
m_seeds.value(),
186188
m_tiles->view(),
187189
dev_points.view(),
188190
m_seed_dc,
@@ -192,7 +194,7 @@ namespace clue {
192194
m_followers->template fill<Acc>(queue, dev_points);
193195

194196
detail::assignPointsToClusters<Acc>(
195-
queue, block_size, m_seeds->data(), m_followers->view(), dev_points.view());
197+
queue, block_size, m_seeds.value(), m_followers->view(), dev_points.view());
196198

197199
clue::copyToHost(queue, h_points, dev_points);
198200
h_points.mark_clustered();
@@ -213,11 +215,13 @@ namespace clue {
213215

214216
detail::computeLocalDensity<Acc>(
215217
queue, work_division, m_tiles->view(), dev_points.view(), kernel, m_dc, n_points);
218+
auto seed_candidates = 0ul;
216219
detail::computeNearestHighers<Acc>(
217-
queue, work_division, m_tiles->view(), dev_points.view(), m_dm, n_points);
220+
queue, work_division, m_tiles->view(), dev_points.view(), m_dm, seed_candidates, n_points);
221+
detail::setup_seeds(queue, m_seeds, seed_candidates);
218222
detail::findClusterSeeds<Acc>(queue,
219223
work_division,
220-
m_seeds->data(),
224+
m_seeds.value(),
221225
m_tiles->view(),
222226
dev_points.view(),
223227
m_seed_dc,
@@ -227,7 +231,7 @@ namespace clue {
227231
m_followers->template fill<Acc>(queue, dev_points);
228232

229233
detail::assignPointsToClusters<Acc>(
230-
queue, block_size, m_seeds->data(), m_followers->view(), dev_points.view());
234+
queue, block_size, m_seeds.value(), m_followers->view(), dev_points.view());
231235

232236
alpaka::wait(queue);
233237
dev_points.mark_clustered();

include/CLUEstering/core/detail/ClusteringKernels.hpp

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
#include "CLUEstering/core/ConvolutionalKernel.hpp"
55
#include "CLUEstering/core/DistanceParameter.hpp"
66
#include "CLUEstering/data_structures/PointsDevice.hpp"
7+
#include "CLUEstering/data_structures/internal/Followers.hpp"
8+
#include "CLUEstering/data_structures/internal/SearchBox.hpp"
9+
#include "CLUEstering/data_structures/internal/SeedArray.hpp"
710
#include "CLUEstering/data_structures/internal/TilesView.hpp"
811
#include "CLUEstering/data_structures/internal/VecArray.hpp"
9-
#include "CLUEstering/data_structures/internal/SearchBox.hpp"
10-
#include "CLUEstering/data_structures/internal/Followers.hpp"
1112
#include "CLUEstering/detail/make_array.hpp"
1213
#include "CLUEstering/internal/alpaka/work_division.hpp"
1314
#include "CLUEstering/internal/math/math.hpp"
@@ -154,6 +155,7 @@ namespace clue::detail {
154155
internal::TilesView<Ndim> dev_tiles,
155156
PointsView<Ndim> dev_points,
156157
DistanceParameter<Ndim> dm,
158+
std::size_t* seed_candidates,
157159
int32_t n_points) const {
158160
for (auto i : alpaka::uniformElements(acc, n_points)) {
159161
float delta_i = std::numeric_limits<float>::max();
@@ -184,14 +186,17 @@ namespace clue::detail {
184186
i);
185187

186188
dev_points.nearest_higher[i] = nh_i;
189+
if (nh_i == -1) {
190+
alpaka::atomicAdd(acc, seed_candidates, 1ul);
191+
}
187192
}
188193
}
189194
};
190195

191196
struct KernelFindClusters {
192197
template <typename TAcc, std::size_t Ndim>
193198
ALPAKA_FN_ACC void operator()(const TAcc& acc,
194-
VecArray<int32_t, reserve>* seeds,
199+
clue::internal::SeedArrayView seeds,
195200
internal::TilesView<Ndim> tiles,
196201
PointsView<Ndim> dev_points,
197202
DistanceParameter<Ndim> seed_dc,
@@ -211,7 +216,7 @@ namespace clue::detail {
211216
if (is_seed) {
212217
dev_points.is_seed[i] = 1;
213218
dev_points.nearest_higher[i] = -1;
214-
seeds->push_back(acc, i);
219+
seeds.push_back(acc, i);
215220
} else {
216221
dev_points.is_seed[i] = 0;
217222
}
@@ -222,16 +227,15 @@ namespace clue::detail {
222227
struct KernelAssignClusters {
223228
template <typename TAcc, std::size_t Ndim>
224229
ALPAKA_FN_ACC void operator()(const TAcc& acc,
225-
VecArray<int32_t, reserve>* seeds,
230+
clue::internal::SeedArrayView seeds,
226231
clue::FollowersView followers,
227232
PointsView<Ndim> dev_points) const {
228-
const auto& seeds_0 = *seeds;
229-
const auto n_seeds = seeds_0.size();
233+
const auto n_seeds = seeds.size();
230234
for (auto idx_cls : alpaka::uniformElements(acc, n_seeds)) {
231235
int local_stack[256] = {-1};
232236
int local_stack_size = 0;
233237

234-
int idx_this_seed = seeds_0[idx_cls];
238+
int idx_this_seed = seeds[idx_cls];
235239
dev_points.cluster_index[idx_this_seed] = idx_cls;
236240
local_stack[local_stack_size] = idx_this_seed;
237241
++local_stack_size;
@@ -279,33 +283,52 @@ namespace clue::detail {
279283
internal::TilesView<Ndim>& tiles,
280284
PointsView<Ndim>& dev_points,
281285
const DistanceParameter<Ndim>& dm,
286+
std::size_t& seed_candidates,
282287
int32_t size) {
283-
alpaka::exec<TAcc>(
284-
queue, work_division, KernelCalculateNearestHigher{}, tiles, dev_points, dm, size);
288+
auto d_seed_candidates = clue::make_device_buffer<std::size_t>(queue);
289+
alpaka::memset(queue, d_seed_candidates, 0u);
290+
alpaka::exec<TAcc>(queue,
291+
work_division,
292+
KernelCalculateNearestHigher{},
293+
tiles,
294+
dev_points,
295+
dm,
296+
d_seed_candidates.data(),
297+
size);
298+
alpaka::memcpy(queue, clue::make_host_view(seed_candidates), d_seed_candidates);
299+
alpaka::wait(queue);
285300
}
286301

287302
template <concepts::accelerator TAcc, concepts::queue TQueue, std::size_t Ndim>
288303
inline void findClusterSeeds(TQueue& queue,
289304
const WorkDiv& work_division,
290-
VecArray<int32_t, reserve>* seeds,
305+
clue::internal::SeedArray<>& seeds,
291306
internal::TilesView<Ndim>& tiles,
292307
PointsView<Ndim>& dev_points,
293308
const DistanceParameter<Ndim>& seed_dc,
294309
float rhoc,
295310
int32_t size) {
296-
alpaka::exec<TAcc>(
297-
queue, work_division, KernelFindClusters{}, seeds, tiles, dev_points, seed_dc, rhoc, size);
311+
alpaka::exec<TAcc>(queue,
312+
work_division,
313+
KernelFindClusters{},
314+
seeds.view(),
315+
tiles,
316+
dev_points,
317+
seed_dc,
318+
rhoc,
319+
size);
298320
}
299321

300322
template <concepts::accelerator TAcc, concepts::queue TQueue, std::size_t Ndim>
301323
inline void assignPointsToClusters(TQueue& queue,
302324
std::size_t block_size,
303-
VecArray<int32_t, reserve>* seeds,
325+
clue::internal::SeedArray<>& seeds,
304326
clue::FollowersView followers,
305327
PointsView<Ndim> dev_points) {
306-
const Idx grid_size = clue::divide_up_by(reserve, block_size);
328+
const Idx grid_size = clue::divide_up_by(seeds.size(queue), block_size);
307329
const auto work_division = clue::make_workdiv<TAcc>(grid_size, block_size);
308-
alpaka::exec<TAcc>(queue, work_division, KernelAssignClusters{}, seeds, followers, dev_points);
330+
alpaka::exec<TAcc>(
331+
queue, work_division, KernelAssignClusters{}, seeds.view(), followers, dev_points);
309332
}
310333

311334
} // namespace clue::detail
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
2+
#pragma once
3+
4+
#include "CLUEstering/detail/concepts.hpp"
5+
#include "CLUEstering/data_structures/internal/SeedArray.hpp"
6+
#include <cstddef>
7+
#include <optional>
8+
9+
namespace clue::detail {
10+
11+
template <concepts::queue TQueue,
12+
concepts::device TDev = decltype(alpaka::getDev(std::declval<TQueue>()))>
13+
inline void setup_seeds(TQueue& queue,
14+
std::optional<clue::internal::SeedArray<TDev>>& seeds,
15+
std::size_t seed_candidates) {
16+
if (!seeds.has_value()) {
17+
seeds = clue::internal::SeedArray<TDev>(queue, seed_candidates);
18+
}
19+
if (seeds->capacity() < seed_candidates) {
20+
seeds = clue::internal::SeedArray<TDev>(queue, seed_candidates);
21+
} else {
22+
seeds->reset(queue);
23+
}
24+
alpaka::wait(queue);
25+
}
26+
27+
} // namespace clue::detail
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
2+
#pragma once
3+
4+
#include "CLUEstering/detail/concepts.hpp"
5+
#include "CLUEstering/internal/alpaka/memory.hpp"
6+
#include <alpaka/alpaka.hpp>
7+
#include <cstddef>
8+
#include <cstdint>
9+
10+
namespace clue::internal {
11+
12+
class SeedArrayView {
13+
int32_t* m_data;
14+
std::size_t* m_size;
15+
std::size_t m_capacity;
16+
17+
public:
18+
ALPAKA_FN_HOST_ACC constexpr SeedArrayView(int32_t* data,
19+
std::size_t* size,
20+
std::size_t capacity)
21+
: m_data{data}, m_size{size}, m_capacity{capacity} {}
22+
23+
ALPAKA_FN_ACC constexpr auto& operator[](std::size_t index) { return m_data[index]; }
24+
ALPAKA_FN_ACC constexpr const auto& operator[](std::size_t index) const {
25+
return m_data[index];
26+
}
27+
28+
ALPAKA_FN_ACC constexpr auto size() const {
29+
// NOTE: not thread-safe
30+
// Could restrict this to KernelAssignClusters, but maybe that's an overkill
31+
return *m_size;
32+
}
33+
34+
template <clue::concepts::accelerator TAcc>
35+
ALPAKA_FN_ACC constexpr void push_back(const TAcc& acc, int32_t value) {
36+
auto prev = alpaka::atomicAdd(acc, m_size, 1ul);
37+
if (prev < m_capacity) {
38+
m_data[prev] = value;
39+
} else {
40+
alpaka::atomicSub(acc, m_size, 1ul);
41+
}
42+
}
43+
};
44+
45+
template <clue::concepts::device TDev = clue::Device>
46+
class SeedArray {
47+
private:
48+
clue::device_buffer<TDev, int32_t[]> m_buffer;
49+
clue::device_buffer<TDev, std::size_t> m_dsize;
50+
std::optional<std::size_t> m_size;
51+
std::size_t m_capacity;
52+
SeedArrayView m_view;
53+
54+
public:
55+
template <clue::concepts::queue TQueue>
56+
SeedArray(TQueue& queue, std::size_t size)
57+
: m_buffer{clue::make_device_buffer<int32_t[]>(queue, size)},
58+
m_dsize{clue::make_device_buffer<std::size_t>(queue)},
59+
m_size{std::nullopt},
60+
m_capacity{size},
61+
m_view{m_buffer.data(), m_dsize.data(), m_capacity} {}
62+
63+
ALPAKA_FN_HOST constexpr auto capacity() const { return m_capacity; }
64+
65+
template <clue::concepts::queue TQueue>
66+
ALPAKA_FN_HOST auto size(TQueue& queue) {
67+
if (!m_size.has_value()) {
68+
m_size = std::make_optional<std::size_t>();
69+
alpaka::memcpy(queue, clue::make_host_view(*m_size), m_dsize);
70+
alpaka::wait(queue);
71+
}
72+
return *m_size;
73+
}
74+
75+
template <clue::concepts::queue TQueue>
76+
ALPAKA_FN_HOST auto reset(TQueue& queue) {
77+
m_size = std::nullopt;
78+
alpaka::memset(queue, m_dsize, 0u);
79+
}
80+
81+
ALPAKA_FN_HOST const auto& view() const { return m_view; }
82+
ALPAKA_FN_HOST auto& view() { return m_view; }
83+
};
84+
85+
} // namespace clue::internal

0 commit comments

Comments
 (0)