Skip to content

Commit 81b07d0

Browse files
authored
Separate calls to algorithms internal kernels (#176)
1 parent 4022404 commit 81b07d0

File tree

2 files changed

+86
-107
lines changed

2 files changed

+86
-107
lines changed

include/CLUEstering/core/detail/CLUEAlpakaKernels.hpp

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,17 @@ namespace clue {
4747
int32_t point_id) {
4848
if constexpr (N_ == 0) {
4949
auto binId = tiles.getGlobalBinByBin(base_vec);
50-
// get the size of this bin
5150
auto binSize = tiles[binId].size();
5251

53-
// iterate inside this bin
5452
for (int binIter{}; binIter < binSize; ++binIter) {
5553
int32_t j{tiles[binId][binIter]};
56-
// query N_{dc_}(i)
5754

5855
auto coords_j = getCoords<Ndim>(dev_points, j);
59-
6056
float dist_ij_sq = tiles.distance(coords_i, coords_j);
6157

6258
auto k = kernel(acc, clue::internal::math::sqrt(dist_ij_sq), point_id, j);
6359
*rho_i += (int)(dist_ij_sq <= dc * dc) * k * dev_points.weight[j];
64-
65-
} // end of interate inside this bin
60+
}
6661
return;
6762
} else {
6863
for (auto i = search_box[search_box.size() - N_][0];
@@ -87,14 +82,12 @@ namespace clue {
8782
float rho_i{0.f};
8883
auto coords_i = getCoords<Ndim>(dev_points, i);
8984

90-
// Get the extremes of the search box
9185
clue::SearchBoxExtremes<Ndim> searchbox_extremes;
9286
for (int dim{}; dim != Ndim; ++dim) {
9387
searchbox_extremes[dim] =
9488
clue::nostd::make_array(coords_i[dim] - dc, coords_i[dim] + dc);
9589
}
9690

97-
// Calculate the search box
9891
clue::SearchBoxBins<Ndim> searchbox_bins;
9992
dev_tiles.searchBox(searchbox_extremes, searchbox_bins);
10093

@@ -121,32 +114,25 @@ namespace clue {
121114
int32_t point_id) {
122115
if constexpr (N_ == 0) {
123116
int binId{tiles.getGlobalBinByBin(base_vec)};
124-
// get the size of this bin
125117
int binSize{tiles[binId].size()};
126118

127-
// iterate inside this bin
128119
for (int binIter{}; binIter < binSize; ++binIter) {
129120
const auto j{tiles[binId][binIter]};
130-
// query N'_{dm}(i)
131121
float rho_j{dev_points.rho[j]};
132122
bool found_higher{(rho_j > rho_i)};
133-
// in the rare case where rho is the same, use detid
134123
found_higher = found_higher || ((rho_j == rho_i) && (rho_j > 0.f) && (j > point_id));
135124

136-
// Calculate the distance between the two points
137125
auto coords_j = getCoords<Ndim>(dev_points, j);
138126

139127
float dist_ij_sq = tiles.distance(coords_i, coords_j);
140128

141129
if (found_higher && dist_ij_sq <= dm_sq) {
142-
// find the nearest point within N'_{dm}(i)
143130
if (dist_ij_sq < *delta_i) {
144-
// update delta_i and nearestHigher_i
145131
*delta_i = dist_ij_sq;
146132
*nh_i = j;
147133
}
148134
}
149-
} // end of interate inside this bin
135+
}
150136

151137
return;
152138
} else {
@@ -183,14 +169,12 @@ namespace clue {
183169
auto coords_i = getCoords<Ndim>(dev_points, i);
184170
float rho_i{dev_points.rho[i]};
185171

186-
// Get the extremes of the search box
187172
clue::SearchBoxExtremes<Ndim> searchbox_extremes;
188173
for (int dim{}; dim != Ndim; ++dim) {
189174
searchbox_extremes[dim] =
190175
clue::nostd::make_array(coords_i[dim] - dm, coords_i[dim] + dm);
191176
}
192177

193-
// Calculate the search box
194178
clue::SearchBoxBins<Ndim> searchbox_bins;
195179
dev_tiles.searchBox(searchbox_extremes, searchbox_bins);
196180

@@ -222,13 +206,11 @@ namespace clue {
222206
float rhoc,
223207
int32_t n_points) const {
224208
for (auto i : alpaka::uniformElements(acc, n_points)) {
225-
// initialize cluster_index
226209
dev_points.cluster_index[i] = -1;
227210

228211
float delta_i = dev_points.delta[i];
229212
float rho_i = dev_points.rho[i];
230213

231-
// Determine whether the point is a seed or an outlier
232214
bool is_seed = (delta_i > seed_dc) && (rho_i >= rhoc);
233215

234216
if (is_seed) {
@@ -255,25 +237,18 @@ namespace clue {
255237

256238
int idx_this_seed = seeds_0[idx_cls];
257239
dev_points.cluster_index[idx_this_seed] = idx_cls;
258-
// push_back idThisSeed to localStack
259240
local_stack[local_stack_size] = idx_this_seed;
260241
++local_stack_size;
261-
// process all elements in localStack
262242
while (local_stack_size > 0) {
263-
// get last element of localStack
264243
int idx_end_of_local_stack{local_stack[local_stack_size - 1]};
265244
int temp_cluster_index = dev_points.cluster_index[idx_end_of_local_stack];
266-
// pop_back last element of localStack
267245
local_stack[local_stack_size - 1] = -1;
268246
--local_stack_size;
269247
const auto& followers_ies = followers[idx_end_of_local_stack];
270248
const auto followers_size = followers_ies.size();
271-
// loop over followers of last element of localStack
272249
for (int j{}; j != followers_size; ++j) {
273-
// pass id to follower
274250
int follower = followers_ies[j];
275251
dev_points.cluster_index[follower] = temp_cluster_index;
276-
// push_back follower to localStack
277252
local_stack[local_stack_size] = follower;
278253
++local_stack_size;
279254
}
@@ -282,5 +257,60 @@ namespace clue {
282257
}
283258
};
284259

260+
using WorkDiv = clue::WorkDiv<clue::Dim1D>;
261+
262+
template <concepts::accelerator TAcc, concepts::queue TQueue, uint8_t Ndim, typename KernelType>
263+
inline void computeLocalDensity(TQueue& queue,
264+
const WorkDiv& work_division,
265+
TilesAlpakaView<Ndim>& tiles,
266+
PointsView& dev_points,
267+
KernelType&& kernel,
268+
float dc,
269+
int32_t size) {
270+
alpaka::exec<TAcc>(queue,
271+
work_division,
272+
KernelCalculateLocalDensity{},
273+
tiles,
274+
dev_points,
275+
std::forward<KernelType>(kernel),
276+
dc,
277+
size);
278+
}
279+
280+
template <concepts::accelerator TAcc, concepts::queue TQueue, uint8_t Ndim>
281+
inline void computeNearestHighers(TQueue& queue,
282+
const WorkDiv& work_division,
283+
TilesAlpakaView<Ndim>& tiles,
284+
PointsView& dev_points,
285+
float dm,
286+
int32_t size) {
287+
alpaka::exec<TAcc>(
288+
queue, work_division, KernelCalculateNearestHigher{}, tiles, dev_points, dm, size);
289+
}
290+
291+
template <concepts::accelerator TAcc, concepts::queue TQueue>
292+
inline void findClusterSeeds(TQueue& queue,
293+
const WorkDiv& work_division,
294+
VecArray<int32_t, reserve>* seeds,
295+
PointsView& dev_points,
296+
float seed_dc,
297+
float rhoc,
298+
int32_t size) {
299+
alpaka::exec<TAcc>(
300+
queue, work_division, KernelFindClusters{}, seeds, dev_points, seed_dc, rhoc, size);
301+
}
302+
303+
template <concepts::accelerator TAcc, concepts::queue TQueue>
304+
inline void assignPointsToClusters(TQueue& queue,
305+
std::size_t block_size,
306+
VecArray<int32_t, reserve>* seeds,
307+
clue::FollowersView followers,
308+
PointsView dev_points) {
309+
const Idx grid_size = clue::divide_up_by(reserve, block_size);
310+
const auto work_division = clue::make_workdiv<TAcc>(grid_size, block_size);
311+
alpaka::exec<TAcc>(
312+
queue, work_division, KernelAssignClusters{}, seeds, followers, dev_points);
313+
}
314+
285315
} // namespace detail
286316
} // namespace clue

include/CLUEstering/core/detail/Clusterer.hpp

Lines changed: 29 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -315,51 +315,25 @@ namespace clue {
315315
const KernelType& kernel,
316316
Queue& queue,
317317
std::size_t block_size) {
318-
const auto nPoints = h_points.size();
319-
// fill the tiles
320-
m_tiles->template fill<Acc>(queue, dev_points, nPoints);
321-
322-
const Idx grid_size = clue::divide_up_by(nPoints, block_size);
323-
auto working_div = clue::make_workdiv<Acc>(grid_size, block_size);
324-
alpaka::exec<Acc>(queue,
325-
working_div,
326-
detail::KernelCalculateLocalDensity{},
327-
m_tiles->view(),
328-
dev_points.view(),
329-
kernel,
330-
m_dc,
331-
nPoints);
332-
alpaka::exec<Acc>(queue,
333-
working_div,
334-
detail::KernelCalculateNearestHigher{},
335-
m_tiles->view(),
336-
dev_points.view(),
337-
m_dm,
338-
nPoints);
318+
const auto n_points = h_points.size();
319+
m_tiles->template fill<Acc>(queue, dev_points, n_points);
320+
321+
const Idx grid_size = clue::divide_up_by(n_points, block_size);
322+
auto work_division = clue::make_workdiv<Acc>(grid_size, block_size);
323+
324+
detail::computeLocalDensity<Acc>(
325+
queue, work_division, m_tiles->view(), dev_points.view(), kernel, m_dc, n_points);
326+
detail::computeNearestHighers<Acc>(
327+
queue, work_division, m_tiles->view(), dev_points.view(), m_dm, n_points);
339328

340329
m_followers->template fill<Acc>(queue, dev_points);
341330

342-
alpaka::exec<Acc>(queue,
343-
working_div,
344-
detail::KernelFindClusters{},
345-
m_seeds->data(),
346-
dev_points.view(),
347-
m_seed_dc,
348-
m_rhoc,
349-
nPoints);
350-
351-
// We change the working division when assigning the clusters
352-
const Idx grid_size_seeds = clue::divide_up_by(reserve, block_size);
353-
auto working_div_seeds = clue::make_workdiv<Acc>(grid_size_seeds, block_size);
354-
355-
alpaka::exec<Acc>(queue,
356-
working_div_seeds,
357-
detail::KernelAssignClusters{},
358-
m_seeds->data(),
359-
m_followers->view(),
360-
dev_points.view());
361-
alpaka::wait(queue);
331+
detail::findClusterSeeds<Acc>(
332+
queue, work_division, m_seeds->data(), dev_points.view(), m_seed_dc, m_rhoc, n_points);
333+
detail::assignPointsToClusters<Acc>(
334+
queue, block_size, m_seeds->data(), m_followers->view(), dev_points.view());
362335

336+
alpaka::wait(queue);
363337
clue::copyToHost(queue, h_points, dev_points);
364338
}
365339

@@ -369,48 +343,23 @@ namespace clue {
369343
const KernelType& kernel,
370344
Queue& queue,
371345
std::size_t block_size) {
372-
const auto nPoints = dev_points.size();
373-
m_tiles->template fill<Acc>(queue, dev_points, nPoints);
374-
375-
const Idx grid_size = clue::divide_up_by(nPoints, block_size);
376-
auto working_div = clue::make_workdiv<Acc>(grid_size, block_size);
377-
alpaka::exec<Acc>(queue,
378-
working_div,
379-
detail::KernelCalculateLocalDensity{},
380-
m_tiles->view(),
381-
dev_points.view(),
382-
kernel,
383-
m_dc,
384-
nPoints);
385-
alpaka::exec<Acc>(queue,
386-
working_div,
387-
detail::KernelCalculateNearestHigher{},
388-
m_tiles->view(),
389-
dev_points.view(),
390-
m_dm,
391-
nPoints);
346+
const auto n_points = dev_points.size();
347+
m_tiles->template fill<Acc>(queue, dev_points, n_points);
348+
349+
const Idx grid_size = clue::divide_up_by(n_points, block_size);
350+
auto work_division = clue::make_workdiv<Acc>(grid_size, block_size);
351+
352+
detail::computeLocalDensity<Acc>(
353+
queue, work_division, m_tiles->view(), dev_points.view(), kernel, m_dc, n_points);
354+
detail::computeNearestHighers<Acc>(
355+
queue, work_division, m_tiles->view(), dev_points.view(), m_dm, n_points);
392356

393357
m_followers->template fill<Acc>(queue, dev_points);
394358

395-
alpaka::exec<Acc>(queue,
396-
working_div,
397-
detail::KernelFindClusters{},
398-
m_seeds->data(),
399-
dev_points.view(),
400-
m_seed_dc,
401-
m_rhoc,
402-
nPoints);
403-
404-
// We change the working division when assigning the clusters
405-
const Idx grid_size_seeds = clue::divide_up_by(reserve, block_size);
406-
auto working_div_seeds = clue::make_workdiv<Acc>(grid_size_seeds, block_size);
407-
408-
alpaka::exec<Acc>(queue,
409-
working_div_seeds,
410-
detail::KernelAssignClusters{},
411-
m_seeds->data(),
412-
m_followers->view(),
413-
dev_points.view());
359+
detail::findClusterSeeds<Acc>(
360+
queue, work_division, m_seeds->data(), dev_points.view(), m_seed_dc, m_rhoc, n_points);
361+
detail::assignPointsToClusters<Acc>(
362+
queue, block_size, m_seeds->data(), m_followers->view(), dev_points.view());
414363
}
415364

416365
} // namespace clue

0 commit comments

Comments
 (0)