Skip to content

Commit

Permalink
Merge branch 'main' into rework_tiles_onetomanyassoc
Browse files Browse the repository at this point in the history
  • Loading branch information
sbaldu committed Oct 29, 2024
2 parents d01ddff + d3d93a1 commit 5ccd590
Show file tree
Hide file tree
Showing 44 changed files with 2,196,910 additions and 10,231 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/bench_profile_serial.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Run benchmark and profiling

# The workflow gets triggered by pushes and pull requests
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false

steps:
# checks out the code in the repository
- uses: actions/checkout@v3
with:
submodules: true

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install matplotlib
- name: Compile and run benchmark
working-directory: ${{ github.workspace }}/benchmark/dataset_size
run: |
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build -- -j 2
./build/serial.out 10 18
# TODO: this works on local but not on github actions
# - name: Compile and run profiling
# working-directory: ${{ github.workspace }}/benchmark/profiling
# run: |
# cmake -B build/Debug -DCMAKE_BUILD_TYPE=Debug
# cmake --build build/Debug -- -j 2
# cmake -B build/Release -DCMAKE_BUILD_TYPE=Release
# cmake --build build/Release -- -j 2
# ./build/Debug/serial.out ../../data/data_32768.csv
# gprof ./build/Debug/serial.out ../../data/data_32768.csv
# ./build/Release/serial.out ../../data/data_32768.csv
# gprof ./build/Release/serial.out ../../data/data_32768.csv

# - name: Check cache misses with perf
# working-directory: ${{ github.workspace }}/benchmark/profiling
# run: |
# perf stat -B -e cache-misses,cycles,instructions,branches ./build/Debug/serial.out ../../data/data_32768.csv
# perf stat -B -e cache-misses,cycles,instructions,branches ./build/Release/serial.out ../../data/data_32768.csv

44 changes: 0 additions & 44 deletions .github/workflows/profile_serial.yml

This file was deleted.

46 changes: 0 additions & 46 deletions .github/workflows/upload_profile_log.yml

This file was deleted.

6 changes: 3 additions & 3 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,18 +510,18 @@ def choose_kernel(self,
if len(parameters) != 1:
raise ValueError("Wrong number of parameters. The flat kernel"
+ " requires 1 parameter.")
self.kernel = CLUE_Convolutional_Kernels.FlatKernel(parameters[0])
self.kernel = clue_kernels.FlatKernel(parameters[0])
elif choice == "exp":
if len(parameters) != 2:
raise ValueError("Wrong number of parameters. The exponential"
+ " kernel requires 2 parameters.")
self.kernel = CLUE_Convolutional_Kernels.ExponentialKernel(parameters[0],
self.kernel = clue_kernels.ExponentialKernel(parameters[0],
parameters[1])
elif choice == "gaus":
if len(parameters) != 3:
raise ValueError("Wrong number of parameters. The gaussian" +
" kernel requires 3 parameters.")
self.kernel = CLUE_Convolutional_Kernels.GaussinKernel(parameters[0],
self.kernel = clue_kernels.GaussianKernel(parameters[0],
parameters[1],
parameters[2])
elif choice == "custom":
Expand Down
29 changes: 17 additions & 12 deletions CLUEstering/alpaka/CLUE/CLUEAlgoAlpaka.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "../DataFormats/Points.h"
#include "../DataFormats/alpaka/PointsAlpaka.h"
#include "../DataFormats/alpaka/TilesAlpaka.h"
#include "../DataFormats/alpaka/Vector.h"
#include "CLUEAlpakaKernels.h"
#include "ConvolutionalKernel.h"

Expand All @@ -34,7 +35,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
}

TilesAlpaka<Ndim>* m_tiles;
VecArray<int32_t, max_seeds>* m_seeds;
clue::Vector<int32_t>* m_seeds;
VecArray<int32_t, max_followers>* m_followers;

template <typename KernelType>
Expand All @@ -55,10 +56,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

// Buffers
std::optional<cms::alpakatools::device_buffer<Device, TilesAlpaka<Ndim>>> d_tiles;
std::optional<
cms::alpakatools::device_buffer<Device,
cms::alpakatools::VecArray<int32_t, max_seeds>>>
d_seeds;
std::optional<cms::alpakatools::device_buffer<Device, int32_t[]>> d_seeds;
std::optional<cms::alpakatools::device_buffer<Device, clue::Vector<int32_t>>> seeds;
std::optional<cms::alpakatools::device_buffer<
Device,
cms::alpakatools::VecArray<int32_t, max_followers>[]>>
Expand Down Expand Up @@ -109,14 +108,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
template <typename TAcc, uint8_t Ndim>
void CLUEAlgoAlpaka<TAcc, Ndim>::init_device(Queue queue_) {
d_tiles = cms::alpakatools::make_device_buffer<TilesAlpaka<Ndim>>(queue_);
d_seeds = cms::alpakatools::make_device_buffer<
cms::alpakatools::VecArray<int32_t, max_seeds>>(queue_);
d_seeds = cms::alpakatools::make_device_buffer<int32_t[]>(queue_, reserve);
d_followers = cms::alpakatools::make_device_buffer<
cms::alpakatools::VecArray<int32_t, max_followers>[]>(queue_, reserve);

seeds = cms::alpakatools::make_device_buffer<clue::Vector<int32_t>>(queue_);
// resize the seeds vector
(*seeds)->resize((*d_seeds).data(), reserve);

// Copy to the public pointers
m_tiles = (*d_tiles).data();
m_seeds = (*d_seeds).data();
m_seeds = (*seeds).data();
m_followers = (*d_followers).data();
}

Expand Down Expand Up @@ -151,7 +153,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(
tiles_working_div, KernelResetTiles{}, m_tiles, nTiles, nPerDim));

alpaka::memcpy(
queue_,
d_points.coords,
Expand All @@ -160,6 +161,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
queue_,
d_points.weight,
cms::alpakatools::make_host_view(h_points.m_weight.data(), h_points.n));
alpaka::memset(queue_, *d_seeds, 0x00);

auto tileIds = cms::alpakatools::make_device_buffer<uint32_t[]>(queue_, h_points.n);
// now we scan the dataset and calculate the tile of each point
Expand Down Expand Up @@ -232,7 +234,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
/* m_domains.data(), */
dc_,
h_points.n));

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div,
KernelCalculateNearestHigher{},
Expand All @@ -242,7 +243,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
dm_,
dc_,
h_points.n));

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div,
KernelFindClusters<Ndim>{},
Expand All @@ -255,9 +255,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
h_points.n));

// We change the working division when assigning the clusters
const Idx grid_size_seeds = cms::alpakatools::divide_up_by(max_seeds, block_size);
const Idx grid_size_seeds = cms::alpakatools::divide_up_by(reserve, block_size);
auto working_div_seeds =
cms::alpakatools::make_workdiv<Acc1D>(grid_size_seeds, block_size);

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div_seeds,
KernelAssignClusters<Ndim>{},
Expand All @@ -268,6 +269,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
// Wait for all the operations in the queue to finish
alpaka::wait(queue_);

#ifdef DEBUG
alpaka::memcpy(queue_,
cms::alpakatools::make_host_view(h_points.m_rho.data(), h_points.n),
d_points.rho,
Expand All @@ -281,6 +283,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
cms::alpakatools::make_host_view(h_points.m_nearestHigher.data(), h_points.n),
d_points.nearest_higher,
static_cast<uint32_t>(h_points.n));
#endif

alpaka::memcpy(
queue_,
cms::alpakatools::make_host_view(h_points.m_clusterIndex.data(), h_points.n),
Expand All @@ -297,4 +301,5 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
return {h_points.m_clusterIndex, h_points.m_isSeed};
}
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

#endif
10 changes: 5 additions & 5 deletions CLUEstering/alpaka/CLUE/CLUEAlpakaKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
#include "../DataFormats/alpaka/PointsAlpaka.h"
#include "../DataFormats/alpaka/TilesAlpaka.h"
#include "../DataFormats/alpaka/AlpakaVecArray.h"
#include "../DataFormats/alpaka/Vector.h"
#include "ConvolutionalKernel.h"

using cms::alpakatools::VecArray;

namespace ALPAKA_ACCELERATOR_NAMESPACE {

constexpr int32_t max_followers{100};
constexpr int32_t max_seeds{100};
constexpr int32_t reserve{1000000};

template <uint8_t Ndim>
Expand Down Expand Up @@ -344,7 +344,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
struct KernelFindClusters {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(const TAcc& acc,
VecArray<int32_t, max_seeds>* seeds,
clue::Vector<int32_t>* seeds,
VecArray<int32_t, max_followers>* followers,
PointsView<Ndim>* dev_points,
float dm,
Expand All @@ -364,7 +364,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

if (is_seed) {
dev_points->is_seed[i] = 1;
seeds[0].push_back(acc, i);
seeds->push_back(acc, i);
} else {
if (!is_outlier) {
followers[dev_points->nearest_higher[i]].push_back(acc, i);
Expand All @@ -379,10 +379,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
struct KernelAssignClusters {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(const TAcc& acc,
VecArray<int, max_seeds>* seeds,
clue::Vector<int32_t>* seeds,
VecArray<int, max_followers>* followers,
PointsView<Ndim>* dev_points) const {
const auto& seeds_0{seeds[0]};
const auto& seeds_0{*seeds};
const auto n_seeds{seeds_0.size()};
cms::alpakatools::for_each_element_in_grid(acc, n_seeds, [&](uint32_t idx_cls) {
int local_stack[256] = {-1};
Expand Down
Loading

0 comments on commit 5ccd590

Please sign in to comment.