Skip to content

Commit dad233e

Browse files
committed
Add benchmark of batched data
1 parent 2cc2ede commit dad233e

File tree

5 files changed

+208
-0
lines changed

5 files changed

+208
-0
lines changed

benchmark/batched/CMakeLists.txt

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
cmake_minimum_required(VERSION 3.16.0)
2+
3+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
4+
set(CMAKE_CXX_STANDARD 20)
5+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
6+
set(CMAKE_CXX_EXTENSIONS OFF)
7+
8+
set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR})
9+
10+
if(NOT CMAKE_BUILD_TYPE)
11+
set(CMAKE_BUILD_TYPE Release)
12+
endif()
13+
14+
string(
15+
APPEND
16+
CMAKE_CXX_FLAGS_DEBUG
17+
" -DCLUE_DEBUG -D_GLIBCXX_ASSERTIONS -O0 -Wall -Wextra -Wpedantic -Wshadow -Wimplicit-fallthrough -Wextra-semi -Wold-style-cast -g -pg -fsanitize=address"
18+
)
19+
set(CMAKE_CXX_FLAGS_RELEASE
20+
" -O2 -funroll-loops -funsafe-math-optimizations -ftree-vectorize -march=native"
21+
)
22+
23+
find_package(Boost 1.75.0 REQUIRED)
24+
find_package(benchmark REQUIRED)
25+
26+
find_package(alpaka)
27+
if(NOT alpaka_FOUND)
28+
include(FetchContent)
29+
FetchContent_Declare(
30+
alpaka
31+
URL https://github.com/alpaka-group/alpaka/archive/refs/tags/1.2.0.tar.gz)
32+
33+
FetchContent_MakeAvailable(alpaka)
34+
endif()
35+
36+
add_subdirectory(cpu)
37+
38+
include(CheckLanguage)
39+
check_language(CUDA)
40+
if(CMAKE_CUDA_COMPILER)
41+
add_subdirectory(cuda)
42+
endif()
43+
44+
check_language(HIP)
45+
if(CMAKE_HIP_COMPILER)
46+
add_subdirectory(hip)
47+
endif()
48+
49+
set(_sycl_search_dirs ${SYCL_ROOT_DIR} /usr/lib /usr/local/lib
50+
/opt/intel/oneapi/compiler/latest/linux)
51+
find_program(
52+
SYCL_COMPILER
53+
NAMES icpx
54+
HINTS ${_sycl_search_dirs}
55+
PATH_SUFFIXES bin)
56+
find_path(
57+
SYCL_INCLUDE_DIR
58+
NAMES sycl/sycl.hpp
59+
HINTS ${_sycl_search_dirs}
60+
PATH_SUFFIXES include)
61+
find_path(
62+
SYCL_LIB_DIR
63+
NAMES libsycl.so
64+
HINTS ${_sycl_search_dirs}
65+
PATH_SUFFIXES lib)
66+
find_package(oneDPL)
67+
68+
if(oneDPL_FOUND
69+
AND SYCL_COMPILER
70+
AND SYCL_INCLUDE_DIR
71+
AND SYCL_LIB_DIR)
72+
add_subdirectory(sycl)
73+
endif()
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
add_executable(serial.out ${CMAKE_SOURCE_DIR}/main.cpp)
2+
target_include_directories(
3+
serial.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
4+
${CMAKE_SOURCE_DIR}/../../benchmark)
5+
target_link_libraries(serial.out PRIVATE alpaka::alpaka Boost::boost
6+
benchmark::benchmark)
7+
target_compile_definitions(
8+
serial.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
9+
CLUE_ENABLE_CACHING_ALLOCATOR)
10+
11+
add_executable(threads.out ${CMAKE_SOURCE_DIR}/main.cpp)
12+
target_include_directories(
13+
threads.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
14+
${CMAKE_SOURCE_DIR}/../../benchmark)
15+
target_link_libraries(threads.out PRIVATE alpaka::alpaka Boost::boost
16+
benchmark::benchmark)
17+
target_compile_definitions(
18+
threads.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
19+
CLUE_ENABLE_CACHING_ALLOCATOR)
20+
21+
find_package(TBB)
22+
if(TBB_FOUND)
23+
add_executable(tbb.out ${CMAKE_SOURCE_DIR}/main.cpp)
24+
target_include_directories(
25+
tbb.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
26+
${CMAKE_SOURCE_DIR}/../../benchmark)
27+
target_link_libraries(tbb.out PRIVATE alpaka::alpaka Boost::boost TBB::tbb
28+
benchmark::benchmark)
29+
target_compile_definitions(
30+
tbb.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
31+
CLUE_ENABLE_CACHING_ALLOCATOR)
32+
endif()
33+
34+
find_package(OpenMP)
35+
if(OpenMP_CXX_FOUND)
36+
add_executable(openmp.out ${CMAKE_SOURCE_DIR}/main.cpp)
37+
target_include_directories(
38+
openmp.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
39+
${CMAKE_SOURCE_DIR}/../../benchmark)
40+
target_link_libraries(
41+
openmp.out PRIVATE alpaka::alpaka Boost::boost OpenMP::OpenMP_CXX
42+
benchmark::benchmark)
43+
target_compile_definitions(
44+
openmp.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
45+
CLUE_ENABLE_CACHING_ALLOCATOR)
46+
set_target_properties(openmp.out PROPERTIES RUNTIME_OUTPUT_DIRECTORY
47+
${CMAKE_BINARY_DIR})
48+
endif()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
enable_language(CUDA)
2+
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_COMPILER})
3+
4+
if(NOT DEFINED CMAKE_CUDA_STANDARD)
5+
set(CMAKE_CUDA_STANDARD 20)
6+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
7+
endif()
8+
9+
set_source_files_properties(${CMAKE_SOURCE_DIR}/main.cpp PROPERTIES LANGUAGE
10+
CUDA)
11+
add_executable(cuda.out ${CMAKE_SOURCE_DIR}/main.cpp)
12+
target_include_directories(cuda.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
13+
${CMAKE_SOURCE_DIR}/../../benchmark)
14+
target_link_libraries(cuda.out PRIVATE alpaka::alpaka Boost::boost
15+
benchmark::benchmark)
16+
target_compile_definitions(cuda.out PRIVATE ALPAKA_ACC_GPU_CUDA_ENABLED
17+
CLUE_ENABLE_CACHING_ALLOCATOR)
18+
target_compile_options(cuda.out PRIVATE --expt-relaxed-constexpr)
19+
set_target_properties(cuda.out PROPERTIES CUDA_SEPARABLE_COMPILATION ON
20+
CUDA_ARCHITECTURES "75;80;90")
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
enable_language(HIP)
2+
find_package(HIP)
3+
4+
set_source_files_properties(${CMAKE_SOURCE_DIR}/main.cpp PROPERTIES LANGUAGE
5+
HIP)
6+
add_executable(hip.out ${CMAKE_SOURCE_DIR}/main.cpp)
7+
target_include_directories(hip.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
8+
${CMAKE_SOURCE_DIR}/../../benchmark)
9+
target_link_libraries(hip.out PRIVATE alpaka::alpaka Boost::boost
10+
benchmark::benchmark)
11+
target_compile_definitions(hip.out PRIVATE ALPAKA_ACC_GPU_HIP_ENABLED
12+
CLUE_ENABLE_CACHING_ALLOCATOR)

benchmark/batched/main.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
2+
#include "CLUEstering/CLUEstering.hpp"
3+
#include "utils/generation.hpp"
4+
#include <benchmark/benchmark.h>
5+
6+
#include <algorithm>
7+
#include <cstddef>
8+
#include <iterator>
9+
#include <ranges>
10+
#include <vector>
11+
12+
static void BM_SingleEvents(benchmark::State& state) {
13+
for (auto _ : state) {
14+
state.PauseTiming();
15+
auto queue = clue::get_queue(0u);
16+
17+
std::vector<clue::PointsHost<2>> host_points;
18+
std::ranges::transform(std::views::iota(0u) | std::views::take(1000),
19+
std::back_inserter(host_points),
20+
[&](const auto i) {
21+
return clue::read_csv<2>(
22+
queue, "../../data/small_event_" + std::to_string(i) + ".csv");
23+
});
24+
auto i = 0;
25+
for (auto& h_points : host_points) {
26+
clue::PointsDevice<2> d_points(queue, h_points.size());
27+
const auto dc = 1.5f, rhoc = 10.f, outlier = 1.5f;
28+
state.ResumeTiming();
29+
30+
clue::Clusterer<2> algo(queue, dc, rhoc, outlier);
31+
algo.make_clusters(queue, h_points, d_points);
32+
}
33+
}
34+
}
35+
36+
static void BM_Batched(benchmark::State& state) {
37+
for (auto _ : state) {
38+
state.PauseTiming();
39+
auto queue = clue::get_queue(0u);
40+
41+
clue::PointsHost<2> h_points = clue::read_csv<2>(queue, "../../data/small_events_batch.csv");
42+
const size_t n_points = h_points.size();
43+
clue::PointsDevice<2> d_points(queue, n_points);
44+
const auto dc = 1.5f, rhoc = 10.f, outlier = 1.5f;
45+
std::vector<std::size_t> batch_event_sizes(1000, n_points / 1000);
46+
state.ResumeTiming();
47+
48+
clue::Clusterer<2> algo(queue, dc, rhoc, outlier);
49+
algo.make_clusters(queue, h_points, d_points, batch_event_sizes);
50+
}
51+
}
52+
53+
BENCHMARK(BM_SingleEvents)->Iterations(1);
54+
BENCHMARK(BM_Batched)->Iterations(1);
55+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)