Skip to content

Commit bbf3ca9

Browse files
committed
tests: rewrite fullsort tests, again
- reduce code-bloat in tests - and chance of manual typeing errors - make slack computed from the type-system (e.g. up to one vector worth of slack) - introduce specific translation units for the i/u/f complilation+testing speed hack while keeping all of the logic in a templated header - still only uses one pattern (unique values) for now
1 parent 4ed505d commit bbf3ca9

22 files changed

+405
-356
lines changed

bench/fullsort/BM_fullsort.vxsort.h

+3-11
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010
#include "../bench_isa.h"
1111
#include "../util.h"
1212

13-
#ifndef VXSORT_COMPILER_MSVC
14-
#include <cxxabi.h>
15-
#endif
16-
1713
#include <vxsort.h>
1814

1915
#include "fullsort_params.h"
@@ -187,13 +183,9 @@ void register_type(i64 s, SortPattern p) {
187183
if constexpr (U >= 2) {
188184
register_type<M, U / 2, T>(s, p);
189185
}
190-
#ifdef VXSORT_COMPILER_MSVC
191-
auto realname = typeid(T).name();
192-
#else
193-
auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr);
194-
#endif
195-
auto bench_name = fmt::format("BM_vxsort_pattern<{}, {}, {}>/{}/{}", realname, U, s,
196-
magic_enum::enum_name(M), magic_enum::enum_name(p));
186+
auto *bench_type = get_canonical_typename<T>();
187+
auto bench_name = fmt::format("BM_vxsort_pattern<{}, {}, {}>/{}/{}", bench_type, U, s,
188+
magic_enum::enum_name(M), magic_enum::enum_name(p));
197189
::benchmark::RegisterBenchmark(bench_name.c_str(), BM_vxsort_pattern<T, M, U>, s, p)
198190
->Unit(kMillisecond)
199191
->ThreadRange(1, processor_count);

bench/util.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
#include <random>
2+
#include <unordered_map>
3+
#include <cstring>
4+
25
#include "util.h"
36

47
#include <benchmark/benchmark.h>
58
#include <fmt/format.h>
6-
#include <defs.h>
7-
89

910
#include <picosha2.h>
1011

11-
#include <random>
12-
#include <unordered_map>
1312

1413
namespace vxsort_bench {
1514
using namespace vxsort::types;
@@ -200,5 +199,4 @@ void process_perf_counters(UserCounters &counters, i64 num_elements) {
200199
counters.erase(k);
201200
}
202201
}
203-
204202
}

bench/util.h

+41-1
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@
33

44
#include <benchmark/benchmark.h>
55

6+
#include <defs.h>
7+
68
#include <vector>
79
#include <algorithm>
810
#include <numeric>
911
#include <random>
12+
#include <cstring>
13+
#ifndef VXSORT_COMPILER_MSVC
14+
#include <cxxabi.h>
15+
#endif
1016

11-
#include <defs.h>
1217

1318
#include "stolen-cycleclock.h"
1419

@@ -130,6 +135,41 @@ std::vector<T> push_middle(usize size, T start, T stride) {
130135
return v;
131136
}
132137

138+
template<typename T>
139+
const char *get_canonical_typename() {
140+
#ifdef VXSORT_COMPILER_MSVC
141+
auto realname = typeid(T).name();
142+
#else
143+
auto realname = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr);
144+
#endif
145+
146+
if (realname == nullptr) {
147+
return "unknown";
148+
} else if (std::strcmp(realname, "long") == 0)
149+
return "i64";
150+
else if (std::strcmp(realname, "unsigned long") == 0)
151+
return "u64";
152+
else if (std::strcmp(realname, "int") == 0)
153+
return "i32";
154+
else if (std::strcmp(realname, "unsigned int") == 0)
155+
return "u32";
156+
else if (std::strcmp(realname, "short") == 0)
157+
return "i16";
158+
else if (std::strcmp(realname, "unsigned short") == 0)
159+
return "u16";
160+
else if (std::strcmp(realname, "char") == 0)
161+
return "i8";
162+
else if (std::strcmp(realname, "unsigned char") == 0)
163+
return "u8";
164+
else if (std::strcmp(realname, "float") == 0)
165+
return "f32";
166+
else if (std::strcmp(realname, "double") == 0)
167+
return "f64";
168+
else
169+
return realname;
170+
}
171+
172+
133173
}
134174

135175
#endif //VXSORT_BENCH_UTIL_H

tests/CMakeLists.txt

+18-10
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ set(test_HEADERS
1010
mini_tests/masked_load_store_test.h
1111
test_isa.h)
1212

13+
list(APPEND sort_types
14+
i
15+
u
16+
f
17+
)
18+
1319
list(APPEND i_sort_types
1420
i16
1521
i32
@@ -27,12 +33,6 @@ list(APPEND f_sort_types
2733
f64
2834
)
2935

30-
list(APPEND sort_types
31-
i
32-
u
33-
f
34-
)
35-
3636
list(APPEND x86_isas
3737
avx2
3838
avx512
@@ -47,7 +47,7 @@ if (${PROCESSOR_IS_X86})
4747
set(test_avx2_SOURCES ${test_SOURCES})
4848
list(APPEND test_avx2_SOURCES
4949
smallsort/smallsort.avx2.cpp
50-
fullsort/fullsort.avx2.cpp
50+
fullsort/fullsort.avx2.i.cpp
5151
mini_tests/masked_load_store.avx2.cpp
5252
mini_tests/partition_machine.avx2.cpp
5353
mini_tests/pack_machine.avx2.cpp
@@ -56,7 +56,7 @@ if (${PROCESSOR_IS_X86})
5656
set(test_avx512_SOURCES ${test_SOURCES})
5757
list(APPEND test_avx512_SOURCES
5858
smallsort/smallsort.avx512.cpp
59-
fullsort/fullsort.avx512.cpp
59+
fullsort/fullsort.avx512.i.cpp
6060
mini_tests/masked_load_store.avx512.cpp
6161
mini_tests/partition_machine.avx512.cpp
6262
mini_tests/pack_machine.avx512.cpp
@@ -67,15 +67,23 @@ if (${PROCESSOR_IS_X86})
6767
foreach(v ${x86_isas})
6868
foreach(tf ${sort_types})
6969
string(TOUPPER ${v} vu)
70-
add_executable(${TARGET_NAME}_${v}_${tf} ${test_${v}_SOURCES} ${test_HEADERS})
70+
71+
add_executable(${TARGET_NAME}_${v}_${tf} ${test_SOURCES} ${test_HEADERS}
72+
smallsort/smallsort.${v}.cpp
73+
fullsort/fullsort.${v}.${tf}.cpp
74+
mini_tests/masked_load_store.${v}.cpp
75+
mini_tests/partition_machine.${v}.cpp
76+
mini_tests/pack_machine.${v}.cpp)
7177

7278
foreach(t ${${tf}_sort_types})
79+
string(TOUPPER ${tf} tfu)
7380
string(TOUPPER ${t} tu)
74-
target_compile_definitions(${TARGET_NAME}_${v}_${tf} PRIVATE VXSORT_TEST_${vu}_${tu})
81+
target_compile_definitions(${TARGET_NAME}_${v}_${tf} PRIVATE VXSORT_TEST_${vu}_${tu} VXSORT_TEST_${vu}_${tfu})
7582
endforeach ()
7683

7784
target_link_libraries(${TARGET_NAME}_${v}_${tf}
7885
${CMAKE_PROJECT_NAME}_lib
86+
magic_enum::magic_enum
7987
Backward::Backward
8088
GTest::gtest
8189
)

tests/fullsort/fullsort.avx2.cpp

-134
This file was deleted.

tests/fullsort/fullsort.avx2.f.cpp

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#include "vxsort_targets_enable_avx2.h"
2+
3+
#include "gtest/gtest.h"
4+
5+
#include <vxsort.avx2.h>
6+
#include "fullsort_test.h"
7+
8+
namespace vxsort_tests {
9+
using namespace vxsort::types;
10+
using testing::Types;
11+
12+
using VM = vxsort::vector_machine;
13+
using namespace vxsort;
14+
15+
void register_fullsort_avx2_f_tests() {
16+
register_fullsort_benchmarks<VM::AVX2, 8, f32>(10, 1000000, 10, 1234.5, 0.1);
17+
register_fullsort_benchmarks<VM::AVX2, 8, f32>(10, 1000000, 10, 1234.5, 0.1);
18+
}
19+
20+
}
21+
22+
23+
#include "vxsort_targets_disable.h"

tests/fullsort/fullsort.avx2.i.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#include "vxsort_targets_enable_avx2.h"
2+
3+
#include "gtest/gtest.h"
4+
5+
#include <vxsort.avx2.h>
6+
#include "fullsort_test.h"
7+
8+
namespace vxsort_tests {
9+
using namespace vxsort::types;
10+
using testing::Types;
11+
12+
using VM = vxsort::vector_machine;
13+
using namespace vxsort;
14+
15+
void register_fullsort_avx2_i_tests() {
16+
register_fullsort_benchmarks<VM::AVX2, 8, i16>(10, 10000, 10, 0x1000, 0x1);
17+
register_fullsort_benchmarks<VM::AVX2, 8, i32>(10, 1000000, 10, 0x1000, 0x1);
18+
register_fullsort_benchmarks<VM::AVX2, 8, i64>(10, 1000000, 10, 0x1000, 0x1);
19+
}
20+
21+
}
22+
23+
24+
#include "vxsort_targets_disable.h"

tests/fullsort/fullsort.avx2.u.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#include "vxsort_targets_enable_avx2.h"
2+
3+
#include "gtest/gtest.h"
4+
5+
#include <vxsort.avx2.h>
6+
#include "fullsort_test.h"
7+
8+
namespace vxsort_tests {
9+
using namespace vxsort::types;
10+
using testing::Types;
11+
12+
using VM = vxsort::vector_machine;
13+
using namespace vxsort;
14+
15+
void register_fullsort_avx2_u_tests() {
16+
register_fullsort_benchmarks<VM::AVX2, 8, u16>(10, 10000, 10, 0x1000, 0x1);
17+
register_fullsort_benchmarks<VM::AVX2, 8, u32>(10, 1000000, 10, 0x1000, 0x1);
18+
register_fullsort_benchmarks<VM::AVX2, 8, u64>(10, 1000000, 10, 0x1000, 0x1);
19+
}
20+
21+
}
22+
23+
24+
#include "vxsort_targets_disable.h"

0 commit comments

Comments
 (0)