Skip to content

Commit 7cf35e8

Browse files
authored
Merge pull request #9 from stevenewald/simd
Add AVX512 equations
2 parents b527e68 + 765cd9d commit 7cf35e8

File tree

12 files changed

+198
-49
lines changed

12 files changed

+198
-49
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ include(cmake/project-is-top-level.cmake)
1414
include(cmake/variables.cmake)
1515

1616
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
17+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64 -mavx512f -mavx512dq -mavx512vl -mavx512bf16")
18+
add_compile_options(-fno-inline -fno-omit-frame-pointer)
19+
1720

1821
# ---- Declare library ----
1922

@@ -24,6 +27,8 @@ add_library(
2427
source/mandelbrot/mandelbrot_window.cpp
2528
source/graphics/color_conversions/color_conversions.cpp
2629
source/graphics/aspect_ratio/aspect_ratio.cpp
30+
source/mandelbrot/equations_simd.cpp
31+
source/mandelbrot/equations.cpp
2732
)
2833

2934
target_include_directories(

source/config.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
namespace fractal {
99

10-
constexpr std::size_t WINDOW_WIDTH = 800UZ*2;
11-
constexpr std::size_t WINDOW_HEIGHT = 600UZ*2;
10+
constexpr std::size_t WINDOW_WIDTH = 5120;
11+
constexpr std::size_t WINDOW_HEIGHT = 1440;
1212
constexpr std::size_t FRAME_RATE = 60UZ;
1313

1414
constexpr display_domain DISPLAY_DOMAIN{
@@ -17,8 +17,8 @@ constexpr display_domain DISPLAY_DOMAIN{
1717
};
1818

1919
constexpr complex_domain START_COMPLEX_DOMAIN{
20-
{complex_underlying{-2}, complex_underlying{-1.5}},
21-
{complex_underlying{1}, complex_underlying{1.5} }
20+
{complex_underlying{-1.402}, complex_underlying{-.001}},
21+
{complex_underlying{-1.400}, complex_underlying{.001} }
2222
};
2323

2424
const complex_underlying MANDELBROT_DIVERGENCE_NORM = 4;

source/coordinates.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ struct display_domain {
5858
return *this;
5959
}
6060

61+
DisplayCoordinateIterator& operator+=(difference_type other)
62+
{
63+
current_coordinate_ += other;
64+
return *this;
65+
}
66+
6167
DisplayCoordinateIterator operator-(difference_type other)
6268
{
6369
DisplayCoordinateIterator tmp = *this;
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#pragma once
22

3+
#include "config.hpp"
34
#include "coordinates.hpp"
45

56
namespace fractal {
67
display_coordinate calculate_rectangle_end_point(
78
display_coordinate start, display_coordinate current,
8-
float target_aspect_ratio = 800.0f / 600.0f
9+
float target_aspect_ratio = static_cast<float>(WINDOW_WIDTH) / WINDOW_HEIGHT
910
);
1011
} // namespace fractal

source/graphics/color_conversions/color_conversions.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,34 +17,28 @@ color hsv_to_rgb(float hue, float saturation, float value)
1717
float green_temp = 0.0f;
1818
float blue_temp = 0.0f;
1919

20-
if (0 <= hue_prime && hue_prime < 1) {
20+
if (hue_prime < 1) {
2121
red_temp = chroma;
2222
green_temp = uint16_termediate;
23-
blue_temp = 0;
2423
}
25-
else if (1 <= hue_prime && hue_prime < 2) {
24+
else if (hue_prime < 2) {
2625
red_temp = uint16_termediate;
2726
green_temp = chroma;
28-
blue_temp = 0;
2927
}
30-
else if (2 <= hue_prime && hue_prime < 3) {
31-
red_temp = 0;
28+
else if (hue_prime < 3) {
3229
green_temp = chroma;
3330
blue_temp = uint16_termediate;
3431
}
35-
else if (3 <= hue_prime && hue_prime < 4) {
36-
red_temp = 0;
32+
else if (hue_prime < 4) {
3733
green_temp = uint16_termediate;
3834
blue_temp = chroma;
3935
}
40-
else if (4 <= hue_prime && hue_prime < 5) {
36+
else if (hue_prime < 5) {
4137
red_temp = uint16_termediate;
42-
green_temp = 0;
4338
blue_temp = chroma;
4439
}
45-
else if (5 <= hue_prime && hue_prime < 6) {
40+
else if (hue_prime < 6) {
4641
red_temp = chroma;
47-
green_temp = 0;
4842
blue_temp = uint16_termediate;
4943
}
5044

source/mandelbrot/equations.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#include "equations.hpp"
2+
3+
#include "config.hpp"
4+
5+
namespace fractal {
6+
// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
7+
std::complex<complex_underlying>
8+
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
9+
{
10+
return z_n * z_n + constant;
11+
}
12+
13+
iteration_count compute_iterations(
14+
std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant,
15+
iteration_count max_iters
16+
)
17+
{
18+
iteration_count iterations = 0;
19+
std::complex<complex_underlying> z_n = z_0;
20+
21+
while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) {
22+
z_n = step(z_n, constant);
23+
++iterations;
24+
}
25+
26+
return iterations;
27+
}
28+
} // namespace fractal

source/mandelbrot/equations.hpp

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,14 @@
11
#pragma once
22

3-
#include "config.hpp"
43
#include "units.hpp"
54

65
namespace fractal {
76
// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
8-
inline std::complex<complex_underlying>
9-
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
10-
{
11-
return z_n * z_n + constant;
12-
}
7+
std::complex<complex_underlying>
8+
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant);
139

14-
inline iteration_count compute_iterations(
10+
iteration_count compute_iterations(
1511
std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant,
1612
iteration_count max_iters
17-
)
18-
{
19-
iteration_count iterations = 0;
20-
std::complex<complex_underlying> z_n = z_0;
21-
22-
while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) {
23-
z_n = step(z_n, constant);
24-
++iterations;
25-
}
26-
27-
return iterations;
28-
}
13+
);
2914
} // namespace fractal

source/mandelbrot/equations_simd.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#include "config.hpp"
2+
#include "equations.hpp"
3+
#include "units.hpp"
4+
5+
#include <immintrin.h>
6+
7+
namespace fractal {
8+
std::array<iteration_count, 8> compute_iterations(
9+
const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters
10+
)
11+
{
12+
static const auto SQUARED_DIVERGENCE =
13+
MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM;
14+
15+
alignas(64) std::array<double, 8> reals = z_0.real;
16+
alignas(64) std::array<double, 8> imags = z_0.imaginary;
17+
alignas(64) std::array<double, 8> const_reals = constant.real;
18+
alignas(64) std::array<double, 8> const_imags = constant.imaginary;
19+
20+
std::array<iteration_count, 8> solved_its = {0};
21+
22+
__m512d input_vec_real = _mm512_load_pd(reals.data());
23+
__m512d input_vec_imag = _mm512_load_pd(imags.data());
24+
__m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data());
25+
__m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data());
26+
__m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data());
27+
28+
for (iteration_count iterations = 0; iterations < max_iters; iterations++) {
29+
// Square real
30+
__m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real);
31+
32+
// Square imag
33+
__m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag);
34+
35+
// Create imags
36+
__m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2));
37+
input_vec_imag =
38+
_mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags);
39+
40+
// Create reals
41+
__m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag);
42+
input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals);
43+
44+
// Create squared norms
45+
__m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag);
46+
__mmask8 solved_mask = _mm512_cmp_pd_mask(
47+
squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS
48+
);
49+
50+
uint32_t solved = _cvtmask8_u32(solved_mask);
51+
solved_its_vec = _mm512_mask_blend_epi16(
52+
solved_mask, solved_its_vec,
53+
_mm512_set1_epi16(static_cast<int16_t>(iterations))
54+
);
55+
if (solved == 0xFF) [[unlikely]]
56+
break;
57+
}
58+
59+
__mmask32 mask = _mm512_cmpeq_epi16_mask(solved_its_vec, _mm512_set1_epi16(0));
60+
solved_its_vec = _mm512_mask_mov_epi16(
61+
solved_its_vec, mask, _mm512_set1_epi16(static_cast<int16_t>(max_iters))
62+
);
63+
_mm512_storeu_epi16(solved_its.data(), solved_its_vec);
64+
65+
return solved_its;
66+
}
67+
} // namespace fractal

source/mandelbrot/equations_simd.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#pragma once
2+
3+
#include "config.hpp"
4+
#include "units.hpp"
5+
6+
namespace fractal {
7+
8+
std::array<iteration_count, 8> compute_iterations(
9+
const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters
10+
);
11+
} // namespace fractal

source/mandelbrot/mandelbrot_window.cpp

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,54 @@
22

33
#include "config.hpp"
44
#include "coordinates.hpp"
5-
#include "equations.hpp"
5+
#include "equations_simd.hpp"
66
#include "graphics/aspect_ratio/aspect_ratio.hpp"
77
#include "graphics/color_conversions/color_conversions.hpp"
88
#include "graphics/display_to_complex.hpp"
9+
#include "units.hpp"
910

11+
#include <fmt/base.h>
12+
#include <fmt/format.h>
13+
#include <immintrin.h>
1014
#include <SFML/Graphics/Drawable.hpp>
1115
#include <SFML/Graphics/Image.hpp>
1216
#include <SFML/Graphics/Sprite.hpp>
1317
#include <SFML/Graphics/Texture.hpp>
1418

19+
#include <chrono>
20+
1521
#include <future>
1622
#include <memory>
1723
#include <optional>
1824

1925
namespace fractal {
2026
void MandelbrotWindow::draw_coordinate_(
21-
const display_coordinate& display_coord, const complex_coordinate& complex_coord
27+
display_coordinate display_coord, const avx512_complex& complex_coords
2228
)
2329
{
24-
iteration_count iterations =
25-
compute_iterations({0, 0}, complex_coord, MANDELBROT_MAX_ITERATIONS);
30+
static constexpr avx512_complex START{};
31+
alignas(64) std::array<iteration_count, 8> iterations =
32+
compute_iterations(START, complex_coords, MANDELBROT_MAX_ITERATIONS);
33+
alignas(64) std::array<float, 8> ratios{};
34+
35+
__m128i iterations_vec = _mm_loadu_epi16(iterations.data());
36+
37+
__m128i input_low = _mm_unpacklo_epi16(iterations_vec, _mm_setzero_si128());
38+
__m128i input_high = _mm_unpackhi_epi16(iterations_vec, _mm_setzero_si128());
39+
40+
__m128 floats_low = _mm_cvtepi32_ps(input_low);
41+
__m128 floats_high = _mm_cvtepi32_ps(input_high);
2642

27-
float iteration_ratio = static_cast<float>(iterations) / MANDELBROT_MAX_ITERATIONS;
28-
set_pixel_color(display_coord, iteration_ratio);
43+
floats_low = _mm_div_ps(floats_low, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS));
44+
floats_high = _mm_div_ps(floats_high, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS));
45+
46+
_mm_storeu_ps(ratios.begin(), floats_low);
47+
_mm_storeu_ps(ratios.begin() + 4, floats_high);
48+
49+
for (size_t i = 0; i < 8; i++) {
50+
set_pixel_color(display_coord, ratios[i]);
51+
display_coord.first++;
52+
}
2953
}
3054

3155
void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
@@ -38,22 +62,36 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
3862
domain_ = {new_top, new_bottom};
3963
to_complex = {DISPLAY_DOMAIN.end_coordinate, domain_};
4064

41-
auto process_coordinate = [&](const display_coordinate& display_coord) {
42-
auto complex_coord = to_complex.to_complex_projection(display_coord);
43-
draw_coordinate_(display_coord, complex_coord);
65+
auto process_coordinates = [&](display_coordinate start_display_coord) {
66+
std::array<std::complex<complex_underlying>, 8> coords{};
67+
auto t = start_display_coord;
68+
for (size_t i = 0; i < 8; i++) {
69+
coords[i] = to_complex.to_complex_projection(start_display_coord);
70+
start_display_coord.first++;
71+
}
72+
avx512_complex coords2{};
73+
for (size_t i = 0; i < 8; i++) {
74+
coords2.real[i] = coords[i].real();
75+
coords2.imaginary[i] = coords[i].imag();
76+
}
77+
draw_coordinate_(t, coords2);
4478
};
4579

4680
auto process_chunk = [&](display_domain::DisplayCoordinateIterator start,
4781
display_domain::DisplayCoordinateIterator end) {
48-
std::for_each(start, end, process_coordinate);
82+
for (auto it = start; it != end; it += 8) {
83+
process_coordinates(*it);
84+
}
4985
};
5086

5187
uint32_t total = WINDOW_WIDTH * WINDOW_HEIGHT;
52-
uint32_t chunks = 32;
88+
uint32_t chunks = 128;
5389
uint32_t step = total / chunks;
5490

5591
std::vector<std::future<void>> futures;
5692

93+
auto start = std::chrono::high_resolution_clock::now();
94+
5795
for (uint32_t chunk = 0; chunk < chunks; chunk++) {
5896
display_domain::DisplayCoordinateIterator start =
5997
DISPLAY_DOMAIN.begin() + chunk * step;
@@ -66,6 +104,9 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
66104
for (const auto& future : futures) {
67105
future.wait();
68106
}
107+
auto end = std::chrono::high_resolution_clock::now();
108+
auto time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
109+
std::cout << fmt::format("Time elapsed: {}", time.count()) << "\n";
69110
}
70111

71112
MandelbrotWindow::MandelbrotWindow()

0 commit comments

Comments
 (0)