From 8b1e419195a6663dfa59dcc7a1b139001f5a4c30 Mon Sep 17 00:00:00 2001 From: Steven Ewald Date: Sun, 24 Nov 2024 14:50:44 -0600 Subject: [PATCH 1/6] Time --- source/mandelbrot/equations_simd.hpp | 29 +++++++++++++++++++++++++ source/mandelbrot/mandelbrot_window.cpp | 10 +++++++++ 2 files changed, 39 insertions(+) create mode 100644 source/mandelbrot/equations_simd.hpp diff --git a/source/mandelbrot/equations_simd.hpp b/source/mandelbrot/equations_simd.hpp new file mode 100644 index 0000000..26103b5 --- /dev/null +++ b/source/mandelbrot/equations_simd.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "config.hpp" +#include "units.hpp" + +namespace fractal { +// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition +inline std::complex +step(std::complex z_n, std::complex constant) +{ + return z_n * z_n + constant; +} + +inline iteration_count compute_iterations( + std::complex z_0, std::complex constant, + iteration_count max_iters +) +{ + iteration_count iterations = 0; + std::complex z_n = z_0; + + while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { + z_n = step(z_n, constant); + ++iterations; + } + + return iterations; +} +} // namespace fractal diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index ad88788..4178ec5 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -7,12 +7,17 @@ #include "graphics/color_conversions/color_conversions.hpp" #include "graphics/display_to_complex.hpp" +#include +#include #include #include #include #include +#include + #include +#include #include #include @@ -54,6 +59,8 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) std::vector> futures; + auto start = std::chrono::high_resolution_clock::now(); + for (uint32_t chunk = 0; chunk < chunks; chunk++) { display_domain::DisplayCoordinateIterator start = DISPLAY_DOMAIN.begin() + chunk * step; @@ -66,6 +73,9 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) for (const auto& future : futures) { future.wait(); } + auto end = std::chrono::high_resolution_clock::now(); + auto time = std::chrono::duration_cast(end - start); + std::cout << fmt::format("Time elapsed: {}", time.count()) << "\n"; } MandelbrotWindow::MandelbrotWindow() From d057a3242f131795a209f20f68b07a9b7f09e787 Mon Sep 17 00:00:00 2001 From: Steven Ewald Date: Sun, 24 Nov 2024 16:04:14 -0600 Subject: [PATCH 2/6] checkpoint --- source/config.hpp | 4 +- source/coordinates.hpp | 6 ++ source/mandelbrot/equations.hpp | 62 ++++++++++++++++--- .../{equations_simd.hpp => equations_nsd.hpp} | 0 source/mandelbrot/mandelbrot_window.cpp | 36 ++++++++--- source/mandelbrot/mandelbrot_window.hpp | 3 +- source/units.hpp | 2 +- 7 files changed, 91 insertions(+), 22 deletions(-) rename source/mandelbrot/{equations_simd.hpp => equations_nsd.hpp} (100%) diff --git a/source/config.hpp b/source/config.hpp index 4fbbe9b..a8f4d27 100644 --- a/source/config.hpp +++ b/source/config.hpp @@ -17,8 +17,8 @@ constexpr display_domain DISPLAY_DOMAIN{ }; constexpr complex_domain START_COMPLEX_DOMAIN{ - {complex_underlying{-2}, complex_underlying{-1.5}}, - {complex_underlying{1}, complex_underlying{1.5} } + {complex_underlying{-1.402}, complex_underlying{-.001}}, + {complex_underlying{-1.400}, complex_underlying{.001} } }; const complex_underlying MANDELBROT_DIVERGENCE_NORM = 4; diff --git a/source/coordinates.hpp b/source/coordinates.hpp index 21ff1e4..9c820fc 100644 --- a/source/coordinates.hpp +++ b/source/coordinates.hpp @@ -58,6 +58,12 @@ struct display_domain { return *this; } + DisplayCoordinateIterator& operator+=(difference_type other) + { + current_coordinate_ += other; + return *this; + } + DisplayCoordinateIterator operator-(difference_type other) { DisplayCoordinateIterator tmp = *this; diff --git a/source/mandelbrot/equations.hpp b/source/mandelbrot/equations.hpp index 26103b5..c81af62 100644 --- a/source/mandelbrot/equations.hpp +++ b/source/mandelbrot/equations.hpp @@ -3,27 +3,73 @@ #include "config.hpp" #include "units.hpp" +#include + namespace fractal { // https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition + inline std::complex step(std::complex z_n, std::complex constant) { return z_n * z_n + constant; } -inline iteration_count compute_iterations( - std::complex z_0, std::complex constant, - iteration_count max_iters +inline std::array compute_iterations( + std::array, 8> z_0, + std::array, 8> constant, iteration_count max_iters ) { - iteration_count iterations = 0; - std::complex z_n = z_0; + std::array res{}; + for (size_t i = 0; i < 8; i++) { + while (res[i] < max_iters && std::norm(z_0[i]) < MANDELBROT_DIVERGENCE_NORM) { + z_0[i] = step(z_0[i], constant[i]); + ++res[i]; + } + } + return res; + + std::array reals = {z_0[0].real(), z_0[1].real(), z_0[2].real(), + z_0[3].real(), z_0[4].real(), z_0[5].real(), + z_0[6].real(), z_0[7].real()}; + std::array imags = {z_0[0].imag(), z_0[1].imag(), z_0[2].imag(), + z_0[3].imag(), z_0[4].imag(), z_0[5].imag(), + z_0[6].imag(), z_0[7].imag()}; + + std::array squared_reals{}; + std::array squared_imags{}; + std::array solved_its{0}; + bool some_unsolved = true; + iteration_count iterations = 0; + + while (some_unsolved && iterations < max_iters) { + __m512d input_vec_real = _mm512_load_pd(reals.data()); + __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); + _mm512_store_pd(squared_reals.data(), squared_vec_real); + __m512d input_vec_imag = _mm512_load_pd(imags.data()); + __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); + _mm512_store_pd(squared_imags.data(), squared_vec_imag); + + for (int i = 0; i < 8; i++) { + if (solved_its[i] == 0) { + imags[i] = (2 * reals[i] * imags[i]) + constant[i].imag(); + reals[i] = (squared_reals[i] - squared_imags[i]) + constant[i].real(); + } + } + + some_unsolved = false; + for (std::size_t i = 0; i < 8; i++) { + if (solved_its[i] == 0) + continue; + double norm = std::sqrt(reals[i] * reals[i] + imags[i] * imags[i]); + if (norm < MANDELBROT_DIVERGENCE_NORM) + solved_its[i] = iterations; + else + some_unsolved = true; + } - while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { - z_n = step(z_n, constant); ++iterations; } - return iterations; + return solved_its; } } // namespace fractal diff --git a/source/mandelbrot/equations_simd.hpp b/source/mandelbrot/equations_nsd.hpp similarity index 100% rename from source/mandelbrot/equations_simd.hpp rename to source/mandelbrot/equations_nsd.hpp diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index 4178ec5..4dbf1bf 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -6,6 +6,7 @@ #include "graphics/aspect_ratio/aspect_ratio.hpp" #include "graphics/color_conversions/color_conversions.hpp" #include "graphics/display_to_complex.hpp" +#include "units.hpp" #include #include @@ -23,14 +24,22 @@ namespace fractal { void MandelbrotWindow::draw_coordinate_( - const display_coordinate& display_coord, const complex_coordinate& complex_coord + display_coordinate display_coord, + const std::array, 8>& complex_coords ) { - iteration_count iterations = - compute_iterations({0, 0}, complex_coord, MANDELBROT_MAX_ITERATIONS); - - float iteration_ratio = static_cast(iterations) / MANDELBROT_MAX_ITERATIONS; - set_pixel_color(display_coord, iteration_ratio); + std::array, 8> starts = { + std::complex{0, 0} + }; + auto iterations = + compute_iterations(starts, complex_coords, MANDELBROT_MAX_ITERATIONS); + + for (size_t i = 0; i < 8; i++) { + float iteration_ratio = + static_cast(iterations[i]) / MANDELBROT_MAX_ITERATIONS; + set_pixel_color(display_coord, iteration_ratio); + display_coord.first++; + } } void MandelbrotWindow::on_resize_(display_domain new_domain_selection) @@ -43,14 +52,21 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) domain_ = {new_top, new_bottom}; to_complex = {DISPLAY_DOMAIN.end_coordinate, domain_}; - auto process_coordinate = [&](const display_coordinate& display_coord) { - auto complex_coord = to_complex.to_complex_projection(display_coord); - draw_coordinate_(display_coord, complex_coord); + auto process_coordinates = [&](display_coordinate start_display_coord) { + std::array, 8> coords{}; + auto t = start_display_coord; + for (size_t i = 0; i < 8; i++) { + coords[i] = to_complex.to_complex_projection(start_display_coord); + start_display_coord.first++; + } + draw_coordinate_(t, coords); }; auto process_chunk = [&](display_domain::DisplayCoordinateIterator start, display_domain::DisplayCoordinateIterator end) { - std::for_each(start, end, process_coordinate); + for (auto it = start; it != end; it+=8) { + process_coordinates(*it); + } }; uint32_t total = WINDOW_WIDTH * WINDOW_HEIGHT; diff --git a/source/mandelbrot/mandelbrot_window.hpp b/source/mandelbrot/mandelbrot_window.hpp index e5797be..d7c7e07 100644 --- a/source/mandelbrot/mandelbrot_window.hpp +++ b/source/mandelbrot/mandelbrot_window.hpp @@ -22,7 +22,8 @@ class MandelbrotWindow : public DisplayEventObserver { void on_resize_(display_domain new_domain_selection); void draw_coordinate_( - const display_coordinate& display_coord, const complex_coordinate& complex_coord + display_coordinate display_coord, + const std::array, 8>& complex_coords ); public: diff --git a/source/units.hpp b/source/units.hpp index f41eaa5..e72cc3a 100644 --- a/source/units.hpp +++ b/source/units.hpp @@ -13,7 +13,7 @@ using iteration_count = std::uint16_t; using complex_underlying = boost::multiprecision::number; using complex = boost::multiprecision::complex_adaptor;*/ -using complex_underlying = __float128; +using complex_underlying = double; struct color { uint8_t red; From e9287b1f1e47b92db18c81f5ce82308e68e9b551 Mon Sep 17 00:00:00 2001 From: Steven Ewald Date: Sun, 24 Nov 2024 16:27:21 -0600 Subject: [PATCH 3/6] checkpoint2 --- CMakeLists.txt | 1 + source/config.hpp | 6 +- source/mandelbrot/equations.hpp | 103 ++++++++++++++---------- source/mandelbrot/mandelbrot_window.cpp | 2 +- 4 files changed, 65 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bc639d..7abe293 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ include(cmake/project-is-top-level.cmake) include(cmake/variables.cmake) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64 -mavx512f -mavx512dq -mavx512vl -mavx512bf16") # ---- Declare library ---- diff --git a/source/config.hpp b/source/config.hpp index a8f4d27..f460612 100644 --- a/source/config.hpp +++ b/source/config.hpp @@ -7,8 +7,8 @@ namespace fractal { -constexpr std::size_t WINDOW_WIDTH = 800UZ*2; -constexpr std::size_t WINDOW_HEIGHT = 600UZ*2; +constexpr std::size_t WINDOW_WIDTH = 800UZ * 2; +constexpr std::size_t WINDOW_HEIGHT = 600UZ * 2; constexpr std::size_t FRAME_RATE = 60UZ; constexpr display_domain DISPLAY_DOMAIN{ @@ -18,7 +18,7 @@ constexpr display_domain DISPLAY_DOMAIN{ constexpr complex_domain START_COMPLEX_DOMAIN{ {complex_underlying{-1.402}, complex_underlying{-.001}}, - {complex_underlying{-1.400}, complex_underlying{.001} } + {complex_underlying{-1.400}, complex_underlying{.001} } }; const complex_underlying MANDELBROT_DIVERGENCE_NORM = 4; diff --git a/source/mandelbrot/equations.hpp b/source/mandelbrot/equations.hpp index c81af62..5052c87 100644 --- a/source/mandelbrot/equations.hpp +++ b/source/mandelbrot/equations.hpp @@ -19,57 +19,74 @@ inline std::array compute_iterations( std::array, 8> constant, iteration_count max_iters ) { - std::array res{}; - for (size_t i = 0; i < 8; i++) { - while (res[i] < max_iters && std::norm(z_0[i]) < MANDELBROT_DIVERGENCE_NORM) { - z_0[i] = step(z_0[i], constant[i]); - ++res[i]; - } - } - return res; - - std::array reals = {z_0[0].real(), z_0[1].real(), z_0[2].real(), - z_0[3].real(), z_0[4].real(), z_0[5].real(), - z_0[6].real(), z_0[7].real()}; - std::array imags = {z_0[0].imag(), z_0[1].imag(), z_0[2].imag(), - z_0[3].imag(), z_0[4].imag(), z_0[5].imag(), - z_0[6].imag(), z_0[7].imag()}; - - std::array squared_reals{}; - std::array squared_imags{}; - std::array solved_its{0}; - bool some_unsolved = true; - iteration_count iterations = 0; - - while (some_unsolved && iterations < max_iters) { - __m512d input_vec_real = _mm512_load_pd(reals.data()); + static const auto SQUARED_DIVERGENCE = + MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; + + alignas(64) std::array reals = {z_0[0].real(), z_0[1].real(), + z_0[2].real(), z_0[3].real(), + z_0[4].real(), z_0[5].real(), + z_0[6].real(), z_0[7].real()}; + alignas(64) std::array imags = {z_0[0].imag(), z_0[1].imag(), + z_0[2].imag(), z_0[3].imag(), + z_0[4].imag(), z_0[5].imag(), + z_0[6].imag(), z_0[7].imag()}; + alignas(64) std::array const_reals = { + constant[0].real(), constant[1].real(), constant[2].real(), constant[3].real(), + constant[4].real(), constant[5].real(), constant[6].real(), constant[7].real() + }; + alignas(64) std::array const_imags = { + constant[0].imag(), constant[1].imag(), constant[2].imag(), constant[3].imag(), + constant[4].imag(), constant[5].imag(), constant[6].imag(), constant[7].imag() + }; + + std::array solved_its = {0}; + iteration_count iterations = 0; + + __m512d input_vec_real = _mm512_load_pd(reals.data()); + __m512d input_vec_imag = _mm512_load_pd(imags.data()); + __m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data()); + __m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data()); + __m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data()); + + while (iterations < max_iters) { + // Square real __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); - _mm512_store_pd(squared_reals.data(), squared_vec_real); - __m512d input_vec_imag = _mm512_load_pd(imags.data()); + + // Square imag __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); - _mm512_store_pd(squared_imags.data(), squared_vec_imag); - for (int i = 0; i < 8; i++) { - if (solved_its[i] == 0) { - imags[i] = (2 * reals[i] * imags[i]) + constant[i].imag(); - reals[i] = (squared_reals[i] - squared_imags[i]) + constant[i].real(); - } - } + // Create imags + __m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2)); + input_vec_imag = + _mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags); - some_unsolved = false; - for (std::size_t i = 0; i < 8; i++) { - if (solved_its[i] == 0) - continue; - double norm = std::sqrt(reals[i] * reals[i] + imags[i] * imags[i]); - if (norm < MANDELBROT_DIVERGENCE_NORM) - solved_its[i] = iterations; - else - some_unsolved = true; - } + // Create reals + __m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag); + input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals); + + // Create squared norms + __m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag); + __mmask8 solved_mask = _mm512_cmp_pd_mask( + squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS + ); + + uint32_t solved = _cvtmask8_u32(solved_mask); + solved_its_vec = _mm512_mask_blend_epi16( + solved_mask, solved_its_vec, _mm512_set1_epi16(iterations) + ); + if (solved == 0xFF) [[unlikely]] + break; ++iterations; } + _mm512_storeu_epi16(solved_its.data(), solved_its_vec); + for (int i = 0; i < 8; i++) { + if (solved_its[i] == 0) { + solved_its[i] = max_iters; + } + } + return solved_its; } } // namespace fractal diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index 4dbf1bf..5e9c1b4 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -64,7 +64,7 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) auto process_chunk = [&](display_domain::DisplayCoordinateIterator start, display_domain::DisplayCoordinateIterator end) { - for (auto it = start; it != end; it+=8) { + for (auto it = start; it != end; it += 8) { process_coordinates(*it); } }; From 5b293ecc656a485b6f88797276f21099ad1270cc Mon Sep 17 00:00:00 2001 From: Steven Ewald Date: Sun, 24 Nov 2024 17:57:51 -0600 Subject: [PATCH 4/6] Separate simd equation impl and headers --- CMakeLists.txt | 1 + source/mandelbrot/equations.hpp | 77 +++---------------------- source/mandelbrot/equations_nsd.hpp | 29 ---------- source/mandelbrot/equations_simd.cpp | 77 +++++++++++++++++++++++++ source/mandelbrot/equations_simd.hpp | 19 ++++++ source/mandelbrot/mandelbrot_window.cpp | 4 +- 6 files changed, 106 insertions(+), 101 deletions(-) delete mode 100644 source/mandelbrot/equations_nsd.hpp create mode 100644 source/mandelbrot/equations_simd.cpp create mode 100644 source/mandelbrot/equations_simd.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 7abe293..8e39438 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,7 @@ add_library( source/mandelbrot/mandelbrot_window.cpp source/graphics/color_conversions/color_conversions.cpp source/graphics/aspect_ratio/aspect_ratio.cpp + source/mandelbrot/equations_simd.cpp ) target_include_directories( diff --git a/source/mandelbrot/equations.hpp b/source/mandelbrot/equations.hpp index 5052c87..26103b5 100644 --- a/source/mandelbrot/equations.hpp +++ b/source/mandelbrot/equations.hpp @@ -3,90 +3,27 @@ #include "config.hpp" #include "units.hpp" -#include - namespace fractal { // https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition - inline std::complex step(std::complex z_n, std::complex constant) { return z_n * z_n + constant; } -inline std::array compute_iterations( - std::array, 8> z_0, - std::array, 8> constant, iteration_count max_iters +inline iteration_count compute_iterations( + std::complex z_0, std::complex constant, + iteration_count max_iters ) { - static const auto SQUARED_DIVERGENCE = - MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; - - alignas(64) std::array reals = {z_0[0].real(), z_0[1].real(), - z_0[2].real(), z_0[3].real(), - z_0[4].real(), z_0[5].real(), - z_0[6].real(), z_0[7].real()}; - alignas(64) std::array imags = {z_0[0].imag(), z_0[1].imag(), - z_0[2].imag(), z_0[3].imag(), - z_0[4].imag(), z_0[5].imag(), - z_0[6].imag(), z_0[7].imag()}; - alignas(64) std::array const_reals = { - constant[0].real(), constant[1].real(), constant[2].real(), constant[3].real(), - constant[4].real(), constant[5].real(), constant[6].real(), constant[7].real() - }; - alignas(64) std::array const_imags = { - constant[0].imag(), constant[1].imag(), constant[2].imag(), constant[3].imag(), - constant[4].imag(), constant[5].imag(), constant[6].imag(), constant[7].imag() - }; - - std::array solved_its = {0}; iteration_count iterations = 0; + std::complex z_n = z_0; - __m512d input_vec_real = _mm512_load_pd(reals.data()); - __m512d input_vec_imag = _mm512_load_pd(imags.data()); - __m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data()); - __m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data()); - __m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data()); - - while (iterations < max_iters) { - // Square real - __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); - - // Square imag - __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); - - // Create imags - __m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2)); - input_vec_imag = - _mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags); - - // Create reals - __m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag); - input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals); - - // Create squared norms - __m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag); - __mmask8 solved_mask = _mm512_cmp_pd_mask( - squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS - ); - - uint32_t solved = _cvtmask8_u32(solved_mask); - solved_its_vec = _mm512_mask_blend_epi16( - solved_mask, solved_its_vec, _mm512_set1_epi16(iterations) - ); - if (solved == 0xFF) [[unlikely]] - break; - + while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { + z_n = step(z_n, constant); ++iterations; } - _mm512_storeu_epi16(solved_its.data(), solved_its_vec); - for (int i = 0; i < 8; i++) { - if (solved_its[i] == 0) { - solved_its[i] = max_iters; - } - } - - return solved_its; + return iterations; } } // namespace fractal diff --git a/source/mandelbrot/equations_nsd.hpp b/source/mandelbrot/equations_nsd.hpp deleted file mode 100644 index 26103b5..0000000 --- a/source/mandelbrot/equations_nsd.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "config.hpp" -#include "units.hpp" - -namespace fractal { -// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition -inline std::complex -step(std::complex z_n, std::complex constant) -{ - return z_n * z_n + constant; -} - -inline iteration_count compute_iterations( - std::complex z_0, std::complex constant, - iteration_count max_iters -) -{ - iteration_count iterations = 0; - std::complex z_n = z_0; - - while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { - z_n = step(z_n, constant); - ++iterations; - } - - return iterations; -} -} // namespace fractal diff --git a/source/mandelbrot/equations_simd.cpp b/source/mandelbrot/equations_simd.cpp new file mode 100644 index 0000000..0ce0cfd --- /dev/null +++ b/source/mandelbrot/equations_simd.cpp @@ -0,0 +1,77 @@ +#include "equations.hpp" + +namespace fractal { +std::array compute_iterations( + const std::array, 8>& z_0, + const std::array, 8>& constant, + iteration_count max_iters +) +{ + static const auto SQUARED_DIVERGENCE = + MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; + + alignas(64) std::array reals = {z_0[0].real(), z_0[1].real(), + z_0[2].real(), z_0[3].real(), + z_0[4].real(), z_0[5].real(), + z_0[6].real(), z_0[7].real()}; + alignas(64) std::array imags = {z_0[0].imag(), z_0[1].imag(), + z_0[2].imag(), z_0[3].imag(), + z_0[4].imag(), z_0[5].imag(), + z_0[6].imag(), z_0[7].imag()}; + alignas(64) std::array const_reals = { + constant[0].real(), constant[1].real(), constant[2].real(), constant[3].real(), + constant[4].real(), constant[5].real(), constant[6].real(), constant[7].real() + }; + alignas(64) std::array const_imags = { + constant[0].imag(), constant[1].imag(), constant[2].imag(), constant[3].imag(), + constant[4].imag(), constant[5].imag(), constant[6].imag(), constant[7].imag() + }; + + std::array solved_its = {0}; + + __m512d input_vec_real = _mm512_load_pd(reals.data()); + __m512d input_vec_imag = _mm512_load_pd(imags.data()); + __m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data()); + __m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data()); + __m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data()); + + for (iteration_count iterations = 0; iterations < max_iters; iterations++) { + // Square real + __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); + + // Square imag + __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); + + // Create imags + __m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2)); + input_vec_imag = + _mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags); + + // Create reals + __m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag); + input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals); + + // Create squared norms + __m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag); + __mmask8 solved_mask = _mm512_cmp_pd_mask( + squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS + ); + + uint32_t solved = _cvtmask8_u32(solved_mask); + solved_its_vec = _mm512_mask_blend_epi16( + solved_mask, solved_its_vec, + _mm512_set1_epi16(static_cast(iterations)) + ); + if (solved == 0xFF) [[unlikely]] + break; + } + + __mmask32 mask = _mm512_cmpeq_epi16_mask(solved_its_vec, _mm512_set1_epi16(0)); + solved_its_vec = _mm512_mask_mov_epi16( + solved_its_vec, mask, _mm512_set1_epi16(static_cast(max_iters)) + ); + _mm512_storeu_epi16(solved_its.data(), solved_its_vec); + + return solved_its; +} +} // namespace fractal diff --git a/source/mandelbrot/equations_simd.hpp b/source/mandelbrot/equations_simd.hpp new file mode 100644 index 0000000..860a7ae --- /dev/null +++ b/source/mandelbrot/equations_simd.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "config.hpp" +#include "units.hpp" + +#include + +namespace fractal { +// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition + +std::complex +step(std::complex z_n, std::complex constant); + +std::array compute_iterations( + const std::array, 8>& z_0, + const std::array, 8>& constant, + iteration_count max_iters +); +} // namespace fractal diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index 5e9c1b4..c261fd2 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -2,7 +2,7 @@ #include "config.hpp" #include "coordinates.hpp" -#include "equations.hpp" +#include "equations_simd.hpp" #include "graphics/aspect_ratio/aspect_ratio.hpp" #include "graphics/color_conversions/color_conversions.hpp" #include "graphics/display_to_complex.hpp" @@ -28,7 +28,7 @@ void MandelbrotWindow::draw_coordinate_( const std::array, 8>& complex_coords ) { - std::array, 8> starts = { + static constexpr std::array, 8> starts = { std::complex{0, 0} }; auto iterations = From ce44af054a70beef545adc7e591a332e7542106f Mon Sep 17 00:00:00 2001 From: Steven Ewald Date: Sun, 24 Nov 2024 22:50:31 -0600 Subject: [PATCH 5/6] Added avx512 struct for future portability --- CMakeLists.txt | 1 + source/mandelbrot/equations.cpp | 28 +++++++++++++++++++++++++ source/mandelbrot/equations.hpp | 23 ++++---------------- source/mandelbrot/equations_simd.cpp | 26 +++++++---------------- source/mandelbrot/equations_simd.hpp | 8 +------ source/mandelbrot/mandelbrot_window.cpp | 16 +++++++------- source/mandelbrot/mandelbrot_window.hpp | 4 ++-- source/units.hpp | 10 +++++++++ 8 files changed, 62 insertions(+), 54 deletions(-) create mode 100644 source/mandelbrot/equations.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e39438..574f19b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ add_library( source/graphics/color_conversions/color_conversions.cpp source/graphics/aspect_ratio/aspect_ratio.cpp source/mandelbrot/equations_simd.cpp + source/mandelbrot/equations.cpp ) target_include_directories( diff --git a/source/mandelbrot/equations.cpp b/source/mandelbrot/equations.cpp new file mode 100644 index 0000000..8078382 --- /dev/null +++ b/source/mandelbrot/equations.cpp @@ -0,0 +1,28 @@ +#include "equations.hpp" + +#include "config.hpp" + +namespace fractal { +// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition +std::complex +step(std::complex z_n, std::complex constant) +{ + return z_n * z_n + constant; +} + +iteration_count compute_iterations( + std::complex z_0, std::complex constant, + iteration_count max_iters +) +{ + iteration_count iterations = 0; + std::complex z_n = z_0; + + while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { + z_n = step(z_n, constant); + ++iterations; + } + + return iterations; +} +} // namespace fractal diff --git a/source/mandelbrot/equations.hpp b/source/mandelbrot/equations.hpp index 26103b5..7ae8bdf 100644 --- a/source/mandelbrot/equations.hpp +++ b/source/mandelbrot/equations.hpp @@ -1,29 +1,14 @@ #pragma once -#include "config.hpp" #include "units.hpp" namespace fractal { // https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition -inline std::complex -step(std::complex z_n, std::complex constant) -{ - return z_n * z_n + constant; -} +std::complex +step(std::complex z_n, std::complex constant); -inline iteration_count compute_iterations( +iteration_count compute_iterations( std::complex z_0, std::complex constant, iteration_count max_iters -) -{ - iteration_count iterations = 0; - std::complex z_n = z_0; - - while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { - z_n = step(z_n, constant); - ++iterations; - } - - return iterations; -} +); } // namespace fractal diff --git a/source/mandelbrot/equations_simd.cpp b/source/mandelbrot/equations_simd.cpp index 0ce0cfd..d614296 100644 --- a/source/mandelbrot/equations_simd.cpp +++ b/source/mandelbrot/equations_simd.cpp @@ -1,31 +1,19 @@ +#include "config.hpp" #include "equations.hpp" +#include "units.hpp" namespace fractal { std::array compute_iterations( - const std::array, 8>& z_0, - const std::array, 8>& constant, - iteration_count max_iters + const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters ) { static const auto SQUARED_DIVERGENCE = MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; - alignas(64) std::array reals = {z_0[0].real(), z_0[1].real(), - z_0[2].real(), z_0[3].real(), - z_0[4].real(), z_0[5].real(), - z_0[6].real(), z_0[7].real()}; - alignas(64) std::array imags = {z_0[0].imag(), z_0[1].imag(), - z_0[2].imag(), z_0[3].imag(), - z_0[4].imag(), z_0[5].imag(), - z_0[6].imag(), z_0[7].imag()}; - alignas(64) std::array const_reals = { - constant[0].real(), constant[1].real(), constant[2].real(), constant[3].real(), - constant[4].real(), constant[5].real(), constant[6].real(), constant[7].real() - }; - alignas(64) std::array const_imags = { - constant[0].imag(), constant[1].imag(), constant[2].imag(), constant[3].imag(), - constant[4].imag(), constant[5].imag(), constant[6].imag(), constant[7].imag() - }; + alignas(64) std::array reals = z_0.real; + alignas(64) std::array imags = z_0.imaginary; + alignas(64) std::array const_reals = constant.real; + alignas(64) std::array const_imags = constant.imaginary; std::array solved_its = {0}; diff --git a/source/mandelbrot/equations_simd.hpp b/source/mandelbrot/equations_simd.hpp index 860a7ae..174a259 100644 --- a/source/mandelbrot/equations_simd.hpp +++ b/source/mandelbrot/equations_simd.hpp @@ -6,14 +6,8 @@ #include namespace fractal { -// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition - -std::complex -step(std::complex z_n, std::complex constant); std::array compute_iterations( - const std::array, 8>& z_0, - const std::array, 8>& constant, - iteration_count max_iters + const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters ); } // namespace fractal diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index c261fd2..73f2390 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -24,15 +24,12 @@ namespace fractal { void MandelbrotWindow::draw_coordinate_( - display_coordinate display_coord, - const std::array, 8>& complex_coords + display_coordinate display_coord, const avx512_complex& complex_coords ) { - static constexpr std::array, 8> starts = { - std::complex{0, 0} - }; + static constexpr avx512_complex START{}; auto iterations = - compute_iterations(starts, complex_coords, MANDELBROT_MAX_ITERATIONS); + compute_iterations(START, complex_coords, MANDELBROT_MAX_ITERATIONS); for (size_t i = 0; i < 8; i++) { float iteration_ratio = @@ -59,7 +56,12 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) coords[i] = to_complex.to_complex_projection(start_display_coord); start_display_coord.first++; } - draw_coordinate_(t, coords); + avx512_complex coords2{}; + for (size_t i = 0; i < 8; i++) { + coords2.real[i] = coords[i].real(); + coords2.imaginary[i] = coords[i].imag(); + } + draw_coordinate_(t, coords2); }; auto process_chunk = [&](display_domain::DisplayCoordinateIterator start, diff --git a/source/mandelbrot/mandelbrot_window.hpp b/source/mandelbrot/mandelbrot_window.hpp index d7c7e07..5e9acd6 100644 --- a/source/mandelbrot/mandelbrot_window.hpp +++ b/source/mandelbrot/mandelbrot_window.hpp @@ -3,6 +3,7 @@ #include "config.hpp" #include "coordinates.hpp" #include "graphics/display_event_observer.hpp" +#include "units.hpp" #include #include @@ -22,8 +23,7 @@ class MandelbrotWindow : public DisplayEventObserver { void on_resize_(display_domain new_domain_selection); void draw_coordinate_( - display_coordinate display_coord, - const std::array, 8>& complex_coords + display_coordinate display_coord, const avx512_complex& complex_coords ); public: diff --git a/source/units.hpp b/source/units.hpp index e72cc3a..9c15dd7 100644 --- a/source/units.hpp +++ b/source/units.hpp @@ -15,6 +15,16 @@ using complex = boost::multiprecision::complex_adaptor;*/ using complex_underlying = double; +struct avx512_complex { + std::array real; + std::array imaginary; + + std::complex get_complex(uint8_t index) + { + return {real[index], imaginary[index]}; + } +}; + struct color { uint8_t red; uint8_t green; From 765cd9dee4d50b5f301788456684ae545325f82f Mon Sep 17 00:00:00 2001 From: Steven Ewald Date: Mon, 25 Nov 2024 11:11:26 -0600 Subject: [PATCH 6/6] Add more simd --- CMakeLists.txt | 2 ++ source/config.hpp | 4 +-- source/graphics/aspect_ratio/aspect_ratio.hpp | 3 ++- .../color_conversions/color_conversions.cpp | 18 +++++-------- source/mandelbrot/equations_simd.cpp | 2 ++ source/mandelbrot/equations_simd.hpp | 2 -- source/mandelbrot/mandelbrot_window.cpp | 25 ++++++++++++++----- 7 files changed, 33 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 574f19b..ad791b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,8 @@ include(cmake/variables.cmake) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64 -mavx512f -mavx512dq -mavx512vl -mavx512bf16") +add_compile_options(-fno-inline -fno-omit-frame-pointer) + # ---- Declare library ---- diff --git a/source/config.hpp b/source/config.hpp index f460612..4b94c34 100644 --- a/source/config.hpp +++ b/source/config.hpp @@ -7,8 +7,8 @@ namespace fractal { -constexpr std::size_t WINDOW_WIDTH = 800UZ * 2; -constexpr std::size_t WINDOW_HEIGHT = 600UZ * 2; +constexpr std::size_t WINDOW_WIDTH = 5120; +constexpr std::size_t WINDOW_HEIGHT = 1440; constexpr std::size_t FRAME_RATE = 60UZ; constexpr display_domain DISPLAY_DOMAIN{ diff --git a/source/graphics/aspect_ratio/aspect_ratio.hpp b/source/graphics/aspect_ratio/aspect_ratio.hpp index fc87ba0..7b9e80e 100644 --- a/source/graphics/aspect_ratio/aspect_ratio.hpp +++ b/source/graphics/aspect_ratio/aspect_ratio.hpp @@ -1,10 +1,11 @@ #pragma once +#include "config.hpp" #include "coordinates.hpp" namespace fractal { display_coordinate calculate_rectangle_end_point( display_coordinate start, display_coordinate current, - float target_aspect_ratio = 800.0f / 600.0f + float target_aspect_ratio = static_cast(WINDOW_WIDTH) / WINDOW_HEIGHT ); } // namespace fractal diff --git a/source/graphics/color_conversions/color_conversions.cpp b/source/graphics/color_conversions/color_conversions.cpp index edf1ccc..c87fa49 100644 --- a/source/graphics/color_conversions/color_conversions.cpp +++ b/source/graphics/color_conversions/color_conversions.cpp @@ -17,34 +17,28 @@ color hsv_to_rgb(float hue, float saturation, float value) float green_temp = 0.0f; float blue_temp = 0.0f; - if (0 <= hue_prime && hue_prime < 1) { + if (hue_prime < 1) { red_temp = chroma; green_temp = uint16_termediate; - blue_temp = 0; } - else if (1 <= hue_prime && hue_prime < 2) { + else if (hue_prime < 2) { red_temp = uint16_termediate; green_temp = chroma; - blue_temp = 0; } - else if (2 <= hue_prime && hue_prime < 3) { - red_temp = 0; + else if (hue_prime < 3) { green_temp = chroma; blue_temp = uint16_termediate; } - else if (3 <= hue_prime && hue_prime < 4) { - red_temp = 0; + else if (hue_prime < 4) { green_temp = uint16_termediate; blue_temp = chroma; } - else if (4 <= hue_prime && hue_prime < 5) { + else if (hue_prime < 5) { red_temp = uint16_termediate; - green_temp = 0; blue_temp = chroma; } - else if (5 <= hue_prime && hue_prime < 6) { + else if (hue_prime < 6) { red_temp = chroma; - green_temp = 0; blue_temp = uint16_termediate; } diff --git a/source/mandelbrot/equations_simd.cpp b/source/mandelbrot/equations_simd.cpp index d614296..e3bb639 100644 --- a/source/mandelbrot/equations_simd.cpp +++ b/source/mandelbrot/equations_simd.cpp @@ -2,6 +2,8 @@ #include "equations.hpp" #include "units.hpp" +#include + namespace fractal { std::array compute_iterations( const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters diff --git a/source/mandelbrot/equations_simd.hpp b/source/mandelbrot/equations_simd.hpp index 174a259..93cd723 100644 --- a/source/mandelbrot/equations_simd.hpp +++ b/source/mandelbrot/equations_simd.hpp @@ -3,8 +3,6 @@ #include "config.hpp" #include "units.hpp" -#include - namespace fractal { std::array compute_iterations( diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index 73f2390..a0e001f 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include -#include #include #include @@ -28,13 +28,26 @@ void MandelbrotWindow::draw_coordinate_( ) { static constexpr avx512_complex START{}; - auto iterations = + alignas(64) std::array iterations = compute_iterations(START, complex_coords, MANDELBROT_MAX_ITERATIONS); + alignas(64) std::array ratios{}; + + __m128i iterations_vec = _mm_loadu_epi16(iterations.data()); + + __m128i input_low = _mm_unpacklo_epi16(iterations_vec, _mm_setzero_si128()); + __m128i input_high = _mm_unpackhi_epi16(iterations_vec, _mm_setzero_si128()); + + __m128 floats_low = _mm_cvtepi32_ps(input_low); + __m128 floats_high = _mm_cvtepi32_ps(input_high); + + floats_low = _mm_div_ps(floats_low, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS)); + floats_high = _mm_div_ps(floats_high, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS)); + + _mm_storeu_ps(ratios.begin(), floats_low); + _mm_storeu_ps(ratios.begin() + 4, floats_high); for (size_t i = 0; i < 8; i++) { - float iteration_ratio = - static_cast(iterations[i]) / MANDELBROT_MAX_ITERATIONS; - set_pixel_color(display_coord, iteration_ratio); + set_pixel_color(display_coord, ratios[i]); display_coord.first++; } } @@ -72,7 +85,7 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) }; uint32_t total = WINDOW_WIDTH * WINDOW_HEIGHT; - uint32_t chunks = 32; + uint32_t chunks = 128; uint32_t step = total / chunks; std::vector> futures;