diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bc639d..ad791b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,9 @@ include(cmake/project-is-top-level.cmake) include(cmake/variables.cmake) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64 -mavx512f -mavx512dq -mavx512vl -mavx512bf16") +add_compile_options(-fno-inline -fno-omit-frame-pointer) + # ---- Declare library ---- @@ -24,6 +27,8 @@ add_library( source/mandelbrot/mandelbrot_window.cpp source/graphics/color_conversions/color_conversions.cpp source/graphics/aspect_ratio/aspect_ratio.cpp + source/mandelbrot/equations_simd.cpp + source/mandelbrot/equations.cpp ) target_include_directories( diff --git a/source/config.hpp b/source/config.hpp index 4fbbe9b..4b94c34 100644 --- a/source/config.hpp +++ b/source/config.hpp @@ -7,8 +7,8 @@ namespace fractal { -constexpr std::size_t WINDOW_WIDTH = 800UZ*2; -constexpr std::size_t WINDOW_HEIGHT = 600UZ*2; +constexpr std::size_t WINDOW_WIDTH = 5120; +constexpr std::size_t WINDOW_HEIGHT = 1440; constexpr std::size_t FRAME_RATE = 60UZ; constexpr display_domain DISPLAY_DOMAIN{ @@ -17,8 +17,8 @@ constexpr display_domain DISPLAY_DOMAIN{ }; constexpr complex_domain START_COMPLEX_DOMAIN{ - {complex_underlying{-2}, complex_underlying{-1.5}}, - {complex_underlying{1}, complex_underlying{1.5} } + {complex_underlying{-1.402}, complex_underlying{-.001}}, + {complex_underlying{-1.400}, complex_underlying{.001} } }; const complex_underlying MANDELBROT_DIVERGENCE_NORM = 4; diff --git a/source/coordinates.hpp b/source/coordinates.hpp index 21ff1e4..9c820fc 100644 --- a/source/coordinates.hpp +++ b/source/coordinates.hpp @@ -58,6 +58,12 @@ struct display_domain { return *this; } + DisplayCoordinateIterator& operator+=(difference_type other) + { + current_coordinate_ += other; + return *this; + } + DisplayCoordinateIterator operator-(difference_type other) { DisplayCoordinateIterator tmp = *this; diff --git a/source/graphics/aspect_ratio/aspect_ratio.hpp b/source/graphics/aspect_ratio/aspect_ratio.hpp index fc87ba0..7b9e80e 100644 --- a/source/graphics/aspect_ratio/aspect_ratio.hpp +++ b/source/graphics/aspect_ratio/aspect_ratio.hpp @@ -1,10 +1,11 @@ #pragma once +#include "config.hpp" #include "coordinates.hpp" namespace fractal { display_coordinate calculate_rectangle_end_point( display_coordinate start, display_coordinate current, - float target_aspect_ratio = 800.0f / 600.0f + float target_aspect_ratio = static_cast(WINDOW_WIDTH) / WINDOW_HEIGHT ); } // namespace fractal diff --git a/source/graphics/color_conversions/color_conversions.cpp b/source/graphics/color_conversions/color_conversions.cpp index edf1ccc..c87fa49 100644 --- a/source/graphics/color_conversions/color_conversions.cpp +++ b/source/graphics/color_conversions/color_conversions.cpp @@ -17,34 +17,28 @@ color hsv_to_rgb(float hue, float saturation, float value) float green_temp = 0.0f; float blue_temp = 0.0f; - if (0 <= hue_prime && hue_prime < 1) { + if (hue_prime < 1) { red_temp = chroma; green_temp = uint16_termediate; - blue_temp = 0; } - else if (1 <= hue_prime && hue_prime < 2) { + else if (hue_prime < 2) { red_temp = uint16_termediate; green_temp = chroma; - blue_temp = 0; } - else if (2 <= hue_prime && hue_prime < 3) { - red_temp = 0; + else if (hue_prime < 3) { green_temp = chroma; blue_temp = uint16_termediate; } - else if (3 <= hue_prime && hue_prime < 4) { - red_temp = 0; + else if (hue_prime < 4) { green_temp = uint16_termediate; blue_temp = chroma; } - else if (4 <= hue_prime && hue_prime < 5) { + else if (hue_prime < 5) { red_temp = uint16_termediate; - green_temp = 0; blue_temp = chroma; } - else if (5 <= hue_prime && hue_prime < 6) { + else if (hue_prime < 6) { red_temp = chroma; - green_temp = 0; blue_temp = uint16_termediate; } diff --git a/source/mandelbrot/equations.cpp b/source/mandelbrot/equations.cpp new file mode 100644 index 0000000..8078382 --- /dev/null +++ b/source/mandelbrot/equations.cpp @@ -0,0 +1,28 @@ +#include "equations.hpp" + +#include "config.hpp" + +namespace fractal { +// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition +std::complex +step(std::complex z_n, std::complex constant) +{ + return z_n * z_n + constant; +} + +iteration_count compute_iterations( + std::complex z_0, std::complex constant, + iteration_count max_iters +) +{ + iteration_count iterations = 0; + std::complex z_n = z_0; + + while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { + z_n = step(z_n, constant); + ++iterations; + } + + return iterations; +} +} // namespace fractal diff --git a/source/mandelbrot/equations.hpp b/source/mandelbrot/equations.hpp index 26103b5..7ae8bdf 100644 --- a/source/mandelbrot/equations.hpp +++ b/source/mandelbrot/equations.hpp @@ -1,29 +1,14 @@ #pragma once -#include "config.hpp" #include "units.hpp" namespace fractal { // https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition -inline std::complex -step(std::complex z_n, std::complex constant) -{ - return z_n * z_n + constant; -} +std::complex +step(std::complex z_n, std::complex constant); -inline iteration_count compute_iterations( +iteration_count compute_iterations( std::complex z_0, std::complex constant, iteration_count max_iters -) -{ - iteration_count iterations = 0; - std::complex z_n = z_0; - - while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { - z_n = step(z_n, constant); - ++iterations; - } - - return iterations; -} +); } // namespace fractal diff --git a/source/mandelbrot/equations_simd.cpp b/source/mandelbrot/equations_simd.cpp new file mode 100644 index 0000000..e3bb639 --- /dev/null +++ b/source/mandelbrot/equations_simd.cpp @@ -0,0 +1,67 @@ +#include "config.hpp" +#include "equations.hpp" +#include "units.hpp" + +#include + +namespace fractal { +std::array compute_iterations( + const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters +) +{ + static const auto SQUARED_DIVERGENCE = + MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; + + alignas(64) std::array reals = z_0.real; + alignas(64) std::array imags = z_0.imaginary; + alignas(64) std::array const_reals = constant.real; + alignas(64) std::array const_imags = constant.imaginary; + + std::array solved_its = {0}; + + __m512d input_vec_real = _mm512_load_pd(reals.data()); + __m512d input_vec_imag = _mm512_load_pd(imags.data()); + __m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data()); + __m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data()); + __m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data()); + + for (iteration_count iterations = 0; iterations < max_iters; iterations++) { + // Square real + __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); + + // Square imag + __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); + + // Create imags + __m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2)); + input_vec_imag = + _mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags); + + // Create reals + __m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag); + input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals); + + // Create squared norms + __m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag); + __mmask8 solved_mask = _mm512_cmp_pd_mask( + squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS + ); + + uint32_t solved = _cvtmask8_u32(solved_mask); + solved_its_vec = _mm512_mask_blend_epi16( + solved_mask, solved_its_vec, + _mm512_set1_epi16(static_cast(iterations)) + ); + if (solved == 0xFF) [[unlikely]] + break; + } + + __mmask32 mask = _mm512_cmpeq_epi16_mask(solved_its_vec, _mm512_set1_epi16(0)); + solved_its_vec = _mm512_mask_mov_epi16( + solved_its_vec, mask, _mm512_set1_epi16(static_cast(max_iters)) + ); + _mm512_storeu_epi16(solved_its.data(), solved_its_vec); + + return solved_its; +} +} // namespace fractal diff --git a/source/mandelbrot/equations_simd.hpp b/source/mandelbrot/equations_simd.hpp new file mode 100644 index 0000000..93cd723 --- /dev/null +++ b/source/mandelbrot/equations_simd.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include "config.hpp" +#include "units.hpp" + +namespace fractal { + +std::array compute_iterations( + const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters +); +} // namespace fractal diff --git a/source/mandelbrot/mandelbrot_window.cpp b/source/mandelbrot/mandelbrot_window.cpp index ad88788..a0e001f 100644 --- a/source/mandelbrot/mandelbrot_window.cpp +++ b/source/mandelbrot/mandelbrot_window.cpp @@ -2,30 +2,54 @@ #include "config.hpp" #include "coordinates.hpp" -#include "equations.hpp" +#include "equations_simd.hpp" #include "graphics/aspect_ratio/aspect_ratio.hpp" #include "graphics/color_conversions/color_conversions.hpp" #include "graphics/display_to_complex.hpp" +#include "units.hpp" +#include +#include +#include #include #include #include #include +#include + #include #include #include namespace fractal { void MandelbrotWindow::draw_coordinate_( - const display_coordinate& display_coord, const complex_coordinate& complex_coord + display_coordinate display_coord, const avx512_complex& complex_coords ) { - iteration_count iterations = - compute_iterations({0, 0}, complex_coord, MANDELBROT_MAX_ITERATIONS); + static constexpr avx512_complex START{}; + alignas(64) std::array iterations = + compute_iterations(START, complex_coords, MANDELBROT_MAX_ITERATIONS); + alignas(64) std::array ratios{}; + + __m128i iterations_vec = _mm_loadu_epi16(iterations.data()); + + __m128i input_low = _mm_unpacklo_epi16(iterations_vec, _mm_setzero_si128()); + __m128i input_high = _mm_unpackhi_epi16(iterations_vec, _mm_setzero_si128()); + + __m128 floats_low = _mm_cvtepi32_ps(input_low); + __m128 floats_high = _mm_cvtepi32_ps(input_high); - float iteration_ratio = static_cast(iterations) / MANDELBROT_MAX_ITERATIONS; - set_pixel_color(display_coord, iteration_ratio); + floats_low = _mm_div_ps(floats_low, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS)); + floats_high = _mm_div_ps(floats_high, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS)); + + _mm_storeu_ps(ratios.begin(), floats_low); + _mm_storeu_ps(ratios.begin() + 4, floats_high); + + for (size_t i = 0; i < 8; i++) { + set_pixel_color(display_coord, ratios[i]); + display_coord.first++; + } } void MandelbrotWindow::on_resize_(display_domain new_domain_selection) @@ -38,22 +62,36 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) domain_ = {new_top, new_bottom}; to_complex = {DISPLAY_DOMAIN.end_coordinate, domain_}; - auto process_coordinate = [&](const display_coordinate& display_coord) { - auto complex_coord = to_complex.to_complex_projection(display_coord); - draw_coordinate_(display_coord, complex_coord); + auto process_coordinates = [&](display_coordinate start_display_coord) { + std::array, 8> coords{}; + auto t = start_display_coord; + for (size_t i = 0; i < 8; i++) { + coords[i] = to_complex.to_complex_projection(start_display_coord); + start_display_coord.first++; + } + avx512_complex coords2{}; + for (size_t i = 0; i < 8; i++) { + coords2.real[i] = coords[i].real(); + coords2.imaginary[i] = coords[i].imag(); + } + draw_coordinate_(t, coords2); }; auto process_chunk = [&](display_domain::DisplayCoordinateIterator start, display_domain::DisplayCoordinateIterator end) { - std::for_each(start, end, process_coordinate); + for (auto it = start; it != end; it += 8) { + process_coordinates(*it); + } }; uint32_t total = WINDOW_WIDTH * WINDOW_HEIGHT; - uint32_t chunks = 32; + uint32_t chunks = 128; uint32_t step = total / chunks; std::vector> futures; + auto start = std::chrono::high_resolution_clock::now(); + for (uint32_t chunk = 0; chunk < chunks; chunk++) { display_domain::DisplayCoordinateIterator start = DISPLAY_DOMAIN.begin() + chunk * step; @@ -66,6 +104,9 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection) for (const auto& future : futures) { future.wait(); } + auto end = std::chrono::high_resolution_clock::now(); + auto time = std::chrono::duration_cast(end - start); + std::cout << fmt::format("Time elapsed: {}", time.count()) << "\n"; } MandelbrotWindow::MandelbrotWindow() diff --git a/source/mandelbrot/mandelbrot_window.hpp b/source/mandelbrot/mandelbrot_window.hpp index e5797be..5e9acd6 100644 --- a/source/mandelbrot/mandelbrot_window.hpp +++ b/source/mandelbrot/mandelbrot_window.hpp @@ -3,6 +3,7 @@ #include "config.hpp" #include "coordinates.hpp" #include "graphics/display_event_observer.hpp" +#include "units.hpp" #include #include @@ -22,7 +23,7 @@ class MandelbrotWindow : public DisplayEventObserver { void on_resize_(display_domain new_domain_selection); void draw_coordinate_( - const display_coordinate& display_coord, const complex_coordinate& complex_coord + display_coordinate display_coord, const avx512_complex& complex_coords ); public: diff --git a/source/units.hpp b/source/units.hpp index f41eaa5..9c15dd7 100644 --- a/source/units.hpp +++ b/source/units.hpp @@ -13,7 +13,17 @@ using iteration_count = std::uint16_t; using complex_underlying = boost::multiprecision::number; using complex = boost::multiprecision::complex_adaptor;*/ -using complex_underlying = __float128; +using complex_underlying = double; + +struct avx512_complex { + std::array real; + std::array imaginary; + + std::complex get_complex(uint8_t index) + { + return {real[index], imaginary[index]}; + } +}; struct color { uint8_t red;