Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AVX512 equations #9

Merged
merged 6 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ include(cmake/project-is-top-level.cmake)
include(cmake/variables.cmake)

set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64 -mavx512f -mavx512dq -mavx512vl -mavx512bf16")
add_compile_options(-fno-inline -fno-omit-frame-pointer)


# ---- Declare library ----

Expand All @@ -24,6 +27,8 @@ add_library(
source/mandelbrot/mandelbrot_window.cpp
source/graphics/color_conversions/color_conversions.cpp
source/graphics/aspect_ratio/aspect_ratio.cpp
source/mandelbrot/equations_simd.cpp
source/mandelbrot/equations.cpp
)

target_include_directories(
Expand Down
8 changes: 4 additions & 4 deletions source/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

namespace fractal {

constexpr std::size_t WINDOW_WIDTH = 800UZ*2;
constexpr std::size_t WINDOW_HEIGHT = 600UZ*2;
constexpr std::size_t WINDOW_WIDTH = 5120;
constexpr std::size_t WINDOW_HEIGHT = 1440;
constexpr std::size_t FRAME_RATE = 60UZ;

constexpr display_domain DISPLAY_DOMAIN{
Expand All @@ -17,8 +17,8 @@ constexpr display_domain DISPLAY_DOMAIN{
};

constexpr complex_domain START_COMPLEX_DOMAIN{
{complex_underlying{-2}, complex_underlying{-1.5}},
{complex_underlying{1}, complex_underlying{1.5} }
{complex_underlying{-1.402}, complex_underlying{-.001}},
{complex_underlying{-1.400}, complex_underlying{.001} }
};

const complex_underlying MANDELBROT_DIVERGENCE_NORM = 4;
Expand Down
6 changes: 6 additions & 0 deletions source/coordinates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ struct display_domain {
return *this;
}

DisplayCoordinateIterator& operator+=(difference_type other)
{
current_coordinate_ += other;
return *this;
}

DisplayCoordinateIterator operator-(difference_type other)
{
DisplayCoordinateIterator tmp = *this;
Expand Down
3 changes: 2 additions & 1 deletion source/graphics/aspect_ratio/aspect_ratio.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#pragma once

#include "config.hpp"
#include "coordinates.hpp"

namespace fractal {
display_coordinate calculate_rectangle_end_point(
display_coordinate start, display_coordinate current,
float target_aspect_ratio = 800.0f / 600.0f
float target_aspect_ratio = static_cast<float>(WINDOW_WIDTH) / WINDOW_HEIGHT
);
} // namespace fractal
18 changes: 6 additions & 12 deletions source/graphics/color_conversions/color_conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,28 @@ color hsv_to_rgb(float hue, float saturation, float value)
float green_temp = 0.0f;
float blue_temp = 0.0f;

if (0 <= hue_prime && hue_prime < 1) {
if (hue_prime < 1) {
red_temp = chroma;
green_temp = uint16_termediate;
blue_temp = 0;
}
else if (1 <= hue_prime && hue_prime < 2) {
else if (hue_prime < 2) {
red_temp = uint16_termediate;
green_temp = chroma;
blue_temp = 0;
}
else if (2 <= hue_prime && hue_prime < 3) {
red_temp = 0;
else if (hue_prime < 3) {
green_temp = chroma;
blue_temp = uint16_termediate;
}
else if (3 <= hue_prime && hue_prime < 4) {
red_temp = 0;
else if (hue_prime < 4) {
green_temp = uint16_termediate;
blue_temp = chroma;
}
else if (4 <= hue_prime && hue_prime < 5) {
else if (hue_prime < 5) {
red_temp = uint16_termediate;
green_temp = 0;
blue_temp = chroma;
}
else if (5 <= hue_prime && hue_prime < 6) {
else if (hue_prime < 6) {
red_temp = chroma;
green_temp = 0;
blue_temp = uint16_termediate;
}

Expand Down
28 changes: 28 additions & 0 deletions source/mandelbrot/equations.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "equations.hpp"

#include "config.hpp"

namespace fractal {
// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
std::complex<complex_underlying>
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
{
return z_n * z_n + constant;
}

iteration_count compute_iterations(
std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant,
iteration_count max_iters
)
{
iteration_count iterations = 0;
std::complex<complex_underlying> z_n = z_0;

while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) {
z_n = step(z_n, constant);
++iterations;
}

return iterations;
}
} // namespace fractal
23 changes: 4 additions & 19 deletions source/mandelbrot/equations.hpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,14 @@
#pragma once

#include "config.hpp"
#include "units.hpp"

namespace fractal {
// https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
inline std::complex<complex_underlying>
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
{
return z_n * z_n + constant;
}
std::complex<complex_underlying>
step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant);

inline iteration_count compute_iterations(
iteration_count compute_iterations(
std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant,
iteration_count max_iters
)
{
iteration_count iterations = 0;
std::complex<complex_underlying> z_n = z_0;

while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) {
z_n = step(z_n, constant);
++iterations;
}

return iterations;
}
);
} // namespace fractal
67 changes: 67 additions & 0 deletions source/mandelbrot/equations_simd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "config.hpp"
#include "equations.hpp"
#include "units.hpp"

#include <immintrin.h>

namespace fractal {
std::array<iteration_count, 8> compute_iterations(
const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters
)
{
static const auto SQUARED_DIVERGENCE =
MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM;

alignas(64) std::array<double, 8> reals = z_0.real;
alignas(64) std::array<double, 8> imags = z_0.imaginary;
alignas(64) std::array<double, 8> const_reals = constant.real;
alignas(64) std::array<double, 8> const_imags = constant.imaginary;

std::array<iteration_count, 8> solved_its = {0};

__m512d input_vec_real = _mm512_load_pd(reals.data());
__m512d input_vec_imag = _mm512_load_pd(imags.data());
__m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data());
__m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data());
__m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data());

for (iteration_count iterations = 0; iterations < max_iters; iterations++) {
// Square real
__m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real);

// Square imag
__m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag);

// Create imags
__m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2));
input_vec_imag =
_mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags);

// Create reals
__m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag);
input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals);

// Create squared norms
__m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag);
__mmask8 solved_mask = _mm512_cmp_pd_mask(
squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS
);

uint32_t solved = _cvtmask8_u32(solved_mask);
solved_its_vec = _mm512_mask_blend_epi16(
solved_mask, solved_its_vec,
_mm512_set1_epi16(static_cast<int16_t>(iterations))
);
if (solved == 0xFF) [[unlikely]]
break;
}

__mmask32 mask = _mm512_cmpeq_epi16_mask(solved_its_vec, _mm512_set1_epi16(0));
solved_its_vec = _mm512_mask_mov_epi16(
solved_its_vec, mask, _mm512_set1_epi16(static_cast<int16_t>(max_iters))
);
_mm512_storeu_epi16(solved_its.data(), solved_its_vec);

return solved_its;
}
} // namespace fractal
11 changes: 11 additions & 0 deletions source/mandelbrot/equations_simd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

#include "config.hpp"
#include "units.hpp"

namespace fractal {

std::array<iteration_count, 8> compute_iterations(
const avx512_complex& z_0, const avx512_complex& constant, iteration_count max_iters
);
} // namespace fractal
63 changes: 52 additions & 11 deletions source/mandelbrot/mandelbrot_window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,54 @@

#include "config.hpp"
#include "coordinates.hpp"
#include "equations.hpp"
#include "equations_simd.hpp"
#include "graphics/aspect_ratio/aspect_ratio.hpp"
#include "graphics/color_conversions/color_conversions.hpp"
#include "graphics/display_to_complex.hpp"
#include "units.hpp"

#include <fmt/base.h>
#include <fmt/format.h>
#include <immintrin.h>
#include <SFML/Graphics/Drawable.hpp>
#include <SFML/Graphics/Image.hpp>
#include <SFML/Graphics/Sprite.hpp>
#include <SFML/Graphics/Texture.hpp>

#include <chrono>

#include <future>
#include <memory>
#include <optional>

namespace fractal {
void MandelbrotWindow::draw_coordinate_(
const display_coordinate& display_coord, const complex_coordinate& complex_coord
display_coordinate display_coord, const avx512_complex& complex_coords
)
{
iteration_count iterations =
compute_iterations({0, 0}, complex_coord, MANDELBROT_MAX_ITERATIONS);
static constexpr avx512_complex START{};
alignas(64) std::array<iteration_count, 8> iterations =
compute_iterations(START, complex_coords, MANDELBROT_MAX_ITERATIONS);
alignas(64) std::array<float, 8> ratios{};

__m128i iterations_vec = _mm_loadu_epi16(iterations.data());

__m128i input_low = _mm_unpacklo_epi16(iterations_vec, _mm_setzero_si128());
__m128i input_high = _mm_unpackhi_epi16(iterations_vec, _mm_setzero_si128());

__m128 floats_low = _mm_cvtepi32_ps(input_low);
__m128 floats_high = _mm_cvtepi32_ps(input_high);

float iteration_ratio = static_cast<float>(iterations) / MANDELBROT_MAX_ITERATIONS;
set_pixel_color(display_coord, iteration_ratio);
floats_low = _mm_div_ps(floats_low, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS));
floats_high = _mm_div_ps(floats_high, _mm_set1_ps(MANDELBROT_MAX_ITERATIONS));

_mm_storeu_ps(ratios.begin(), floats_low);
_mm_storeu_ps(ratios.begin() + 4, floats_high);

for (size_t i = 0; i < 8; i++) {
set_pixel_color(display_coord, ratios[i]);
display_coord.first++;
}
}

void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
Expand All @@ -38,22 +62,36 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
domain_ = {new_top, new_bottom};
to_complex = {DISPLAY_DOMAIN.end_coordinate, domain_};

auto process_coordinate = [&](const display_coordinate& display_coord) {
auto complex_coord = to_complex.to_complex_projection(display_coord);
draw_coordinate_(display_coord, complex_coord);
auto process_coordinates = [&](display_coordinate start_display_coord) {
std::array<std::complex<complex_underlying>, 8> coords{};
auto t = start_display_coord;
for (size_t i = 0; i < 8; i++) {
coords[i] = to_complex.to_complex_projection(start_display_coord);
start_display_coord.first++;
}
avx512_complex coords2{};
for (size_t i = 0; i < 8; i++) {
coords2.real[i] = coords[i].real();
coords2.imaginary[i] = coords[i].imag();
}
draw_coordinate_(t, coords2);
};

auto process_chunk = [&](display_domain::DisplayCoordinateIterator start,
display_domain::DisplayCoordinateIterator end) {
std::for_each(start, end, process_coordinate);
for (auto it = start; it != end; it += 8) {
process_coordinates(*it);
}
};

uint32_t total = WINDOW_WIDTH * WINDOW_HEIGHT;
uint32_t chunks = 32;
uint32_t chunks = 128;
uint32_t step = total / chunks;

std::vector<std::future<void>> futures;

auto start = std::chrono::high_resolution_clock::now();

for (uint32_t chunk = 0; chunk < chunks; chunk++) {
display_domain::DisplayCoordinateIterator start =
DISPLAY_DOMAIN.begin() + chunk * step;
Expand All @@ -66,6 +104,9 @@ void MandelbrotWindow::on_resize_(display_domain new_domain_selection)
for (const auto& future : futures) {
future.wait();
}
auto end = std::chrono::high_resolution_clock::now();
auto time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << fmt::format("Time elapsed: {}", time.count()) << "\n";
}

MandelbrotWindow::MandelbrotWindow()
Expand Down
3 changes: 2 additions & 1 deletion source/mandelbrot/mandelbrot_window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "config.hpp"
#include "coordinates.hpp"
#include "graphics/display_event_observer.hpp"
#include "units.hpp"

#include <SFML/Graphics/Drawable.hpp>
#include <SFML/Graphics/Image.hpp>
Expand All @@ -22,7 +23,7 @@ class MandelbrotWindow : public DisplayEventObserver {

void on_resize_(display_domain new_domain_selection);
void draw_coordinate_(
const display_coordinate& display_coord, const complex_coordinate& complex_coord
display_coordinate display_coord, const avx512_complex& complex_coords
);

public:
Expand Down
12 changes: 11 additions & 1 deletion source/units.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@ using iteration_count = std::uint16_t;
using complex_underlying = boost::multiprecision::number<small_float>;
using complex = boost::multiprecision::complex_adaptor<small_float>;*/

using complex_underlying = __float128;
using complex_underlying = double;

struct avx512_complex {
std::array<complex_underlying, 8> real;
std::array<complex_underlying, 8> imaginary;

std::complex<complex_underlying> get_complex(uint8_t index)
{
return {real[index], imaginary[index]};
}
};

struct color {
uint8_t red;
Expand Down