|
3 | 3 | #include "config.hpp" |
4 | 4 | #include "units.hpp" |
5 | 5 |
|
6 | | -#include <immintrin.h> |
7 | | - |
8 | 6 | namespace fractal { |
9 | 7 | // https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition |
10 | | - |
11 | 8 | inline std::complex<complex_underlying> |
12 | 9 | step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant) |
13 | 10 | { |
14 | 11 | return z_n * z_n + constant; |
15 | 12 | } |
16 | 13 |
|
17 | | -inline std::array<iteration_count, 8> compute_iterations( |
18 | | - std::array<std::complex<complex_underlying>, 8> z_0, |
19 | | - std::array<std::complex<complex_underlying>, 8> constant, iteration_count max_iters |
| 14 | +inline iteration_count compute_iterations( |
| 15 | + std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant, |
| 16 | + iteration_count max_iters |
20 | 17 | ) |
21 | 18 | { |
22 | | - static const auto SQUARED_DIVERGENCE = |
23 | | - MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; |
24 | | - |
25 | | - alignas(64) std::array<double, 8> reals = {z_0[0].real(), z_0[1].real(), |
26 | | - z_0[2].real(), z_0[3].real(), |
27 | | - z_0[4].real(), z_0[5].real(), |
28 | | - z_0[6].real(), z_0[7].real()}; |
29 | | - alignas(64) std::array<double, 8> imags = {z_0[0].imag(), z_0[1].imag(), |
30 | | - z_0[2].imag(), z_0[3].imag(), |
31 | | - z_0[4].imag(), z_0[5].imag(), |
32 | | - z_0[6].imag(), z_0[7].imag()}; |
33 | | - alignas(64) std::array<double, 8> const_reals = { |
34 | | - constant[0].real(), constant[1].real(), constant[2].real(), constant[3].real(), |
35 | | - constant[4].real(), constant[5].real(), constant[6].real(), constant[7].real() |
36 | | - }; |
37 | | - alignas(64) std::array<double, 8> const_imags = { |
38 | | - constant[0].imag(), constant[1].imag(), constant[2].imag(), constant[3].imag(), |
39 | | - constant[4].imag(), constant[5].imag(), constant[6].imag(), constant[7].imag() |
40 | | - }; |
41 | | - |
42 | | - std::array<iteration_count, 8> solved_its = {0}; |
43 | 19 | iteration_count iterations = 0; |
| 20 | + std::complex<complex_underlying> z_n = z_0; |
44 | 21 |
|
45 | | - __m512d input_vec_real = _mm512_load_pd(reals.data()); |
46 | | - __m512d input_vec_imag = _mm512_load_pd(imags.data()); |
47 | | - __m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data()); |
48 | | - __m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data()); |
49 | | - __m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data()); |
50 | | - |
51 | | - while (iterations < max_iters) { |
52 | | - // Square real |
53 | | - __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); |
54 | | - |
55 | | - // Square imag |
56 | | - __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); |
57 | | - |
58 | | - // Create imags |
59 | | - __m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2)); |
60 | | - input_vec_imag = |
61 | | - _mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags); |
62 | | - |
63 | | - // Create reals |
64 | | - __m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag); |
65 | | - input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals); |
66 | | - |
67 | | - // Create squared norms |
68 | | - __m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag); |
69 | | - __mmask8 solved_mask = _mm512_cmp_pd_mask( |
70 | | - squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS |
71 | | - ); |
72 | | - |
73 | | - uint32_t solved = _cvtmask8_u32(solved_mask); |
74 | | - solved_its_vec = _mm512_mask_blend_epi16( |
75 | | - solved_mask, solved_its_vec, _mm512_set1_epi16(iterations) |
76 | | - ); |
77 | | - if (solved == 0xFF) [[unlikely]] |
78 | | - break; |
79 | | - |
| 22 | + while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { |
| 23 | + z_n = step(z_n, constant); |
80 | 24 | ++iterations; |
81 | 25 | } |
82 | 26 |
|
83 | | - _mm512_storeu_epi16(solved_its.data(), solved_its_vec); |
84 | | - for (int i = 0; i < 8; i++) { |
85 | | - if (solved_its[i] == 0) { |
86 | | - solved_its[i] = max_iters; |
87 | | - } |
88 | | - } |
89 | | - |
90 | | - return solved_its; |
| 27 | + return iterations; |
91 | 28 | } |
92 | 29 | } // namespace fractal |
0 commit comments