|
3 | 3 | #include "config.hpp"
|
4 | 4 | #include "units.hpp"
|
5 | 5 |
|
6 |
| -#include <immintrin.h> |
7 |
| - |
8 | 6 | namespace fractal {
|
9 | 7 | // https://en.wikipedia.org/wiki/Mandelbrot_set#Formal_definition
|
10 |
| - |
11 | 8 | inline std::complex<complex_underlying>
|
12 | 9 | step(std::complex<complex_underlying> z_n, std::complex<complex_underlying> constant)
|
13 | 10 | {
|
14 | 11 | return z_n * z_n + constant;
|
15 | 12 | }
|
16 | 13 |
|
17 |
| -inline std::array<iteration_count, 8> compute_iterations( |
18 |
| - std::array<std::complex<complex_underlying>, 8> z_0, |
19 |
| - std::array<std::complex<complex_underlying>, 8> constant, iteration_count max_iters |
| 14 | +inline iteration_count compute_iterations( |
| 15 | + std::complex<complex_underlying> z_0, std::complex<complex_underlying> constant, |
| 16 | + iteration_count max_iters |
20 | 17 | )
|
21 | 18 | {
|
22 |
| - static const auto SQUARED_DIVERGENCE = |
23 |
| - MANDELBROT_DIVERGENCE_NORM * MANDELBROT_DIVERGENCE_NORM; |
24 |
| - |
25 |
| - alignas(64) std::array<double, 8> reals = {z_0[0].real(), z_0[1].real(), |
26 |
| - z_0[2].real(), z_0[3].real(), |
27 |
| - z_0[4].real(), z_0[5].real(), |
28 |
| - z_0[6].real(), z_0[7].real()}; |
29 |
| - alignas(64) std::array<double, 8> imags = {z_0[0].imag(), z_0[1].imag(), |
30 |
| - z_0[2].imag(), z_0[3].imag(), |
31 |
| - z_0[4].imag(), z_0[5].imag(), |
32 |
| - z_0[6].imag(), z_0[7].imag()}; |
33 |
| - alignas(64) std::array<double, 8> const_reals = { |
34 |
| - constant[0].real(), constant[1].real(), constant[2].real(), constant[3].real(), |
35 |
| - constant[4].real(), constant[5].real(), constant[6].real(), constant[7].real() |
36 |
| - }; |
37 |
| - alignas(64) std::array<double, 8> const_imags = { |
38 |
| - constant[0].imag(), constant[1].imag(), constant[2].imag(), constant[3].imag(), |
39 |
| - constant[4].imag(), constant[5].imag(), constant[6].imag(), constant[7].imag() |
40 |
| - }; |
41 |
| - |
42 |
| - std::array<iteration_count, 8> solved_its = {0}; |
43 | 19 | iteration_count iterations = 0;
|
| 20 | + std::complex<complex_underlying> z_n = z_0; |
44 | 21 |
|
45 |
| - __m512d input_vec_real = _mm512_load_pd(reals.data()); |
46 |
| - __m512d input_vec_imag = _mm512_load_pd(imags.data()); |
47 |
| - __m512d input_vec_constant_imags = _mm512_load_pd(const_imags.data()); |
48 |
| - __m512d input_vec_constant_reals = _mm512_load_pd(const_reals.data()); |
49 |
| - __m512i solved_its_vec = _mm512_loadu_epi16(solved_its.data()); |
50 |
| - |
51 |
| - while (iterations < max_iters) { |
52 |
| - // Square real |
53 |
| - __m512d squared_vec_real = _mm512_mul_pd(input_vec_real, input_vec_real); |
54 |
| - |
55 |
| - // Square imag |
56 |
| - __m512d squared_vec_imag = _mm512_mul_pd(input_vec_imag, input_vec_imag); |
57 |
| - |
58 |
| - // Create imags |
59 |
| - __m512d real_x2 = _mm512_mul_pd(input_vec_real, _mm512_set1_pd(2)); |
60 |
| - input_vec_imag = |
61 |
| - _mm512_fmadd_pd(real_x2, input_vec_imag, input_vec_constant_imags); |
62 |
| - |
63 |
| - // Create reals |
64 |
| - __m512d subtracted_squared = _mm512_sub_pd(squared_vec_real, squared_vec_imag); |
65 |
| - input_vec_real = _mm512_add_pd(subtracted_squared, input_vec_constant_reals); |
66 |
| - |
67 |
| - // Create squared norms |
68 |
| - __m512d squared_norms_vec = _mm512_add_pd(squared_vec_real, squared_vec_imag); |
69 |
| - __mmask8 solved_mask = _mm512_cmp_pd_mask( |
70 |
| - squared_norms_vec, _mm512_set1_pd(SQUARED_DIVERGENCE), _CMP_GT_OS |
71 |
| - ); |
72 |
| - |
73 |
| - uint32_t solved = _cvtmask8_u32(solved_mask); |
74 |
| - solved_its_vec = _mm512_mask_blend_epi16( |
75 |
| - solved_mask, solved_its_vec, _mm512_set1_epi16(iterations) |
76 |
| - ); |
77 |
| - if (solved == 0xFF) [[unlikely]] |
78 |
| - break; |
79 |
| - |
| 22 | + while (iterations < max_iters && std::norm(z_n) < MANDELBROT_DIVERGENCE_NORM) { |
| 23 | + z_n = step(z_n, constant); |
80 | 24 | ++iterations;
|
81 | 25 | }
|
82 | 26 |
|
83 |
| - _mm512_storeu_epi16(solved_its.data(), solved_its_vec); |
84 |
| - for (int i = 0; i < 8; i++) { |
85 |
| - if (solved_its[i] == 0) { |
86 |
| - solved_its[i] = max_iters; |
87 |
| - } |
88 |
| - } |
89 |
| - |
90 |
| - return solved_its; |
| 27 | + return iterations; |
91 | 28 | }
|
92 | 29 | } // namespace fractal
|
0 commit comments