Skip to content

Commit 257f92a

Browse files
committed
Make SLEEF the default for functions on macos (Accelerate does not provide precision enough)
1 parent 9478199 commit 257f92a

6 files changed

Lines changed: 57 additions & 14 deletions

File tree

README_DEVELOPERS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ The main [README.md](README.md) keeps the simplest supported build path. This se
9292
- On Emscripten, setting `MINIEXPR_ENABLE_TCC_JIT=ON` enables wasm32 JIT support automatically.
9393
- Setting `MINIEXPR_ENABLE_TCC_JIT=OFF` disables TCC-based JIT backends; on Linux/macOS, the separate `# me:compiler=cc` runtime path may still be available.
9494
- `MINIEXPR_USE_SLEEF=ON` fetches SLEEF and enables SIMD math acceleration; set it to `OFF` to build without SLEEF.
95-
- `MINIEXPR_USE_ACCELERATE=ON` enables the macOS Accelerate/vForce backend; in `auto` mode on macOS it is preferred by default, and unsupported functions still fall back to scalar kernels.
95+
- `MINIEXPR_USE_ACCELERATE=ON` enables the macOS Accelerate/vForce backend; in `auto` mode on macOS, SLEEF is preferred when available and Accelerate remains available as a fallback backend.
9696
- When `ME_SIMD_MATH_BACKEND=accelerate` is active, the `ME_SIMD_ULP_1` / `ME_SIMD_ULP_3_5` distinction does not select different kernels. Those accuracy modes remain meaningful for the SLEEF backend.
9797

9898
### Alternative Build Invocations
@@ -121,7 +121,7 @@ The public/runtime-stable DSL JIT controls remain documented in [README.md](READ
121121

122122
### Internal/Test-Only Environment Variables
123123

124-
- `ME_SIMD_MATH_BACKEND=auto|sleef|accelerate|scalar`: Force the SIMD math backend selection used by `src/functions-simd.c` for benchmarking and debugging. Default: `auto` (`accelerate` on macOS when enabled, otherwise the existing platform backend selection).
124+
- `ME_SIMD_MATH_BACKEND=auto|sleef|accelerate|scalar`: Force the SIMD math backend selection used by `src/functions-simd.c` for benchmarking and debugging. Default: `auto` (prefers SLEEF when available, otherwise falls back to Accelerate on macOS when enabled, then the existing scalar fallback).
125125
- The SIMD math benchmarks print backend-aware columns. For `accelerate` and `scalar`, do not interpret the `ME_SIMD_ULP_1` / `ME_SIMD_ULP_3_5` labels as distinct math implementations.
126126
- `ME_DSL_WHILE_MAX_ITERS=<n>`: Override the runtime safety cap for DSL `while` loops.
127127
- `ME_DSL_JIT_MATH_BRIDGE=0|1`: Enable or disable runtime math-bridge lowering globally. Default: `1`.

RELEASE_NOTES.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ Release notes for miniexpr
44
Changes from 0.2.0 to 0.2.1
55
===========================
66

7-
* macOS SIMD math now prefers Accelerate/vForce by default when enabled at build time.
7+
* macOS SIMD math now prefers SLEEF by default when it is available at build time.
88
- New CMake option: `MINIEXPR_USE_ACCELERATE=ON|OFF` (enabled by default on macOS, off elsewhere).
9-
- In runtime `auto` mode, macOS uses Accelerate first and falls back to scalar kernels for functions without Accelerate coverage.
9+
- In runtime `auto` mode, macOS prefers SLEEF first, then falls back to Accelerate when enabled, then to scalar kernels.
1010
- Added `ME_SIMD_MATH_BACKEND=auto|sleef|accelerate|scalar` for backend forcing during debugging and benchmarking.
1111
- Updated SIMD math benchmarks to report backend-appropriate columns instead of implying fake `U10/U35` distinctions for Accelerate/scalar runs.
1212

bench/benchmark_transcendentals.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ static void benchmark_dtype(const dtype_info_t *info, const int *blocks, int nbl
141141
printf("\n========================================\n");
142142
printf("Transcendentals chain (%s)\n", info->name);
143143
printf("========================================\n");
144+
printf("Units: BlockKiB = input bytes in KiB, throughput columns = GB/s (10^9 bytes/s)\n");
144145
if (report_mode == BENCH_REPORT_U35) {
145146
printf("BlockKiB ME_U10 ME_U35 ME_SCAL C\n");
146147
} else if (report_mode == BENCH_REPORT_ACCELERATE) {
@@ -194,14 +195,15 @@ int main(void) {
194195
{"float32", ME_FLOAT32, sizeof(float)},
195196
{"float64", ME_FLOAT64, sizeof(double)}
196197
};
197-
const int blocks[] = {1024, 4096, 16384, 65536, 262144, 1048576};
198+
const int blocks[] = {4096, 16384, 65536, 262144, 1048576, 4194304};
198199
const int nblocks = (int)(sizeof(blocks) / sizeof(blocks[0]));
199200

200201
printf("========================================\n");
201202
printf("MiniExpr Transcendentals Benchmark (Block Sizes)\n");
202203
printf("========================================\n");
203204
printf("Backend: %s\n", backend ? backend : "auto");
204205
printf("Expression: log(exp(x) + tanh(x) + log1p(abs(x)) + sqrt(abs(x)) + expm1(x))\n");
206+
printf("Units: block size in KiB, throughput in GB/s (10^9 bytes/s)\n");
205207

206208
for (size_t i = 0; i < sizeof(infos) / sizeof(infos[0]); i++) {
207209
benchmark_dtype(&infos[i], blocks, nblocks, report_mode);

src/functions-simd.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4176,15 +4176,6 @@ static void me_init_simd(void) {
41764176
#endif
41774177
}
41784178

4179-
#if ME_USE_ACCELERATE && defined(__APPLE__)
4180-
if (me_simd_backend_preference == ME_SIMD_BACKEND_AUTO) {
4181-
me_set_accelerate_backend();
4182-
me_simd_backend = "accelerate";
4183-
me_dsl_trace_simd_init(use_u35);
4184-
return;
4185-
}
4186-
#endif
4187-
41884179
/* Use SLEEF SIMD kernels when the CPU supports them. */
41894180
#if ME_ENABLE_SLEEF_SIMD && (defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64))
41904181
if (me_simd_backend_preference != ME_SIMD_BACKEND_ACCELERATE &&
@@ -4433,6 +4424,10 @@ void me_simd_reset_for_tests(void) {
44334424
me_simd_backend = "scalar";
44344425
}
44354426

4427+
const char *me_simd_backend_for_tests(void) {
4428+
return me_simd_backend;
4429+
}
4430+
44364431
void vec_sin_dispatch(const double* a, double* out, int n) {
44374432
if (me_simd_force_scalar) {
44384433
vec_sin_scalar(a, out, n);

src/functions-simd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ void me_simd_params_pop(const me_simd_params_state *state);
2323
void me_sincos_eval_start(void);
2424
int me_simd_initialized_for_tests(void);
2525
void me_simd_reset_for_tests(void);
26+
const char *me_simd_backend_for_tests(void);
2627

2728
void vec_sin_dispatch(const double* a, double* out, int n);
2829
void vec_cos_dispatch(const double* a, double* out, int n);

tests/test_simd_math.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1738,11 +1738,56 @@ static int test_simd_init(void) {
17381738
return 0;
17391739
}
17401740

1741+
static int test_simd_auto_backend_selection(void) {
1742+
double data[] = {0.1, 0.2, 0.3, 0.4};
1743+
double out[4] = {0};
1744+
const void *vars[] = {data};
1745+
me_variable v[] = {{"x", ME_FLOAT64, data}};
1746+
me_expr *expr = NULL;
1747+
int err = 0;
1748+
const char *backend;
1749+
1750+
if (setenv("ME_SIMD_MATH_BACKEND", "auto", 1) != 0) {
1751+
printf("Failed to set ME_SIMD_MATH_BACKEND=auto\n");
1752+
return 1;
1753+
}
1754+
1755+
me_simd_reset_for_tests();
1756+
if (me_compile("sin(x) + cos(x)", v, 1, ME_FLOAT64, &err, &expr) != ME_COMPILE_SUCCESS) {
1757+
printf("Failed to compile simd auto backend test (err=%d)\n", err);
1758+
unsetenv("ME_SIMD_MATH_BACKEND");
1759+
return 1;
1760+
}
1761+
1762+
if (me_eval(expr, vars, 1, out, 4, NULL) != ME_EVAL_SUCCESS) {
1763+
printf("me_eval failed in simd auto backend test\n");
1764+
me_free(expr);
1765+
unsetenv("ME_SIMD_MATH_BACKEND");
1766+
return 1;
1767+
}
1768+
1769+
backend = me_simd_backend_for_tests();
1770+
#if defined(__APPLE__) && ME_USE_SLEEF && ME_ENABLE_SLEEF_SIMD && \
1771+
(defined(__aarch64__) || defined(_M_ARM64))
1772+
if (strcmp(backend, "advsimd-u10") != 0 && strcmp(backend, "advsimd-u35") != 0) {
1773+
printf("Expected auto backend to prefer SLEEF on macOS arm64, got %s\n", backend);
1774+
me_free(expr);
1775+
unsetenv("ME_SIMD_MATH_BACKEND");
1776+
return 1;
1777+
}
1778+
#endif
1779+
1780+
me_free(expr);
1781+
unsetenv("ME_SIMD_MATH_BACKEND");
1782+
return 0;
1783+
}
1784+
17411785
int main(void) {
17421786
int failures = 0;
17431787
const int n = 1024;
17441788

17451789
failures += test_simd_init();
1790+
failures += test_simd_auto_backend_selection();
17461791
failures += run_unary_pair_f64("abs", fabs, n, -10.0, 10.0, 1e-12);
17471792
failures += run_unary_pair_f64("exp", exp, n, -5.0, 5.0, 1e-12);
17481793
failures += run_unary_pair_f64("expm1", expm1, n, -3.0, 3.0, 1e-12);

0 commit comments

Comments
 (0)