Skip to content

Commit 851e74d

Browse files
committed
Comparsions in expression now return actual bools
1 parent c90048f commit 851e74d

9 files changed

Lines changed: 1137 additions & 14 deletions

bench/benchmark_comparisons.c

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
/*
2+
* Benchmark for comparison operations with boolean output
3+
*
4+
* Tests various comparison expressions and measures performance:
5+
* - Simple comparisons: a < b, a == b
6+
* - Complex comparisons: a**2 == (a + b), sqrt(a) < b
7+
* - Compares ME_BOOL output vs ME_FLOAT64 output
8+
*
9+
* This benchmark evaluates the overhead of type conversion when
10+
* outputting boolean results from floating-point comparisons.
11+
*
12+
* Usage: ./benchmark_comparisons
13+
*/
14+
15+
#include <stdio.h>
16+
#include <stdlib.h>
17+
#include <stdbool.h>
18+
#include <string.h>
19+
#include <time.h>
20+
#include <math.h>
21+
#include "miniexpr.h"
22+
23+
/* Configuration */
24+
#define TOTAL_SIZE (10 * 1024 * 1024) /* 10M elements */
25+
#define WARMUP_ITERS 2
26+
#define BENCH_ITERS 10
27+
28+
static double get_time_sec(void) {
29+
struct timespec ts;
30+
clock_gettime(CLOCK_MONOTONIC, &ts);
31+
return ts.tv_sec + ts.tv_nsec / 1e9;
32+
}
33+
34+
typedef struct {
35+
const char *name;
36+
const char *expr;
37+
int num_vars;
38+
double throughput_bool; /* Melems/sec with ME_BOOL output */
39+
double throughput_f64; /* Melems/sec with ME_FLOAT64 output */
40+
} bench_result_t;
41+
42+
/*
43+
* Benchmark a comparison expression with both bool and float64 output
44+
*/
45+
static void benchmark_comparison(const char *name, const char *expr_str,
46+
double *a, double *b, double *c,
47+
int num_vars, size_t n,
48+
bench_result_t *result) {
49+
int err;
50+
double start, elapsed;
51+
52+
result->name = name;
53+
result->expr = expr_str;
54+
result->num_vars = num_vars;
55+
56+
/* Setup variable definitions with explicit types */
57+
me_variable vars2[] = {{"a", ME_FLOAT64}, {"b", ME_FLOAT64}};
58+
me_variable vars3[] = {{"a", ME_FLOAT64}, {"b", ME_FLOAT64}, {"c", ME_FLOAT64}};
59+
me_variable *vars = (num_vars == 2) ? vars2 : vars3;
60+
61+
const void *ptrs2[] = {a, b};
62+
const void *ptrs3[] = {a, b, c};
63+
const void **ptrs = (num_vars == 2) ? ptrs2 : ptrs3;
64+
65+
/* Allocate output buffers */
66+
bool *result_bool = malloc(n * sizeof(bool));
67+
double *result_f64 = malloc(n * sizeof(double));
68+
69+
if (!result_bool || !result_f64) {
70+
fprintf(stderr, "Failed to allocate result buffers\n");
71+
free(result_bool);
72+
free(result_f64);
73+
return;
74+
}
75+
76+
/*
77+
* Benchmark 1: ME_BOOL output
78+
*/
79+
me_expr *expr_bool = me_compile(expr_str, vars, num_vars, ME_BOOL, &err);
80+
if (!expr_bool) {
81+
fprintf(stderr, "Failed to compile %s with ME_BOOL: error %d\n", name, err);
82+
free(result_bool);
83+
free(result_f64);
84+
return;
85+
}
86+
87+
/* Warmup */
88+
for (int i = 0; i < WARMUP_ITERS; i++) {
89+
me_eval(expr_bool, ptrs, num_vars, result_bool, n);
90+
}
91+
92+
/* Timed iterations */
93+
start = get_time_sec();
94+
for (int i = 0; i < BENCH_ITERS; i++) {
95+
me_eval(expr_bool, ptrs, num_vars, result_bool, n);
96+
}
97+
elapsed = get_time_sec() - start;
98+
result->throughput_bool = (n * BENCH_ITERS / elapsed) / 1e6;
99+
100+
me_free(expr_bool);
101+
102+
/*
103+
* Benchmark 2: ME_FLOAT64 output (for comparison)
104+
*/
105+
me_expr *expr_f64 = me_compile(expr_str, vars, num_vars, ME_FLOAT64, &err);
106+
if (!expr_f64) {
107+
fprintf(stderr, "Failed to compile %s with ME_FLOAT64: error %d\n", name, err);
108+
free(result_bool);
109+
free(result_f64);
110+
return;
111+
}
112+
113+
/* Warmup */
114+
for (int i = 0; i < WARMUP_ITERS; i++) {
115+
me_eval(expr_f64, ptrs, num_vars, result_f64, n);
116+
}
117+
118+
/* Timed iterations */
119+
start = get_time_sec();
120+
for (int i = 0; i < BENCH_ITERS; i++) {
121+
me_eval(expr_f64, ptrs, num_vars, result_f64, n);
122+
}
123+
elapsed = get_time_sec() - start;
124+
result->throughput_f64 = (n * BENCH_ITERS / elapsed) / 1e6;
125+
126+
me_free(expr_f64);
127+
128+
/* Verify results match (spot check) */
129+
int mismatches = 0;
130+
for (size_t i = 0; i < n && mismatches < 5; i += n / 10) {
131+
bool b_val = result_bool[i];
132+
bool f_val = (result_f64[i] != 0.0);
133+
if (b_val != f_val) {
134+
mismatches++;
135+
}
136+
}
137+
if (mismatches > 0) {
138+
fprintf(stderr, "Warning: %d mismatches in %s\n", mismatches, name);
139+
}
140+
141+
free(result_bool);
142+
free(result_f64);
143+
}
144+
145+
int main() {
146+
printf("═══════════════════════════════════════════════════════════════════════\n");
147+
printf(" Comparison Operations Benchmark\n");
148+
printf("═══════════════════════════════════════════════════════════════════════\n");
149+
printf("Configuration:\n");
150+
printf(" - Dataset size: %d elements (%.1f MB per array)\n",
151+
TOTAL_SIZE, TOTAL_SIZE * sizeof(double) / (1024.0 * 1024.0));
152+
printf(" - Warmup iterations: %d\n", WARMUP_ITERS);
153+
printf(" - Benchmark iterations: %d\n", BENCH_ITERS);
154+
printf(" - Comparing ME_BOOL vs ME_FLOAT64 output types\n");
155+
printf("═══════════════════════════════════════════════════════════════════════\n\n");
156+
157+
/* Allocate and initialize input arrays */
158+
double *a = malloc(TOTAL_SIZE * sizeof(double));
159+
double *b = malloc(TOTAL_SIZE * sizeof(double));
160+
double *c = malloc(TOTAL_SIZE * sizeof(double));
161+
162+
if (!a || !b || !c) {
163+
fprintf(stderr, "Failed to allocate input arrays\n");
164+
free(a);
165+
free(b);
166+
free(c);
167+
return 1;
168+
}
169+
170+
/* Initialize with varied data to exercise different comparison outcomes */
171+
for (size_t i = 0; i < TOTAL_SIZE; i++) {
172+
a[i] = (double)(i % 1000) / 100.0; /* 0.00 to 9.99 */
173+
b[i] = (double)((i + 500) % 1000) / 100.0; /* Offset pattern */
174+
/* c[i] such that a**2 == a + c is sometimes true */
175+
c[i] = a[i] * a[i] - a[i]; /* c = a² - a, so a² == a + c */
176+
}
177+
178+
/* Define benchmarks */
179+
bench_result_t results[10];
180+
int num_benchmarks = 0;
181+
182+
printf("Running benchmarks...\n\n");
183+
184+
/* Simple comparisons */
185+
benchmark_comparison("a < b", "a < b", a, b, c, 2, TOTAL_SIZE, &results[num_benchmarks++]);
186+
benchmark_comparison("a <= b", "a <= b", a, b, c, 2, TOTAL_SIZE, &results[num_benchmarks++]);
187+
benchmark_comparison("a == b", "a == b", a, b, c, 2, TOTAL_SIZE, &results[num_benchmarks++]);
188+
benchmark_comparison("a != b", "a != b", a, b, c, 2, TOTAL_SIZE, &results[num_benchmarks++]);
189+
190+
/* Comparisons with arithmetic */
191+
benchmark_comparison("a + b < c", "a + b < c", a, b, c, 3, TOTAL_SIZE, &results[num_benchmarks++]);
192+
benchmark_comparison("a * b == c", "a * b == c", a, b, c, 3, TOTAL_SIZE, &results[num_benchmarks++]);
193+
194+
/* Comparisons with power operations */
195+
benchmark_comparison("a**2 < b", "a**2 < b", a, b, c, 2, TOTAL_SIZE, &results[num_benchmarks++]);
196+
benchmark_comparison("a**2 + b**2 < c", "a**2 + b**2 < c", a, b, c, 3, TOTAL_SIZE, &results[num_benchmarks++]);
197+
198+
/* Complex comparisons */
199+
benchmark_comparison("sqrt(a) < b", "sqrt(a) < b", a, b, c, 2, TOTAL_SIZE, &results[num_benchmarks++]);
200+
benchmark_comparison("a**2 + b**2 < c**2", "a**2 + b**2 < c**2", a, b, c, 3, TOTAL_SIZE, &results[num_benchmarks++]);
201+
202+
/* Print results table */
203+
printf("═══════════════════════════════════════════════════════════════════════\n");
204+
printf("Results:\n");
205+
printf("═══════════════════════════════════════════════════════════════════════\n");
206+
printf("%-22s %12s %12s %10s\n",
207+
"Expression", "Bool (Me/s)", "F64 (Me/s)", "Ratio");
208+
printf("───────────────────────────────────────────────────────────────────────\n");
209+
210+
double total_bool = 0, total_f64 = 0;
211+
int valid_count = 0;
212+
for (int i = 0; i < num_benchmarks; i++) {
213+
double ratio = results[i].throughput_bool / results[i].throughput_f64;
214+
/* Handle potential inf/nan values */
215+
if (results[i].throughput_bool > 1e9 || results[i].throughput_f64 > 1e9) {
216+
printf("%-22s %12s %12s %10s\n",
217+
results[i].expr, "error", "error", "N/A");
218+
} else {
219+
printf("%-22s %12.2f %12.2f %9.2fx\n",
220+
results[i].expr,
221+
results[i].throughput_bool,
222+
results[i].throughput_f64,
223+
ratio);
224+
total_bool += results[i].throughput_bool;
225+
total_f64 += results[i].throughput_f64;
226+
valid_count++;
227+
}
228+
}
229+
230+
printf("───────────────────────────────────────────────────────────────────────\n");
231+
if (valid_count > 0) {
232+
double avg_bool = total_bool / valid_count;
233+
double avg_f64 = total_f64 / valid_count;
234+
printf("%-22s %12.2f %12.2f %9.2fx\n",
235+
"AVERAGE", avg_bool, avg_f64, avg_bool / avg_f64);
236+
}
237+
printf("═══════════════════════════════════════════════════════════════════════\n");
238+
239+
/* Memory bandwidth analysis */
240+
printf("\nMemory Analysis (for simple 'a < b'):\n");
241+
printf(" - Input: 2 × %.1f MB = %.1f MB read\n",
242+
TOTAL_SIZE * sizeof(double) / (1024.0 * 1024.0),
243+
2 * TOTAL_SIZE * sizeof(double) / (1024.0 * 1024.0));
244+
printf(" - Output (bool): %.1f MB written\n",
245+
TOTAL_SIZE * sizeof(bool) / (1024.0 * 1024.0));
246+
printf(" - Output (f64): %.1f MB written\n",
247+
TOTAL_SIZE * sizeof(double) / (1024.0 * 1024.0));
248+
249+
double bw_bool = results[0].throughput_bool * (2 * sizeof(double) + sizeof(bool)) / 1000.0;
250+
double bw_f64 = results[0].throughput_f64 * (3 * sizeof(double)) / 1000.0;
251+
printf(" - Bandwidth (bool): %.2f GB/s\n", bw_bool);
252+
printf(" - Bandwidth (f64): %.2f GB/s\n", bw_f64);
253+
254+
printf("\nKey Observations:\n");
255+
printf(" - ME_BOOL output computes in float64, then converts to bool\n");
256+
printf(" - Ratio > 1.0 means bool output is faster (less memory written)\n");
257+
printf(" - Ratio < 1.0 means conversion overhead exceeds memory savings\n");
258+
printf(" - Complex expressions amortize conversion overhead better\n");
259+
printf("═══════════════════════════════════════════════════════════════════════\n");
260+
261+
free(a);
262+
free(b);
263+
free(c);
264+
265+
return 0;
266+
}

doc/data-types.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,87 @@ RGB( 64,128, 64) -> Gray=100
229229
RGB( 32,192,128) -> Gray=136
230230
```
231231

232+
## Example 5: Boolean Output from Comparisons
233+
234+
Comparison operators (`==`, `!=`, `<`, `<=`, `>`, `>=`) can output boolean arrays. This is useful for filtering, masking, or conditional operations.
235+
236+
```c
237+
#include <stdio.h>
238+
#include <stdbool.h>
239+
#include "miniexpr.h"
240+
241+
int main() {
242+
// Sample data where a² = a + b for all elements
243+
double a[] = {2.0, 3.0, 4.0, 5.0, 6.0};
244+
double b[] = {2.0, 6.0, 12.0, 20.0, 30.0};
245+
int n = 5;
246+
247+
// Boolean output array
248+
bool is_equal[5];
249+
250+
// Method 1: Explicit variable dtypes with ME_BOOL output
251+
// Use this when input types differ from output type
252+
me_variable vars[] = {
253+
{"a", ME_FLOAT64},
254+
{"b", ME_FLOAT64}
255+
};
256+
257+
int error;
258+
me_expr *expr = me_compile("a ** 2 == (a + b)", vars, 2, ME_BOOL, &error);
259+
260+
if (!expr) {
261+
printf("Parse error at position %d\n", error);
262+
return 1;
263+
}
264+
265+
const void *var_ptrs[] = {a, b};
266+
me_eval(expr, var_ptrs, 2, is_equal, n);
267+
268+
printf("Comparison Results (BOOL):\n");
269+
for (int i = 0; i < n; i++) {
270+
printf("a=%.1f: a² (%.1f) == a+b (%.1f) -> %s\n",
271+
a[i], a[i]*a[i], a[i]+b[i],
272+
is_equal[i] ? "true" : "false");
273+
}
274+
275+
me_free(expr);
276+
return 0;
277+
}
278+
```
279+
280+
### Expected Output
281+
282+
```
283+
Comparison Results (BOOL):
284+
a=2.0: a² (4.0) == a+b (4.0) -> true
285+
a=3.0: a² (9.0) == a+b (9.0) -> true
286+
a=4.0: a² (16.0) == a+b (16.0) -> true
287+
a=5.0: a² (25.0) == a+b (25.0) -> true
288+
a=6.0: a² (36.0) == a+b (36.0) -> true
289+
```
290+
291+
### Two Ways to Get Boolean Output
292+
293+
**Method 1: Explicit variable dtypes with ME_BOOL output**
294+
```c
295+
me_variable vars[] = {{"x", ME_FLOAT64}, {"y", ME_FLOAT64}};
296+
me_expr *expr = me_compile("x < y", vars, 2, ME_BOOL, &error);
297+
```
298+
299+
**Method 2: ME_AUTO output (auto-infers ME_BOOL for comparisons)**
300+
```c
301+
me_variable vars[] = {{"x", ME_FLOAT64}, {"y", ME_FLOAT64}};
302+
me_expr *expr = me_compile("x < y", vars, 2, ME_AUTO, &error);
303+
// me_get_dtype(expr) == ME_BOOL (automatically inferred)
304+
```
305+
306+
Both methods require explicit variable dtypes when the computation type differs from the output type.
307+
232308
## Choosing the Right Data Type
233309

234310
| Type | Use When | Memory per Element |
235311
|------|----------|-------------------|
312+
| `ME_BOOL` | Comparison results, flags, masks | 1 byte |
236313
| `ME_INT8` / `ME_UINT8` | Small integers, flags, pixel values | 1 byte |
237314
| `ME_INT16` / `ME_UINT16` | Medium-range integers | 2 bytes |
238315
| `ME_INT32` / `ME_UINT32` | Standard integers | 4 bytes |
@@ -245,3 +322,4 @@ RGB( 32,192,128) -> Gray=136
245322
- Use unsigned types when values are always non-negative
246323
- Use FLOAT64 for scientific computing requiring high precision
247324
- Use FLOAT32 for graphics or when processing large arrays
325+
- Use ME_BOOL for comparison expressions to get proper boolean output

0 commit comments

Comments
 (0)