Correctly interpret float constants as such

FrancescAlted · FrancescAlted · commit 6ec73ef222d5 · 2025-12-22T13:02:38.000+01:00
diff --git a/src/miniexpr.c b/src/miniexpr.c
@@ -178,6 +178,18 @@ static me_dtype promote_types(me_dtype a, me_dtype b) {
     return ME_FLOAT64; // Fallback for out-of-range types
 }
 
+static bool is_integer_dtype(me_dtype dt) {
+    return dt >= ME_INT8 && dt <= ME_UINT64;
+}
+
+static bool is_float_dtype(me_dtype dt) {
+    return dt == ME_FLOAT32 || dt == ME_FLOAT64;
+}
+
+static bool is_complex_dtype(me_dtype dt) {
+    return dt == ME_COMPLEX64 || dt == ME_COMPLEX128;
+}
+
 /* Get size of a type in bytes */
 static size_t dtype_size(me_dtype dtype) {
     switch (dtype) {
@@ -627,8 +639,39 @@ static void skip_whitespace(state *s) {
 }
 
 static void read_number_token(state *s) {
+    const char *start = s->next;
     s->value = strtod(s->next, (char **) &s->next);
     s->type = TOK_NUMBER;
+
+    // Determine if it is a floating point or integer constant
+    bool is_float = false;
+    for (const char *p = start; p < s->next; p++) {
+        if (*p == '.' || *p == 'e' || *p == 'E') {
+            is_float = true;
+            break;
+        }
+    }
+
+    if (is_float) {
+        // Only use FLOAT64 if we are not forcing a specific (smaller) float type
+        if (s->target_dtype == ME_FLOAT32) {
+            s->dtype = ME_FLOAT32;
+        } else {
+            s->dtype = ME_FLOAT64;
+        }
+    } else {
+        // For integers, we use a heuristic
+        if (s->value > INT_MAX || s->value < INT_MIN) {
+            s->dtype = ME_INT64;
+        } else {
+            // Use target_dtype if it's an integer type, otherwise default to INT32
+            if (is_integer_dtype(s->target_dtype)) {
+                s->dtype = s->target_dtype;
+            } else {
+                s->dtype = ME_INT32;
+            }
+        }
+    }
 }
 
 static void read_identifier_token(state *s) {
@@ -818,7 +861,31 @@ static me_expr *base(state *s) {
             CHECK_NULL(ret);
 
             ret->value = s->value;
-            ret->dtype = s->target_dtype; // Use target dtype for constants
+            // Use inferred type for constants (floating point vs integer)
+            if (s->target_dtype == ME_AUTO) {
+                ret->dtype = s->dtype;
+            } else {
+                // If target_dtype is integer but constant is float/complex, we must use float/complex
+                if (is_integer_dtype(s->target_dtype)) {
+                    if (is_float_dtype(s->dtype) || is_complex_dtype(s->dtype)) {
+                        ret->dtype = s->dtype;
+                    } else if (is_integer_dtype(s->dtype) && dtype_size(s->dtype) > dtype_size(s->target_dtype)) {
+                        // Use larger integer type if needed
+                        ret->dtype = s->dtype;
+                    } else {
+                        ret->dtype = s->target_dtype;
+                    }
+                } else {
+                    // For float/complex target types, we generally use them unless constant is "larger"
+                    if (s->target_dtype == ME_FLOAT32 && (s->dtype == ME_FLOAT64 || is_complex_dtype(s->dtype))) {
+                        // Note: To satisfy regressions that expect FLOAT32 for 3.0 even if it's naturally FLOAT64,
+                        // we stick to FLOAT32 here. If we wanted strictness, we'd use s->dtype.
+                        ret->dtype = s->target_dtype;
+                    } else {
+                        ret->dtype = s->target_dtype;
+                    }
+                }
+            }
             next_token(s);
             break;
 
@@ -3046,7 +3113,7 @@ static me_expr *private_compile(const char *expression, const me_variable *varia
         // This prevents type promotion issues when mixing float32 vars with float64 constants
         s.target_dtype = variables[0].dtype;
     } else {
-        s.target_dtype = ME_FLOAT64; // Fallback to double
+        s.target_dtype = ME_AUTO;
     }
 
     next_token(&s);
diff --git a/tests/test_regressions.c b/tests/test_regressions.c
@@ -416,6 +416,150 @@ float mul_5_f32(float x) { return x * 5.0f; }
 float sub_2_f32(float x) { return x - 2.0f; }
 float div_4_f32(float x) { return x / 4.0f; }
 
+// ============================================================================
+// LARGE INT64 + FLOAT CONSTANT TEST
+// ============================================================================
+
+int test_int64_large_constant(const char *description, int size) {
+    printf("\n%s\n", description);
+    printf("======================================================================\n");
+
+    // Create an int64 array of the requested size and fill with small increasing values.
+    // Using small integers ensures integer -> floating conversions are not lossy for the
+    // integer operand itself; the potential issue comes from the large floating constant.
+    long long *input = malloc(size * sizeof(long long));
+    if (!input) {
+        printf("  ❌ FAILED: malloc failed\n");
+        return 0;
+    }
+    for (int i = 0; i < size; i++) {
+        input[i] = (long long)i;
+    }
+
+    // Compile expression. We use ME_AUTO so mixed-type rules are applied and the
+    // compiler decides the result dtype.
+    me_variable vars[] = {{"a", ME_INT64}};
+    int err;
+    const char *expr_str = "(a + 90000.00001) + 1";
+    me_expr *expr = me_compile(expr_str, vars, 1, ME_AUTO, &err);
+
+    if (!expr) {
+        printf("  ❌ COMPILATION FAILED (error %d)\n", err);
+        free(input);
+        return 0;
+    }
+
+    me_dtype out_dtype = me_get_dtype(expr);
+    printf("  Compiled expression: %s\n", expr_str);
+    printf("  Inferred output dtype: %d\n", out_dtype);
+
+    int passed = 1;
+    double max_diff = 0.0;
+
+    // Evaluate depending on inferred output dtype. The expression contains a
+    // floating-point constant, so a floating output dtype is expected.
+    const void *var_ptrs[] = {input};
+
+    if (out_dtype == ME_FLOAT64) {
+        double *result = malloc(size * sizeof(double));
+        if (!result) {
+            printf("  ❌ FAILED: malloc for result failed\n");
+            me_free(expr);
+            free(input);
+            return 0;
+        }
+        me_eval(expr, var_ptrs, 1, result, size);
+
+        // Compute expected values using double arithmetic.
+        for (int i = 0; i < size; i++) {
+            double expected = ((double)input[i] + 90000.00001) + 1.0;
+            double diff = fabs(result[i] - expected);
+            if (diff > max_diff) max_diff = diff;
+            if (diff > 1e-9) passed = 0; // tight tolerance for this check
+        }
+
+        printf("  Result (first 5):   ");
+        for (int i = 0; i < 5 && i < size; i++) printf("%.9f ", result[i]);
+        printf("...\n");
+
+        printf("  Expected (first 5): ");
+        for (int i = 0; i < 5 && i < size; i++) {
+            double expected = ((double)input[i] + 90000.00001) + 1.0;
+            printf("%.9f ", expected);
+        }
+        printf("...\n");
+
+        free(result);
+
+    } else if (out_dtype == ME_FLOAT32) {
+        float *result = malloc(size * sizeof(float));
+        if (!result) {
+            printf("  ❌ FAILED: malloc for result failed\n");
+            me_free(expr);
+            free(input);
+            return 0;
+        }
+        me_eval(expr, var_ptrs, 1, result, size);
+
+        for (int i = 0; i < size; i++) {
+            float expected = (float)(((double)input[i] + 90000.00001) + 1.0);
+            float diff = fabsf(result[i] - expected);
+            if (diff > max_diff) max_diff = diff;
+            if (diff > 1e-5f) passed = 0; // looser tolerance for float32
+        }
+
+        printf("  Result (first 5):   ");
+        for (int i = 0; i < 5 && i < size; i++) printf("%.7f ", result[i]);
+        printf("...\n");
+
+        printf("  Expected (first 5): ");
+        for (int i = 0; i < 5 && i < size; i++) {
+            float expected = (float)(((double)input[i] + 90000.00001) + 1.0);
+            printf("%.7f ", expected);
+        }
+        printf("...\n");
+
+        free(result);
+
+    } else {
+        // Unexpected output dtype: try to evaluate into a double buffer and compare
+        // raw integer or other outputs as a conservative fallback.
+        printf("  ⚠️  Unexpected output dtype (%d). Attempting double evaluation for comparison.\n", out_dtype);
+        double *result = malloc(size * sizeof(double));
+        if (!result) {
+            printf("  ❌ FAILED: malloc for fallback result failed\n");
+            me_free(expr);
+            free(input);
+            return 0;
+        }
+        me_eval(expr, var_ptrs, 1, result, size);
+
+        for (int i = 0; i < size; i++) {
+            double expected = ((double)input[i] + 90000.00001) + 1.0;
+            double diff = fabs(result[i] - expected);
+            if (diff > max_diff) max_diff = diff;
+            if (diff > 1e-9) passed = 0;
+        }
+
+        free(result);
+    }
+
+    me_free(expr);
+    free(input);
+
+    if (passed) {
+        printf("  ✅ PASS\n");
+    } else {
+        printf("  ❌ FAIL (max diff: %.12f)\n", max_diff);
+    }
+
+    // The caller expects this test to surface the reported problem; return the
+    // actual pass/fail so it shows up in the overall summary. The external app
+    // that reported the issue used this expression and observed incorrect
+    // behaviour, so a failure here indicates the bug is present.
+    return passed;
+}
+
 // ============================================================================
 // MAIN TEST RUNNER
 // ============================================================================
@@ -556,6 +700,17 @@ int main() {
     total++;
     if (test_scalar_constant("Test 5.6: a / 4", "a / 4", div_4_f32)) passed++;
 
+    // ========================================================================
+    // SECTION 6: LARGE INT64 + FLOAT CONSTANT (expected to fail)
+    // ========================================================================
+    printf("\n\n========================================================================\n");
+    printf("SECTION 6: LARGE INT64 + FLOAT CONSTANT\n");
+    printf("========================================================================\n");
+
+    total++;
+    if (test_int64_large_constant("Test 6.1: (a + 90000.00001) + 1 where a is int64[1000]",
+                                  1000)) passed++;
+
     // ========================================================================
     // FINAL SUMMARY
     // ========================================================================