Merge pull request #290 from asmeurer/test_sum-fix

asmeurer · web-flow · commit b905bca5e068 · 2024-10-02T16:26:03.000-06:00
Fix test_sum to be more numerically correct
diff --git a/array_api_tests/pytest_helpers.py b/array_api_tests/pytest_helpers.py
@@ -397,7 +397,7 @@ def assert_scalar_equals(
     kw: dict = {},
 ):
     """
-    Assert a 0d array, convered to a scalar, is as expected, e.g.
+    Assert a 0d array, converted to a scalar, is as expected, e.g.
 
         >>> x = xp.ones(5, dtype=xp.uint8)
         >>> out = xp.sum(x)
@@ -407,6 +407,8 @@ def assert_scalar_equals(
 
         >>> assert int(out) == 5
 
+    NOTE: This function does *exact* comparison, even for floats. For
+    approximate float comparisons use assert_scalar_isclose
     """
     __tracebackhide__ = True
     repr_name = repr_name if idx == () else f"{repr_name}[{idx}]"
@@ -418,8 +420,40 @@ def assert_scalar_equals(
         msg = f"{repr_name}={out}, but should be {expected} [{f_func}]"
         assert cmath.isnan(out), msg
     else:
-        msg = f"{repr_name}={out}, but should be roughly {expected} [{f_func}]"
-        assert cmath.isclose(out, expected, rel_tol=0.25, abs_tol=1), msg
+        msg = f"{repr_name}={out}, but should be {expected} [{f_func}]"
+        assert out == expected, msg
+
+
+def assert_scalar_isclose(
+    func_name: str,
+    *,
+    rel_tol: float = 0.25,
+    abs_tol: float = 1,
+    type_: ScalarType,
+    idx: Shape,
+    out: Scalar,
+    expected: Scalar,
+    repr_name: str = "out",
+    kw: dict = {},
+):
+    """
+    Assert a 0d array, converted to a scalar, is close to the expected value, e.g.
+
+        >>> x = xp.ones(5., dtype=xp.float64)
+        >>> out = xp.sum(x)
+        >>> assert_scalar_isclose('sum', type_int, out=(), out=int(out), expected=5.)
+
+        is equivalent to
+
+        >>> assert math.isclose(float(out) == 5.)
+
+    """
+    __tracebackhide__ = True
+    repr_name = repr_name if idx == () else f"{repr_name}[{idx}]"
+    f_func = f"{func_name}({fmt_kw(kw)})"
+    msg = f"{repr_name}={out}, but should be roughly {expected} [{f_func}]"
+    assert type_ in [float, complex] # Sanity check
+    assert cmath.isclose(out, expected, rel_tol=rel_tol, abs_tol=abs_tol), msg
 
 
 def assert_fill(
diff --git a/array_api_tests/test_statistical_functions.py b/array_api_tests/test_statistical_functions.py
@@ -17,6 +17,7 @@
 
 
 @pytest.mark.min_version("2023.12")
+@pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=hh.numeric_dtypes,
@@ -80,10 +81,15 @@ def test_cumulative_sum(x, data):
             if dh.is_int_dtype(out.dtype):
                 m, M = dh.dtype_ranges[out.dtype]
                 assume(m <= expected <= M)
-            ph.assert_scalar_equals("cumulative_sum", type_=scalar_type,
-                                    idx=out_idx.raw, out=out_val,
-                                    expected=expected)
-
+                ph.assert_scalar_equals("cumulative_sum", type_=scalar_type,
+                                        idx=out_idx.raw, out=out_val,
+                                        expected=expected)
+            else:
+                condition_number = _sum_condition_number(elements)
+                assume(condition_number < 1e6)
+                ph.assert_scalar_isclose("cumulative_sum", type_=scalar_type,
+                                         idx=out_idx.raw, out=out_val,
+                                         expected=expected)
 
 def kwarg_dtypes(dtype: DataType) -> st.SearchStrategy[Optional[DataType]]:
     dtypes = [d2 for d1, d2 in dh.promotion_table if d1 == dtype]
@@ -176,6 +182,16 @@ def test_min(x, data):
         ph.assert_scalar_equals("min", type_=scalar_type, idx=out_idx, out=min_, expected=expected)
 
 
+def _prod_condition_number(elements):
+    # Relative condition number using the infinity norm
+    abs_max = max([abs(i) for i in elements])
+    abs_min = min([abs(i) for i in elements])
+
+    if abs_min == 0:
+        return float('inf')
+
+    return abs_max / abs_min
+
 @pytest.mark.unvectorized
 @given(
     x=hh.arrays(
@@ -225,7 +241,13 @@ def test_prod(x, data):
         if dh.is_int_dtype(out.dtype):
             m, M = dh.dtype_ranges[out.dtype]
             assume(m <= expected <= M)
-        ph.assert_scalar_equals("prod", type_=scalar_type, idx=out_idx, out=prod, expected=expected)
+            ph.assert_scalar_equals("prod", type_=scalar_type, idx=out_idx,
+                                    out=prod, expected=expected)
+        else:
+            condition_number = _prod_condition_number(elements)
+            assume(condition_number < 1e15)
+            ph.assert_scalar_isclose("prod", type_=scalar_type, idx=out_idx,
+                                     out=prod, expected=expected)
 
 
 @pytest.mark.skip(reason="flaky")  # TODO: fix!
@@ -264,8 +286,16 @@ def test_std(x, data):
     )
     # We can't easily test the result(s) as standard deviation methods vary a lot
 
+def _sum_condition_number(elements):
+    sum_abs = sum([abs(i) for i in elements])
+    abs_sum = abs(sum(elements))
 
-@pytest.mark.unvectorized
+    if abs_sum == 0:
+        return float('inf')
+
+    return sum_abs / abs_sum
+
+# @pytest.mark.unvectorized
 @given(
     x=hh.arrays(
         dtype=hh.numeric_dtypes,
@@ -314,7 +344,15 @@ def test_sum(x, data):
         if dh.is_int_dtype(out.dtype):
             m, M = dh.dtype_ranges[out.dtype]
             assume(m <= expected <= M)
-        ph.assert_scalar_equals("sum", type_=scalar_type, idx=out_idx, out=sum_, expected=expected)
+            ph.assert_scalar_equals("sum", type_=scalar_type, idx=out_idx,
+                                    out=sum_, expected=expected)
+        else:
+            # Avoid value testing for ill conditioned summations. See
+            # https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Accuracy and
+            # https://en.wikipedia.org/wiki/Condition_number.
+            condition_number = _sum_condition_number(elements)
+            assume(condition_number < 1e6)
+            ph.assert_scalar_isclose("sum", type_=scalar_type, idx=out_idx, out=sum_, expected=expected)
 
 
 @pytest.mark.unvectorized