From bc9e7c751b9a55710d7c21e849bb77a13e7c04b1 Mon Sep 17 00:00:00 2001
From: Dhruvanshu-Joshi <dhruvanshu.joshi@gmail.com>
Date: Thu, 18 Jul 2024 02:09:35 +0530
Subject: [PATCH 1/4] Add support for numpy like percentile and quantile

---
 pytensor/tensor/math.py | 102 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index d1e4dc6195..35032e3a3d 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -26,8 +26,10 @@
     concatenate,
     constant,
     expand_dims,
+    full_like,
     stack,
     switch,
+    take_along_axis,
 )
 from pytensor.tensor.blockwise import Blockwise, vectorize_node_fallback
 from pytensor.tensor.elemwise import (
@@ -2926,6 +2928,104 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     return x
 
 
+def percentile(input, q, axis=None):
+    """
+    Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation.
+
+    Parameters
+    ----------
+    input: TensorVariable
+        The input tensor.
+    q: float or list of floats
+        Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive.
+    axis: None or int or list of int, optional
+        Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array.
+    """
+    input = as_tensor_variable(input)
+    input_ndim = input.type.ndim
+
+    if axis is None:
+        axis = list(range(input_ndim))
+    elif isinstance(axis, (int | np.integer)):
+        axis = [axis]
+    elif isinstance(axis, np.ndarray) and axis.ndim == 0:
+        axis = [int(axis)]
+    else:
+        axis = [int(a) for a in axis]
+
+    # Compute the shape of the remaining axes
+    new_axes_order = [i for i in range(input.ndim) if i not in axis] + list(axis)
+    input = input.dimshuffle(new_axes_order)
+    input_shape = shape(input)
+    remaining_axis_size = input_shape[: input.ndim - len(axis)]
+    flattened_axis_size = prod(input_shape[input.ndim - len(axis) :])
+    input = input.reshape(concatenate([remaining_axis_size, [flattened_axis_size]]))
+    axis = -1
+
+    # Sort the input tensor along the specified axis
+    sorted_input = input.sort(axis=axis)
+    input_shape = input.shape[axis]
+
+    if isinstance(q, (int | float)):
+        q = [q]
+
+    for percentile in q:
+        if percentile < 0 or percentile > 100:
+            raise ValueError("Percentiles must be in the range [0, 100]")
+
+    result = []
+    for percentile in q:
+        k = (percentile / 100.0) * (input_shape - 1)
+        k_floor = floor(k).astype("int64")
+        k_ceil = ceil(k).astype("int64")
+
+        indices1 = expand_dims(
+            full_like(sorted_input.take(0, axis=axis), k_floor), axis
+        )
+        indices2 = expand_dims(full_like(sorted_input.take(0, axis=axis), k_ceil), axis)
+
+        val1 = take_along_axis(sorted_input, indices1, axis=axis)
+        val2 = take_along_axis(sorted_input, indices2, axis=axis)
+
+        d = k - k_floor
+        percentile_val = val1 + d * (val2 - val1)
+
+        result.append(percentile_val.squeeze(axis=axis))
+
+    if len(result) == 1:
+        result = result[0]
+    else:
+        result = stack(result)
+
+    result.name = "percentile"
+    return result
+
+
+def quantile(input, q, axis=None):
+    """
+    Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation.
+
+    Parameters
+    ----------
+    input: TensorVariable
+        The input tensor.
+    q: float or list of floats
+        Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive.
+    axis: None or int or list of int, optional
+        Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array.
+    """
+    if isinstance(q, (int | float)):
+        q = [q]
+
+    for quantile in q:
+        if quantile < 0 or quantile > 1:
+            raise ValueError("Quantiles must be in the range [0, 1]")
+
+    percentiles = [100.0 * x for x in q]
+
+    return percentile(input, percentiles, axis)
+
+
 # NumPy logical aliases
 square = sqr
 
@@ -3080,6 +3180,8 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     "outer",
     "any",
     "all",
+    "percentile",
+    "quantile",
     "ptp",
     "power",
     "logaddexp",

From 9b7611c0d8a0e7d764ce857f51af68f3d853a1bb Mon Sep 17 00:00:00 2001
From: Dhruvanshu-Joshi <dhruvanshu.joshi@gmail.com>
Date: Tue, 15 Oct 2024 19:28:18 +0530
Subject: [PATCH 2/4] merge median

---
 pytensor/tensor/math.py   | 39 ++++++++++------------
 tests/tensor/test_math.py | 68 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 22 deletions(-)

diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index 35032e3a3d..ac9f847de9 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -26,10 +26,8 @@
     concatenate,
     constant,
     expand_dims,
-    full_like,
     stack,
     switch,
-    take_along_axis,
 )
 from pytensor.tensor.blockwise import Blockwise, vectorize_node_fallback
 from pytensor.tensor.elemwise import (
@@ -2941,11 +2939,11 @@ def percentile(input, q, axis=None):
     axis: None or int or list of int, optional
         Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array.
     """
-    input = as_tensor_variable(input)
-    input_ndim = input.type.ndim
+    x = as_tensor_variable(input)
+    x_ndim = x.type.ndim
 
     if axis is None:
-        axis = list(range(input_ndim))
+        axis = list(range(x_ndim))
     elif isinstance(axis, (int | np.integer)):
         axis = [axis]
     elif isinstance(axis, np.ndarray) and axis.ndim == 0:
@@ -2954,17 +2952,17 @@ def percentile(input, q, axis=None):
         axis = [int(a) for a in axis]
 
     # Compute the shape of the remaining axes
-    new_axes_order = [i for i in range(input.ndim) if i not in axis] + list(axis)
-    input = input.dimshuffle(new_axes_order)
-    input_shape = shape(input)
-    remaining_axis_size = input_shape[: input.ndim - len(axis)]
-    flattened_axis_size = prod(input_shape[input.ndim - len(axis) :])
-    input = input.reshape(concatenate([remaining_axis_size, [flattened_axis_size]]))
-    axis = -1
+    new_axes_order = [i for i in range(x.ndim) if i not in axis] + list(axis)
+    x = x.dimshuffle(new_axes_order)
+    input_shape = shape(x)
+    remaining_axis_size = input_shape[: x.ndim - len(axis)]
+    x = x.reshape((*remaining_axis_size, -1))
 
     # Sort the input tensor along the specified axis
-    sorted_input = input.sort(axis=axis)
-    input_shape = input.shape[axis]
+    sorted_input = x.sort(axis=-1)
+    slices1 = [slice(None)] * sorted_input.ndim
+    slices2 = [slice(None)] * sorted_input.ndim
+    input_shape = x.shape[-1]
 
     if isinstance(q, (int | float)):
         q = [q]
@@ -2979,18 +2977,15 @@ def percentile(input, q, axis=None):
         k_floor = floor(k).astype("int64")
         k_ceil = ceil(k).astype("int64")
 
-        indices1 = expand_dims(
-            full_like(sorted_input.take(0, axis=axis), k_floor), axis
-        )
-        indices2 = expand_dims(full_like(sorted_input.take(0, axis=axis), k_ceil), axis)
-
-        val1 = take_along_axis(sorted_input, indices1, axis=axis)
-        val2 = take_along_axis(sorted_input, indices2, axis=axis)
+        slices1[-1] = slice(k_floor, k_floor + 1)
+        slices2[-1] = slice(k_ceil, k_ceil + 1)
+        val1 = sorted_input[tuple(slices1)]
+        val2 = sorted_input[tuple(slices2)]
 
         d = k - k_floor
         percentile_val = val1 + d * (val2 - val1)
 
-        result.append(percentile_val.squeeze(axis=axis))
+        result.append(percentile_val.squeeze(axis=-1))
 
     if len(result) == 1:
         result = result[0]
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
index 14bc2614e3..c4de265914 100644
--- a/tests/tensor/test_math.py
+++ b/tests/tensor/test_math.py
@@ -102,9 +102,11 @@
     neg,
     neq,
     outer,
+    percentile,
     polygamma,
     power,
     ptp,
+    quantile,
     rad2deg,
     reciprocal,
     round_half_away_from_zero,
@@ -3766,3 +3768,69 @@ def test_median(ndim, axis):
 
     assert np.allclose(result_odd, expected_odd)
     assert np.allclose(result_even, expected_even)
+
+
+@pytest.mark.parametrize(
+    "ndim, axis, q",
+    [
+        (2, None, 50),
+        (2, 1, 33),
+        (2, (0, 1), 50),
+        (3, (1, 2), 50),
+        (4, (1, 3, 0), 25),
+        (2, None, [25, 50, 75]),
+        (3, (1, 2), [10, 90]),
+        (3, 1, 75),
+        (3, 0, 50),
+    ],
+)
+def test_percentile(ndim, axis, q):
+    shape = tuple(np.arange(1, ndim + 1))
+    data = np.random.rand(*shape)
+    x = tensor(shape=np.array(data).shape)
+    f = function([x], percentile(x, q, axis=axis))
+    result = f(data.astype(x.dtype))
+    expected = np.percentile(data.astype(x.dtype), q, axis=axis)
+    assert np.allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ndim, axis, q",
+    [
+        (2, None, 0.5),
+        (2, None, [0.25, 0.75]),
+        (2, 0, 0.5),
+        (2, (0, 1), 0.5),
+        (3, None, 0.5),
+        (3, None, [0.25, 0.75]),
+        (3, 0, 0.5),
+        (3, (1, 2), 0.5),
+    ],
+)
+def test_quantile(ndim, axis, q):
+    shape = tuple(np.random.randint(2, 6) for _ in range(ndim))
+    data = np.random.rand(*shape)
+
+    x = tensor(dtype="float64", shape=(None,) * ndim)
+    f = function([x], quantile(x, q, axis=axis))
+
+    result = f(data.astype(x.dtype))
+    expected = np.quantile(data.astype(x.dtype), q, axis=axis)
+
+    assert np.allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ndim, axis, q, is_percentile",
+    [
+        (2, None, [50, 120], True),
+        (2, 1, -0.5, False),
+    ],
+)
+def test_invalid_percentile_quantile(ndim, axis, q, is_percentile):
+    x = tensor(dtype="float64", shape=(None,) * ndim)
+    with pytest.raises(ValueError):
+        if is_percentile:
+            percentile(x, q, axis)
+        else:
+            quantile(x, q, axis)

From 17e6aa3b1cabd240b9c4ee282bf7522a2b7478ae Mon Sep 17 00:00:00 2001
From: Dhruvanshu-Joshi <dhruvanshu.joshi@gmail.com>
Date: Thu, 1 Aug 2024 02:31:22 +0530
Subject: [PATCH 3/4] Add quantile and quantile dependent percentile

---
 pytensor/tensor/math.py | 42 ++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index ac9f847de9..688add6352 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -2926,18 +2926,18 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     return x
 
 
-def percentile(input, q, axis=None):
+def quantile(input, q, axis=None):
     """
-    Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation.
+    Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation.
 
     Parameters
     ----------
     input: TensorVariable
         The input tensor.
     q: float or list of floats
-        Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive.
+        Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive.
     axis: None or int or list of int, optional
-        Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array.
+        Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array.
     """
     x = as_tensor_variable(input)
     x_ndim = x.type.ndim
@@ -2967,13 +2967,13 @@ def percentile(input, q, axis=None):
     if isinstance(q, (int | float)):
         q = [q]
 
-    for percentile in q:
-        if percentile < 0 or percentile > 100:
-            raise ValueError("Percentiles must be in the range [0, 100]")
+    for quantile in q:
+        if quantile < 0 or quantile > 1:
+            raise ValueError("Quantiles must be in the range [0, 1]")
 
     result = []
-    for percentile in q:
-        k = (percentile / 100.0) * (input_shape - 1)
+    for quantile in q:
+        k = (quantile) * (input_shape - 1)
         k_floor = floor(k).astype("int64")
         k_ceil = ceil(k).astype("int64")
 
@@ -2983,42 +2983,42 @@ def percentile(input, q, axis=None):
         val2 = sorted_input[tuple(slices2)]
 
         d = k - k_floor
-        percentile_val = val1 + d * (val2 - val1)
+        quantile_val = val1 + d * (val2 - val1)
 
-        result.append(percentile_val.squeeze(axis=-1))
+        result.append(quantile_val.squeeze(axis=-1))
 
     if len(result) == 1:
         result = result[0]
     else:
         result = stack(result)
 
-    result.name = "percentile"
+    result.name = "quantile"
     return result
 
 
-def quantile(input, q, axis=None):
+def percentile(input, q, axis=None):
     """
-    Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation.
+    Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation.
 
     Parameters
     ----------
     input: TensorVariable
         The input tensor.
     q: float or list of floats
-        Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive.
+        Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive.
     axis: None or int or list of int, optional
-        Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array.
+        Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array.
     """
     if isinstance(q, (int | float)):
         q = [q]
 
-    for quantile in q:
-        if quantile < 0 or quantile > 1:
-            raise ValueError("Quantiles must be in the range [0, 1]")
+    for percentile in q:
+        if percentile < 0 or percentile > 100:
+            raise ValueError("Percentiles must be in the range [0, 100]")
 
-    percentiles = [100.0 * x for x in q]
+    quantiles = [x / 100 for x in q]
 
-    return percentile(input, percentiles, axis)
+    return quantile(input, quantiles, axis)
 
 
 # NumPy logical aliases

From dcdeef77c923ffe9fb45a8701507f8030e9390c4 Mon Sep 17 00:00:00 2001
From: Dhruvanshu-Joshi <dhruvanshu.joshi@gmail.com>
Date: Tue, 15 Oct 2024 20:46:53 +0530
Subject: [PATCH 4/4] Vectorised percentile/quantile

---
 pytensor/tensor/math.py   | 118 +++++++++++++++++---------------------
 tests/tensor/test_math.py |   1 +
 2 files changed, 53 insertions(+), 66 deletions(-)

diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index 688add6352..aa1fdbdf26 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -2926,99 +2926,85 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     return x
 
 
-def quantile(input, q, axis=None):
+def quantile(x: TensorLike, q: float | list[float], axis=None) -> TensorVariable:
     """
-    Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation.
+    Computes the q-th quantile along the given axis(es) of a tensor `input`.
 
     Parameters
     ----------
-    input: TensorVariable
+    x: TensorVariable
         The input tensor.
-    q: float or list of floats
-        Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive.
-    axis: None or int or list of int, optional
-        Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array.
+    q: float or list of float
+        The quantile(s) to compute, which must be between 0 and 1 inclusive.
+        0 corresponds to the minimum, 0.5 to the median, and 1 to the maximum.
+    axis: None or int or (list of int) (see `Sum`)
+        Compute the quantile along this axis of the tensor.
+        None means all axes (like numpy).
     """
-    x = as_tensor_variable(input)
-    x_ndim = x.type.ndim
 
+    x = as_tensor_variable(x)
+    q = as_tensor_variable(q)
+    x_ndim = x.type.ndim
     if axis is None:
         axis = list(range(x_ndim))
-    elif isinstance(axis, (int | np.integer)):
-        axis = [axis]
-    elif isinstance(axis, np.ndarray) and axis.ndim == 0:
-        axis = [int(axis)]
     else:
-        axis = [int(a) for a in axis]
-
-    # Compute the shape of the remaining axes
-    new_axes_order = [i for i in range(x.ndim) if i not in axis] + list(axis)
-    x = x.dimshuffle(new_axes_order)
-    input_shape = shape(x)
-    remaining_axis_size = input_shape[: x.ndim - len(axis)]
-    x = x.reshape((*remaining_axis_size, -1))
-
-    # Sort the input tensor along the specified axis
-    sorted_input = x.sort(axis=-1)
-    slices1 = [slice(None)] * sorted_input.ndim
-    slices2 = [slice(None)] * sorted_input.ndim
-    input_shape = x.shape[-1]
+        axis = list(normalize_axis_tuple(axis, x_ndim))
 
-    if isinstance(q, (int | float)):
-        q = [q]
+    non_axis = [i for i in range(x_ndim) if i not in axis]
+    non_axis_shape = [x.shape[i] for i in non_axis]
 
-    for quantile in q:
-        if quantile < 0 or quantile > 1:
-            raise ValueError("Quantiles must be in the range [0, 1]")
+    # Put axis at the end and unravel them
+    x_raveled = x.transpose(*non_axis, *axis)
+    if len(axis) > 1:
+        x_raveled = x_raveled.reshape((*non_axis_shape, -1))
+    raveled_size = x_raveled.shape[-1]
 
-    result = []
-    for quantile in q:
-        k = (quantile) * (input_shape - 1)
-        k_floor = floor(k).astype("int64")
-        k_ceil = ceil(k).astype("int64")
+    # Ensure q is between 0 and 1
+    q = clip(q, 0.0, 1.0)
 
-        slices1[-1] = slice(k_floor, k_floor + 1)
-        slices2[-1] = slice(k_ceil, k_ceil + 1)
-        val1 = sorted_input[tuple(slices1)]
-        val2 = sorted_input[tuple(slices2)]
+    # Compute quantile indices
+    k = (q * (raveled_size - 1)).astype("int64")
+    k_float = q * (raveled_size - 1)
 
-        d = k - k_floor
-        quantile_val = val1 + d * (val2 - val1)
+    # Sort the input tensor along the specified axis
+    x_sorted = x_raveled.sort(axis=-1)
 
-        result.append(quantile_val.squeeze(axis=-1))
+    # Get the values at index k and k + 1 for linear interpolation
+    k_values = x_sorted[..., k]
+    kp1_values = x_sorted[..., minimum(k + 1, raveled_size - 1)]
 
-    if len(result) == 1:
-        result = result[0]
-    else:
-        result = stack(result)
+    # Interpolation between the two values if needed
+    frac = k_float - k.astype(k_float.dtype)
+    quantile_value = (1 - frac) * k_values + frac * kp1_values
 
-    result.name = "quantile"
-    return result
+    return quantile_value
 
 
-def percentile(input, q, axis=None):
+def percentile(x: TensorLike, p: float | list[float], axis=None) -> TensorVariable:
     """
-    Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation.
+    Computes the p-th percentile along the given axis(es) of a tensor `input`.
 
     Parameters
     ----------
-    input: TensorVariable
+    x: TensorVariable
         The input tensor.
-    q: float or list of floats
-        Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive.
-    axis: None or int or list of int, optional
-        Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array.
-    """
-    if isinstance(q, (int | float)):
-        q = [q]
-
-    for percentile in q:
-        if percentile < 0 or percentile > 100:
-            raise ValueError("Percentiles must be in the range [0, 100]")
+    p: float or list of float
+        The percentile(s) to compute, which must be between 0 and 100 inclusive.
+        0 corresponds to the minimum, 50 to the median, and 100 to the maximum.
+    axis: None or int or (list of int) (see `Sum`)
+        Compute the percentile along this axis of the tensor.
+        None means all axes (like numpy).
 
-    quantiles = [x / 100 for x in q]
+    Returns
+    -------
+    TensorVariable
+        The computed percentile values.
+    """
+    # Convert percentiles (0-100) to quantiles (0-1)
+    q = as_tensor_variable(p) / 100.0
 
-    return quantile(input, quantiles, axis)
+    # Call the quantile function
+    return quantile(x, q, axis=axis)
 
 
 # NumPy logical aliases
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
index c4de265914..11a54a6685 100644
--- a/tests/tensor/test_math.py
+++ b/tests/tensor/test_math.py
@@ -3805,6 +3805,7 @@ def test_percentile(ndim, axis, q):
         (3, None, [0.25, 0.75]),
         (3, 0, 0.5),
         (3, (1, 2), 0.5),
+        (3, (1, 2), [0.1, 0.9]),
     ],
 )
 def test_quantile(ndim, axis, q):