From bc9e7c751b9a55710d7c21e849bb77a13e7c04b1 Mon Sep 17 00:00:00 2001 From: Dhruvanshu-Joshi Date: Thu, 18 Jul 2024 02:09:35 +0530 Subject: [PATCH 1/4] Add support for numpy like percentile and quantile --- pytensor/tensor/math.py | 102 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py index d1e4dc6195..35032e3a3d 100644 --- a/pytensor/tensor/math.py +++ b/pytensor/tensor/math.py @@ -26,8 +26,10 @@ concatenate, constant, expand_dims, + full_like, stack, switch, + take_along_axis, ) from pytensor.tensor.blockwise import Blockwise, vectorize_node_fallback from pytensor.tensor.elemwise import ( @@ -2926,6 +2928,104 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None): return x +def percentile(input, q, axis=None): + """ + Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation. + + Parameters + ---------- + input: TensorVariable + The input tensor. + q: float or list of floats + Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive. + axis: None or int or list of int, optional + Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array. + """ + input = as_tensor_variable(input) + input_ndim = input.type.ndim + + if axis is None: + axis = list(range(input_ndim)) + elif isinstance(axis, (int | np.integer)): + axis = [axis] + elif isinstance(axis, np.ndarray) and axis.ndim == 0: + axis = [int(axis)] + else: + axis = [int(a) for a in axis] + + # Compute the shape of the remaining axes + new_axes_order = [i for i in range(input.ndim) if i not in axis] + list(axis) + input = input.dimshuffle(new_axes_order) + input_shape = shape(input) + remaining_axis_size = input_shape[: input.ndim - len(axis)] + flattened_axis_size = prod(input_shape[input.ndim - len(axis) :]) + input = input.reshape(concatenate([remaining_axis_size, [flattened_axis_size]])) + axis = -1 + + # Sort the input tensor along the specified axis + sorted_input = input.sort(axis=axis) + input_shape = input.shape[axis] + + if isinstance(q, (int | float)): + q = [q] + + for percentile in q: + if percentile < 0 or percentile > 100: + raise ValueError("Percentiles must be in the range [0, 100]") + + result = [] + for percentile in q: + k = (percentile / 100.0) * (input_shape - 1) + k_floor = floor(k).astype("int64") + k_ceil = ceil(k).astype("int64") + + indices1 = expand_dims( + full_like(sorted_input.take(0, axis=axis), k_floor), axis + ) + indices2 = expand_dims(full_like(sorted_input.take(0, axis=axis), k_ceil), axis) + + val1 = take_along_axis(sorted_input, indices1, axis=axis) + val2 = take_along_axis(sorted_input, indices2, axis=axis) + + d = k - k_floor + percentile_val = val1 + d * (val2 - val1) + + result.append(percentile_val.squeeze(axis=axis)) + + if len(result) == 1: + result = result[0] + else: + result = stack(result) + + result.name = "percentile" + return result + + +def quantile(input, q, axis=None): + """ + Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation. + + Parameters + ---------- + input: TensorVariable + The input tensor. + q: float or list of floats + Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive. + axis: None or int or list of int, optional + Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array. + """ + if isinstance(q, (int | float)): + q = [q] + + for quantile in q: + if quantile < 0 or quantile > 1: + raise ValueError("Quantiles must be in the range [0, 1]") + + percentiles = [100.0 * x for x in q] + + return percentile(input, percentiles, axis) + + # NumPy logical aliases square = sqr @@ -3080,6 +3180,8 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None): "outer", "any", "all", + "percentile", + "quantile", "ptp", "power", "logaddexp", From 9b7611c0d8a0e7d764ce857f51af68f3d853a1bb Mon Sep 17 00:00:00 2001 From: Dhruvanshu-Joshi Date: Tue, 15 Oct 2024 19:28:18 +0530 Subject: [PATCH 2/4] merge median --- pytensor/tensor/math.py | 39 ++++++++++------------ tests/tensor/test_math.py | 68 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 22 deletions(-) diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py index 35032e3a3d..ac9f847de9 100644 --- a/pytensor/tensor/math.py +++ b/pytensor/tensor/math.py @@ -26,10 +26,8 @@ concatenate, constant, expand_dims, - full_like, stack, switch, - take_along_axis, ) from pytensor.tensor.blockwise import Blockwise, vectorize_node_fallback from pytensor.tensor.elemwise import ( @@ -2941,11 +2939,11 @@ def percentile(input, q, axis=None): axis: None or int or list of int, optional Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array. """ - input = as_tensor_variable(input) - input_ndim = input.type.ndim + x = as_tensor_variable(input) + x_ndim = x.type.ndim if axis is None: - axis = list(range(input_ndim)) + axis = list(range(x_ndim)) elif isinstance(axis, (int | np.integer)): axis = [axis] elif isinstance(axis, np.ndarray) and axis.ndim == 0: @@ -2954,17 +2952,17 @@ def percentile(input, q, axis=None): axis = [int(a) for a in axis] # Compute the shape of the remaining axes - new_axes_order = [i for i in range(input.ndim) if i not in axis] + list(axis) - input = input.dimshuffle(new_axes_order) - input_shape = shape(input) - remaining_axis_size = input_shape[: input.ndim - len(axis)] - flattened_axis_size = prod(input_shape[input.ndim - len(axis) :]) - input = input.reshape(concatenate([remaining_axis_size, [flattened_axis_size]])) - axis = -1 + new_axes_order = [i for i in range(x.ndim) if i not in axis] + list(axis) + x = x.dimshuffle(new_axes_order) + input_shape = shape(x) + remaining_axis_size = input_shape[: x.ndim - len(axis)] + x = x.reshape((*remaining_axis_size, -1)) # Sort the input tensor along the specified axis - sorted_input = input.sort(axis=axis) - input_shape = input.shape[axis] + sorted_input = x.sort(axis=-1) + slices1 = [slice(None)] * sorted_input.ndim + slices2 = [slice(None)] * sorted_input.ndim + input_shape = x.shape[-1] if isinstance(q, (int | float)): q = [q] @@ -2979,18 +2977,15 @@ def percentile(input, q, axis=None): k_floor = floor(k).astype("int64") k_ceil = ceil(k).astype("int64") - indices1 = expand_dims( - full_like(sorted_input.take(0, axis=axis), k_floor), axis - ) - indices2 = expand_dims(full_like(sorted_input.take(0, axis=axis), k_ceil), axis) - - val1 = take_along_axis(sorted_input, indices1, axis=axis) - val2 = take_along_axis(sorted_input, indices2, axis=axis) + slices1[-1] = slice(k_floor, k_floor + 1) + slices2[-1] = slice(k_ceil, k_ceil + 1) + val1 = sorted_input[tuple(slices1)] + val2 = sorted_input[tuple(slices2)] d = k - k_floor percentile_val = val1 + d * (val2 - val1) - result.append(percentile_val.squeeze(axis=axis)) + result.append(percentile_val.squeeze(axis=-1)) if len(result) == 1: result = result[0] diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py index 14bc2614e3..c4de265914 100644 --- a/tests/tensor/test_math.py +++ b/tests/tensor/test_math.py @@ -102,9 +102,11 @@ neg, neq, outer, + percentile, polygamma, power, ptp, + quantile, rad2deg, reciprocal, round_half_away_from_zero, @@ -3766,3 +3768,69 @@ def test_median(ndim, axis): assert np.allclose(result_odd, expected_odd) assert np.allclose(result_even, expected_even) + + +@pytest.mark.parametrize( + "ndim, axis, q", + [ + (2, None, 50), + (2, 1, 33), + (2, (0, 1), 50), + (3, (1, 2), 50), + (4, (1, 3, 0), 25), + (2, None, [25, 50, 75]), + (3, (1, 2), [10, 90]), + (3, 1, 75), + (3, 0, 50), + ], +) +def test_percentile(ndim, axis, q): + shape = tuple(np.arange(1, ndim + 1)) + data = np.random.rand(*shape) + x = tensor(shape=np.array(data).shape) + f = function([x], percentile(x, q, axis=axis)) + result = f(data.astype(x.dtype)) + expected = np.percentile(data.astype(x.dtype), q, axis=axis) + assert np.allclose(result, expected) + + +@pytest.mark.parametrize( + "ndim, axis, q", + [ + (2, None, 0.5), + (2, None, [0.25, 0.75]), + (2, 0, 0.5), + (2, (0, 1), 0.5), + (3, None, 0.5), + (3, None, [0.25, 0.75]), + (3, 0, 0.5), + (3, (1, 2), 0.5), + ], +) +def test_quantile(ndim, axis, q): + shape = tuple(np.random.randint(2, 6) for _ in range(ndim)) + data = np.random.rand(*shape) + + x = tensor(dtype="float64", shape=(None,) * ndim) + f = function([x], quantile(x, q, axis=axis)) + + result = f(data.astype(x.dtype)) + expected = np.quantile(data.astype(x.dtype), q, axis=axis) + + assert np.allclose(result, expected) + + +@pytest.mark.parametrize( + "ndim, axis, q, is_percentile", + [ + (2, None, [50, 120], True), + (2, 1, -0.5, False), + ], +) +def test_invalid_percentile_quantile(ndim, axis, q, is_percentile): + x = tensor(dtype="float64", shape=(None,) * ndim) + with pytest.raises(ValueError): + if is_percentile: + percentile(x, q, axis) + else: + quantile(x, q, axis) From 17e6aa3b1cabd240b9c4ee282bf7522a2b7478ae Mon Sep 17 00:00:00 2001 From: Dhruvanshu-Joshi Date: Thu, 1 Aug 2024 02:31:22 +0530 Subject: [PATCH 3/4] Add quantile and quantile dependent percentile --- pytensor/tensor/math.py | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py index ac9f847de9..688add6352 100644 --- a/pytensor/tensor/math.py +++ b/pytensor/tensor/math.py @@ -2926,18 +2926,18 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None): return x -def percentile(input, q, axis=None): +def quantile(input, q, axis=None): """ - Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation. + Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation. Parameters ---------- input: TensorVariable The input tensor. q: float or list of floats - Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive. + Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive. axis: None or int or list of int, optional - Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array. + Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array. """ x = as_tensor_variable(input) x_ndim = x.type.ndim @@ -2967,13 +2967,13 @@ def percentile(input, q, axis=None): if isinstance(q, (int | float)): q = [q] - for percentile in q: - if percentile < 0 or percentile > 100: - raise ValueError("Percentiles must be in the range [0, 100]") + for quantile in q: + if quantile < 0 or quantile > 1: + raise ValueError("Quantiles must be in the range [0, 1]") result = [] - for percentile in q: - k = (percentile / 100.0) * (input_shape - 1) + for quantile in q: + k = (quantile) * (input_shape - 1) k_floor = floor(k).astype("int64") k_ceil = ceil(k).astype("int64") @@ -2983,42 +2983,42 @@ def percentile(input, q, axis=None): val2 = sorted_input[tuple(slices2)] d = k - k_floor - percentile_val = val1 + d * (val2 - val1) + quantile_val = val1 + d * (val2 - val1) - result.append(percentile_val.squeeze(axis=-1)) + result.append(quantile_val.squeeze(axis=-1)) if len(result) == 1: result = result[0] else: result = stack(result) - result.name = "percentile" + result.name = "quantile" return result -def quantile(input, q, axis=None): +def percentile(input, q, axis=None): """ - Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation. + Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation. Parameters ---------- input: TensorVariable The input tensor. q: float or list of floats - Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive. + Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive. axis: None or int or list of int, optional - Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array. + Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array. """ if isinstance(q, (int | float)): q = [q] - for quantile in q: - if quantile < 0 or quantile > 1: - raise ValueError("Quantiles must be in the range [0, 1]") + for percentile in q: + if percentile < 0 or percentile > 100: + raise ValueError("Percentiles must be in the range [0, 100]") - percentiles = [100.0 * x for x in q] + quantiles = [x / 100 for x in q] - return percentile(input, percentiles, axis) + return quantile(input, quantiles, axis) # NumPy logical aliases From dcdeef77c923ffe9fb45a8701507f8030e9390c4 Mon Sep 17 00:00:00 2001 From: Dhruvanshu-Joshi Date: Tue, 15 Oct 2024 20:46:53 +0530 Subject: [PATCH 4/4] Vectorised percentile/quantile --- pytensor/tensor/math.py | 118 +++++++++++++++++--------------------- tests/tensor/test_math.py | 1 + 2 files changed, 53 insertions(+), 66 deletions(-) diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py index 688add6352..aa1fdbdf26 100644 --- a/pytensor/tensor/math.py +++ b/pytensor/tensor/math.py @@ -2926,99 +2926,85 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None): return x -def quantile(input, q, axis=None): +def quantile(x: TensorLike, q: float | list[float], axis=None) -> TensorVariable: """ - Computes the quantile along the given axis(es) of a tensor `input` using linear interpolation. + Computes the q-th quantile along the given axis(es) of a tensor `input`. Parameters ---------- - input: TensorVariable + x: TensorVariable The input tensor. - q: float or list of floats - Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive. - axis: None or int or list of int, optional - Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened version of the array. + q: float or list of float + The quantile(s) to compute, which must be between 0 and 1 inclusive. + 0 corresponds to the minimum, 0.5 to the median, and 1 to the maximum. + axis: None or int or (list of int) (see `Sum`) + Compute the quantile along this axis of the tensor. + None means all axes (like numpy). """ - x = as_tensor_variable(input) - x_ndim = x.type.ndim + x = as_tensor_variable(x) + q = as_tensor_variable(q) + x_ndim = x.type.ndim if axis is None: axis = list(range(x_ndim)) - elif isinstance(axis, (int | np.integer)): - axis = [axis] - elif isinstance(axis, np.ndarray) and axis.ndim == 0: - axis = [int(axis)] else: - axis = [int(a) for a in axis] - - # Compute the shape of the remaining axes - new_axes_order = [i for i in range(x.ndim) if i not in axis] + list(axis) - x = x.dimshuffle(new_axes_order) - input_shape = shape(x) - remaining_axis_size = input_shape[: x.ndim - len(axis)] - x = x.reshape((*remaining_axis_size, -1)) - - # Sort the input tensor along the specified axis - sorted_input = x.sort(axis=-1) - slices1 = [slice(None)] * sorted_input.ndim - slices2 = [slice(None)] * sorted_input.ndim - input_shape = x.shape[-1] + axis = list(normalize_axis_tuple(axis, x_ndim)) - if isinstance(q, (int | float)): - q = [q] + non_axis = [i for i in range(x_ndim) if i not in axis] + non_axis_shape = [x.shape[i] for i in non_axis] - for quantile in q: - if quantile < 0 or quantile > 1: - raise ValueError("Quantiles must be in the range [0, 1]") + # Put axis at the end and unravel them + x_raveled = x.transpose(*non_axis, *axis) + if len(axis) > 1: + x_raveled = x_raveled.reshape((*non_axis_shape, -1)) + raveled_size = x_raveled.shape[-1] - result = [] - for quantile in q: - k = (quantile) * (input_shape - 1) - k_floor = floor(k).astype("int64") - k_ceil = ceil(k).astype("int64") + # Ensure q is between 0 and 1 + q = clip(q, 0.0, 1.0) - slices1[-1] = slice(k_floor, k_floor + 1) - slices2[-1] = slice(k_ceil, k_ceil + 1) - val1 = sorted_input[tuple(slices1)] - val2 = sorted_input[tuple(slices2)] + # Compute quantile indices + k = (q * (raveled_size - 1)).astype("int64") + k_float = q * (raveled_size - 1) - d = k - k_floor - quantile_val = val1 + d * (val2 - val1) + # Sort the input tensor along the specified axis + x_sorted = x_raveled.sort(axis=-1) - result.append(quantile_val.squeeze(axis=-1)) + # Get the values at index k and k + 1 for linear interpolation + k_values = x_sorted[..., k] + kp1_values = x_sorted[..., minimum(k + 1, raveled_size - 1)] - if len(result) == 1: - result = result[0] - else: - result = stack(result) + # Interpolation between the two values if needed + frac = k_float - k.astype(k_float.dtype) + quantile_value = (1 - frac) * k_values + frac * kp1_values - result.name = "quantile" - return result + return quantile_value -def percentile(input, q, axis=None): +def percentile(x: TensorLike, p: float | list[float], axis=None) -> TensorVariable: """ - Computes the percentile along the given axis(es) of a tensor `input` using linear interpolation. + Computes the p-th percentile along the given axis(es) of a tensor `input`. Parameters ---------- - input: TensorVariable + x: TensorVariable The input tensor. - q: float or list of floats - Percentile or sequence of percentiles to compute, which must be between 0 and 100 inclusive. - axis: None or int or list of int, optional - Axis or axes along which the percentiles are computed. The default is to compute the percentile(s) along a flattened version of the array. - """ - if isinstance(q, (int | float)): - q = [q] - - for percentile in q: - if percentile < 0 or percentile > 100: - raise ValueError("Percentiles must be in the range [0, 100]") + p: float or list of float + The percentile(s) to compute, which must be between 0 and 100 inclusive. + 0 corresponds to the minimum, 50 to the median, and 100 to the maximum. + axis: None or int or (list of int) (see `Sum`) + Compute the percentile along this axis of the tensor. + None means all axes (like numpy). - quantiles = [x / 100 for x in q] + Returns + ------- + TensorVariable + The computed percentile values. + """ + # Convert percentiles (0-100) to quantiles (0-1) + q = as_tensor_variable(p) / 100.0 - return quantile(input, quantiles, axis) + # Call the quantile function + return quantile(x, q, axis=axis) # NumPy logical aliases diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py index c4de265914..11a54a6685 100644 --- a/tests/tensor/test_math.py +++ b/tests/tensor/test_math.py @@ -3805,6 +3805,7 @@ def test_percentile(ndim, axis, q): (3, None, [0.25, 0.75]), (3, 0, 0.5), (3, (1, 2), 0.5), + (3, (1, 2), [0.1, 0.9]), ], ) def test_quantile(ndim, axis, q):