ENH Add beta fragility and gpd risk estimates (#118)

jeyoor · twiecki · commit b561c5fd2d29 · 2019-09-30T12:16:00.000-04:00
* ENH Add beta_fragility_heuristic and gpd_risk_estimates functions

* Fix formatting according to PEP8/flake8

* Fix PEP8 warning W503 line break before binary operator

* Use numpy.around for consistency between
python 2 and 3

* Fix length of zero lists returned from gpd_risk_estimates

* Clarify variable names, fix thresholds, use proper list lengths

* Fix broken merge
- old tests work
- new tests still failing

* Have tests assume input is aligned
- fixes TypeError: cannot concatenate a non-NDFrame object
- also, switch the functions from SIMPLE_STAT_FUNCS to FACTOR_STAT_FUNCS

* Fix formatting to prevent flake8 errors

* remove merge backup files

* remove one more merge backup file

* Switch to np.zeros and fix comment formatting

* fix ReST formatting for notes headers
diff --git a/empyrical/__init__.py b/empyrical/__init__.py
@@ -29,6 +29,10 @@
     beta,
     beta_aligned,
     cagr,
+    beta_fragility_heuristic,
+    beta_fragility_heuristic_aligned,
+    gpd_risk_estimates,
+    gpd_risk_estimates_aligned,
     calmar_ratio,
     capture,
     conditional_value_at_risk,
diff --git a/empyrical/stats.py b/empyrical/stats.py
@@ -18,8 +18,10 @@
 import math
 import pandas as pd
 import numpy as np
-from scipy import stats
+from math import pow
+from scipy import stats, optimize
 from six import iteritems
+from sys import float_info
 
 from .utils import nanmean, nanstd, nanmin, up, down, roll, rolling_window
 from .periods import ANNUALIZATION_FACTORS, APPROX_BDAYS_PER_YEAR
@@ -1527,8 +1529,7 @@ def tail_ratio(returns):
 
 
 def capture(returns, factor_returns, period=DAILY):
-    """
-    Compute capture ratio.
+    """Compute capture ratio.
 
     Parameters
     ----------
@@ -1561,6 +1562,322 @@ def capture(returns, factor_returns, period=DAILY):
             annual_return(factor_returns, period=period))
 
 
+def beta_fragility_heuristic(returns, factor_returns):
+    """Estimate fragility to drops in beta.
+
+    Parameters
+    ----------
+    returns : pd.Series or np.ndarray
+        Daily returns of the strategy, noncumulative.
+        - See full explanation in :func:`~empyrical.stats.cum_returns`.
+    factor_returns : pd.Series or np.ndarray
+         Daily noncumulative returns of the factor to which beta is
+         computed. Usually a benchmark such as the market.
+         - This is in the same style as returns.
+
+    Returns
+    -------
+    float, np.nan
+        The beta fragility of the strategy.
+
+    Note
+    ----
+    A negative return value indicates potential losses
+    could follow volatility in beta.
+    The magnitude of the negative value indicates the size of
+    the potential loss.
+    seealso::
+    `A New Heuristic Measure of Fragility and
+Tail Risks: Application to Stress Testing`
+        https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf
+        An IMF Working Paper describing the heuristic
+    """
+    if len(returns) < 3 or len(factor_returns) < 3:
+        return np.nan
+
+    return beta_fragility_heuristic_aligned(
+        *_aligned_series(returns, factor_returns))
+
+
+def beta_fragility_heuristic_aligned(returns, factor_returns):
+    """Estimate fragility to drops in beta
+
+    Parameters
+    ----------
+    returns : pd.Series or np.ndarray
+        Daily returns of the strategy, noncumulative.
+        - See full explanation in :func:`~empyrical.stats.cum_returns`.
+    factor_returns : pd.Series or np.ndarray
+         Daily noncumulative returns of the factor to which beta is
+         computed. Usually a benchmark such as the market.
+         - This is in the same style as returns.
+
+    Returns
+    -------
+    float, np.nan
+        The beta fragility of the strategy.
+
+    Note
+    ----
+    If they are pd.Series, expects returns and factor_returns have already
+    been aligned on their labels.  If np.ndarray, these arguments should have
+    the same shape.
+    seealso::
+    `A New Heuristic Measure of Fragility and
+Tail Risks: Application to Stress Testing`
+        https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf
+        An IMF Working Paper describing the heuristic
+    """
+    if len(returns) < 3 or len(factor_returns) < 3:
+        return np.nan
+
+    # combine returns and factor returns into pairs
+    returns_series = pd.Series(returns)
+    factor_returns_series = pd.Series(factor_returns)
+    pairs = pd.concat([returns_series, factor_returns_series], axis=1)
+    pairs.columns = ['returns', 'factor_returns']
+
+    # exclude any rows where returns are nan
+    pairs = pairs.dropna()
+    # sort by beta
+    pairs = pairs.sort_values(by='factor_returns')
+
+    # find the three vectors, using median of 3
+    start_index = 0
+    mid_index = int(np.around(len(pairs) / 2, 0))
+    end_index = len(pairs) - 1
+
+    (start_returns, start_factor_returns) = pairs.iloc[start_index]
+    (mid_returns, mid_factor_returns) = pairs.iloc[mid_index]
+    (end_returns, end_factor_returns) = pairs.iloc[end_index]
+
+    factor_returns_range = (end_factor_returns - start_factor_returns)
+    start_returns_weight = 0.5
+    end_returns_weight = 0.5
+
+    # find weights for the start and end returns
+    # using a convex combination
+    if not factor_returns_range == 0:
+        start_returns_weight = \
+            (mid_factor_returns - start_factor_returns) / \
+            factor_returns_range
+        end_returns_weight = \
+            (end_factor_returns - mid_factor_returns) / \
+            factor_returns_range
+
+    # calculate fragility heuristic
+    heuristic = (start_returns_weight*start_returns) + \
+        (end_returns_weight*end_returns) - mid_returns
+
+    return heuristic
+
+
+def gpd_risk_estimates(returns, var_p=0.01):
+    """Estimate VaR and ES using the Generalized Pareto Distribution (GPD)
+
+    Parameters
+    ----------
+    returns : pd.Series or np.ndarray
+        Daily returns of the strategy, noncumulative.
+        - See full explanation in :func:`~empyrical.stats.cum_returns`.
+    var_p : float
+        The percentile to use for estimating the VaR and ES
+
+    Returns
+    -------
+    [threshold, scale_param, shape_param, var_estimate, es_estimate]
+        : list[float]
+        threshold - the threshold use to cut off exception tail losses
+        scale_param - a parameter (often denoted by sigma, capturing the
+            scale, related to variance)
+        shape_param - a parameter (often denoted by xi, capturing the shape or
+            type of the distribution)
+        var_estimate - an estimate for the VaR for the given percentile
+        es_estimate - an estimate for the ES for the given percentile
+
+    Note
+    ----
+    seealso::
+    `An Application of Extreme Value Theory for
+Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>`
+        A paper describing how to use the Generalized Pareto
+        Distribution to estimate VaR and ES.
+    """
+    if len(returns) < 3:
+        result = np.zeros(5)
+        if isinstance(returns, pd.Series):
+            result = pd.Series(result)
+        return result
+    return gpd_risk_estimates_aligned(*_aligned_series(returns, var_p))
+
+
+def gpd_risk_estimates_aligned(returns, var_p=0.01):
+    """Estimate VaR and ES using the Generalized Pareto Distribution (GPD)
+
+    Parameters
+    ----------
+    returns : pd.Series or np.ndarray
+        Daily returns of the strategy, noncumulative.
+        - See full explanation in :func:`~empyrical.stats.cum_returns`.
+    var_p : float
+        The percentile to use for estimating the VaR and ES
+
+    Returns
+    -------
+    [threshold, scale_param, shape_param, var_estimate, es_estimate]
+        : list[float]
+        threshold - the threshold use to cut off exception tail losses
+        scale_param - a parameter (often denoted by sigma, capturing the
+            scale, related to variance)
+        shape_param - a parameter (often denoted by xi, capturing the shape or
+            type of the distribution)
+        var_estimate - an estimate for the VaR for the given percentile
+        es_estimate - an estimate for the ES for the given percentile
+
+    Note
+    ----
+    seealso::
+    `An Application of Extreme Value Theory for
+Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>`
+        A paper describing how to use the Generalized Pareto
+        Distribution to estimate VaR and ES.
+    """
+    result = np.zeros(5)
+    if not len(returns) < 3:
+
+        DEFAULT_THRESHOLD = 0.2
+        MINIMUM_THRESHOLD = 0.000000001
+        returns_array = pd.Series(returns).as_matrix()
+        flipped_returns = -1 * returns_array
+        losses = flipped_returns[flipped_returns > 0]
+        threshold = DEFAULT_THRESHOLD
+        finished = False
+        scale_param = 0
+        shape_param = 0
+        while not finished and threshold > MINIMUM_THRESHOLD:
+            losses_beyond_threshold = \
+                losses[losses >= threshold]
+            param_result = \
+                gpd_loglikelihood_minimizer_aligned(losses_beyond_threshold)
+            if (param_result[0] is not False and
+                    param_result[1] is not False):
+                scale_param = param_result[0]
+                shape_param = param_result[1]
+                var_estimate = gpd_var_calculator(threshold, scale_param,
+                                                  shape_param, var_p,
+                                                  len(losses),
+                                                  len(losses_beyond_threshold))
+                # non-negative shape parameter is required for fat tails
+                # non-negative VaR estimate is required for loss of some kind
+                if (shape_param > 0 and var_estimate > 0):
+                    finished = True
+            if (not finished):
+                threshold = threshold / 2
+        if (finished):
+            es_estimate = gpd_es_calculator(var_estimate, threshold,
+                                            scale_param, shape_param)
+            result = np.array([threshold, scale_param, shape_param,
+                               var_estimate, es_estimate])
+    if isinstance(returns, pd.Series):
+        result = pd.Series(result)
+    return result
+
+
+def gpd_es_calculator(var_estimate, threshold, scale_param,
+                      shape_param):
+    result = 0
+    if ((1 - shape_param) != 0):
+        # this formula is from Gilli and Kellezi pg. 8
+        var_ratio = (var_estimate/(1 - shape_param))
+        param_ratio = ((scale_param - (shape_param * threshold)) /
+                       (1 - shape_param))
+        result = var_ratio + param_ratio
+    return result
+
+
+def gpd_var_calculator(threshold, scale_param, shape_param,
+                       probability, total_n, exceedance_n):
+    result = 0
+    if (exceedance_n > 0 and shape_param > 0):
+        # this formula is from Gilli and Kellezi pg. 12
+        param_ratio = scale_param / shape_param
+        prob_ratio = (total_n/exceedance_n) * probability
+        result = threshold + (param_ratio *
+                              (pow(prob_ratio, -shape_param) - 1))
+    return result
+
+
+def gpd_loglikelihood_minimizer_aligned(price_data):
+    result = [False, False]
+    DEFAULT_SCALE_PARAM = 1
+    DEFAULT_SHAPE_PARAM = 1
+    if (len(price_data) > 0):
+        gpd_loglikelihood_lambda = \
+            gpd_loglikelihood_factory(price_data)
+        optimization_results = \
+            optimize.minimize(gpd_loglikelihood_lambda,
+                              [DEFAULT_SCALE_PARAM,
+                               DEFAULT_SHAPE_PARAM],
+                              method='Nelder-Mead')
+        if optimization_results.success:
+            resulting_params = optimization_results.x
+            if len(resulting_params) == 2:
+                result[0] = resulting_params[0]
+                result[1] = resulting_params[1]
+    return result
+
+
+def gpd_loglikelihood_factory(price_data):
+    return lambda params: gpd_loglikelihood(params, price_data)
+
+
+def gpd_loglikelihood(params, price_data):
+    if (params[1] != 0):
+        return -gpd_loglikelihood_scale_and_shape(params[0],
+                                                  params[1],
+                                                  price_data)
+    else:
+        return -gpd_loglikelihood_scale_only(params[0], price_data)
+
+
+def gpd_loglikelihood_scale_and_shape_factory(price_data):
+    # minimize a function of two variables requires a list of params
+    # we are expecting the lambda below to be called as follows:
+    # parameters = [scale, shape]
+    # the final outer negative is added because scipy only minimizes
+    return lambda params: \
+        -gpd_loglikelihood_scale_and_shape(params[0],
+                                           params[1],
+                                           price_data)
+
+
+def gpd_loglikelihood_scale_and_shape(scale, shape, price_data):
+    n = len(price_data)
+    result = -1 * float_info.max
+    if (scale != 0):
+        param_factor = shape / scale
+        if (shape != 0 and param_factor >= 0 and scale >= 0):
+            result = ((-n * np.log(scale)) -
+                      (((1 / shape) + 1) *
+                       (np.log((shape / scale * price_data) + 1)).sum()))
+    return result
+
+
+def gpd_loglikelihood_scale_only_factory(price_data):
+    # the negative is added because scipy only minimizes
+    return lambda scale: \
+        -gpd_loglikelihood_scale_only(scale, price_data)
+
+
+def gpd_loglikelihood_scale_only(scale, price_data):
+    n = len(price_data)
+    data_sum = price_data.sum()
+    result = -1 * float_info.max
+    if (scale >= 0):
+        result = ((-n*np.log(scale)) - (data_sum/scale))
+    return result
+
+
 def up_capture(returns, factor_returns, **kwargs):
     """
     Compute the capture ratio for periods when the benchmark return is positive
@@ -1840,6 +2157,8 @@ def conditional_value_at_risk(returns, cutoff=0.05):
     excess_sharpe,
     alpha,
     beta,
+    beta_fragility_heuristic,
+    gpd_risk_estimates,
     capture,
     up_capture,
     down_capture
diff --git a/empyrical/tests/test_stats.py b/empyrical/tests/test_stats.py