Skip to content

Commit b561c5f

Browse files
jeyoortwiecki
authored andcommitted
ENH Add beta fragility and gpd risk estimates (#118)
* ENH Add beta_fragility_heuristic and gpd_risk_estimates functions * Fix formatting according to PEP8/flake8 * Fix PEP8 warning W503 line break before binary operator * Use numpy.around for consistency between python 2 and 3 * Fix length of zero lists returned from gpd_risk_estimates * Clarify variable names, fix thresholds, use proper list lengths * Fix broken merge - old tests work - new tests still failing * Have tests assume input is aligned - fixes TypeError: cannot concatenate a non-NDFrame object - also, switch the functions from SIMPLE_STAT_FUNCS to FACTOR_STAT_FUNCS * Fix formatting to prevent flake8 errors * remove merge backup files * remove one more merge backup file * Switch to np.zeros and fix comment formatting * fix ReST formatting for notes headers
1 parent 4cb0508 commit b561c5f

File tree

3 files changed

+373
-4
lines changed

3 files changed

+373
-4
lines changed

empyrical/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
beta,
3030
beta_aligned,
3131
cagr,
32+
beta_fragility_heuristic,
33+
beta_fragility_heuristic_aligned,
34+
gpd_risk_estimates,
35+
gpd_risk_estimates_aligned,
3236
calmar_ratio,
3337
capture,
3438
conditional_value_at_risk,

empyrical/stats.py

Lines changed: 322 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
import math
1919
import pandas as pd
2020
import numpy as np
21-
from scipy import stats
21+
from math import pow
22+
from scipy import stats, optimize
2223
from six import iteritems
24+
from sys import float_info
2325

2426
from .utils import nanmean, nanstd, nanmin, up, down, roll, rolling_window
2527
from .periods import ANNUALIZATION_FACTORS, APPROX_BDAYS_PER_YEAR
@@ -1527,8 +1529,7 @@ def tail_ratio(returns):
15271529

15281530

15291531
def capture(returns, factor_returns, period=DAILY):
1530-
"""
1531-
Compute capture ratio.
1532+
"""Compute capture ratio.
15321533
15331534
Parameters
15341535
----------
@@ -1561,6 +1562,322 @@ def capture(returns, factor_returns, period=DAILY):
15611562
annual_return(factor_returns, period=period))
15621563

15631564

1565+
def beta_fragility_heuristic(returns, factor_returns):
1566+
"""Estimate fragility to drops in beta.
1567+
1568+
Parameters
1569+
----------
1570+
returns : pd.Series or np.ndarray
1571+
Daily returns of the strategy, noncumulative.
1572+
- See full explanation in :func:`~empyrical.stats.cum_returns`.
1573+
factor_returns : pd.Series or np.ndarray
1574+
Daily noncumulative returns of the factor to which beta is
1575+
computed. Usually a benchmark such as the market.
1576+
- This is in the same style as returns.
1577+
1578+
Returns
1579+
-------
1580+
float, np.nan
1581+
The beta fragility of the strategy.
1582+
1583+
Note
1584+
----
1585+
A negative return value indicates potential losses
1586+
could follow volatility in beta.
1587+
The magnitude of the negative value indicates the size of
1588+
the potential loss.
1589+
seealso::
1590+
`A New Heuristic Measure of Fragility and
1591+
Tail Risks: Application to Stress Testing`
1592+
https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf
1593+
An IMF Working Paper describing the heuristic
1594+
"""
1595+
if len(returns) < 3 or len(factor_returns) < 3:
1596+
return np.nan
1597+
1598+
return beta_fragility_heuristic_aligned(
1599+
*_aligned_series(returns, factor_returns))
1600+
1601+
1602+
def beta_fragility_heuristic_aligned(returns, factor_returns):
1603+
"""Estimate fragility to drops in beta
1604+
1605+
Parameters
1606+
----------
1607+
returns : pd.Series or np.ndarray
1608+
Daily returns of the strategy, noncumulative.
1609+
- See full explanation in :func:`~empyrical.stats.cum_returns`.
1610+
factor_returns : pd.Series or np.ndarray
1611+
Daily noncumulative returns of the factor to which beta is
1612+
computed. Usually a benchmark such as the market.
1613+
- This is in the same style as returns.
1614+
1615+
Returns
1616+
-------
1617+
float, np.nan
1618+
The beta fragility of the strategy.
1619+
1620+
Note
1621+
----
1622+
If they are pd.Series, expects returns and factor_returns have already
1623+
been aligned on their labels. If np.ndarray, these arguments should have
1624+
the same shape.
1625+
seealso::
1626+
`A New Heuristic Measure of Fragility and
1627+
Tail Risks: Application to Stress Testing`
1628+
https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf
1629+
An IMF Working Paper describing the heuristic
1630+
"""
1631+
if len(returns) < 3 or len(factor_returns) < 3:
1632+
return np.nan
1633+
1634+
# combine returns and factor returns into pairs
1635+
returns_series = pd.Series(returns)
1636+
factor_returns_series = pd.Series(factor_returns)
1637+
pairs = pd.concat([returns_series, factor_returns_series], axis=1)
1638+
pairs.columns = ['returns', 'factor_returns']
1639+
1640+
# exclude any rows where returns are nan
1641+
pairs = pairs.dropna()
1642+
# sort by beta
1643+
pairs = pairs.sort_values(by='factor_returns')
1644+
1645+
# find the three vectors, using median of 3
1646+
start_index = 0
1647+
mid_index = int(np.around(len(pairs) / 2, 0))
1648+
end_index = len(pairs) - 1
1649+
1650+
(start_returns, start_factor_returns) = pairs.iloc[start_index]
1651+
(mid_returns, mid_factor_returns) = pairs.iloc[mid_index]
1652+
(end_returns, end_factor_returns) = pairs.iloc[end_index]
1653+
1654+
factor_returns_range = (end_factor_returns - start_factor_returns)
1655+
start_returns_weight = 0.5
1656+
end_returns_weight = 0.5
1657+
1658+
# find weights for the start and end returns
1659+
# using a convex combination
1660+
if not factor_returns_range == 0:
1661+
start_returns_weight = \
1662+
(mid_factor_returns - start_factor_returns) / \
1663+
factor_returns_range
1664+
end_returns_weight = \
1665+
(end_factor_returns - mid_factor_returns) / \
1666+
factor_returns_range
1667+
1668+
# calculate fragility heuristic
1669+
heuristic = (start_returns_weight*start_returns) + \
1670+
(end_returns_weight*end_returns) - mid_returns
1671+
1672+
return heuristic
1673+
1674+
1675+
def gpd_risk_estimates(returns, var_p=0.01):
1676+
"""Estimate VaR and ES using the Generalized Pareto Distribution (GPD)
1677+
1678+
Parameters
1679+
----------
1680+
returns : pd.Series or np.ndarray
1681+
Daily returns of the strategy, noncumulative.
1682+
- See full explanation in :func:`~empyrical.stats.cum_returns`.
1683+
var_p : float
1684+
The percentile to use for estimating the VaR and ES
1685+
1686+
Returns
1687+
-------
1688+
[threshold, scale_param, shape_param, var_estimate, es_estimate]
1689+
: list[float]
1690+
threshold - the threshold use to cut off exception tail losses
1691+
scale_param - a parameter (often denoted by sigma, capturing the
1692+
scale, related to variance)
1693+
shape_param - a parameter (often denoted by xi, capturing the shape or
1694+
type of the distribution)
1695+
var_estimate - an estimate for the VaR for the given percentile
1696+
es_estimate - an estimate for the ES for the given percentile
1697+
1698+
Note
1699+
----
1700+
seealso::
1701+
`An Application of Extreme Value Theory for
1702+
Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>`
1703+
A paper describing how to use the Generalized Pareto
1704+
Distribution to estimate VaR and ES.
1705+
"""
1706+
if len(returns) < 3:
1707+
result = np.zeros(5)
1708+
if isinstance(returns, pd.Series):
1709+
result = pd.Series(result)
1710+
return result
1711+
return gpd_risk_estimates_aligned(*_aligned_series(returns, var_p))
1712+
1713+
1714+
def gpd_risk_estimates_aligned(returns, var_p=0.01):
1715+
"""Estimate VaR and ES using the Generalized Pareto Distribution (GPD)
1716+
1717+
Parameters
1718+
----------
1719+
returns : pd.Series or np.ndarray
1720+
Daily returns of the strategy, noncumulative.
1721+
- See full explanation in :func:`~empyrical.stats.cum_returns`.
1722+
var_p : float
1723+
The percentile to use for estimating the VaR and ES
1724+
1725+
Returns
1726+
-------
1727+
[threshold, scale_param, shape_param, var_estimate, es_estimate]
1728+
: list[float]
1729+
threshold - the threshold use to cut off exception tail losses
1730+
scale_param - a parameter (often denoted by sigma, capturing the
1731+
scale, related to variance)
1732+
shape_param - a parameter (often denoted by xi, capturing the shape or
1733+
type of the distribution)
1734+
var_estimate - an estimate for the VaR for the given percentile
1735+
es_estimate - an estimate for the ES for the given percentile
1736+
1737+
Note
1738+
----
1739+
seealso::
1740+
`An Application of Extreme Value Theory for
1741+
Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>`
1742+
A paper describing how to use the Generalized Pareto
1743+
Distribution to estimate VaR and ES.
1744+
"""
1745+
result = np.zeros(5)
1746+
if not len(returns) < 3:
1747+
1748+
DEFAULT_THRESHOLD = 0.2
1749+
MINIMUM_THRESHOLD = 0.000000001
1750+
returns_array = pd.Series(returns).as_matrix()
1751+
flipped_returns = -1 * returns_array
1752+
losses = flipped_returns[flipped_returns > 0]
1753+
threshold = DEFAULT_THRESHOLD
1754+
finished = False
1755+
scale_param = 0
1756+
shape_param = 0
1757+
while not finished and threshold > MINIMUM_THRESHOLD:
1758+
losses_beyond_threshold = \
1759+
losses[losses >= threshold]
1760+
param_result = \
1761+
gpd_loglikelihood_minimizer_aligned(losses_beyond_threshold)
1762+
if (param_result[0] is not False and
1763+
param_result[1] is not False):
1764+
scale_param = param_result[0]
1765+
shape_param = param_result[1]
1766+
var_estimate = gpd_var_calculator(threshold, scale_param,
1767+
shape_param, var_p,
1768+
len(losses),
1769+
len(losses_beyond_threshold))
1770+
# non-negative shape parameter is required for fat tails
1771+
# non-negative VaR estimate is required for loss of some kind
1772+
if (shape_param > 0 and var_estimate > 0):
1773+
finished = True
1774+
if (not finished):
1775+
threshold = threshold / 2
1776+
if (finished):
1777+
es_estimate = gpd_es_calculator(var_estimate, threshold,
1778+
scale_param, shape_param)
1779+
result = np.array([threshold, scale_param, shape_param,
1780+
var_estimate, es_estimate])
1781+
if isinstance(returns, pd.Series):
1782+
result = pd.Series(result)
1783+
return result
1784+
1785+
1786+
def gpd_es_calculator(var_estimate, threshold, scale_param,
1787+
shape_param):
1788+
result = 0
1789+
if ((1 - shape_param) != 0):
1790+
# this formula is from Gilli and Kellezi pg. 8
1791+
var_ratio = (var_estimate/(1 - shape_param))
1792+
param_ratio = ((scale_param - (shape_param * threshold)) /
1793+
(1 - shape_param))
1794+
result = var_ratio + param_ratio
1795+
return result
1796+
1797+
1798+
def gpd_var_calculator(threshold, scale_param, shape_param,
1799+
probability, total_n, exceedance_n):
1800+
result = 0
1801+
if (exceedance_n > 0 and shape_param > 0):
1802+
# this formula is from Gilli and Kellezi pg. 12
1803+
param_ratio = scale_param / shape_param
1804+
prob_ratio = (total_n/exceedance_n) * probability
1805+
result = threshold + (param_ratio *
1806+
(pow(prob_ratio, -shape_param) - 1))
1807+
return result
1808+
1809+
1810+
def gpd_loglikelihood_minimizer_aligned(price_data):
1811+
result = [False, False]
1812+
DEFAULT_SCALE_PARAM = 1
1813+
DEFAULT_SHAPE_PARAM = 1
1814+
if (len(price_data) > 0):
1815+
gpd_loglikelihood_lambda = \
1816+
gpd_loglikelihood_factory(price_data)
1817+
optimization_results = \
1818+
optimize.minimize(gpd_loglikelihood_lambda,
1819+
[DEFAULT_SCALE_PARAM,
1820+
DEFAULT_SHAPE_PARAM],
1821+
method='Nelder-Mead')
1822+
if optimization_results.success:
1823+
resulting_params = optimization_results.x
1824+
if len(resulting_params) == 2:
1825+
result[0] = resulting_params[0]
1826+
result[1] = resulting_params[1]
1827+
return result
1828+
1829+
1830+
def gpd_loglikelihood_factory(price_data):
1831+
return lambda params: gpd_loglikelihood(params, price_data)
1832+
1833+
1834+
def gpd_loglikelihood(params, price_data):
1835+
if (params[1] != 0):
1836+
return -gpd_loglikelihood_scale_and_shape(params[0],
1837+
params[1],
1838+
price_data)
1839+
else:
1840+
return -gpd_loglikelihood_scale_only(params[0], price_data)
1841+
1842+
1843+
def gpd_loglikelihood_scale_and_shape_factory(price_data):
1844+
# minimize a function of two variables requires a list of params
1845+
# we are expecting the lambda below to be called as follows:
1846+
# parameters = [scale, shape]
1847+
# the final outer negative is added because scipy only minimizes
1848+
return lambda params: \
1849+
-gpd_loglikelihood_scale_and_shape(params[0],
1850+
params[1],
1851+
price_data)
1852+
1853+
1854+
def gpd_loglikelihood_scale_and_shape(scale, shape, price_data):
1855+
n = len(price_data)
1856+
result = -1 * float_info.max
1857+
if (scale != 0):
1858+
param_factor = shape / scale
1859+
if (shape != 0 and param_factor >= 0 and scale >= 0):
1860+
result = ((-n * np.log(scale)) -
1861+
(((1 / shape) + 1) *
1862+
(np.log((shape / scale * price_data) + 1)).sum()))
1863+
return result
1864+
1865+
1866+
def gpd_loglikelihood_scale_only_factory(price_data):
1867+
# the negative is added because scipy only minimizes
1868+
return lambda scale: \
1869+
-gpd_loglikelihood_scale_only(scale, price_data)
1870+
1871+
1872+
def gpd_loglikelihood_scale_only(scale, price_data):
1873+
n = len(price_data)
1874+
data_sum = price_data.sum()
1875+
result = -1 * float_info.max
1876+
if (scale >= 0):
1877+
result = ((-n*np.log(scale)) - (data_sum/scale))
1878+
return result
1879+
1880+
15641881
def up_capture(returns, factor_returns, **kwargs):
15651882
"""
15661883
Compute the capture ratio for periods when the benchmark return is positive
@@ -1840,6 +2157,8 @@ def conditional_value_at_risk(returns, cutoff=0.05):
18402157
excess_sharpe,
18412158
alpha,
18422159
beta,
2160+
beta_fragility_heuristic,
2161+
gpd_risk_estimates,
18432162
capture,
18442163
up_capture,
18452164
down_capture

0 commit comments

Comments
 (0)