|
18 | 18 | import math
|
19 | 19 | import pandas as pd
|
20 | 20 | import numpy as np
|
21 |
| -from scipy import stats |
| 21 | +from math import pow |
| 22 | +from scipy import stats, optimize |
22 | 23 | from six import iteritems
|
| 24 | +from sys import float_info |
23 | 25 |
|
24 | 26 | from .utils import nanmean, nanstd, nanmin, up, down, roll, rolling_window
|
25 | 27 | from .periods import ANNUALIZATION_FACTORS, APPROX_BDAYS_PER_YEAR
|
@@ -1527,8 +1529,7 @@ def tail_ratio(returns):
|
1527 | 1529 |
|
1528 | 1530 |
|
1529 | 1531 | def capture(returns, factor_returns, period=DAILY):
|
1530 |
| - """ |
1531 |
| - Compute capture ratio. |
| 1532 | + """Compute capture ratio. |
1532 | 1533 |
|
1533 | 1534 | Parameters
|
1534 | 1535 | ----------
|
@@ -1561,6 +1562,322 @@ def capture(returns, factor_returns, period=DAILY):
|
1561 | 1562 | annual_return(factor_returns, period=period))
|
1562 | 1563 |
|
1563 | 1564 |
|
| 1565 | +def beta_fragility_heuristic(returns, factor_returns): |
| 1566 | + """Estimate fragility to drops in beta. |
| 1567 | +
|
| 1568 | + Parameters |
| 1569 | + ---------- |
| 1570 | + returns : pd.Series or np.ndarray |
| 1571 | + Daily returns of the strategy, noncumulative. |
| 1572 | + - See full explanation in :func:`~empyrical.stats.cum_returns`. |
| 1573 | + factor_returns : pd.Series or np.ndarray |
| 1574 | + Daily noncumulative returns of the factor to which beta is |
| 1575 | + computed. Usually a benchmark such as the market. |
| 1576 | + - This is in the same style as returns. |
| 1577 | +
|
| 1578 | + Returns |
| 1579 | + ------- |
| 1580 | + float, np.nan |
| 1581 | + The beta fragility of the strategy. |
| 1582 | +
|
| 1583 | + Note |
| 1584 | + ---- |
| 1585 | + A negative return value indicates potential losses |
| 1586 | + could follow volatility in beta. |
| 1587 | + The magnitude of the negative value indicates the size of |
| 1588 | + the potential loss. |
| 1589 | + seealso:: |
| 1590 | + `A New Heuristic Measure of Fragility and |
| 1591 | +Tail Risks: Application to Stress Testing` |
| 1592 | + https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf |
| 1593 | + An IMF Working Paper describing the heuristic |
| 1594 | + """ |
| 1595 | + if len(returns) < 3 or len(factor_returns) < 3: |
| 1596 | + return np.nan |
| 1597 | + |
| 1598 | + return beta_fragility_heuristic_aligned( |
| 1599 | + *_aligned_series(returns, factor_returns)) |
| 1600 | + |
| 1601 | + |
| 1602 | +def beta_fragility_heuristic_aligned(returns, factor_returns): |
| 1603 | + """Estimate fragility to drops in beta |
| 1604 | +
|
| 1605 | + Parameters |
| 1606 | + ---------- |
| 1607 | + returns : pd.Series or np.ndarray |
| 1608 | + Daily returns of the strategy, noncumulative. |
| 1609 | + - See full explanation in :func:`~empyrical.stats.cum_returns`. |
| 1610 | + factor_returns : pd.Series or np.ndarray |
| 1611 | + Daily noncumulative returns of the factor to which beta is |
| 1612 | + computed. Usually a benchmark such as the market. |
| 1613 | + - This is in the same style as returns. |
| 1614 | +
|
| 1615 | + Returns |
| 1616 | + ------- |
| 1617 | + float, np.nan |
| 1618 | + The beta fragility of the strategy. |
| 1619 | +
|
| 1620 | + Note |
| 1621 | + ---- |
| 1622 | + If they are pd.Series, expects returns and factor_returns have already |
| 1623 | + been aligned on their labels. If np.ndarray, these arguments should have |
| 1624 | + the same shape. |
| 1625 | + seealso:: |
| 1626 | + `A New Heuristic Measure of Fragility and |
| 1627 | +Tail Risks: Application to Stress Testing` |
| 1628 | + https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf |
| 1629 | + An IMF Working Paper describing the heuristic |
| 1630 | + """ |
| 1631 | + if len(returns) < 3 or len(factor_returns) < 3: |
| 1632 | + return np.nan |
| 1633 | + |
| 1634 | + # combine returns and factor returns into pairs |
| 1635 | + returns_series = pd.Series(returns) |
| 1636 | + factor_returns_series = pd.Series(factor_returns) |
| 1637 | + pairs = pd.concat([returns_series, factor_returns_series], axis=1) |
| 1638 | + pairs.columns = ['returns', 'factor_returns'] |
| 1639 | + |
| 1640 | + # exclude any rows where returns are nan |
| 1641 | + pairs = pairs.dropna() |
| 1642 | + # sort by beta |
| 1643 | + pairs = pairs.sort_values(by='factor_returns') |
| 1644 | + |
| 1645 | + # find the three vectors, using median of 3 |
| 1646 | + start_index = 0 |
| 1647 | + mid_index = int(np.around(len(pairs) / 2, 0)) |
| 1648 | + end_index = len(pairs) - 1 |
| 1649 | + |
| 1650 | + (start_returns, start_factor_returns) = pairs.iloc[start_index] |
| 1651 | + (mid_returns, mid_factor_returns) = pairs.iloc[mid_index] |
| 1652 | + (end_returns, end_factor_returns) = pairs.iloc[end_index] |
| 1653 | + |
| 1654 | + factor_returns_range = (end_factor_returns - start_factor_returns) |
| 1655 | + start_returns_weight = 0.5 |
| 1656 | + end_returns_weight = 0.5 |
| 1657 | + |
| 1658 | + # find weights for the start and end returns |
| 1659 | + # using a convex combination |
| 1660 | + if not factor_returns_range == 0: |
| 1661 | + start_returns_weight = \ |
| 1662 | + (mid_factor_returns - start_factor_returns) / \ |
| 1663 | + factor_returns_range |
| 1664 | + end_returns_weight = \ |
| 1665 | + (end_factor_returns - mid_factor_returns) / \ |
| 1666 | + factor_returns_range |
| 1667 | + |
| 1668 | + # calculate fragility heuristic |
| 1669 | + heuristic = (start_returns_weight*start_returns) + \ |
| 1670 | + (end_returns_weight*end_returns) - mid_returns |
| 1671 | + |
| 1672 | + return heuristic |
| 1673 | + |
| 1674 | + |
| 1675 | +def gpd_risk_estimates(returns, var_p=0.01): |
| 1676 | + """Estimate VaR and ES using the Generalized Pareto Distribution (GPD) |
| 1677 | +
|
| 1678 | + Parameters |
| 1679 | + ---------- |
| 1680 | + returns : pd.Series or np.ndarray |
| 1681 | + Daily returns of the strategy, noncumulative. |
| 1682 | + - See full explanation in :func:`~empyrical.stats.cum_returns`. |
| 1683 | + var_p : float |
| 1684 | + The percentile to use for estimating the VaR and ES |
| 1685 | +
|
| 1686 | + Returns |
| 1687 | + ------- |
| 1688 | + [threshold, scale_param, shape_param, var_estimate, es_estimate] |
| 1689 | + : list[float] |
| 1690 | + threshold - the threshold use to cut off exception tail losses |
| 1691 | + scale_param - a parameter (often denoted by sigma, capturing the |
| 1692 | + scale, related to variance) |
| 1693 | + shape_param - a parameter (often denoted by xi, capturing the shape or |
| 1694 | + type of the distribution) |
| 1695 | + var_estimate - an estimate for the VaR for the given percentile |
| 1696 | + es_estimate - an estimate for the ES for the given percentile |
| 1697 | +
|
| 1698 | + Note |
| 1699 | + ---- |
| 1700 | + seealso:: |
| 1701 | + `An Application of Extreme Value Theory for |
| 1702 | +Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>` |
| 1703 | + A paper describing how to use the Generalized Pareto |
| 1704 | + Distribution to estimate VaR and ES. |
| 1705 | + """ |
| 1706 | + if len(returns) < 3: |
| 1707 | + result = np.zeros(5) |
| 1708 | + if isinstance(returns, pd.Series): |
| 1709 | + result = pd.Series(result) |
| 1710 | + return result |
| 1711 | + return gpd_risk_estimates_aligned(*_aligned_series(returns, var_p)) |
| 1712 | + |
| 1713 | + |
| 1714 | +def gpd_risk_estimates_aligned(returns, var_p=0.01): |
| 1715 | + """Estimate VaR and ES using the Generalized Pareto Distribution (GPD) |
| 1716 | +
|
| 1717 | + Parameters |
| 1718 | + ---------- |
| 1719 | + returns : pd.Series or np.ndarray |
| 1720 | + Daily returns of the strategy, noncumulative. |
| 1721 | + - See full explanation in :func:`~empyrical.stats.cum_returns`. |
| 1722 | + var_p : float |
| 1723 | + The percentile to use for estimating the VaR and ES |
| 1724 | +
|
| 1725 | + Returns |
| 1726 | + ------- |
| 1727 | + [threshold, scale_param, shape_param, var_estimate, es_estimate] |
| 1728 | + : list[float] |
| 1729 | + threshold - the threshold use to cut off exception tail losses |
| 1730 | + scale_param - a parameter (often denoted by sigma, capturing the |
| 1731 | + scale, related to variance) |
| 1732 | + shape_param - a parameter (often denoted by xi, capturing the shape or |
| 1733 | + type of the distribution) |
| 1734 | + var_estimate - an estimate for the VaR for the given percentile |
| 1735 | + es_estimate - an estimate for the ES for the given percentile |
| 1736 | +
|
| 1737 | + Note |
| 1738 | + ---- |
| 1739 | + seealso:: |
| 1740 | + `An Application of Extreme Value Theory for |
| 1741 | +Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>` |
| 1742 | + A paper describing how to use the Generalized Pareto |
| 1743 | + Distribution to estimate VaR and ES. |
| 1744 | + """ |
| 1745 | + result = np.zeros(5) |
| 1746 | + if not len(returns) < 3: |
| 1747 | + |
| 1748 | + DEFAULT_THRESHOLD = 0.2 |
| 1749 | + MINIMUM_THRESHOLD = 0.000000001 |
| 1750 | + returns_array = pd.Series(returns).as_matrix() |
| 1751 | + flipped_returns = -1 * returns_array |
| 1752 | + losses = flipped_returns[flipped_returns > 0] |
| 1753 | + threshold = DEFAULT_THRESHOLD |
| 1754 | + finished = False |
| 1755 | + scale_param = 0 |
| 1756 | + shape_param = 0 |
| 1757 | + while not finished and threshold > MINIMUM_THRESHOLD: |
| 1758 | + losses_beyond_threshold = \ |
| 1759 | + losses[losses >= threshold] |
| 1760 | + param_result = \ |
| 1761 | + gpd_loglikelihood_minimizer_aligned(losses_beyond_threshold) |
| 1762 | + if (param_result[0] is not False and |
| 1763 | + param_result[1] is not False): |
| 1764 | + scale_param = param_result[0] |
| 1765 | + shape_param = param_result[1] |
| 1766 | + var_estimate = gpd_var_calculator(threshold, scale_param, |
| 1767 | + shape_param, var_p, |
| 1768 | + len(losses), |
| 1769 | + len(losses_beyond_threshold)) |
| 1770 | + # non-negative shape parameter is required for fat tails |
| 1771 | + # non-negative VaR estimate is required for loss of some kind |
| 1772 | + if (shape_param > 0 and var_estimate > 0): |
| 1773 | + finished = True |
| 1774 | + if (not finished): |
| 1775 | + threshold = threshold / 2 |
| 1776 | + if (finished): |
| 1777 | + es_estimate = gpd_es_calculator(var_estimate, threshold, |
| 1778 | + scale_param, shape_param) |
| 1779 | + result = np.array([threshold, scale_param, shape_param, |
| 1780 | + var_estimate, es_estimate]) |
| 1781 | + if isinstance(returns, pd.Series): |
| 1782 | + result = pd.Series(result) |
| 1783 | + return result |
| 1784 | + |
| 1785 | + |
| 1786 | +def gpd_es_calculator(var_estimate, threshold, scale_param, |
| 1787 | + shape_param): |
| 1788 | + result = 0 |
| 1789 | + if ((1 - shape_param) != 0): |
| 1790 | + # this formula is from Gilli and Kellezi pg. 8 |
| 1791 | + var_ratio = (var_estimate/(1 - shape_param)) |
| 1792 | + param_ratio = ((scale_param - (shape_param * threshold)) / |
| 1793 | + (1 - shape_param)) |
| 1794 | + result = var_ratio + param_ratio |
| 1795 | + return result |
| 1796 | + |
| 1797 | + |
| 1798 | +def gpd_var_calculator(threshold, scale_param, shape_param, |
| 1799 | + probability, total_n, exceedance_n): |
| 1800 | + result = 0 |
| 1801 | + if (exceedance_n > 0 and shape_param > 0): |
| 1802 | + # this formula is from Gilli and Kellezi pg. 12 |
| 1803 | + param_ratio = scale_param / shape_param |
| 1804 | + prob_ratio = (total_n/exceedance_n) * probability |
| 1805 | + result = threshold + (param_ratio * |
| 1806 | + (pow(prob_ratio, -shape_param) - 1)) |
| 1807 | + return result |
| 1808 | + |
| 1809 | + |
| 1810 | +def gpd_loglikelihood_minimizer_aligned(price_data): |
| 1811 | + result = [False, False] |
| 1812 | + DEFAULT_SCALE_PARAM = 1 |
| 1813 | + DEFAULT_SHAPE_PARAM = 1 |
| 1814 | + if (len(price_data) > 0): |
| 1815 | + gpd_loglikelihood_lambda = \ |
| 1816 | + gpd_loglikelihood_factory(price_data) |
| 1817 | + optimization_results = \ |
| 1818 | + optimize.minimize(gpd_loglikelihood_lambda, |
| 1819 | + [DEFAULT_SCALE_PARAM, |
| 1820 | + DEFAULT_SHAPE_PARAM], |
| 1821 | + method='Nelder-Mead') |
| 1822 | + if optimization_results.success: |
| 1823 | + resulting_params = optimization_results.x |
| 1824 | + if len(resulting_params) == 2: |
| 1825 | + result[0] = resulting_params[0] |
| 1826 | + result[1] = resulting_params[1] |
| 1827 | + return result |
| 1828 | + |
| 1829 | + |
| 1830 | +def gpd_loglikelihood_factory(price_data): |
| 1831 | + return lambda params: gpd_loglikelihood(params, price_data) |
| 1832 | + |
| 1833 | + |
| 1834 | +def gpd_loglikelihood(params, price_data): |
| 1835 | + if (params[1] != 0): |
| 1836 | + return -gpd_loglikelihood_scale_and_shape(params[0], |
| 1837 | + params[1], |
| 1838 | + price_data) |
| 1839 | + else: |
| 1840 | + return -gpd_loglikelihood_scale_only(params[0], price_data) |
| 1841 | + |
| 1842 | + |
| 1843 | +def gpd_loglikelihood_scale_and_shape_factory(price_data): |
| 1844 | + # minimize a function of two variables requires a list of params |
| 1845 | + # we are expecting the lambda below to be called as follows: |
| 1846 | + # parameters = [scale, shape] |
| 1847 | + # the final outer negative is added because scipy only minimizes |
| 1848 | + return lambda params: \ |
| 1849 | + -gpd_loglikelihood_scale_and_shape(params[0], |
| 1850 | + params[1], |
| 1851 | + price_data) |
| 1852 | + |
| 1853 | + |
| 1854 | +def gpd_loglikelihood_scale_and_shape(scale, shape, price_data): |
| 1855 | + n = len(price_data) |
| 1856 | + result = -1 * float_info.max |
| 1857 | + if (scale != 0): |
| 1858 | + param_factor = shape / scale |
| 1859 | + if (shape != 0 and param_factor >= 0 and scale >= 0): |
| 1860 | + result = ((-n * np.log(scale)) - |
| 1861 | + (((1 / shape) + 1) * |
| 1862 | + (np.log((shape / scale * price_data) + 1)).sum())) |
| 1863 | + return result |
| 1864 | + |
| 1865 | + |
| 1866 | +def gpd_loglikelihood_scale_only_factory(price_data): |
| 1867 | + # the negative is added because scipy only minimizes |
| 1868 | + return lambda scale: \ |
| 1869 | + -gpd_loglikelihood_scale_only(scale, price_data) |
| 1870 | + |
| 1871 | + |
| 1872 | +def gpd_loglikelihood_scale_only(scale, price_data): |
| 1873 | + n = len(price_data) |
| 1874 | + data_sum = price_data.sum() |
| 1875 | + result = -1 * float_info.max |
| 1876 | + if (scale >= 0): |
| 1877 | + result = ((-n*np.log(scale)) - (data_sum/scale)) |
| 1878 | + return result |
| 1879 | + |
| 1880 | + |
1564 | 1881 | def up_capture(returns, factor_returns, **kwargs):
|
1565 | 1882 | """
|
1566 | 1883 | Compute the capture ratio for periods when the benchmark return is positive
|
@@ -1840,6 +2157,8 @@ def conditional_value_at_risk(returns, cutoff=0.05):
|
1840 | 2157 | excess_sharpe,
|
1841 | 2158 | alpha,
|
1842 | 2159 | beta,
|
| 2160 | + beta_fragility_heuristic, |
| 2161 | + gpd_risk_estimates, |
1843 | 2162 | capture,
|
1844 | 2163 | up_capture,
|
1845 | 2164 | down_capture
|
|
0 commit comments