diff --git a/doc/source/api.rst b/doc/source/api.rst index 12dc0b0cb50b9..3c7ca6d5c2326 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -194,65 +194,6 @@ Top-level evaluation eval -Standard moving window functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: generated/ - - rolling_count - rolling_sum - rolling_mean - rolling_median - rolling_var - rolling_std - rolling_min - rolling_max - rolling_corr - rolling_corr_pairwise - rolling_cov - rolling_skew - rolling_kurt - rolling_apply - rolling_quantile - rolling_window - -.. _api.functions_expanding: - -Standard expanding window functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: generated/ - - expanding_count - expanding_sum - expanding_mean - expanding_median - expanding_var - expanding_std - expanding_min - expanding_max - expanding_corr - expanding_corr_pairwise - expanding_cov - expanding_skew - expanding_kurt - expanding_apply - expanding_quantile - -Exponentially-weighted moving window functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: generated/ - - ewma - ewmstd - ewmvar - ewmcorr - ewmcov - .. _api.series: Series @@ -260,6 +201,9 @@ Series Constructor ~~~~~~~~~~~ + +.. currentmodule:: pandas + .. autosummary:: :toctree: generated/ @@ -344,14 +288,17 @@ Binary operator functions Series.ne Series.eq -Function application, GroupBy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Function application, GroupBy & Window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ Series.apply Series.map Series.groupby + Series.rolling + Series.expanding + Series.ewm .. _api.series.stats: @@ -846,14 +793,17 @@ Binary operator functions DataFrame.combine DataFrame.combine_first -Function application, GroupBy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Function application, GroupBy & Window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ DataFrame.apply DataFrame.applymap DataFrame.groupby + DataFrame.rolling + DataFrame.expanding + DataFrame.ewm .. _api.dataframe.stats: @@ -1551,6 +1501,78 @@ Conversion TimedeltaIndex.to_series TimedeltaIndex.round +Window +------ +.. currentmodule:: pandas.core.window + +Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. +Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. +EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. + +Standard moving window functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. autosummary:: + :toctree: generated/ + + Rolling.count + Rolling.sum + Rolling.mean + Rolling.median + Rolling.var + Rolling.std + Rolling.min + Rolling.max + Rolling.corr + Rolling.cov + Rolling.skew + Rolling.kurt + Rolling.apply + Rolling.quantile + Window.mean + Window.sum + +.. _api.functions_expanding: + +Standard expanding window functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. autosummary:: + :toctree: generated/ + + Expanding.count + Expanding.sum + Expanding.mean + Expanding.median + Expanding.var + Expanding.std + Expanding.min + Expanding.max + Expanding.corr + Expanding.cov + Expanding.skew + Expanding.kurt + Expanding.apply + Expanding.quantile + +Exponentially-weighted moving window functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. autosummary:: + :toctree: generated/ + + EWM.mean + EWM.std + EWM.var + EWM.corr + EWM.cov + GroupBy ------- .. currentmodule:: pandas.core.groupby diff --git a/doc/source/computation.rst b/doc/source/computation.rst index b2fa7f6749379..39587e82731b0 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -21,7 +21,7 @@ Computational tools =================== -Statistical functions +Statistical Functions --------------------- .. _computation.pct_change: @@ -196,90 +196,120 @@ parameter: - ``max`` : highest rank in the group - ``first`` : ranks assigned in the order they appear in the array +.. _stats.moments: -.. currentmodule:: pandas +Window Functions +---------------- -.. currentmodule:: pandas.stats.api +.. warning:: -.. _stats.moments: + Prior to version 0.18.0, ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` were module level + functions and are now deprecated and replaced by the corresponding method call. -Moving (rolling) statistics / moments -------------------------------------- + The deprecation warning will show the new syntax, see an example :ref:`here ` + You can view the previous documentation + `here `__ -For working with time series data, a number of functions are provided for -computing common *moving* or *rolling* statistics. Among these are count, sum, +For working with data, a number of windows functions are provided for +computing common *window* or *rolling* statistics. Among these are count, sum, mean, median, correlation, variance, covariance, standard deviation, skewness, -and kurtosis. All of these methods are in the :mod:`pandas` namespace, but -otherwise they can be found in :mod:`pandas.stats.moments`. +and kurtosis. -.. currentmodule:: pandas +.. currentmodule:: pandas.core.window -.. csv-table:: - :header: "Function", "Description" - :widths: 20, 80 +.. note:: + + The API for window statistics is quite similar to the way one works with ``GroupBy`` objects, see the documentation :ref:`here ` + +We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding +objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.EWM`. + +.. ipython:: python + + s = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) + s = s.cumsum() + s + +These are created from methods on ``Series`` and ``DataFrame``. - :func:`rolling_count`, Number of non-null observations - :func:`rolling_sum`, Sum of values - :func:`rolling_mean`, Mean of values - :func:`rolling_median`, Arithmetic median of values - :func:`rolling_min`, Minimum - :func:`rolling_max`, Maximum - :func:`rolling_std`, Unbiased standard deviation - :func:`rolling_var`, Unbiased variance - :func:`rolling_skew`, Unbiased skewness (3rd moment) - :func:`rolling_kurt`, Unbiased kurtosis (4th moment) - :func:`rolling_quantile`, Sample quantile (value at %) - :func:`rolling_apply`, Generic apply - :func:`rolling_cov`, Unbiased covariance (binary) - :func:`rolling_corr`, Correlation (binary) - :func:`rolling_window`, Moving window function - -Generally these methods all have the same interface. The binary operators -(e.g. :func:`rolling_corr`) take two Series or DataFrames. Otherwise, they all +.. ipython:: python + + r = s.rolling(window=60) + r + +Generally these methods all have the same interface. They all accept the following arguments: - - ``window``: size of moving window - - ``min_periods``: threshold of non-null data points to require (otherwise - result is NA) - - ``freq``: optionally specify a :ref:`frequency string ` - or :ref:`DateOffset ` to pre-conform the data to. - Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used - instead of ``freq`` that referred to the legacy time rule constants - - ``how``: optionally specify method for down or re-sampling. Default is - is min for :func:`rolling_min`, max for :func:`rolling_max`, median for - :func:`rolling_median`, and mean for all other rolling functions. See - :meth:`DataFrame.resample`'s how argument for more information. +- ``window``: size of moving window +- ``min_periods``: threshold of non-null data points to require (otherwise + result is NA) +- ``center``: boolean, whether to set the labels at the center (default is False) + +.. warning:: -These functions can be applied to ndarrays or Series objects: + The ``freq`` and ``how`` arguments were in the API prior to 0.18.0 changes. These are deprecated in the new API. You can simply resample the input prior to creating a window function. + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D',how='max').rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. + +We can then call methods on these ``rolling`` objects. These return like-indexed objects: .. ipython:: python - ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) - ts = ts.cumsum() + r.mean() + +.. ipython:: python - ts.plot(style='k--') + s.plot(style='k--') @savefig rolling_mean_ex.png - pd.rolling_mean(ts, 60).plot(style='k') - -They can also be applied to DataFrame objects. This is really just syntactic -sugar for applying the moving window operator to all of the DataFrame's columns: + r.mean().plot(style='k') .. ipython:: python :suppress: plt.close('all') +They can also be applied to DataFrame objects. This is really just syntactic +sugar for applying the moving window operator to all of the DataFrame's columns: + .. ipython:: python - df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, - columns=['A', 'B', 'C', 'D']) + df = pd.DataFrame(np.random.randn(1000, 4), index=s.index, + columns=['A', 'B', 'C', 'D']) df = df.cumsum() @savefig rolling_mean_frame.png - pd.rolling_sum(df, 60).plot(subplots=True) + df.rolling(window=60).sum().plot(subplots=True) -The :func:`rolling_apply` function takes an extra ``func`` argument and performs +.. _stats.summary: + +Method Summary +~~~~~~~~~~~~~~ + +We provide a number of the common statistical functions: + +.. currentmodule:: pandas.core.window + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + + :meth:`~Rolling.count`, Number of non-null observations + :meth:`~Rolling.sum`, Sum of values + :meth:`~Rolling.mean`, Mean of values + :meth:`~Rolling.median`, Arithmetic median of values + :meth:`~Rolling.min`, Minimum + :meth:`~Rolling.max`, Maximum + :meth:`~Rolling.std`, Unbiased standard deviation + :meth:`~Rolling.var`, Unbiased variance + :meth:`~Rolling.skew`, Unbiased skewness (3rd moment) + :meth:`~Rolling.kurt`, Unbiased kurtosis (4th moment) + :meth:`~Rolling.quantile`, Sample quantile (value at %) + :meth:`~Rolling.apply`, Generic apply + :meth:`~Rolling.cov`, Unbiased covariance (binary) + :meth:`~Rolling.corr`, Correlation (binary) + +The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs generic rolling computations. The ``func`` argument should be a single function that produces a single value from an ndarray input. Suppose we wanted to compute the mean absolute deviation on a rolling basis: @@ -288,79 +318,91 @@ compute the mean absolute deviation on a rolling basis: mad = lambda x: np.fabs(x - x.mean()).mean() @savefig rolling_apply_ex.png - pd.rolling_apply(ts, 60, mad).plot(style='k') - -The :func:`rolling_window` function performs a generic rolling window computation -on the input data. The weights used in the window are specified by the ``win_type`` -keyword. The list of recognized types are: + s.rolling(window=60).apply(mad).plot(style='k') - - ``boxcar`` - - ``triang`` - - ``blackman`` - - ``hamming`` - - ``bartlett`` - - ``parzen`` - - ``bohman`` - - ``blackmanharris`` - - ``nuttall`` - - ``barthann`` - - ``kaiser`` (needs beta) - - ``gaussian`` (needs std) - - ``general_gaussian`` (needs power, width) - - ``slepian`` (needs width). +.. _stats.rolling_window: -.. ipython:: python +Rolling Windows +~~~~~~~~~~~~~~~ - ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10)) +Passing ``win_type`` to ``.rolling`` generates a generic rolling window computation, that is weighted according the ``win_type``. +The following methods are available: - pd.rolling_window(ser, 5, 'triang') +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 -Note that the ``boxcar`` window is equivalent to :func:`rolling_mean`. + :meth:`~Window.sum`, Sum of values + :meth:`~Window.mean`, Mean of values + +The weights used in the window are specified by the ``win_type`` keyword. The list of recognized types are: + +- ``boxcar`` +- ``triang`` +- ``blackman`` +- ``hamming`` +- ``bartlett`` +- ``parzen`` +- ``bohman`` +- ``blackmanharris`` +- ``nuttall`` +- ``barthann`` +- ``kaiser`` (needs beta) +- ``gaussian`` (needs std) +- ``general_gaussian`` (needs power, width) +- ``slepian`` (needs width). .. ipython:: python - pd.rolling_window(ser, 5, 'boxcar') + ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10)) - pd.rolling_mean(ser, 5) + ser.rolling(window=5, win_type='triang').mean() -For some windowing functions, additional parameters must be specified: +Note that the ``boxcar`` window is equivalent to :meth:`~Rolling.mean`. .. ipython:: python - pd.rolling_window(ser, 5, 'gaussian', std=0.1) + ser.rolling(window=5, win_type='boxcar').mean() + ser.rolling(window=5).mean() -By default the labels are set to the right edge of the window, but a -``center`` keyword is available so the labels can be set at the center. -This keyword is available in other rolling functions as well. +For some windowing functions, additional parameters must be specified: .. ipython:: python - pd.rolling_window(ser, 5, 'boxcar') - - pd.rolling_window(ser, 5, 'boxcar', center=True) - - pd.rolling_mean(ser, 5, center=True) + ser.rolling(window=5, win_type='gaussian').mean(std=0.1) .. _stats.moments.normalization: .. note:: - In rolling sum mode (``mean=False``) there is no normalization done to the - weights. Passing custom weights of ``[1, 1, 1]`` will yield a different + For ``.sum()`` with a ``win_type``, there is no normalization done to the + weights for the window. Passing custom weights of ``[1, 1, 1]`` will yield a different result than passing weights of ``[2, 2, 2]``, for example. When passing a ``win_type`` instead of explicitly specifying the weights, the weights are already normalized so that the largest weight is 1. - In contrast, the nature of the rolling mean calculation (``mean=True``)is + In contrast, the nature of the ``.mean()`` calculation is such that the weights are normalized with respect to each other. Weights of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. +Centering Windows +~~~~~~~~~~~~~~~~~ + +By default the labels are set to the right edge of the window, but a +``center`` keyword is available so the labels can be set at the center. +This keyword is available in other rolling functions as well. + +.. ipython:: python + + ser.rolling(window=5).mean() + ser.rolling(window=5, center=True).mean() + .. _stats.moments.binary: -Binary rolling moments -~~~~~~~~~~~~~~~~~~~~~~ +Binary Window Functions +~~~~~~~~~~~~~~~~~~~~~~~ -:func:`rolling_cov` and :func:`rolling_corr` can compute moving window statistics about +:meth:`~Rolling.cov` and :meth:`~Rolling.corr` can compute moving window statistics about two ``Series`` or any combination of ``DataFrame/Series`` or ``DataFrame/DataFrame``. Here is the behavior in each case: @@ -378,7 +420,7 @@ For example: .. ipython:: python df2 = df[:20] - pd.rolling_corr(df2, df2['B'], window=5) + df2.rolling(window=5).corr(df2['B']) .. _stats.moments.corr_pairwise: @@ -403,23 +445,16 @@ can even be omitted: .. ipython:: python - covs = pd.rolling_cov(df[['B','C','D']], df[['A','B','C']], 50, pairwise=True) + covs = df[['B','C','D']].rolling(window=50).cov(df[['A','B','C']], pairwise=True) covs[df.index[-50]] .. ipython:: python - correls = pd.rolling_corr(df, 50) + correls = df.rolling(window=50).corr() correls[df.index[-50]] -.. note:: - - Prior to version 0.14 this was available through ``rolling_corr_pairwise`` - which is now simply syntactic sugar for calling ``rolling_corr(..., - pairwise=True)`` and deprecated. This is likely to be removed in a future - release. - You can efficiently retrieve the time series of correlations between two -columns using ``ix`` indexing: +columns using ``.loc`` indexing: .. ipython:: python :suppress: @@ -429,62 +464,152 @@ columns using ``ix`` indexing: .. ipython:: python @savefig rolling_corr_pairwise_ex.png - correls.ix[:, 'A', 'C'].plot() + correls.loc[:, 'A', 'C'].plot() + +.. _stats.aggregate: + +Aggregation +----------- + +Once the ``Rolling``, ``Expanding`` or ``EWM`` objects have been created, several methods are available to +perform multiple computations on the data. This is very similar to a ``.groupby.agg`` seen :ref:`here `. + +An obvious one is aggregation via the ``aggregate`` or equivalently ``agg`` method: + +.. ipython:: python + + dfa = pd.DataFrame(np.random.randn(1000, 3), index=s.index, + columns=['A', 'B', 'C']) + r = dfa.rolling(window=60,min_periods=1) + r + +We can aggregate by passing a function to the entire DataFrame, or select a Series (or multiple Series) via standard getitem. + +.. ipython:: python + + r.aggregate(np.sum) + + r['A'].aggregate(np.sum) + + r[['A','B']].aggregate(np.sum) + +As you can see, the result of the aggregation will have the selected columns, or all +columns if none are selected. + +.. _stats.aggregate.multifunc: + +Applying multiple functions at once +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With windowed Series you can also pass a list or dict of functions to do +aggregation with, outputting a DataFrame: + +.. ipython:: python + + r['A'].agg([np.sum, np.mean, np.std]) + +If a dict is passed, the keys will be used to name the columns. Otherwise the +function's name (stored in the function object) will be used. + +.. ipython:: python + + r['A'].agg({'result1' : np.sum, + 'result2' : np.mean}) + +On a widowed DataFrame, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + r.agg([np.sum, np.mean]) + +Passing a dict of functions has different behavior by default, see the next +section. + +Applying different functions to DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a DataFrame: + +.. ipython:: python + + r.agg({'A' : np.sum, + 'B' : lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be implemented on the Windowed object + +.. ipython:: python + + r.agg({'A' : 'sum', 'B' : 'std'}) + +Furthermore you can pass a nested dict to indicate different aggregations on different columns. + +.. ipython:: python + + r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] }) + .. _stats.moments.expanding: -Expanding window moment functions ---------------------------------- +Expanding Windows +----------------- + A common alternative to rolling statistics is to use an *expanding* window, which yields the value of the statistic with all the data available up to that -point in time. As these calculations are a special case of rolling statistics, +point in time. + +These follow a similar interface to ``.rolling``, with the ``.expanding`` method +returning an :class:`~pandas.core.window.Expanding` object. + +As these calculations are a special case of rolling statistics, they are implemented in pandas such that the following two calls are equivalent: .. ipython:: python - pd.rolling_mean(df, window=len(df), min_periods=1)[:5] + df.rolling(window=len(df), min_periods=1).mean()[:5] - pd.expanding_mean(df)[:5] + df.expanding(min_periods=1).mean()[:5] -Like the ``rolling_`` functions, the following methods are included in the -``pandas`` namespace or can be located in ``pandas.stats.moments``. +These have a similar set of methods to ``.rolling`` methods. -.. currentmodule:: pandas +Method Summary +~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window .. csv-table:: :header: "Function", "Description" :widths: 20, 80 - :func:`expanding_count`, Number of non-null observations - :func:`expanding_sum`, Sum of values - :func:`expanding_mean`, Mean of values - :func:`expanding_median`, Arithmetic median of values - :func:`expanding_min`, Minimum - :func:`expanding_max`, Maximum - :func:`expanding_std`, Unbiased standard deviation - :func:`expanding_var`, Unbiased variance - :func:`expanding_skew`, Unbiased skewness (3rd moment) - :func:`expanding_kurt`, Unbiased kurtosis (4th moment) - :func:`expanding_quantile`, Sample quantile (value at %) - :func:`expanding_apply`, Generic apply - :func:`expanding_cov`, Unbiased covariance (binary) - :func:`expanding_corr`, Correlation (binary) + :meth:`~Expanding.count`, Number of non-null observations + :meth:`~Expanding.sum`, Sum of values + :meth:`~Expanding.mean`, Mean of values + :meth:`~Expanding.median`, Arithmetic median of values + :meth:`~Expanding.min`, Minimum + :meth:`~Expanding.max`, Maximum + :meth:`~Expanding.std`, Unbiased standard deviation + :meth:`~Expanding.var`, Unbiased variance + :meth:`~Expanding.skew`, Unbiased skewness (3rd moment) + :meth:`~Expanding.kurt`, Unbiased kurtosis (4th moment) + :meth:`~Expanding.quantile`, Sample quantile (value at %) + :meth:`~Expanding.apply`, Generic apply + :meth:`~Expanding.cov`, Unbiased covariance (binary) + :meth:`~Expanding.corr`, Correlation (binary) Aside from not having a ``window`` parameter, these functions have the same -interfaces as their ``rolling_`` counterpart. Like above, the parameters they +interfaces as their ``.rolling`` counterparts. Like above, the parameters they all accept are: - - ``min_periods``: threshold of non-null data points to require. Defaults to - minimum needed to compute statistic. No ``NaNs`` will be output once - ``min_periods`` non-null data points have been seen. - - ``freq``: optionally specify a :ref:`frequency string ` - or :ref:`DateOffset ` to pre-conform the data to. - Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used - instead of ``freq`` that referred to the legacy time rule constants +- ``min_periods``: threshold of non-null data points to require. Defaults to + minimum needed to compute statistic. No ``NaNs`` will be output once + ``min_periods`` non-null data points have been seen. +- ``center``: boolean, whether to set the labels at the center (default is False) .. note:: - The output of the ``rolling_`` and ``expanding_`` functions do not return a + The output of the ``.rolling`` and ``.expanding`` methods do not return a ``NaN`` if there are at least ``min_periods`` non-null values in the current window. This differs from ``cumsum``, ``cumprod``, ``cummax``, and ``cummin``, which return ``NaN`` in the output wherever a ``NaN`` is @@ -493,7 +618,7 @@ all accept are: An expanding window statistic will be more stable (and less responsive) than its rolling window counterpart as the increasing window size decreases the relative impact of an individual data point. As an example, here is the -:func:`expanding_mean` output for the previous time series dataset: +:meth:`~Expanding.mean` output for the previous time series dataset: .. ipython:: python :suppress: @@ -502,31 +627,34 @@ relative impact of an individual data point. As an example, here is the .. ipython:: python - ts.plot(style='k--') + s.plot(style='k--') @savefig expanding_mean_frame.png - pd.expanding_mean(ts).plot(style='k') + s.expanding().mean().plot(style='k') + .. _stats.moments.exponentially_weighted: -Exponentially weighted moment functions ---------------------------------------- +Exponentially Weighted Windows +------------------------------ A related set of functions are exponentially weighted versions of several of -the above statistics. A number of expanding EW (exponentially weighted) -functions are provided: +the above statistics. A similar interface to ``.rolling`` and ``.expanding`` is accessed +thru the ``.ewm`` method to receive a :class:`~pandas.core.window.EWM` object. +A number of expanding EW (exponentially weighted) +methods are provided: -.. currentmodule:: pandas +.. currentmodule:: pandas.core.window .. csv-table:: :header: "Function", "Description" :widths: 20, 80 - :func:`ewma`, EW moving average - :func:`ewmvar`, EW moving variance - :func:`ewmstd`, EW moving standard deviation - :func:`ewmcorr`, EW moving correlation - :func:`ewmcov`, EW moving covariance + :meth:`~EWM.mean`, EW moving average + :meth:`~EWM.var`, EW moving variance + :meth:`~EWM.std`, EW moving standard deviation + :meth:`~EWM.corr`, EW moving correlation + :meth:`~EWM.cov`, EW moving covariance In general, a weighted moving average is calculated as @@ -621,20 +749,20 @@ Here is an example for a univariate time series: .. ipython:: python - ts.plot(style='k--') + s.plot(style='k--') @savefig ewma_ex.png - pd.ewma(ts, span=20).plot(style='k') + s.ewm(span=20).mean().plot(style='k') -All the EW functions have a ``min_periods`` argument, which has the same -meaning it does for all the ``expanding_`` and ``rolling_`` functions: +EWM has a ``min_periods`` argument, which has the same +meaning it does for all the ``.expanding`` and ``.rolling`` methods: no output values will be set until at least ``min_periods`` non-null values are encountered in the (expanding) window. (This is a change from versions prior to 0.15.0, in which the ``min_periods`` argument affected only the ``min_periods`` consecutive entries starting at the first non-null value.) -All the EW functions also have an ``ignore_na`` argument, which deterines how +EWM also has an ``ignore_na`` argument, which deterines how intermediate null values affect the calculation of the weights. When ``ignore_na=False`` (the default), weights are calculated based on absolute positions, so that intermediate null values affect the result. @@ -653,7 +781,7 @@ Whereas if ``ignore_na=True``, the weighted average would be calculated as \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. -The :func:`ewmvar`, :func:`ewmstd`, and :func:`ewmcov` functions have a ``bias`` argument, +The :meth:`~Ewm.var`, :meth:`~Ewm.std`, and :meth:`~Ewm.cov` functions have a ``bias`` argument, specifying whether the result should contain biased or unbiased statistics. For example, if ``bias=True``, ``ewmvar(x)`` is calculated as ``ewmvar(x) = ewma(x**2) - ewma(x)**2``; diff --git a/doc/source/conf.py b/doc/source/conf.py index 23095b7f4d24b..709d9b32984c0 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -224,16 +224,7 @@ 'pandas.io.pickle.read_pickle', 'pandas.io.pytables.HDFStore.append', 'pandas.io.pytables.HDFStore.get', 'pandas.io.pytables.HDFStore.put', 'pandas.io.pytables.HDFStore.select', 'pandas.io.pytables.read_hdf', 'pandas.io.sql.read_sql', 'pandas.io.sql.read_frame', 'pandas.io.sql.write_frame', - 'pandas.io.stata.read_stata', 'pandas.stats.moments.ewma', 'pandas.stats.moments.ewmcorr', - 'pandas.stats.moments.ewmcov', 'pandas.stats.moments.ewmstd', 'pandas.stats.moments.ewmvar', - 'pandas.stats.moments.expanding_apply', 'pandas.stats.moments.expanding_corr', 'pandas.stats.moments.expanding_count', - 'pandas.stats.moments.expanding_cov', 'pandas.stats.moments.expanding_kurt', 'pandas.stats.moments.expanding_mean', - 'pandas.stats.moments.expanding_median', 'pandas.stats.moments.expanding_quantile', 'pandas.stats.moments.expanding_skew', - 'pandas.stats.moments.expanding_std', 'pandas.stats.moments.expanding_sum', 'pandas.stats.moments.expanding_var', - 'pandas.stats.moments.rolling_apply', 'pandas.stats.moments.rolling_corr', 'pandas.stats.moments.rolling_count', - 'pandas.stats.moments.rolling_cov', 'pandas.stats.moments.rolling_kurt', 'pandas.stats.moments.rolling_mean', - 'pandas.stats.moments.rolling_median', 'pandas.stats.moments.rolling_quantile', 'pandas.stats.moments.rolling_skew', - 'pandas.stats.moments.rolling_std', 'pandas.stats.moments.rolling_sum', 'pandas.stats.moments.rolling_var'] + 'pandas.io.stata.read_stata'] html_additional_pages = {'generated/' + page: 'api_redirect.html' for page in moved_api_pages} diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 92ed85071ecb8..4d6a7457bcf90 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -517,7 +517,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to def Red(x): return functools.reduce(CumRet,x,1.0) - pd.expanding_apply(S, Red) + S.expanding().apply(Red) `Replacing some values with mean of the rest of a group @@ -639,7 +639,7 @@ Create a list of dataframes, split using a delineation based on logic included i df = pd.DataFrame(data={'Case' : ['A','A','A','B','A','A','B','A','A'], 'Data' : np.random.randn(9)}) - dfs = list(zip(*df.groupby(pd.rolling_median((1*(df['Case']=='B')).cumsum(),3,True))))[-1] + dfs = list(zip(*df.groupby((1*(df['Case']=='B')).cumsum().rolling(window=3,min_periods=1).median())))[-1] dfs[0] dfs[1] diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 4ae2ee1927d1a..61f87ebe0db1b 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -519,7 +519,7 @@ to standardize the data within each group: index = pd.date_range('10/1/1999', periods=1100) ts = pd.Series(np.random.normal(0.5, 2, 1100), index) - ts = pd.rolling_mean(ts, 100, 100).dropna() + ts = ts.rolling(window=100,min_periods=100).mean().dropna() ts.head() ts.tail() diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index e2f96f204edab..67928af30bead 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -170,11 +170,18 @@ API changes :ref:`Computing rolling pairwise covariances and correlations ` in the docs. - .. ipython:: python + .. code-block:: python + + In [1]: df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) + + In [4]: covs = pd.rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) - df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) - covs = rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) - covs[df.index[-1]] + In [5]: covs[df.index[-1]] + Out[5]: + B C D + A 0.035310 0.326593 -0.505430 + B 0.137748 -0.006888 -0.005383 + C -0.006888 0.861040 0.020762 - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index a33e0f19961ab..9651c1efeff4a 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -68,7 +68,7 @@ For full docs, see the :ref:`categorical introduction ` and the .. ipython:: python :okwarning: - + df = DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) df["grade"] = df["raw_grade"].astype("category") @@ -353,9 +353,15 @@ Rolling/Expanding Moments improvements New behavior - .. ipython:: python + .. code-block:: python - rolling_min(s, window=10, min_periods=5) + In [4]: pd.rolling_min(s, window=10, min_periods=5) + Out[4]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + dtype: float64 - :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, :func:`rolling_quantile`, @@ -381,9 +387,15 @@ Rolling/Expanding Moments improvements New behavior (note final value is ``5 = sum([2, 3, NaN])``): - .. ipython:: python + .. code-block:: python - rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + In [7]: rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 5 + dtype: float64 - :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those @@ -397,20 +409,27 @@ Rolling/Expanding Moments improvements .. code-block:: python - In [39]: rolling_window(s, window=3, win_type='triang', center=True) - Out[39]: - 0 NaN - 1 6.583333 - 2 6.883333 - 3 6.683333 - 4 NaN - dtype: float64 + In [39]: rolling_window(s, window=3, win_type='triang', center=True) + Out[39]: + 0 NaN + 1 6.583333 + 2 6.883333 + 3 6.683333 + 4 NaN + dtype: float64 New behavior .. ipython:: python - rolling_window(s, window=3, win_type='triang', center=True) + In [10]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[10]: + 0 NaN + 1 9.875 + 2 10.325 + 3 10.025 + 4 NaN + dtype: float64 - Removed ``center`` argument from all :func:`expanding_ ` functions (see :ref:`list `), as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) @@ -449,9 +468,17 @@ Rolling/Expanding Moments improvements New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): - .. ipython:: python + .. code-block:: python - ewma(s, com=3., min_periods=2) + In [2]: pd.ewma(s, com=3., min_periods=2) + Out[2]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + 4 1.759644 + 5 2.383784 + dtype: float64 - :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` now have an optional ``adjust`` argument, just like :func:`ewma` does, @@ -465,11 +492,28 @@ Rolling/Expanding Moments improvements When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. (:issue:`7543`) - .. ipython:: python + .. code-block:: python + + In [7]: pd.ewma(Series([None, 1., 8.]), com=2.) + Out[7]: + 0 NaN + 1 1.0 + 2 5.2 + dtype: float64 + + In [8]: pd.ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior + Out[8]: + 0 1.0 + 1 1.0 + 2 5.2 + dtype: float64 - ewma(Series([None, 1., 8.]), com=2.) - ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior - ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + In [9]: pd.ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + Out[9]: + 0 1.000000 + 1 1.000000 + 2 5.846154 + dtype: float64 .. warning:: @@ -525,10 +569,23 @@ Rolling/Expanding Moments improvements By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, and the debiasing factors are decreasing (towards 1.25): - .. ipython:: python + .. code-block:: python - ewmvar(s, com=2., bias=False) - ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) + In [14]: pd.ewmvar(s, com=2., bias=False) + Out[14]: + 0 NaN + 1 0.500000 + 2 1.210526 + 3 4.089069 + dtype: float64 + + In [15]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[15]: + 0 NaN + 1 2.083333 + 2 1.583333 + 3 1.425439 + dtype: float64 See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7912`) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 86a7be5857035..7f63096d7c045 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -13,6 +13,8 @@ users upgrade to this version. Highlights include: +- Window functions are now methods on ``.groupby`` like objects, see :ref:`here `. + Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. .. contents:: What's new in v0.18.0 @@ -25,10 +27,67 @@ New features ~~~~~~~~~~~~ +.. _whatsnew_0180.enhancements.moments: + +Window functions are now methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Window functions have been refactored to be methods on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`) + +.. ipython:: python + + np.random.seed(1234) + df = DataFrame({'A' : range(10), 'B' : np.random.randn(10)}) + df + +Previous Behavior: + +.. code-block:: python + + In [8]: pd.rolling_mean(df,window=3) + Out[8]: + A B + 0 NaN NaN + 1 NaN NaN + 2 1 0.237722 + 3 2 -0.023640 + 4 3 0.133155 + 5 4 -0.048693 + 6 5 0.342054 + 7 6 0.370076 + 8 7 0.079587 + 9 8 -0.954504 + +New Behavior: + +.. ipython:: python + + r = df.rolling(window=3) + +These show a descriptive repr, with tab-completion of available methods + +.. ipython:: python + + r + +The methods operate on this ``Rolling`` object itself + +.. ipython:: python + + r.mean() +They provide getitem accessors +.. ipython:: python + r['A'].mean() +And multiple aggregations + +.. ipython:: python + + r.agg({'A' : ['mean','std'], + 'B' : ['mean','std']}) .. _whatsnew_0180.enhancements.other: @@ -153,18 +212,44 @@ Other API Changes Deprecations ^^^^^^^^^^^^ +.. _whatsnew_0180.window_deprecations: + +- The functions ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` are deprecated and replaced by the corresponding method call. Note that + the new suggested syntax includes all of the arguments (even if default) (:issue:`11603`) + + .. code-block:: python + In [1]: s = Series(range(3)) + In [2]: pd.rolling_mean(s,window=2,min_periods=1) + FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with + Series.rolling(min_periods=1,window=2,center=False).mean() + Out[2]: + 0 0.0 + 1 0.5 + 2 1.5 + dtype: float64 + In [3]: pd.rolling_cov(s, s, window=2) + FutureWarning: pd.rolling_cov is deprecated for Series and will be removed in a future version, replace with + Series.rolling(window=2).cov(other=) + Out[3]: + 0 NaN + 1 0.5 + 2 0.5 + dtype: float64 +- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. You can simply resample the input prior to creating a window function. (:issue:`11603`). + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D',how='max').rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. .. _whatsnew_0180.prior_deprecations: Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - +- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) @@ -195,6 +280,7 @@ Bug Fixes - Bug in ``Period.end_time`` when a multiple of time period is requested (:issue:`11738`) - Regression in ``.clip`` with tz-aware datetimes (:issue:`11838`) - Bug in ``date_range`` when the boundaries fell on the frequency (:issue:`11804`) +- Bug in consistency of passing nested dicts to ``.groupby(...).agg(...)`` (:issue:`9052`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 6996bb06065af..a1e1c20344ea4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -2,11 +2,13 @@ Base and utility classes for pandas objects. """ from pandas import compat +from pandas.compat import builtins import numpy as np from pandas.core import common as com import pandas.core.nanops as nanops import pandas.lib as lib -from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg +from pandas.util.decorators import (Appender, Substitution, + cache_readonly, deprecate_kwarg) from pandas.core.common import AbstractMethodError _shared_docs = dict() @@ -218,6 +220,288 @@ def __delete__(self, instance): raise AttributeError("can't delete attribute") +class GroupByError(Exception): + pass + + +class DataError(GroupByError): + pass + + +class SpecificationError(GroupByError): + pass + + +class SelectionMixin(object): + """ + mixin implementing the selection & aggregation interface on a group-like object + sub-classes need to define: obj, exclusions + """ + _selection = None + _internal_names = ['_cache','__setstate__'] + _internal_names_set = set(_internal_names) + _builtin_table = { + builtins.sum: np.sum, + builtins.max: np.max, + builtins.min: np.min, + } + _cython_table = { + builtins.sum: 'sum', + builtins.max: 'max', + builtins.min: 'min', + np.sum: 'sum', + np.mean: 'mean', + np.prod: 'prod', + np.std: 'std', + np.var: 'var', + np.median: 'median', + np.max: 'max', + np.min: 'min', + np.cumprod: 'cumprod', + np.cumsum: 'cumsum' + } + + @property + def name(self): + if self._selection is None: + return None # 'result' + else: + return self._selection + + @property + def _selection_list(self): + if not isinstance(self._selection, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)): + return [self._selection] + return self._selection + + @cache_readonly + def _selected_obj(self): + + if self._selection is None or isinstance(self.obj, com.ABCSeries): + return self.obj + else: + return self.obj[self._selection] + + @cache_readonly + def _obj_with_exclusions(self): + if self._selection is not None and isinstance(self.obj, com.ABCDataFrame): + return self.obj.reindex(columns=self._selection_list) + + if len(self.exclusions) > 0: + return self.obj.drop(self.exclusions, axis=1) + else: + return self.obj + + def __getitem__(self, key): + if self._selection is not None: + raise Exception('Column(s) %s already selected' % self._selection) + + if isinstance(key, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)): + if len(self.obj.columns.intersection(key)) != len(key): + bad_keys = list(set(key).difference(self.obj.columns)) + raise KeyError("Columns not found: %s" + % str(bad_keys)[1:-1]) + return self._gotitem(list(key), ndim=2) + + elif not getattr(self,'as_index',False): + if key not in self.obj.columns: + raise KeyError("Column not found: %s" % key) + return self._gotitem(key, ndim=2) + + else: + if key not in self.obj: + raise KeyError("Column not found: %s" % key) + return self._gotitem(key, ndim=1) + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + + """ + raise AbstractMethodError(self) + + _agg_doc = """Aggregate using input function or dict of {column -> function} + +Parameters +---------- +arg : function or dict + Function to use for aggregating groups. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If + passed a dict, the keys must be DataFrame column names. + + Accepted Combinations are: + - string cythonized function name + - function + - list of functions + - dict of columns -> functions + - nested dict of names -> dicts of functions + +Notes +----- +Numpy functions mean/median/prod/sum/std/var are special cased so the +default behavior is applying the function along axis=0 +(e.g., np.mean(arr_2d, axis=0)) as opposed to +mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). + +Returns +------- +aggregated : DataFrame +""" + + _see_also_template = """ +See also +-------- +pandas.Series.%(name)s +pandas.DataFrame.%(name)s +""" + + def aggregate(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + agg = aggregate + + def _aggregate(self, arg, *args, **kwargs): + """ + provide an implementation for the aggregators + + Parameters + ---------- + arg : string, dict, function + *args : args to pass on to the function + **kwargs : kwargs to pass on to the function + + + Returns + ------- + tuple of result, how + + Notes + ----- + how can be a string describe the required post-processing, or + None if not required + """ + + _level = kwargs.pop('_level',None) + if isinstance(arg, compat.string_types): + return getattr(self, arg)(*args, **kwargs), None + + result = compat.OrderedDict() + if isinstance(arg, dict): + if self.axis != 0: # pragma: no cover + raise ValueError('Can only pass dict with axis=0') + + obj = self._selected_obj + + if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): + new_arg = compat.OrderedDict() + for k, v in compat.iteritems(arg): + if not isinstance(v, (tuple, list, dict)): + new_arg[k] = [v] + else: + new_arg[k] = v + arg = new_arg + + keys = [] + if self._selection is not None: + subset = obj + + for fname, agg_how in compat.iteritems(arg): + colg = self._gotitem(self._selection, ndim=1, subset=subset) + result[fname] = colg.aggregate(agg_how, _level=None) + keys.append(fname) + else: + for col, agg_how in compat.iteritems(arg): + colg = self._gotitem(col, ndim=1) + result[col] = colg.aggregate(agg_how, _level=None) + keys.append(col) + + if isinstance(list(result.values())[0], com.ABCDataFrame): + from pandas.tools.merge import concat + result = concat([ result[k] for k in keys ], keys=keys, axis=1) + else: + from pandas import DataFrame + result = DataFrame(result) + + return result, True + elif hasattr(arg, '__iter__'): + return self._aggregate_multiple_funcs(arg, _level=_level), None + else: + result = None + + cy_func = self._is_cython_func(arg) + if cy_func and not args and not kwargs: + return getattr(self, cy_func)(), None + + # caller can react + return result, True + + def _aggregate_multiple_funcs(self, arg, _level): + from pandas.tools.merge import concat + + if self.axis != 0: + raise NotImplementedError("axis other than 0 is not supported") + + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + obj = self._obj_with_exclusions + + results = [] + keys = [] + + # degenerate case + if obj.ndim==1: + for a in arg: + try: + colg = self._gotitem(obj.name, ndim=1, subset=obj) + results.append(colg.aggregate(a)) + + # make sure we find a good name + name = com._get_callable_name(a) or a + keys.append(name) + except (TypeError, DataError): + pass + except SpecificationError: + raise + + # multiples + else: + for col in obj: + try: + colg = self._gotitem(col, ndim=1, subset=obj[col]) + results.append(colg.aggregate(arg)) + keys.append(col) + except (TypeError, DataError): + pass + except SpecificationError: + raise + + if _level: + keys = None + result = concat(results, keys=keys, axis=1) + + return result + + def _is_cython_func(self, arg): + """ if we define an internal function for this argument, return it """ + return self._cython_table.get(arg) + + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return self._builtin_table.get(arg, arg) + class FrozenList(PandasObject, list): """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff110880d34ba..2fc0786aa1e09 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5149,6 +5149,7 @@ def combineMult(self, other): DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) DataFrame._add_numeric_operations() +DataFrame._add_series_or_dataframe_operations() _EMPTY_SERIES = Series([]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b75573edc7157..d3cd0840782b4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -29,7 +29,6 @@ from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config - # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() @@ -4734,6 +4733,35 @@ def nanptp(values, axis=0, skipna=True): method ``ptp``.""", nanptp) + @classmethod + def _add_series_or_dataframe_operations(cls): + """ add the series or dataframe only operations to the cls; evaluate the doc strings again """ + + from pandas.core import window as rwindow + + @Appender(rwindow.rolling.__doc__) + def rolling(self, window, min_periods=None, freq=None, center=False, + win_type=None, axis=0): + axis = self._get_axis_number(axis) + return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, + win_type=win_type, axis=axis) + cls.rolling = rolling + + @Appender(rwindow.expanding.__doc__) + def expanding(self, min_periods=1, freq=None, center=False, axis=0): + axis = self._get_axis_number(axis) + return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center, + axis=axis) + cls.expanding = expanding + + @Appender(rwindow.ewm.__doc__) + def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None, + adjust=True, ignore_na=False, axis=0): + axis = self._get_axis_number(axis) + return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods, + freq=freq, adjust=adjust, ignore_na=ignore_na, axis=axis) + cls.ewm = ewm + def _doc_parms(cls): """ return a tuple of the doc parms """ axis_descr = "{%s}" % ', '.join([ @@ -4916,6 +4944,6 @@ def logical_func(self, axis=None, bool_only=None, skipna=None, logical_func.__name__ = name return logical_func -# install the indexerse +# install the indexes for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 28d95c40c7294..5428ee5484bfa 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -12,7 +12,7 @@ ) from pandas import compat -from pandas.core.base import PandasObject +from pandas.core.base import PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -20,8 +20,9 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel -from pandas.util.decorators import (cache_readonly, Appender, make_signature, +from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature, deprecate_kwarg) +from textwrap import dedent import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import(_possibly_downcast_to_dtype, isnull, @@ -37,27 +38,14 @@ import pandas.algos as _algos import pandas.hashtable as _hash -_agg_doc = """Aggregate using input function or dict of {column -> function} - -Parameters ----------- -arg : function or dict - Function to use for aggregating groups. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If - passed a dict, the keys must be DataFrame column names. - -Notes ------ -Numpy functions mean/median/prod/sum/std/var are special cased so the -default behavior is applying the function along axis=0 -(e.g., np.mean(arr_2d, axis=0)) as opposed to -mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). - -Returns -------- -aggregated : DataFrame -""" +_doc_template = """ + See also + -------- + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s +""" # special case to prevent duplicate plots when catching exceptions when # forwarding methods from NDFrames @@ -91,20 +79,14 @@ _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift']) -class GroupByError(Exception): - pass - - -class DataError(GroupByError): - pass - - -class SpecificationError(GroupByError): - pass - - def _groupby_function(name, alias, npfunc, numeric_only=True, _convert=False): + + _local_template = "Compute %(f)s of group values" + + @Substitution(name='groupby',f=name) + @Appender(_doc_template) + @Appender(_local_template) def f(self): self._set_selection_from_grouper() try: @@ -117,8 +99,7 @@ def f(self): result = result._convert(datetime=True) return result - f.__doc__ = "Compute %s of group values" % name - f.__name__ = name + f.__name__ = name return f @@ -319,7 +300,7 @@ def f(self): return attr -class GroupBy(PandasObject): +class GroupBy(PandasObject, SelectionMixin): """ Class for grouping and aggregating relational data. See aggregate, @@ -387,8 +368,6 @@ class GroupBy(PandasObject): Number of groups """ _apply_whitelist = _common_apply_whitelist - _internal_names = ['_cache'] - _internal_names_set = set(_internal_names) _group_selection = None def __init__(self, obj, keys=None, axis=0, level=None, @@ -493,19 +472,6 @@ def _get_index(self, name): """ safe get index, translate keys for datelike to underlying repr """ return self._get_indices([name])[0] - @property - def name(self): - if self._selection is None: - return None # 'result' - else: - return self._selection - - @property - def _selection_list(self): - if not isinstance(self._selection, (list, tuple, Series, Index, np.ndarray)): - return [self._selection] - return self._selection - @cache_readonly def _selected_obj(self): @@ -558,9 +524,6 @@ def __getattr__(self, attr): raise AttributeError("%r object has no attribute %r" % (type(self).__name__, attr)) - def __getitem__(self, key): - raise NotImplementedError('Not implemented: %s' % key) - plot = property(GroupByPlot) def _make_wrapper(self, name): @@ -660,6 +623,7 @@ def __iter__(self): """ return self.grouper.get_iterator(self.obj, axis=self.axis) + @Substitution(name='groupby') def apply(self, func, *args, **kwargs): """ Apply function and combine results together in an intelligent way. The @@ -695,16 +659,14 @@ def apply(self, func, *args, **kwargs): side-effects, as they will take effect twice for the first group. - See also -------- aggregate, transform + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s""" - Returns - ------- - applied : type depending on grouped object and function - """ - func = _intercept_function(func) + func = self._is_builtin_func(func) @wraps(func) def f(g): @@ -721,13 +683,6 @@ def _python_apply_general(self, f): return self._wrap_applied_output(keys, values, not_indexed_same=mutated) - def aggregate(self, func, *args, **kwargs): - raise AbstractMethodError(self) - - @Appender(_agg_doc) - def agg(self, func, *args, **kwargs): - return self.aggregate(func, *args, **kwargs) - def _iterate_slices(self): yield self.name, self._selected_obj @@ -744,12 +699,16 @@ def irow(self, i): FutureWarning, stacklevel=2) return self.nth(i) + @Substitution(name='groupby') + @Appender(_doc_template) def count(self): - """ Compute count of group, excluding missing values """ + """Compute count of group, excluding missing values""" # defined here for API doc raise NotImplementedError + @Substitution(name='groupby') + @Appender(_doc_template) def mean(self): """ Compute mean of groups, excluding missing values @@ -765,6 +724,8 @@ def mean(self): f = lambda x: x.mean(axis=self.axis) return self._python_agg_general(f) + @Substitution(name='groupby') + @Appender(_doc_template) def median(self): """ Compute median of groups, excluding missing values @@ -784,21 +745,37 @@ def f(x): return x.median(axis=self.axis) return self._python_agg_general(f) + @Substitution(name='groupby') + @Appender(_doc_template) def std(self, ddof=1): """ Compute standard deviation of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex + + Parameters + ---------- + ddof : integer, default 1 + degrees of freedom """ + # todo, implement at cython level? return np.sqrt(self.var(ddof=ddof)) + @Substitution(name='groupby') + @Appender(_doc_template) def var(self, ddof=1): """ Compute variance of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex + + Parameters + ---------- + ddof : integer, default 1 + degrees of freedom """ + if ddof == 1: return self._cython_agg_general('var') else: @@ -806,19 +783,26 @@ def var(self, ddof=1): f = lambda x: x.var(ddof=ddof) return self._python_agg_general(f) + @Substitution(name='groupby') + @Appender(_doc_template) def sem(self, ddof=1): """ Compute standard error of the mean of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex + + Parameters + ---------- + ddof : integer, default 1 + degrees of freedom """ + return self.std(ddof=ddof)/np.sqrt(self.count()) + @Substitution(name='groupby') + @Appender(_doc_template) def size(self): - """ - Compute group sizes - - """ + """Compute group sizes""" return self.grouper.size() sum = _groupby_function('sum', 'add', np.sum) @@ -830,14 +814,19 @@ def size(self): last = _groupby_function('last', 'last', _last_compat, numeric_only=False, _convert=True) + @Substitution(name='groupby') + @Appender(_doc_template) def ohlc(self): """ Compute sum of values, excluding missing values For multiple groupings, the result index will be a MultiIndex """ + return self._apply_to_column_groupbys( lambda x: x._cython_agg_general('ohlc')) + @Substitution(name='groupby') + @Appender(_doc_template) def nth(self, n, dropna=None): """ Take the nth row from each group if n is an int, or a subset of rows @@ -872,16 +861,16 @@ def nth(self, n, dropna=None): 2 5 6 >>> g.nth(0, dropna='any') B - A + A 1 4 5 6 >>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna B - A + A 1 NaN 5 NaN - """ + if isinstance(n, int): nth_values = [n] elif isinstance(n, (set, list, tuple)): @@ -973,6 +962,8 @@ def nth(self, n, dropna=None): return result + @Substitution(name='groupby') + @Appender(_doc_template) def cumcount(self, ascending=True): """ Number each item in each group from 0 to the length of that group - 1. @@ -1015,37 +1006,44 @@ def cumcount(self, ascending=True): 4 0 5 0 dtype: int64 - """ + self._set_selection_from_grouper() index = self._selected_obj.index cumcounts = self._cumcount_array(ascending=ascending) return Series(cumcounts, index) + @Substitution(name='groupby') + @Appender(_doc_template) def cumprod(self, axis=0): - """ - Cumulative product for each group - - """ + """Cumulative product for each group""" if axis != 0: return self.apply(lambda x: x.cumprod(axis=axis)) return self._cython_transform('cumprod') + @Substitution(name='groupby') + @Appender(_doc_template) def cumsum(self, axis=0): - """ - Cumulative sum for each group - - """ + """Cumulative sum for each group""" if axis != 0: return self.apply(lambda x: x.cumprod(axis=axis)) return self._cython_transform('cumsum') + @Substitution(name='groupby') + @Appender(_doc_template) def shift(self, periods=1, freq=None, axis=0): """ Shift each group by periods observations + + Parameters + ---------- + periods : integer, default 1 + number of periods to shift + freq : frequency string + axis : axis to shift, default 0 """ if freq is not None or axis != 0: @@ -1062,6 +1060,8 @@ def shift(self, periods=1, freq=None, axis=0): return self._wrap_transformed_output(output) + @Substitution(name='groupby') + @Appender(_doc_template) def head(self, n=5): """ Returns first n rows of each group. @@ -1073,7 +1073,7 @@ def head(self, n=5): -------- >>> df = DataFrame([[1, 2], [1, 4], [5, 6]], - columns=['A', 'B']) + columns=['A', 'B']) >>> df.groupby('A', as_index=False).head(1) A B 0 1 2 @@ -1082,13 +1082,15 @@ def head(self, n=5): A B 0 1 2 2 5 6 - """ + obj = self._selected_obj in_head = self._cumcount_array() < n head = obj[in_head] return head + @Substitution(name='groupby') + @Appender(_doc_template) def tail(self, n=5): """ Returns last n rows of each group @@ -1100,7 +1102,7 @@ def tail(self, n=5): -------- >>> df = DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], - columns=['A', 'B']) + columns=['A', 'B']) >>> df.groupby('A').tail(1) A B 1 a 2 @@ -1109,8 +1111,8 @@ def tail(self, n=5): A B 0 a 1 2 b 1 - """ + obj = self._selected_obj rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64') in_tail = self._cumcount_array(rng, ascending=False) > -n @@ -1121,8 +1123,10 @@ def _cumcount_array(self, arr=None, ascending=True): """ arr is where cumcount gets its values from - note: this is currently implementing sort=False (though the default is sort=True) - for groupby in general + Note + ---- + this is currently implementing sort=False (though the default is sort=True) + for groupby in general """ if arr is None: arr = np.arange(self.grouper._max_groupsize, dtype='int64') @@ -1217,7 +1221,7 @@ def _cython_agg_general(self, how, numeric_only=True): return self._wrap_aggregated_output(output, names) def _python_agg_general(self, func, *args, **kwargs): - func = _intercept_function(func) + func = self._is_builtin_func(func) f = lambda x: func(x, *args, **kwargs) # iterate through "columns" ex exclusions to populate output dict @@ -1733,7 +1737,7 @@ def agg_series(self, obj, func): return self._aggregate_series_pure_python(obj, func) def _aggregate_series_fast(self, obj, func): - func = _intercept_function(func) + func = self._is_builtin_func(func) if obj.index._has_complex_internals: raise TypeError('Incompatible index for Cython grouper') @@ -2421,13 +2425,14 @@ def aggregate(self, func_or_funcs, *args, **kwargs): ------- Series or DataFrame """ + _level = kwargs.pop('_level',None) if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) if hasattr(func_or_funcs, '__iter__'): ret = self._aggregate_multiple_funcs(func_or_funcs) else: - cyfunc = _intercept_cython(func_or_funcs) + cyfunc = self._is_cython_func(func_or_funcs) if cyfunc and not args and not kwargs: return getattr(self, cyfunc)() @@ -2447,6 +2452,8 @@ def aggregate(self, func_or_funcs, *args, **kwargs): return ret + agg = aggregate + def _aggregate_multiple_funcs(self, arg): if isinstance(arg, dict): columns = list(arg.keys()) @@ -2470,11 +2477,18 @@ def _aggregate_multiple_funcs(self, arg): results = {} for name, func in arg: + obj = self if name in results: raise SpecificationError('Function names must be unique, ' 'found multiple named %s' % name) - results[name] = self.aggregate(func) + # reset the cache so that we + # only include the named selection + if name in self._selected_obj: + obj = copy.copy(obj) + obj._reset_cache() + obj._selection = name + results[name] = obj.aggregate(func) return DataFrame(results, columns=columns) @@ -2559,7 +2573,7 @@ def transform(self, func, *args, **kwargs): transformed : Series """ - func = _intercept_cython(func) or func + func = self._is_cython_func(func) or func # if string function if isinstance(func, compat.string_types): @@ -2912,68 +2926,16 @@ def _post_process_cython_aggregate(self, obj): obj = obj.swapaxes(0, 1) return obj - @cache_readonly - def _obj_with_exclusions(self): - if self._selection is not None: - return self.obj.reindex(columns=self._selection_list) - - if len(self.exclusions) > 0: - return self.obj.drop(self.exclusions, axis=1) - else: - return self.obj - - @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): - if isinstance(arg, compat.string_types): - return getattr(self, arg)(*args, **kwargs) - - result = OrderedDict() - if isinstance(arg, dict): - if self.axis != 0: # pragma: no cover - raise ValueError('Can only pass dict with axis=0') - obj = self._selected_obj + _level = kwargs.pop('_level',None) + result, how = self._aggregate(arg, _level=_level, *args, **kwargs) + if how is None: + return result - if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): - new_arg = OrderedDict() - for k, v in compat.iteritems(arg): - if not isinstance(v, (tuple, list, dict)): - new_arg[k] = [v] - else: - new_arg[k] = v - arg = new_arg - - keys = [] - if self._selection is not None: - subset = obj - if isinstance(subset, DataFrame): - raise NotImplementedError("Aggregating on a DataFrame is " - "not supported") - - for fname, agg_how in compat.iteritems(arg): - colg = SeriesGroupBy(subset, selection=self._selection, - grouper=self.grouper) - result[fname] = colg.aggregate(agg_how) - keys.append(fname) - else: - for col, agg_how in compat.iteritems(arg): - colg = SeriesGroupBy(obj[col], selection=col, - grouper=self.grouper) - result[col] = colg.aggregate(agg_how) - keys.append(col) - - if isinstance(list(result.values())[0], DataFrame): - from pandas.tools.merge import concat - result = concat([result[k] for k in keys], keys=keys, axis=1) - else: - result = DataFrame(result) - elif isinstance(arg, list): - return self._aggregate_multiple_funcs(arg) - else: - cyfunc = _intercept_cython(arg) - if cyfunc and not args and not kwargs: - return getattr(self, cyfunc)() + if result is None: + # grouper specific aggregations if self.grouper.nkeys > 1: return self._python_agg_general(arg, *args, **kwargs) else: @@ -2981,7 +2943,7 @@ def aggregate(self, arg, *args, **kwargs): # try to treat as if we are passing a list try: assert not args and not kwargs - result = self._aggregate_multiple_funcs([arg]) + result = self._aggregate_multiple_funcs([arg], _level=_level) result.columns = Index(result.columns.levels[0], name=self._selected_obj.columns.name) except: @@ -2993,29 +2955,7 @@ def aggregate(self, arg, *args, **kwargs): return result._convert(datetime=True) - def _aggregate_multiple_funcs(self, arg): - from pandas.tools.merge import concat - - if self.axis != 0: - raise NotImplementedError("axis other than 0 is not supported") - - obj = self._obj_with_exclusions - - results = [] - keys = [] - for col in obj: - try: - colg = SeriesGroupBy(obj[col], selection=col, - grouper=self.grouper) - results.append(colg.aggregate(arg)) - keys.append(col) - except (TypeError, DataError): - pass - except SpecificationError: - raise - result = concat(results, keys=keys, axis=1) - - return result + agg = aggregate def _aggregate_generic(self, func, *args, **kwargs): if self.grouper.nkeys != 1: @@ -3318,7 +3258,7 @@ def transform(self, func, *args, **kwargs): """ # optimized transforms - func = _intercept_cython(func) or func + func = self._is_cython_func(func) or func if isinstance(func, compat.string_types): if func in _cython_transforms: # cythonized transform @@ -3463,35 +3403,42 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 - def __getitem__(self, key): - if self._selection is not None: - raise Exception('Column(s) %s already selected' % self._selection) + @Substitution(name='groupby') + @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) - if isinstance(key, (list, tuple, Series, Index, np.ndarray)): - if len(self.obj.columns.intersection(key)) != len(key): - bad_keys = list(set(key).difference(self.obj.columns)) - raise KeyError("Columns not found: %s" - % str(bad_keys)[1:-1]) - return DataFrameGroupBy(self.obj, self.grouper, selection=key, - grouper=self.grouper, - exclusions=self.exclusions, - as_index=self.as_index) + agg = aggregate - elif not self.as_index: - if key not in self.obj.columns: - raise KeyError("Column not found: %s" % key) - return DataFrameGroupBy(self.obj, self.grouper, selection=key, + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + + if ndim == 2: + if subset is None: + subset = self.obj + return DataFrameGroupBy(subset, self.grouper, selection=key, grouper=self.grouper, exclusions=self.exclusions, as_index=self.as_index) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return SeriesGroupBy(subset, selection=key, + grouper=self.grouper) - else: - if key not in self.obj: - raise KeyError("Column not found: %s" % key) - # kind of a kludge - return SeriesGroupBy(self.obj[key], selection=key, - grouper=self.grouper, - exclusions=self.exclusions) + raise AssertionError("invalid ndim for _gotitem") def _wrap_generic_output(self, result, obj): result_index = self.grouper.levels[0] @@ -3627,6 +3574,14 @@ def count(self): class PanelGroupBy(NDFrameGroupBy): + @Substitution(name='groupby') + @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + return super(PanelGroupBy, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + def _iterate_slices(self): if self.axis == 0: # kludge @@ -4162,38 +4117,6 @@ def _reorder_by_uniques(uniques, labels): return uniques, labels -_func_table = { - builtins.sum: np.sum, - builtins.max: np.max, - builtins.min: np.min -} - - -_cython_table = { - builtins.sum: 'sum', - builtins.max: 'max', - builtins.min: 'min', - np.sum: 'sum', - np.mean: 'mean', - np.prod: 'prod', - np.std: 'std', - np.var: 'var', - np.median: 'median', - np.max: 'max', - np.min: 'min', - np.cumprod: 'cumprod', - np.cumsum: 'cumsum' -} - - -def _intercept_function(func): - return _func_table.get(func, func) - - -def _intercept_cython(func): - return _cython_table.get(func) - - def _groupby_indices(values): return _algos.groupby_indices(_values_from_object(com._ensure_object(values))) diff --git a/pandas/core/series.py b/pandas/core/series.py index ca55a834a33d2..d6eb18396e14c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2765,6 +2765,7 @@ def _dir_additions(self): aliases={'rows': 0}) Series._add_numeric_operations() Series._add_series_only_operations() +Series._add_series_or_dataframe_operations() _INDEX_TYPES = ndarray, Index, list, tuple #------------------------------------------------------------------------------ diff --git a/pandas/core/window.py b/pandas/core/window.py new file mode 100644 index 0000000000000..4bbdf444ac2a7 --- /dev/null +++ b/pandas/core/window.py @@ -0,0 +1,1364 @@ +""" + +provide a generic structure to support window functions, +similar to how we have a Groupby object + + +""" +from __future__ import division + +import warnings +import numpy as np +from functools import wraps +from collections import defaultdict + +import pandas as pd +from pandas.lib import isscalar +from pandas.core.base import PandasObject, SelectionMixin, AbstractMethodError +import pandas.core.common as com +import pandas.algos as algos +from pandas import compat +from pandas.util.decorators import Substitution, Appender +from textwrap import dedent + +_shared_docs = dict() +_doc_template = """ + +Returns +------- +same type as input + +See also +-------- +pandas.Series.%(name)s +pandas.DataFrame.%(name)s +""" + +class _Window(PandasObject, SelectionMixin): + _attributes = ['window','min_periods','freq','center','win_type','axis'] + exclusions = set() + + def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, + win_type=None, axis=0): + + if freq is not None: + warnings.warn("The freq kw is deprecated and will be removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=3) + + self.blocks = [] + self.obj = obj + self.window = window + self.min_periods = min_periods + self.freq = freq + self.center = center + self.win_type = win_type + self.axis = axis + self._setup() + + @property + def _constructor(self): + return Window + + def _setup(self): + pass + + def _convert_freq(self, how=None): + """ resample according to the how, return a new object """ + + obj = self._selected_obj + if self.freq is not None and isinstance(obj, (com.ABCSeries, com.ABCDataFrame)): + if how is not None: + warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=6) + + obj = obj.resample(self.freq, how=how) + return obj + + def _create_blocks(self, how): + """ split data into blocks & return conformed data """ + + obj = self._convert_freq(how) + return obj.as_blocks(copy=False).values(), obj + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + self = self._shallow_copy(subset) + self._reset_cache() + if subset.ndim==2: + if isscalar(key) and key in subset or com.is_list_like(key): + self._selection = key + return self + + def __getattr__(self, attr): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError("%r object has no attribute %r" % + (type(self).__name__, attr)) + + def _dir_additions(self): + return self.obj._dir_additions() + + def _get_window(self, other=None): + return self.window + + def __unicode__(self): + """ provide a nice str repr of our rolling object """ + + attrs = [ "{k}={v}".format(k=k,v=getattr(self,k)) \ + for k in self._attributes if getattr(self,k,None) is not None ] + return "{klass} [{attrs}]".format(klass=self.__class__.__name__, + attrs=','.join(attrs)) + + def _shallow_copy(self, obj=None, **kwargs): + """ return a new object with the replacement attributes """ + if obj is None: + obj = self._selected_obj.copy() + if isinstance(obj, self.__class__): + obj = obj.obj + for attr in self._attributes: + if attr not in kwargs: + kwargs[attr] = getattr(self,attr) + return self._constructor(obj, **kwargs) + + def _prep_values(self, values=None, kill_inf=True, how=None): + + if values is None: + values = getattr(self._selected_obj,'values',self._selected_obj) + + # coerce dtypes as appropriate + if com.is_float_dtype(values.dtype): + pass + elif com.is_integer_dtype(values.dtype): + values = values.astype(float) + elif com.is_timedelta64_dtype(values.dtype): + values = values.view('i8').astype(float) + else: + try: + values = values.astype(float) + except (ValueError, TypeError): + raise TypeError("cannot handle this type -> {0}".format(values.dtype)) + + if kill_inf: + values = values.copy() + values[np.isinf(values)] = np.NaN + + return values + + def _wrap_result(self, result, block=None, obj=None): + """ wrap a single result """ + + if obj is None: + obj = self._selected_obj + if isinstance(result, np.ndarray): + + # coerce if necessary + if block is not None: + if com.is_timedelta64_dtype(block.values.dtype): + result = pd.to_timedelta(result.ravel(),unit='ns').values.reshape(result.shape) + + if result.ndim == 1: + from pandas import Series + return Series(result, obj.index, name=obj.name) + + return type(obj)(result, + index=obj.index, + columns=block.columns) + return result + + def _wrap_results(self, results, blocks, obj): + """ + wrap the results + + Paramters + --------- + results : list of ndarrays + blocks : list of blocks + obj : conformed data (may be resampled) + """ + + final = [] + for result, block in zip(results, blocks): + + result = self._wrap_result(result, block=block, obj=obj) + if result.ndim == 1: + return result + final.append(result) + + if not len(final): + return obj.astype('float64') + return pd.concat(final,axis=1).reindex(columns=obj.columns) + + def _center_window(self, result, window): + """ center the result in the window """ + if self.axis > result.ndim-1: + raise ValueError("Requested axis is larger then no. of argument " + "dimensions") + + from pandas import Series, DataFrame + offset = _offset(window, True) + if offset > 0: + if isinstance(result, (Series, DataFrame)): + result = result.slice_shift(-offset, axis=self.axis) + else: + lead_indexer = [slice(None)] * result.ndim + lead_indexer[self.axis] = slice(offset, None) + result = np.copy(result[tuple(lead_indexer)]) + return result + + def aggregate(self, arg, *args, **kwargs): + result, how = self._aggregate(arg, *args, **kwargs) + if result is None: + return self.apply(arg, args=args, kwargs=kwargs) + return result + + agg = aggregate + + _shared_docs['sum'] = dedent(""" + %(name)s sum + + Parameters + ---------- + how : string, default None (DEPRECATED) + Method for down- or re-sampling""") + + _shared_docs['mean'] = dedent(""" + %(name)s mean + + Parameters + ---------- + how : string, default None (DEPRECATED) + Method for down- or re-sampling""") + +class Window(_Window): + """ + Provides rolling transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + window : int + Size of the moving window. This is the number of observations used for + calculating the statistic. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) (DEPRECATED) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + win_type : string, default None + prove a window type, see the notes below + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + The recognized window types are: + + * ``boxcar`` + * ``triang`` + * ``blackman`` + * ``hamming`` + * ``bartlett`` + * ``parzen`` + * ``bohman`` + * ``blackmanharris`` + * ``nuttall`` + * ``barthann`` + * ``kaiser`` (needs beta) + * ``gaussian`` (needs std) + * ``general_gaussian`` (needs power, width) + * ``slepian`` (needs width). +""" + + def _prep_window(self, **kwargs): + """ provide validation for our window type, return the window """ + window = self._get_window() + + if isinstance(window, (list, tuple, np.ndarray)): + return com._asarray_tuplesafe(window).astype(float) + elif com.is_integer(window): + try: + import scipy.signal as sig + except ImportError: + raise ImportError('Please install scipy to generate window weight') + win_type = _validate_win_type(self.win_type, kwargs) # may pop from kwargs + return sig.get_window(win_type, window).astype(float) + + raise ValueError('Invalid window %s' % str(window)) + + def _apply_window(self, mean=True, how=None, **kwargs): + """ + Applies a moving window of type ``window_type`` on the data. + + Parameters + ---------- + mean : boolean, default True + If True computes weighted mean, else weighted sum + how : string, default to None (DEPRECATED) + how to resample + + Returns + ------- + y : type of input argument + + """ + window = self._prep_window(**kwargs) + center = self.center + + blocks, obj = self._create_blocks(how=how) + results = [] + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + offset = _offset(window, center) + additional_nans = np.array([np.NaN] * offset) + def f(arg, *args, **kwargs): + minp = _use_window(self.min_periods, len(window)) + return algos.roll_window(np.concatenate((arg, additional_nans)) if center else arg, + window, minp, avg=mean) + + result = np.apply_along_axis(f, self.axis, values) + + if center: + result = self._center_window(result, window) + results.append(result) + + return self._wrap_results(results, blocks, obj) + + @Substitution(name='rolling') + @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + result, how = self._aggregate(arg, *args, **kwargs) + if result is None: + + # these must apply directly + result = arg(self) + + return result + + agg = aggregate + + @Substitution(name='window') + @Appender(_doc_template) + @Appender(_shared_docs['sum']) + def sum(self, **kwargs): + return self._apply_window(mean=False, **kwargs) + + @Substitution(name='window') + @Appender(_doc_template) + @Appender(_shared_docs['mean']) + def mean(self, **kwargs): + return self._apply_window(mean=True, **kwargs) + +class _Rolling(_Window): + + @property + def _constructor(self): + return Rolling + + def _apply(self, func, window=None, center=None, check_minp=None, how=None, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : string/callable to apply + window : int/array, default to _get_window() + center : boolean, default to self.center + check_minp : function, default to _use_window + how : string, default to None (DEPRECATED) + how to resample + + Returns + ------- + y : type of input + """ + if center is None: + center = self.center + if window is None: + window = self._get_window() + + if check_minp is None: + check_minp = _use_window + + blocks, obj = self._create_blocks(how=how) + results = [] + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, compat.string_types): + if not hasattr(algos, func): + raise ValueError("we do not support this function algos.{0}".format(func)) + + cfunc = getattr(algos, func) + def func(arg, window, min_periods=None): + minp = check_minp(min_periods, window) + return cfunc(arg, window, minp, **kwargs) + + # calculation function + if center: + offset = _offset(window, center) + additional_nans = np.array([np.NaN] * offset) + def calc(x): + return func(np.concatenate((x, additional_nans)), + window, min_periods=self.min_periods) + else: + def calc(x): + return func(x,window, min_periods=self.min_periods) + + if values.ndim > 1: + result = np.apply_along_axis(calc, self.axis, values) + else: + result = calc(values) + + if center: + result = self._center_window(result, window) + + results.append(result) + + return self._wrap_results(results, blocks, obj) + +class _Rolling_and_Expanding(_Rolling): + + _shared_docs['count'] = """%(name)s count of number of non-NaN observations inside provided window.""" + def count(self): + obj = self._convert_freq() + window = self._get_window() + window = min(window, len(obj)) if not self.center else window + try: + converted = np.isfinite(obj).astype(float) + except TypeError: + converted = np.isfinite(obj.astype(float)).astype(float) + result = self._constructor(converted, + window=window, + min_periods=0, + center=self.center).sum() + + result[result.isnull()] = 0 + return result + + _shared_docs['apply'] = dedent(""" + %(name)s function apply + + Parameters + ---------- + func : function + Must produce a single value from an ndarray input + *args and **kwargs are passed to the function""") + + def apply(self, func, args=(), kwargs={}): + _level = kwargs.pop('_level',None) + window = self._get_window() + offset = _offset(window, self.center) + def f(arg, window, min_periods): + minp = _use_window(min_periods, window) + return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) + + return self._apply(f, center=False) + + def sum(self, **kwargs): + return self._apply('roll_sum', **kwargs) + + _shared_docs['max'] = dedent(""" + %(name)s maximum + + Parameters + ---------- + how : string, default 'max' (DEPRECATED) + Method for down- or re-sampling""") + def max(self, how=None, **kwargs): + if self.freq is not None and how is None: + how = 'max' + return self._apply('roll_max', how=how, **kwargs) + + _shared_docs['min'] = dedent(""" + %(name)s minimum + + Parameters + ---------- + how : string, default 'min' (DEPRECATED) + Method for down- or re-sampling""") + def min(self, how=None, **kwargs): + if self.freq is not None and how is None: + how = 'min' + return self._apply('roll_min', how=how, **kwargs) + + def mean(self, **kwargs): + return self._apply('roll_mean', **kwargs) + + _shared_docs['median'] = dedent(""" + %(name)s median + + Parameters + ---------- + how : string, default 'median' (DEPRECATED) + Method for down- or re-sampling""") + def median(self, how=None, **kwargs): + if self.freq is not None and how is None: + how = 'median' + return self._apply('roll_median_c', how=how, **kwargs) + + _shared_docs['std'] = dedent(""" + %(name)s standard deviation + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") + + def std(self, ddof=1, **kwargs): + window = self._get_window() + def f(arg, *args, **kwargs): + minp = _require_min_periods(1)(self.min_periods, window) + return _zsqrt(algos.roll_var(arg, window, minp, ddof)) + + return self._apply(f, check_minp=_require_min_periods(1), **kwargs) + + _shared_docs['var'] = dedent(""" + %(name)s variance + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") + + def var(self, ddof=1, **kwargs): + return self._apply('roll_var', + check_minp=_require_min_periods(1), + ddof=ddof, + **kwargs) + + _shared_docs['skew'] = """Unbiased %(name)s skewness""" + def skew(self, **kwargs): + return self._apply('roll_skew', + check_minp=_require_min_periods(3), + **kwargs) + + _shared_docs['kurt'] = """Unbiased %(name)s kurtosis""" + def kurt(self, **kwargs): + return self._apply('roll_kurt', + check_minp=_require_min_periods(4), + **kwargs) + + _shared_docs['quantile'] = dedent(""" + %(name)s quantile + + Parameters + ---------- + quantile : float + 0 <= quantile <= 1""") + + def quantile(self, quantile, **kwargs): + window = self._get_window() + def f(arg, *args, **kwargs): + minp = _use_window(self.min_periods, window) + return algos.roll_quantile(arg, window, minp, quantile) + + return self._apply(f, **kwargs) + + _shared_docs['cov'] = dedent(""" + %(name)s sample covariance + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") + + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + if other is None: + other = self._selected_obj + pairwise = True if pairwise is None else pairwise # only default unset + other = self._shallow_copy(other) + window = self._get_window(other) + + def _get_cov(X, Y): + mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean(**kwargs) + count = (X+Y).rolling(window=window, center=self.center).count(**kwargs) + bias_adj = count / (count - ddof) + return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + + _shared_docs['corr'] = dedent(""" + %(name)s sample correlation + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output + pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used.""") + + def corr(self, other=None, pairwise=None, **kwargs): + if other is None: + other = self._selected_obj + pairwise = True if pairwise is None else pairwise # only default unset + other = self._shallow_copy(other) + window = self._get_window(other) + + def _get_corr(a, b): + a = a.rolling(window=window, + min_periods=self.min_periods, + freq=self.freq, + center=self.center) + b = b.rolling(window=window, + min_periods=self.min_periods, + freq=self.freq, + center=self.center) + + return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) + +class Rolling(_Rolling_and_Expanding): + """ + Provides rolling window calculcations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + window : int + Size of the moving window. This is the number of observations used for + calculating the statistic. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) (DEPRECATED) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + """ + + @Substitution(name='rolling') + @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + return super(Rolling, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['count']) + def count(self): + return super(Rolling, self).count() + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['apply']) + def apply(self, func, args=(), kwargs={}): + return super(Rolling, self).apply(func, args=args, kwargs=kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['sum']) + def sum(self, **kwargs): + return super(Rolling, self).sum(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['max']) + def max(self, **kwargs): + return super(Rolling, self).max(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['min']) + def min(self, **kwargs): + return super(Rolling, self).min(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['mean']) + def mean(self, **kwargs): + return super(Rolling, self).mean(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['median']) + def median(self, **kwargs): + return super(Rolling, self).median(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['std']) + def std(self, ddof=1, **kwargs): + return super(Rolling, self).std(ddof=ddof, **kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['var']) + def var(self, ddof=1, **kwargs): + return super(Rolling, self).var(ddof=ddof, **kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['skew']) + def skew(self, **kwargs): + return super(Rolling, self).skew(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['kurt']) + def kurt(self, **kwargs): + return super(Rolling, self).kurt(**kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['quantile']) + def quantile(self, quantile, **kwargs): + return super(Rolling, self).quantile(quantile=quantile, **kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['cov']) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @Substitution(name='rolling') + @Appender(_doc_template) + @Appender(_shared_docs['corr']) + def corr(self, other=None, pairwise=None, **kwargs): + return super(Rolling, self).corr(other=other, pairwise=pairwise, **kwargs) + +class Expanding(_Rolling_and_Expanding): + """ + Provides expanding transformations. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : string or DateOffset object, optional (default None) (DEPRECATED) + Frequency to conform the data to before computing the statistic. Specified + as a frequency string or DateOffset object. + center : boolean, default False + Set the labels at the center of the window. + axis : int, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + """ + + _attributes = ['min_periods','freq','center','axis'] + + def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0, **kwargs): + return super(Expanding, self).__init__(obj=obj, min_periods=min_periods, freq=freq, center=center, axis=axis) + + @property + def _constructor(self): + return Expanding + + def _get_window(self, other=None): + obj = self._selected_obj + if other is None: + return max(len(obj), self.min_periods) if self.min_periods else len(obj) + return max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj)) + + @Substitution(name='expanding') + @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + return super(Expanding, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['count']) + def count(self, **kwargs): + return super(Expanding, self).count(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['apply']) + def apply(self, func, args=(), kwargs={}): + return super(Expanding, self).apply(func, args=args, kwargs=kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['sum']) + def sum(self, **kwargs): + return super(Expanding, self).sum(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['max']) + def max(self, **kwargs): + return super(Expanding, self).max(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['min']) + def min(self, **kwargs): + return super(Expanding, self).min(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['mean']) + def mean(self, **kwargs): + return super(Expanding, self).mean(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['median']) + def median(self, **kwargs): + return super(Expanding, self).median(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['std']) + def std(self, ddof=1, **kwargs): + return super(Expanding, self).std(ddof=ddof, **kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['var']) + def var(self, ddof=1, **kwargs): + return super(Expanding, self).var(ddof=ddof, **kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['skew']) + def skew(self, **kwargs): + return super(Expanding, self).skew(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['kurt']) + def kurt(self, **kwargs): + return super(Expanding, self).kurt(**kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['quantile']) + def quantile(self, quantile, **kwargs): + return super(Expanding, self).quantile(quantile=quantile, **kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['cov']) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @Substitution(name='expanding') + @Appender(_doc_template) + @Appender(_shared_docs['corr']) + def corr(self, other=None, pairwise=None, **kwargs): + return super(Expanding, self).corr(other=other, pairwise=pairwise, **kwargs) + +_bias_template = """ + +Parameters +---------- +bias : boolean, default False + Use a standard estimation bias correction +""" + +_pairwise_template = """ + +Parameters +---------- +other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output +pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. +bias : boolean, default False + Use a standard estimation bias correction +""" + +class EWM(_Rolling): + """ + Provides exponential weighted functions + + .. versionadded:: 0.18.0 + + Parameters + ---------- + com : float. optional + Center of mass: :math:`\alpha = 1 / (1 + com)`, + span : float, optional + Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` + halflife : float, optional + Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` + min_periods : int, default 0 + Minimum number of observations in window required to have a value + (otherwise result is NA). + freq : None or string alias / date offset object, default=None (DEPRECATED) + Frequency to conform to before computing statistic + adjust : boolean, default True + Divide by decaying adjustment factor in beginning periods to account for + imbalance in relative weightings (viewing EWMA as a moving average) + ignore_na : boolean, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior + + Returns + ------- + a Window sub-classed for the particular operation + + Notes + ----- + Either center of mass, span or halflife must be specified + + EWMA is sometimes specified using a "span" parameter `s`, we have that the + decay parameter :math:`\alpha` is related to the span as + :math:`\alpha = 2 / (s + 1) = 1 / (1 + c)` + + where `c` is the center of mass. Given a span, the associated center of mass is + :math:`c = (s - 1) / 2` + + So a "20-day EWMA" would have center 9.5. + + The `freq` keyword is used to conform time series data to a specified + frequency by resampling the data. This is done with the default parameters + of :meth:`~pandas.Series.resample` (i.e. using the `mean`). + + When adjust is True (default), weighted averages are calculated using weights + (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. + + When adjust is False, weighted averages are calculated recursively as: + weighted_average[0] = arg[0]; + weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. + + When ignore_na is False (default), weights are based on absolute positions. + For example, the weights of x and y used in calculating the final weighted + average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and + (1-alpha)**2 and alpha (if adjust is False). + + When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on + relative positions. For example, the weights of x and y used in calculating + the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is + True), and 1-alpha and alpha (if adjust is False). + + More details can be found at + http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions + """ + _attributes = ['com','min_periods','freq','adjust','ignore_na','axis'] + + def __init__(self, obj, com=None, span=None, halflife=None, min_periods=0, freq=None, + adjust=True, ignore_na=False, axis=0): + self.obj = obj + self.com = _get_center_of_mass(com, span, halflife) + self.min_periods = min_periods + self.freq = freq + self.adjust = adjust + self.ignore_na = ignore_na + self.axis = axis + + @property + def _constructor(self): + return EWM + + @Substitution(name='ewm') + @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) + def aggregate(self, arg, *args, **kwargs): + return super(EWM, self).aggregate(arg, *args, **kwargs) + + agg = aggregate + + def _apply(self, func, how=None, **kwargs): + """Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : string/callable to apply + how : string, default to None (DEPRECATED) + how to resample + + Returns + ------- + y : type of input argument + + """ + blocks, obj = self._create_blocks(how=how) + results = [] + for b in blocks: + try: + values = self._prep_values(b.values) + except TypeError: + results.append(b.values.copy()) + continue + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, compat.string_types): + if not hasattr(algos, func): + raise ValueError("we do not support this function algos.{0}".format(func)) + + cfunc = getattr(algos, func) + def func(arg): + return cfunc(arg, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods)) + + results.append(np.apply_along_axis(func, self.axis, values)) + + return self._wrap_results(results, blocks, obj) + + @Substitution(name='ewm') + @Appender(_doc_template) + def mean(self, **kwargs): + """exponential weighted moving average""" + return self._apply('ewma', **kwargs) + + @Substitution(name='ewm') + @Appender(_doc_template) + @Appender(_bias_template) + def std(self, bias=False, **kwargs): + """exponential weighted moving stddev""" + return _zsqrt(self.var(bias=bias, **kwargs)) + vol=std + + @Substitution(name='ewm') + @Appender(_doc_template) + @Appender(_bias_template) + def var(self, bias=False, **kwargs): + """exponential weighted moving variance""" + def f(arg): + return algos.ewmcov(arg, + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias)) + + return self._apply(f, **kwargs) + + @Substitution(name='ewm') + @Appender(_doc_template) + @Appender(_pairwise_template) + def cov(self, other=None, pairwise=None, bias=False, **kwargs): + """exponential weighted sample covariance""" + if other is None: + other = self._selected_obj + pairwise = True if pairwise is None else pairwise # only default unset + other = self._shallow_copy(other) + + def _get_cov(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + cov = algos.ewmcov(X._prep_values(), + Y._prep_values(), + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias)) + return X._wrap_result(cov) + + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) + + @Substitution(name='ewm') + @Appender(_doc_template) + @Appender(_pairwise_template) + def corr(self, other=None, pairwise=None, **kwargs): + """exponential weighted sample correlation""" + if other is None: + other = self._selected_obj + pairwise = True if pairwise is None else pairwise # only default unset + other = self._shallow_copy(other) + + def _get_corr(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + def _cov(x, y): + return algos.ewmcov(x, y, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods), 1) + + x_values = X._prep_values() + y_values = Y._prep_values() + cov = _cov(x_values, y_values) + x_var = _cov(x_values, x_values) + y_var = _cov(y_values, y_values) + corr = cov / _zsqrt(x_var * y_var) + return X._wrap_result(corr) + + return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) + +######################## +##### Helper Funcs ##### +######################## + +def _flex_binary_moment(arg1, arg2, f, pairwise=False): + from pandas import Series, DataFrame, Panel + if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and + isinstance(arg2,(np.ndarray, Series, DataFrame))): + raise TypeError("arguments to moment function must be of type " + "np.ndarray/Series/DataFrame") + + if isinstance(arg1, (np.ndarray, Series)) and \ + isinstance(arg2, (np.ndarray,Series)): + X, Y = _prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, DataFrame): + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, DataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i, col in enumerate(arg1.columns): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + X, Y = arg1.align(arg2, join='outer') + X = X + 0 * Y + Y = Y + 0 * X + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i, k1 in enumerate(arg1.columns): + for j, k2 in enumerate(arg2.columns): + if j 0: + p.major_axis = arg1.columns[p.major_axis] + if len(p.minor_axis) > 0: + p.minor_axis = arg2.columns[p.minor_axis] + return p + else: + raise ValueError("'pairwise' is not True/False") + else: + results = {} + for i, col in enumerate(arg1.columns): + results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) + return dataframe_from_int_dict(results, arg1) + + else: + return _flex_binary_moment(arg2, arg1, f) + +def _get_center_of_mass(com, span, halflife): + valid_count = len([x for x in [com, span, halflife] if x is not None]) + if valid_count > 1: + raise Exception("com, span, and halflife are mutually exclusive") + + if span is not None: + # convert span to center of mass + com = (span - 1) / 2. + elif halflife is not None: + # convert halflife to center of mass + decay = 1 - np.exp(np.log(0.5) / halflife) + com = 1 / decay - 1 + elif com is None: + raise Exception("Must pass one of com, span, or halflife") + + return float(com) + +def _offset(window, center): + if not com.is_integer(window): + window = len(window) + offset = (window - 1) / 2. if center else 0 + try: + return int(offset) + except: + return offset.astype(int) + +def _require_min_periods(p): + def _check_func(minp, window): + if minp is None: + return window + else: + return max(p, minp) + return _check_func + +def _use_window(minp, window): + if minp is None: + return window + else: + return minp + +def _zsqrt(x): + result = np.sqrt(x) + mask = x < 0 + + from pandas import DataFrame + if isinstance(x, DataFrame): + if mask.values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + +def _prep_binary(arg1, arg2): + if not isinstance(arg2, type(arg1)): + raise Exception('Input arrays must be of the same type!') + + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + + return X, Y + +def _validate_win_type(win_type, kwargs): + # may pop from kwargs + arg_map = {'kaiser': ['beta'], + 'gaussian': ['std'], + 'general_gaussian': ['power', 'width'], + 'slepian': ['width']} + if win_type in arg_map: + return tuple([win_type] + + _pop_args(win_type, arg_map[win_type], kwargs)) + return win_type + + +def _pop_args(win_type, arg_names, kwargs): + msg = '%s window requires %%s' % win_type + all_args = [] + for n in arg_names: + if n not in kwargs: + raise ValueError(msg % n) + all_args.append(kwargs.pop(n)) + return all_args + +############################# +##### top-level exports ##### +############################# + +def rolling(obj, win_type=None, **kwds): + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + if win_type is not None: + return Window(obj, win_type=win_type, **kwds) + + return Rolling(obj, **kwds) +rolling.__doc__ = Window.__doc__ + +def expanding(obj, **kwds): + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + return Expanding(obj, **kwds) +expanding.__doc__ = Expanding.__doc__ + +def ewm(obj, **kwds): + from pandas import Series, DataFrame + if not isinstance(obj, (Series, DataFrame)): + raise TypeError('invalid type: %s' % type(obj)) + + return EWM(obj, **kwds) +ewm.__doc__ = EWM.__doc__ diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 3cddae45e7516..28f35cf26e582 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -4,29 +4,23 @@ """ from __future__ import division -from functools import wraps -from collections import defaultdict - -from numpy import NaN +import warnings import numpy as np - -from pandas.core.api import DataFrame, Series, Panel, notnull -import pandas.algos as algos -import pandas.core.common as pdcom - +from pandas import lib +from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender __all__ = ['rolling_count', 'rolling_max', 'rolling_min', 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_quantile', 'rolling_median', 'rolling_apply', - 'rolling_corr_pairwise', 'rolling_window', + 'rolling_window', 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov', 'expanding_count', 'expanding_max', 'expanding_min', 'expanding_sum', 'expanding_mean', 'expanding_std', 'expanding_cov', 'expanding_corr', 'expanding_var', 'expanding_skew', 'expanding_kurt', 'expanding_quantile', - 'expanding_median', 'expanding_apply', 'expanding_corr_pairwise'] + 'expanding_median', 'expanding_apply' ] #------------------------------------------------------------------------------ # Docs @@ -179,8 +173,72 @@ Use a standard estimation bias correction """ +def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): + """ + wrapper function to dispatch to the appropriate window functions + wraps/unwraps ndarrays for compat + + can be removed when ndarray support is removed + """ + is_ndarray = isinstance(arg, np.ndarray) + if is_ndarray: + if arg.ndim == 1: + arg = Series(arg) + elif arg.ndim == 2: + arg = DataFrame(arg) + else: + raise AssertionError("cannot support ndim > 2 for ndarray compat") + + warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and will be removed " + "in a future version".format(dispatch=dispatch,name=name), + FutureWarning, stacklevel=3) + + # get the functional keywords here + if func_kw is None: + func_kw = [] + kwds = {} + for k in func_kw: + value = kwargs.pop(k,None) + if value is not None: + kwds[k] = value + + # how is a keyword that if not-None should be in kwds + how = kwargs.pop('how',None) + if how is not None: + kwds['how'] = how + + r = getattr(arg,dispatch)(**kwargs) + + if not is_ndarray: + + # give a helpful deprecation message + # with copy-pastable arguments + pargs = ','.join([ "{a}={b}".format(a=a,b=b) for a,b in kwargs.items() if b is not None ]) + aargs = ','.join(args) + if len(aargs): + aargs += ',' + + def f(a,b): + if lib.isscalar(b): + return "{a}={b}".format(a=a,b=b) + return "{a}=<{b}>".format(a=a,b=type(b).__name__) + aargs = ','.join([ f(a,b) for a,b in kwds.items() if b is not None ]) + warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} " + "and will be removed in a future version, replace with " + "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})".format(klass=type(arg).__name__, + pargs=pargs, + aargs=aargs, + dispatch=dispatch, + name=name), + FutureWarning, stacklevel=3) + + result = getattr(r,name)(*args, **kwds) + + if is_ndarray: + result = result.values + return result -def rolling_count(arg, window, freq=None, center=False, how=None): +def rolling_count(arg, window, **kwargs): """ Rolling count of number of non-NaN observations inside provided window. @@ -208,26 +266,12 @@ def rolling_count(arg, window, freq=None, center=False, how=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - arg = _conv_timerule(arg, freq, how) - if not center: - window = min(window, len(arg)) - - return_hook, values = _process_data_structure(arg, kill_inf=False) - - converted = np.isfinite(values).astype(float) - result = rolling_sum(converted, window, min_periods=0, - center=center) # already converted - - # putmask here? - result[np.isnan(result)] = 0 - return return_hook(result) - + return ensure_compat('rolling', 'count', arg, window=window, **kwargs) @Substitution("Unbiased moving covariance.", _binary_arg_flex, _roll_kw%'None'+_pairwise_kw+_ddof_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None, ddof=1): +def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -235,23 +279,19 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, elif arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise # only default unset - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - - def _get_cov(X, Y): - mean = lambda x: rolling_mean(x, window, min_periods, center=center) - count = rolling_count(X + Y, window, center=center) - bias_adj = count / (count - ddof) - return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj - rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) - return rs - + return ensure_compat('rolling', + 'cov', + arg1, + other=arg2, + window=window, + pairwise=pairwise, + func_kw=['other','pairwise','ddof'], + **kwargs) @Substitution("Moving sample correlation.", _binary_arg_flex, _roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, - center=False, pairwise=None, how=None): +def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): if window is None and isinstance(arg2, (int, float)): window = arg2 arg2 = arg1 @@ -259,259 +299,74 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, elif arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise # only default unset - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - - def _get_corr(a, b): - num = rolling_cov(a, b, window, min_periods, freq=freq, - center=center) - den = (rolling_std(a, window, min_periods, freq=freq, - center=center) * - rolling_std(b, window, min_periods, freq=freq, - center=center)) - return num / den - - return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise)) - - -def _flex_binary_moment(arg1, arg2, f, pairwise=False): - if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and - isinstance(arg2,(np.ndarray, Series, DataFrame))): - raise TypeError("arguments to moment function must be of type " - "np.ndarray/Series/DataFrame") - - if isinstance(arg1, (np.ndarray, Series)) and \ - isinstance(arg2, (np.ndarray,Series)): - X, Y = _prep_binary(arg1, arg2) - return f(X, Y) - - elif isinstance(arg1, DataFrame): - def dataframe_from_int_dict(data, frame_template): - result = DataFrame(data, index=frame_template.index) - if len(result.columns) > 0: - result.columns = frame_template.columns[result.columns] - return result - - results = {} - if isinstance(arg2, DataFrame): - if pairwise is False: - if arg1 is arg2: - # special case in order to handle duplicate column names - for i, col in enumerate(arg1.columns): - results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) - return dataframe_from_int_dict(results, arg1) - else: - if not arg1.columns.is_unique: - raise ValueError("'arg1' columns are not unique") - if not arg2.columns.is_unique: - raise ValueError("'arg2' columns are not unique") - X, Y = arg1.align(arg2, join='outer') - X = X + 0 * Y - Y = Y + 0 * X - res_columns = arg1.columns.union(arg2.columns) - for col in res_columns: - if col in X and col in Y: - results[col] = f(X[col], Y[col]) - return DataFrame(results, index=X.index, columns=res_columns) - elif pairwise is True: - results = defaultdict(dict) - for i, k1 in enumerate(arg1.columns): - for j, k2 in enumerate(arg2.columns): - if j 0: - p.major_axis = arg1.columns[p.major_axis] - if len(p.minor_axis) > 0: - p.minor_axis = arg2.columns[p.minor_axis] - return p - else: - raise ValueError("'pairwise' is not True/False") - else: - results = {} - for i, col in enumerate(arg1.columns): - results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) - return dataframe_from_int_dict(results, arg1) - - else: - return _flex_binary_moment(arg2, arg1, f) - - -@Substitution("Deprecated. Use rolling_corr(..., pairwise=True) instead.\n\n" - "Pairwise moving sample correlation", _pairwise_arg, - _roll_kw%'None', _pairwise_retval, _roll_notes) -@Appender(_doc_template) -def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None, - freq=None, center=False): - import warnings - msg = "rolling_corr_pairwise is deprecated, use rolling_corr(..., pairwise=True)" - warnings.warn(msg, FutureWarning, stacklevel=2) - return rolling_corr(df1, df2, window=window, min_periods=min_periods, - freq=freq, center=center, - pairwise=True) - - -def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False, - how=None, args=(), kwargs={}, **kwds): - """ - Rolling statistical measure using supplied function. Designed to be - used with passed-in Cython array-based functions. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - window : Number of observations used for calculating statistic - func : Cython function to compute rolling statistic on raw series - minp : int - Minimum number of observations required to have a value - axis : int, default 0 - freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic - center : boolean, default False - Whether the label should correspond with center of window - how : string, default 'mean' - Method for down- or re-sampling - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input - """ - arg = _conv_timerule(arg, freq, how) - - return_hook, values = _process_data_structure(arg) - - if values.size == 0: - result = values.copy() - else: - # actually calculate the moment. Faster way to do this? - offset = int((window - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x, - window, minp=minp, args=args, kwargs=kwargs, - **kwds) - if values.ndim > 1: - result = np.apply_along_axis(calc, axis, values) - else: - result = calc(values) + return ensure_compat('rolling', + 'corr', + arg1, + other=arg2, + window=window, + pairwise=pairwise, + func_kw=['other','pairwise'], + **kwargs) - if center: - result = _center_window(result, window, axis) - - return return_hook(result) - - -def _center_window(rs, window, axis): - if axis > rs.ndim-1: - raise ValueError("Requested axis is larger then no. of argument " - "dimensions") - - offset = int((window - 1) / 2.) - if offset > 0: - if isinstance(rs, (Series, DataFrame, Panel)): - rs = rs.slice_shift(-offset, axis=axis) - else: - lead_indexer = [slice(None)] * rs.ndim - lead_indexer[axis] = slice(offset, None) - rs = np.copy(rs[tuple(lead_indexer)]) - return rs - - -def _process_data_structure(arg, kill_inf=True): - if isinstance(arg, DataFrame): - return_hook = lambda v: type(arg)(v, index=arg.index, - columns=arg.columns) - values = arg.values - elif isinstance(arg, Series): - values = arg.values - return_hook = lambda v: Series(v, arg.index, name=arg.name) - else: - return_hook = lambda v: v - values = arg - - if not issubclass(values.dtype.type, float): - values = values.astype(float) - - if kill_inf: - values = values.copy() - values[np.isinf(values)] = np.NaN - - return return_hook, values #------------------------------------------------------------------------------ # Exponential moving moments -def _get_center_of_mass(com, span, halflife): - valid_count = len([x for x in [com, span, halflife] if x is not None]) - if valid_count > 1: - raise Exception("com, span, and halflife are mutually exclusive") - - if span is not None: - # convert span to center of mass - com = (span - 1) / 2. - elif halflife is not None: - # convert halflife to center of mass - decay = 1 - np.exp(np.log(0.5) / halflife) - com = 1 / decay - 1 - elif com is None: - raise Exception("Must pass one of com, span, or halflife") - - return float(com) - - @Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, adjust=True, how=None, ignore_na=False): - arg = _conv_timerule(arg, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _ewma(v): - return algos.ewma(v, com, int(adjust), int(ignore_na), int(min_periods)) - - return_hook, values = _process_data_structure(arg) - if values.size == 0: - output = values.copy() - else: - output = np.apply_along_axis(_ewma, 0, values) - return return_hook(output) - + return ensure_compat('ewm', + 'mean', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na) @Substitution("Exponentially-weighted moving variance", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, freq=None, how=None, ignore_na=False, adjust=True): - arg = _conv_timerule(arg, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _ewmvar(v): - return algos.ewmcov(v, v, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - - return_hook, values = _process_data_structure(arg) - if values.size == 0: - output = values.copy() - else: - output = np.apply_along_axis(_ewmvar, 0, values) - return return_hook(output) - + return ensure_compat('ewm', + 'var', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na, + bias=bias, + func_kw=['bias']) @Substitution("Exponentially-weighted moving std", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - ignore_na=False, adjust=True): - result = ewmvar(arg, com=com, span=span, halflife=halflife, - min_periods=min_periods, bias=bias, adjust=adjust, ignore_na=ignore_na) - return _zsqrt(result) + freq=None, how=None, ignore_na=False, adjust=True): + return ensure_compat('ewm', + 'std', + arg, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + adjust=adjust, + how=how, + ignore_na=ignore_na, + bias=bias, + func_kw=['bias']) ewmvol = ewmstd @@ -528,21 +383,22 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _get_ewmcov(X, Y): - # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() - return_hook, x_values = _process_data_structure(X) - return_hook, y_values = _process_data_structure(Y) - cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - return return_hook(cov) - - result = _flex_binary_moment(arg1, arg2, _get_ewmcov, - pairwise=bool(pairwise)) - return result + return ensure_compat('ewm', + 'cov', + arg1, + other=arg2, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + bias=bias, + freq=freq, + how=how, + ignore_na=ignore_na, + adjust=adjust, + pairwise=pairwise, + func_kw=['other','pairwise','bias']) @Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @@ -556,80 +412,26 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, com = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - arg1 = _conv_timerule(arg1, freq, how) - arg2 = _conv_timerule(arg2, freq, how) - com = _get_center_of_mass(com, span, halflife) - - def _get_ewmcorr(X, Y): - # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() - return_hook, x_values = _process_data_structure(X) - return_hook, y_values = _process_data_structure(Y) - cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - x_var = algos.ewmcov(x_values, x_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - y_var = algos.ewmcov(y_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) - corr = cov / _zsqrt(x_var * y_var) - return return_hook(corr) - - result = _flex_binary_moment(arg1, arg2, _get_ewmcorr, - pairwise=bool(pairwise)) - return result - - -def _zsqrt(x): - result = np.sqrt(x) - mask = x < 0 - - if isinstance(x, DataFrame): - if mask.values.any(): - result[mask] = 0 - else: - if mask.any(): - result[mask] = 0 - - return result - - -def _prep_binary(arg1, arg2): - if not isinstance(arg2, type(arg1)): - raise Exception('Input arrays must be of the same type!') - - # mask out values, this also makes a common index... - X = arg1 + 0 * arg2 - Y = arg2 + 0 * arg1 - - return X, Y + return ensure_compat('ewm', + 'corr', + arg1, + other=arg2, + com=com, + span=span, + halflife=halflife, + min_periods=min_periods, + freq=freq, + how=how, + ignore_na=ignore_na, + adjust=adjust, + pairwise=pairwise, + func_kw=['other','pairwise']) #---------------------------------------------------------------------- # Python interface to Cython functions -def _conv_timerule(arg, freq, how): - - types = (DataFrame, Series) - if freq is not None and isinstance(arg, types): - # Conform to whatever frequency needed. - arg = arg.resample(freq, how=how) - - return arg - - -def _require_min_periods(p): - def _check_func(minp, window): - if minp is None: - return window - else: - return max(p, minp) - return _check_func - - -def _use_window(minp, window): - if minp is None: - return window - else: - return minp - - -def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw=''): +def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): if how is None: how_arg_str = 'None' else: @@ -638,36 +440,33 @@ def _rolling_func(func, desc, check_minp=_use_window, how=None, additional_kw='' @Substitution(desc, _unary_arg, _roll_kw%how_arg_str + additional_kw, _type_of_input_retval, _roll_notes) @Appender(_doc_template) - @wraps(func) - def f(arg, window, min_periods=None, freq=None, center=False, how=how, + def f(arg, window, min_periods=None, freq=None, center=False, **kwargs): - def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center, how=how, **kwargs) + return ensure_compat('rolling', + name, + arg, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + func_kw=func_kw, + **kwargs) return f -rolling_max = _rolling_func(algos.roll_max, 'Moving maximum.', how='max') -rolling_min = _rolling_func(algos.roll_min, 'Moving minimum.', how='min') -rolling_sum = _rolling_func(algos.roll_sum, 'Moving sum.') -rolling_mean = _rolling_func(algos.roll_mean, 'Moving mean.') -rolling_median = _rolling_func(algos.roll_median_c, 'Moving median.', - how='median') - -_ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw)) -rolling_std = _rolling_func(_ts_std, 'Moving standard deviation.', - check_minp=_require_min_periods(1), +rolling_max = _rolling_func('max', 'Moving maximum.', how='max') +rolling_min = _rolling_func('min', 'Moving minimum.', how='min') +rolling_sum = _rolling_func('sum', 'Moving sum.') +rolling_mean = _rolling_func('mean', 'Moving mean.') +rolling_median = _rolling_func('median', 'Moving median.', how='median') +rolling_std = _rolling_func('std', 'Moving standard deviation.', + func_kw=['ddof'], additional_kw=_ddof_kw) -rolling_var = _rolling_func(algos.roll_var, 'Moving variance.', - check_minp=_require_min_periods(1), +rolling_var = _rolling_func('var', 'Moving variance.', + func_kw=['ddof'], additional_kw=_ddof_kw) -rolling_skew = _rolling_func(algos.roll_skew, 'Unbiased moving skewness.', - check_minp=_require_min_periods(3)) -rolling_kurt = _rolling_func(algos.roll_kurt, 'Unbiased moving kurtosis.', - check_minp=_require_min_periods(4)) - +rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') +rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, center=False): @@ -703,12 +502,15 @@ def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - - def call_cython(arg, window, minp, args=(), kwargs={}): - minp = _use_window(minp, window) - return algos.roll_quantile(arg, window, minp, quantile) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=center) + return ensure_compat('rolling', + 'quantile', + arg, + window=window, + freq=freq, + center=center, + min_periods=min_periods, + func_kw=['quantile'], + quantile=quantile) def rolling_apply(arg, window, func, min_periods=None, freq=None, @@ -749,12 +551,17 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - offset = int((window - 1) / 2.) if center else 0 - def call_cython(arg, window, minp, args, kwargs): - minp = _use_window(minp, window) - return algos.roll_generic(arg, window, minp, offset, func, args, kwargs) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - center=False, args=args, kwargs=kwargs) + return ensure_compat('rolling', + 'apply', + arg, + window=window, + freq=freq, + center=center, + min_periods=min_periods, + func_kw=['func','args','kwargs'], + func=func, + args=args, + kwargs=kwargs) def rolling_window(arg, window=None, win_type=None, min_periods=None, @@ -816,97 +623,47 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - if isinstance(window, (list, tuple, np.ndarray)): - if win_type is not None: - raise ValueError(('Do not specify window type if using custom ' - 'weights')) - window = pdcom._asarray_tuplesafe(window).astype(float) - elif pdcom.is_integer(window): # window size - if win_type is None: - raise ValueError('Must specify window type') - try: - import scipy.signal as sig - except ImportError: - raise ImportError('Please install scipy to generate window weight') - win_type = _validate_win_type(win_type, kwargs) # may pop from kwargs - window = sig.get_window(win_type, window).astype(float) - else: - raise ValueError('Invalid window %s' % str(window)) - - minp = _use_window(min_periods, len(window)) - - arg = _conv_timerule(arg, freq, how) - return_hook, values = _process_data_structure(arg) - - if values.size == 0: - result = values.copy() - else: - offset = int((len(window) - 1) / 2.) if center else 0 - additional_nans = np.array([np.NaN] * offset) - f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x, - window, minp, avg=mean) - result = np.apply_along_axis(f, axis, values) - - if center: - result = _center_window(result, len(window), axis) - - return return_hook(result) - - -def _validate_win_type(win_type, kwargs): - # may pop from kwargs - arg_map = {'kaiser': ['beta'], - 'gaussian': ['std'], - 'general_gaussian': ['power', 'width'], - 'slepian': ['width']} - if win_type in arg_map: - return tuple([win_type] + - _pop_args(win_type, arg_map[win_type], kwargs)) - return win_type - - -def _pop_args(win_type, arg_names, kwargs): - msg = '%s window requires %%s' % win_type - all_args = [] - for n in arg_names: - if n not in kwargs: - raise ValueError(msg % n) - all_args.append(kwargs.pop(n)) - return all_args - - -def _expanding_func(func, desc, check_minp=_use_window, additional_kw=''): + func = 'mean' if mean else 'sum' + return ensure_compat('rolling', + func, + arg, + window=window, + win_type=win_type, + freq=freq, + center=center, + min_periods=min_periods, + axis=axis, + func_kw=kwargs.keys(), + **kwargs) + +def _expanding_func(name, desc, func_kw=None, additional_kw=''): @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, _type_of_input_retval, "") @Appender(_doc_template) - @wraps(func) def f(arg, min_periods=1, freq=None, **kwargs): - window = max(len(arg), min_periods) if min_periods else len(arg) - - def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): - minp = check_minp(minp, window) - return func(arg, window, minp, **kwds) - return _rolling_moment(arg, window, call_cython, min_periods, freq=freq, - **kwargs) - + return ensure_compat('expanding', + name, + arg, + min_periods=min_periods, + freq=freq, + func_kw=func_kw, + **kwargs) return f -expanding_max = _expanding_func(algos.roll_max, 'Expanding maximum.') -expanding_min = _expanding_func(algos.roll_min, 'Expanding minimum.') -expanding_sum = _expanding_func(algos.roll_sum, 'Expanding sum.') -expanding_mean = _expanding_func(algos.roll_mean, 'Expanding mean.') -expanding_median = _expanding_func(algos.roll_median_c, 'Expanding median.') +expanding_max = _expanding_func('max', 'Expanding maximum.') +expanding_min = _expanding_func('min', 'Expanding minimum.') +expanding_sum = _expanding_func('sum', 'Expanding sum.') +expanding_mean = _expanding_func('mean', 'Expanding mean.') +expanding_median = _expanding_func('median', 'Expanding median.') -expanding_std = _expanding_func(_ts_std, 'Expanding standard deviation.', - check_minp=_require_min_periods(1), +expanding_std = _expanding_func('std', 'Expanding standard deviation.', + func_kw=['ddof'], additional_kw=_ddof_kw) -expanding_var = _expanding_func(algos.roll_var, 'Expanding variance.', - check_minp=_require_min_periods(1), +expanding_var = _expanding_func('var', 'Expanding variance.', + func_kw=['ddof'], additional_kw=_ddof_kw) -expanding_skew = _expanding_func(algos.roll_skew, 'Unbiased expanding skewness.', - check_minp=_require_min_periods(3)) -expanding_kurt = _expanding_func(algos.roll_kurt, 'Unbiased expanding kurtosis.', - check_minp=_require_min_periods(4)) +expanding_skew = _expanding_func('skew', 'Unbiased expanding skewness.') +expanding_kurt = _expanding_func('kurt', 'Unbiased expanding kurtosis.') def expanding_count(arg, freq=None): @@ -930,7 +687,7 @@ def expanding_count(arg, freq=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_count(arg, len(arg), freq=freq) + return ensure_compat('expanding', 'count', arg, freq=freq) def expanding_quantile(arg, quantile, min_periods=1, freq=None): @@ -958,9 +715,13 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None): frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods, - freq=freq) - + return ensure_compat('expanding', + 'quantile', + arg, + freq=freq, + min_periods=min_periods, + func_kw=['quantile'], + quantile=quantile) @Substitution("Unbiased expanding covariance.", _binary_arg_flex, _expanding_kw+_pairwise_kw+_ddof_kw, _flex_retval, "") @@ -973,10 +734,15 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None, ddof min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) - return rolling_cov(arg1, arg2, window, - min_periods=min_periods, freq=freq, - pairwise=pairwise, ddof=ddof) + return ensure_compat('expanding', + 'cov', + arg1, + other=arg2, + min_periods=min_periods, + pairwise=pairwise, + freq=freq, + ddof=ddof, + func_kw=['other','pairwise','ddof']) @Substitution("Expanding sample correlation.", _binary_arg_flex, @@ -990,23 +756,14 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) - return rolling_corr(arg1, arg2, window, - min_periods=min_periods, - freq=freq, pairwise=pairwise) - - -@Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n" - "Pairwise expanding sample correlation", _pairwise_arg, - _expanding_kw, _pairwise_retval, "") -@Appender(_doc_template) -def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None): - import warnings - msg = "expanding_corr_pairwise is deprecated, use expanding_corr(..., pairwise=True)" - warnings.warn(msg, FutureWarning, stacklevel=2) - return expanding_corr(df1, df2, min_periods=min_periods, - freq=freq, pairwise=True) - + return ensure_compat('expanding', + 'corr', + arg1, + other=arg2, + min_periods=min_periods, + pairwise=pairwise, + freq=freq, + func_kw=['other','pairwise','ddof']) def expanding_apply(arg, func, min_periods=1, freq=None, args=(), kwargs={}): @@ -1038,6 +795,12 @@ def expanding_apply(arg, func, min_periods=1, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - window = max(len(arg), min_periods) if min_periods else len(arg) - return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, - args=args, kwargs=kwargs) + return ensure_compat('expanding', + 'apply', + arg, + freq=freq, + min_periods=min_periods, + func_kw=['func','args','kwargs'], + func=func, + args=args, + kwargs=kwargs) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index bd21053f37568..d067b2fd7b969 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1443,6 +1443,48 @@ def test_frame_set_name_single(self): result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) self.assertEqual(result.index.name, 'A') + def test_aggregate_api_consistency(self): + # GH 9052 + # make sure that the aggregates via dict + # are consistent + + + def compare(result, expected): + # if we ar passin dicts then ordering is not guaranteed for output columns + assert_frame_equal(result.reindex_like(expected), expected) + + + df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B' : ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C' : np.random.randn(8), + 'D' : np.random.randn(8)}) + + grouped = df.groupby(['A', 'B']) + result = grouped[['D','C']].agg({'r':np.sum, 'r2':np.mean}) + expected = pd.concat([grouped[['D','C']].sum(), + grouped[['D','C']].mean()], + keys=['r','r2'], + axis=1).stack(level=1) + compare(result, expected) + + result = grouped[['D','C']].agg({'r': { 'C' : np.sum }, 'r2' : { 'D' : np.mean }}) + expected = pd.concat([grouped[['C']].sum(), + grouped[['D']].mean()], + axis=1) + expected.columns = MultiIndex.from_tuples([('r','C'),('r2','D')]) + compare(result, expected) + + result = grouped[['D','C']].agg([np.sum, np.mean]) + expected = pd.concat([grouped['D'].sum(), + grouped['D'].mean(), + grouped['C'].sum(), + grouped['C'].mean()], + axis=1) + expected.columns = MultiIndex.from_product([['D','C'],['sum','mean']]) + compare(result, expected) + def test_multi_iter(self): s = Series(np.arange(6)) k1 = np.array(['a', 'a', 'a', 'b', 'b', 'b']) diff --git a/pandas/stats/tests/test_moments.py b/pandas/tests/test_window.py similarity index 57% rename from pandas/stats/tests/test_moments.py rename to pandas/tests/test_window.py index b9efa875735d2..4d7f9292705ad 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/tests/test_window.py @@ -9,12 +9,14 @@ import numpy as np from distutils.version import LooseVersion +import pandas as pd from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull, concat from pandas.util.testing import ( assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_index_equal ) import pandas.core.datetools as datetools import pandas.stats.moments as mom +import pandas.core.window as rwindow import pandas.util.testing as tm from pandas.compat import range, zip, PY3, StringIO @@ -33,41 +35,267 @@ def _create_data(self): self.arr = arr self.rng = bdate_range(datetime(2009, 1, 1), periods=N) - self.series = Series(arr.copy(), index=self.rng) - self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) +class TestApi(Base): + + def setUp(self): + self._create_data() + + def test_getitem(self): + + r = self.frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns) + + r = self.frame.rolling(window=5)[1] + self.assertEqual(r._selected_obj.name,self.frame.columns[1]) + + # technically this is allowed + r = self.frame.rolling(window=5)[1,3] + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + + r = self.frame.rolling(window=5)[[1,3]] + tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=['A', 'B']) + g = df.rolling(window=5) + self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] + + self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[['A', 'C']] + + def test_attribute_access(self): + + df = DataFrame([[1, 2]], columns=['A', 'B']) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(),r['A'].sum()) + self.assertRaises(AttributeError, lambda : r.F) + + def tests_skip_nuisance(self): + + df = DataFrame({'A' : range(5), 'B' : range(5,10), 'C' : 'foo'}) + + r = df.rolling(window=3) + result = r[['A','B']].sum() + expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], + 'B' : [np.nan,np.nan,18,21,24]}, + columns=list('AB')) + assert_frame_equal(result, expected) + + expected = pd.concat([r[['A','B']].sum(),df[['C']]],axis=1) + result = r.sum() + assert_frame_equal(result, expected) + + def test_timedeltas(self): + + df = DataFrame({'A' : range(5), 'B' : pd.timedelta_range('1 day',periods=5)}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], + 'B' : pd.to_timedelta([pd.NaT,pd.NaT,'6 days','9 days','12 days'])}, + columns=list('AB')) + assert_frame_equal(result, expected) + + def test_agg(self): + df = DataFrame({'A' : range(5), + 'B' : range(0,10,2)}) + + r = df.rolling(window=3) + a_mean = r['A'].mean() + a_std = r['A'].std() + a_sum = r['A'].sum() + b_mean = r['B'].mean() + b_std = r['B'].std() + b_sum = r['B'].sum() + + def compare(result, expected): + # if we are using dicts, the orderings is not guaranteed + assert_frame_equal(result.reindex_like(expected), expected) + + result = r.aggregate([np.mean, np.std]) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_product([['A','B'],['mean','std']]) + assert_frame_equal(result, expected) + + result = r.aggregate({'A': np.mean, + 'B': np.std}) + expected = pd.concat([a_mean,b_std],axis=1) + compare(result, expected) + + result = r.aggregate({'A': ['mean','std']}) + expected = pd.concat([a_mean,a_std],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std')]) + assert_frame_equal(result, expected) + + result = r['A'].aggregate(['mean','sum']) + expected = pd.concat([a_mean,a_sum],axis=1) + expected.columns = ['mean','sum'] + assert_frame_equal(result, expected) + + result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) + expected = pd.concat([a_mean,a_sum],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum')]) + compare(result, expected) + + result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, + 'B': { 'mean2' : 'mean', 'sum2' : 'sum' }}) + expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum'), + ('B','mean2'),('B','sum2')]) + compare(result, expected) + + result = r.aggregate({'A': ['mean','std'], + 'B': ['mean','std']}) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std'), + ('B','mean'),('B','std')]) + compare(result, expected) + + result = r.aggregate({'r1' : { 'A' : ['mean','sum'] }, + 'r2' : { 'B' : ['mean','sum'] }}) + expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('r1','A','mean'),('r1','A','sum'), + ('r2','B','mean'),('r2','B','sum')]) + compare(result, expected) + + result = r.agg({'A' : {'ra' : ['mean','std']}, + 'B' : {'rb' : ['mean','std']}}) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','ra','mean'),('A','ra','std'), + ('B','rb','mean'),('B','rb','std')]) + compare(result, expected) + + + # passed lambda + result = r.agg({'A' : np.sum, + 'B' : lambda x: np.std(x, ddof=1)}) + rcustom = r['B'].apply(lambda x: np.std(x,ddof=1)) + expected = pd.concat([a_sum,rcustom],axis=1) + compare(result, expected) + + def test_agg_consistency(self): + + df = DataFrame({'A' : range(5), + 'B' : range(0,10,2)}) + r = df.rolling(window=3) + + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list('AB'),['sum','mean']]) + tm.assert_index_equal(result, expected) + + result = r['A'].agg([np.sum, np.mean]).columns + expected = pd.Index(['sum','mean']) + tm.assert_index_equal(result, expected) + + result = r.agg({'A' : [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([('A','sum'),('A','mean')]) + tm.assert_index_equal(result, expected) + + def test_window_with_args(self): + tm._skip_if_no_scipy() + + # make sure that we are aggregating window functions correctly with arg + r = Series(np.random.randn(100)).rolling(window=10,min_periods=1,win_type='gaussian') + expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) + expected.columns = ['',''] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=.01)]) + assert_frame_equal(result, expected) + + def a(x): + return x.mean(std=10) + def b(x): + return x.mean(std=0.01) + expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) + expected.columns = ['a','b'] + result = r.aggregate([a,b]) + assert_frame_equal(result, expected) + + def test_preserve_metadata(self): + # GH 10565 + s = Series(np.arange(100), name='foo') + + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + self.assertEqual(s2.name, 'foo') + self.assertEqual(s3.name, 'foo') + + def test_how_compat(self): + # in prior versions, we would allow how to be used in the resample + # now that its deprecated, we need to handle this in the actual + # aggregation functions + s = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000', periods=20, freq='12H')) + + for how in ['min','max','median']: + for op in ['mean','sum','std','var','kurt','skew']: + for t in ['rolling','expanding']: + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + + dfunc = getattr(pd,"{0}_{1}".format(t,op)) + if dfunc is None: + continue + + if t == 'rolling': + kwargs = {'window' : 5} + else: + kwargs = {} + result = dfunc(s, freq='D', how=how, **kwargs) + + expected = getattr(getattr(s,t)(freq='D', **kwargs),op)(how=how) + assert_series_equal(result, expected) + +class TestDeprecations(Base): + """ test that we are catching deprecation warnings """ + + def setUp(self): + self._create_data() + + + def test_deprecations(self): + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + mom.rolling_mean(np.ones(10),3,center=True ,axis=0) + mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) + class TestMoments(Base): def setUp(self): self._create_data() def test_centered_axis_validation(self): + # ok - mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) + Series(np.ones(10)).rolling(window=3,center=True ,axis=0).mean() + # bad axis - self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1) + self.assertRaises(ValueError, lambda : Series(np.ones(10)).rolling(window=3,center=True ,axis=1).mean()) # ok ok - mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0) - mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1) + DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=0).mean() + DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=1).mean() + # bad axis - self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2) + self.assertRaises(ValueError, lambda : DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=2).mean()) def test_rolling_sum(self): - self._check_moment_func(mom.rolling_sum, np.sum) + self._check_moment_func(mom.rolling_sum, np.sum, name='sum') def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() self._check_moment_func(mom.rolling_count, counter, + name='count', has_min_periods=False, preserve_nan=False, fill_value=0) def test_rolling_mean(self): - self._check_moment_func(mom.rolling_mean, np.mean) + self._check_moment_func(mom.rolling_mean, np.mean, name='mean') def test_cmov_mean(self): # GH 8238 @@ -78,11 +306,12 @@ def test_cmov_mean(self): xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - rs = mom.rolling_mean(vals, 5, center=True) - assert_almost_equal(xp, rs) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_mean(vals, 5, center=True) + assert_almost_equal(xp, rs) xp = Series(rs) - rs = mom.rolling_mean(Series(vals), 5, center=True) + rs = Series(vals).rolling(5, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window(self): @@ -94,11 +323,12 @@ def test_cmov_window(self): xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - assert_almost_equal(xp, rs) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 5, 'boxcar', center=True) + assert_almost_equal(xp, rs) xp = Series(rs) - rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True) + rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_corner(self): @@ -108,19 +338,22 @@ def test_cmov_window_corner(self): # all nan vals = np.empty(10, dtype=float) vals.fill(np.nan) - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertTrue(np.isnan(rs).all()) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 5, 'boxcar', center=True) + self.assertTrue(np.isnan(rs).all()) # empty vals = np.array([]) - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertEqual(len(rs), 0) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 5, 'boxcar', center=True) + self.assertEqual(len(rs), 0) # shorter than window vals = np.random.randn(5) - rs = mom.rolling_window(vals, 10, 'boxcar') - self.assertTrue(np.isnan(rs).all()) - self.assertEqual(len(rs), 5) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + rs = mom.rolling_window(vals, 10, 'boxcar') + self.assertTrue(np.isnan(rs).all()) + self.assertEqual(len(rs), 5) def test_cmov_window_frame(self): # Gh 8238 @@ -149,7 +382,25 @@ def test_cmov_window_frame(self): [ np.nan, np.nan]]) # DataFrame - rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True) + rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).mean() + assert_frame_equal(DataFrame(xp), rs) + + # invalid method + self.assertRaises(AttributeError, lambda : DataFrame(vals).rolling(5, win_type='boxcar', center=True).std()) + + # sum + xp = np.array([[ np.nan, np.nan], + [ np.nan, np.nan], + [ 46.26, 46.96], + [ 43.22, 49.53], + [ 44.35, 51.04], + [ 34.05, 42.94], + [ 38.96, 43.22], + [ 45.25, 39.12], + [ np.nan, np.nan], + [ np.nan, np.nan]]) + + rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).sum() assert_frame_equal(DataFrame(xp), rs) def test_cmov_window_na_min_periods(self): @@ -160,9 +411,8 @@ def test_cmov_window_na_min_periods(self): vals[4] = np.nan vals[8] = np.nan - xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) - rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) - + xp = vals.rolling(5, min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type='boxcar', min_periods=4, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_regular(self): @@ -194,7 +444,7 @@ def test_cmov_window_regular(self): for wt in win_types: xp = Series(xps[wt]) - rs = mom.rolling_window(Series(vals), 5, wt, center=True) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_regular_linear_range(self): @@ -211,7 +461,7 @@ def test_cmov_window_regular_linear_range(self): xp = Series(xp) for wt in win_types: - rs = mom.rolling_window(Series(vals), 5, wt, center=True) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_regular_missing_data(self): @@ -245,7 +495,7 @@ def test_cmov_window_regular_missing_data(self): for wt in win_types: xp = Series(xps[wt]) - rs = mom.rolling_window(Series(vals), 5, wt, min_periods=3) + rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean() assert_series_equal(xp, rs) def test_cmov_window_special(self): @@ -273,9 +523,7 @@ def test_cmov_window_special(self): for wt, k in zip(win_types, kwds): xp = Series(xps[wt]) - - rs = mom.rolling_window(Series(vals), 5, wt, center=True, - **k) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) assert_series_equal(xp, rs) def test_cmov_window_special_linear_range(self): @@ -293,32 +541,36 @@ def test_cmov_window_special_linear_range(self): xp = Series(xp) for wt, k in zip(win_types, kwds): - rs = mom.rolling_window(Series(vals), 5, wt, center=True, - **k) + rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) assert_series_equal(xp, rs) def test_rolling_median(self): - self._check_moment_func(mom.rolling_median, np.median) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - self._check_moment_func(mom.rolling_min, np.min) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_min, np.min, name='min') - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_min(a, window=100, min_periods=1) - assert_almost_equal(b, np.ones(len(a))) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + a = np.array([1, 2, 3, 4, 5]) + b = mom.rolling_min(a, window=100, min_periods=1) + assert_almost_equal(b, np.ones(len(a))) - self.assertRaises(ValueError, mom.rolling_min, np.array([1, - 2, 3]), window=3, min_periods=5) + self.assertRaises(ValueError, mom.rolling_min, + np.array([1,2, 3]), window=3, min_periods=5) def test_rolling_max(self): - self._check_moment_func(mom.rolling_max, np.max) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_max, np.max, name='max') - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_max(a, window=100, min_periods=1) - assert_almost_equal(a, b) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + a = np.array([1, 2, 3, 4, 5]) + b = mom.rolling_max(a, window=100, min_periods=1) + assert_almost_equal(a, b) - self.assertRaises(ValueError, mom.rolling_max, np.array([1, - 2, 3]), window=3, min_periods=5) + self.assertRaises(ValueError, mom.rolling_max, np.array([1,2, 3]), + window=3, min_periods=5) def test_rolling_quantile(self): qs = [.1, .5, .9] @@ -330,8 +582,8 @@ def scoreatpercentile(a, per): return values[int(idx)] for q in qs: - def f(x, window, min_periods=None, freq=None, center=False): - return mom.rolling_quantile(x, window, q, + def f(x, window, quantile, min_periods=None, freq=None, center=False): + return mom.rolling_quantile(x, window, quantile, min_periods=min_periods, freq=freq, center=center) @@ -339,7 +591,7 @@ def f(x, window, min_periods=None, freq=None, center=False): def alt(x): return scoreatpercentile(x, q) - self._check_moment_func(f, alt) + self._check_moment_func(f, alt, name='quantile', quantile=q) def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing with a 0-length Series @@ -347,52 +599,65 @@ def test_rolling_apply(self): warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) ser = Series([]) - assert_series_equal(ser, mom.rolling_apply(ser, 10, lambda x: x.mean())) + assert_series_equal(ser, ser.rolling(10).apply(lambda x: x.mean())) - def roll_mean(x, window, min_periods=None, freq=None, center=False): - return mom.rolling_apply(x, window, - lambda x: x[np.isfinite(x)].mean(), + f = lambda x: x[np.isfinite(x)].mean() + def roll_mean(x, window, min_periods=None, freq=None, center=False, **kwargs): + return mom.rolling_apply(x, + window, + func=f, min_periods=min_periods, freq=freq, center=center) - self._check_moment_func(roll_mean, np.mean) + self._check_moment_func(roll_mean, np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) - result = mom.rolling_apply(s, 2, lambda x: len(x), min_periods=0) + result = s.rolling(2,min_periods=0).apply(lambda x: len(x)) expected = Series([1., 2., 2.]) assert_series_equal(result, expected) + result = s.rolling(2, min_periods=0).apply(len) + assert_series_equal(result, expected) + def test_rolling_apply_out_of_bounds(self): # #1850 arr = np.arange(4) # it works! - result = mom.rolling_apply(arr, 10, np.sum) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_apply(arr, 10, np.sum) self.assertTrue(isnull(result).all()) - result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) assert_almost_equal(result, result) def test_rolling_std(self): self._check_moment_func(mom.rolling_std, - lambda x: np.std(x, ddof=1)) - self._check_moment_func(functools.partial(mom.rolling_std, ddof=0), - lambda x: np.std(x, ddof=0)) + lambda x: np.std(x, ddof=1), + name='std') + self._check_moment_func(mom.rolling_std, + lambda x: np.std(x, ddof=0), + name='std', + ddof=0) def test_rolling_std_1obs(self): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), + 1, min_periods=1) expected = np.array([np.nan] * 5) assert_almost_equal(result, expected) - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1, ddof=0) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), + 1, min_periods=1, ddof=0) expected = np.zeros(5) assert_almost_equal(result, expected) - result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), - 3, min_periods=2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), + 3, min_periods=2) self.assertTrue(np.isnan(result[2])) def test_rolling_std_neg_sqrt(self): @@ -405,18 +670,23 @@ def test_rolling_std_neg_sqrt(self): 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767]) - b = mom.rolling_std(a, window=3) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + b = mom.rolling_std(a, window=3) self.assertTrue(np.isfinite(b[2:]).all()) - b = mom.ewmstd(a, span=3) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + b = mom.ewmstd(a, span=3) self.assertTrue(np.isfinite(b[2:]).all()) def test_rolling_var(self): self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), - test_stable=True) - self._check_moment_func(functools.partial(mom.rolling_var, ddof=0), - lambda x: np.var(x, ddof=0)) + test_stable=True, + name='var') + self._check_moment_func(mom.rolling_var, + lambda x: np.var(x, ddof=0), + name='var', + ddof=0) def test_rolling_skew(self): try: @@ -424,7 +694,8 @@ def test_rolling_skew(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False)) + lambda x: skew(x, bias=False), + name='skew') def test_rolling_kurt(self): try: @@ -432,7 +703,8 @@ def test_rolling_kurt(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False)) + lambda x: kurtosis(x, bias=False), + name='kurt') def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture @@ -446,53 +718,79 @@ def test_fperr_robustness(self): if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() - result = mom.rolling_sum(arr, 2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) - result = mom.rolling_mean(arr, 2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) - result = mom.rolling_var(arr, 2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) - result = mom.rolling_mean(arr, 1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) - result = mom.rolling_mean(-arr, 1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) - def _check_moment_func(self, func, static_comp, window=50, + def _check_moment_func(self, f, static_comp, + name=None, + window=50, has_min_periods=True, has_center=True, has_time_rule=True, preserve_nan=True, fill_value=None, - test_stable=False): - - self._check_ndarray(func, static_comp, window=window, - has_min_periods=has_min_periods, - preserve_nan=preserve_nan, - has_center=has_center, - fill_value=fill_value, - test_stable=test_stable) - - self._check_structures(func, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center) - - def _check_ndarray(self, func, static_comp, window=50, + test_stable=False, + **kwargs): + + with warnings.catch_warnings(record=True): + self._check_ndarray(f, static_comp, window=window, + has_min_periods=has_min_periods, + preserve_nan=preserve_nan, + has_center=has_center, + fill_value=fill_value, + test_stable=test_stable, + **kwargs) + + with warnings.catch_warnings(record=True): + self._check_structures(f, static_comp, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + fill_value=fill_value, + has_center=has_center, + **kwargs) + + # new API + if name is not None: + self._check_structures(f, static_comp, + name=name, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + fill_value=fill_value, + has_center=has_center, + **kwargs) + + def _check_ndarray(self, f, static_comp, window=50, has_min_periods=True, preserve_nan=True, has_center=True, fill_value=None, test_stable=False, - test_window=True): + test_window=True, + **kwargs): + + def get_result(arr, window, min_periods=None, center=False): + return f(arr, window, min_periods=min_periods, center=center, **kwargs) - result = func(self.arr, window) + result = get_result(self.arr, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:])) @@ -505,11 +803,11 @@ def _check_ndarray(self, func, static_comp, window=50, arr[-10:] = np.NaN if has_min_periods: - result = func(arr, 50, min_periods=30) + result = get_result(arr, 50, min_periods=30) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # min_periods is working correctly - result = func(arr, 20, min_periods=15) + result = get_result(arr, 20, min_periods=15) self.assertTrue(np.isnan(result[23])) self.assertFalse(np.isnan(result[24])) @@ -517,31 +815,31 @@ def _check_ndarray(self, func, static_comp, window=50, self.assertTrue(np.isnan(result[-5])) arr2 = randn(20) - result = func(arr2, 10, min_periods=5) + result = get_result(arr2, 10, min_periods=5) self.assertTrue(isnull(result[3])) self.assertTrue(notnull(result[4])) # min_periods=0 - result0 = func(arr, 20, min_periods=0) - result1 = func(arr, 20, min_periods=1) + result0 = get_result(arr, 20, min_periods=0) + result1 = get_result(arr, 20, min_periods=1) assert_almost_equal(result0, result1) else: - result = func(arr, 50) + result = get_result(arr, 50) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # GH 7925 if has_center: if has_min_periods: - result = func(arr, 20, min_periods=15, center=True) - expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] + result = get_result(arr, 20, min_periods=15, center=True) + expected = get_result(np.concatenate((arr, np.array([np.NaN] * 9))), 20, min_periods=15)[9:] else: - result = func(arr, 20, center=True) - expected = func(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] + result = get_result(arr, 20, center=True) + expected = get_result(np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] self.assert_numpy_array_equal(result, expected) if test_stable: - result = func(self.arr + 1e9, window) + result = get_result(self.arr + 1e9, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:] + 1e9)) @@ -549,16 +847,16 @@ def _check_ndarray(self, func, static_comp, window=50, if test_window: if has_min_periods: for minp in (0, len(self.arr)-1, len(self.arr)): - result = func(self.arr, len(self.arr)+1, min_periods=minp) - expected = func(self.arr, len(self.arr), min_periods=minp) + result = get_result(self.arr, len(self.arr)+1, min_periods=minp) + expected = get_result(self.arr, len(self.arr), min_periods=minp) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask assert_almost_equal(result[nan_mask], expected[nan_mask]) else: - result = func(self.arr, len(self.arr)+1) - expected = func(self.arr, len(self.arr)) + result = get_result(self.arr, len(self.arr)+1) + expected = get_result(self.arr, len(self.arr)) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask @@ -567,15 +865,40 @@ def _check_ndarray(self, func, static_comp, window=50, - def _check_structures(self, func, static_comp, + def _check_structures(self, f, static_comp, + name=None, has_min_periods=True, has_time_rule=True, has_center=True, - fill_value=None): + fill_value=None, + **kwargs): + + def get_result(obj, window, min_periods=None, freq=None, center=False): + + # check via the API calls if name is provided + if name is not None: + + # catch a freq deprecation warning if freq is provided and not None + w = FutureWarning if freq is not None else None + with tm.assert_produces_warning(w, check_stacklevel=False): + r = obj.rolling(window=window, + min_periods=min_periods, + freq=freq, + center=center) + return getattr(r,name)(**kwargs) + + # check via the moments API + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + return f(obj, + window=window, + min_periods=min_periods, + freq=freq, + center=center, + **kwargs) + + series_result = get_result(self.series, window=50) + frame_result = get_result(self.frame, window=50) - series_result = func(self.series, 50) tm.assertIsInstance(series_result, Series) - - frame_result = func(self.frame, 50) self.assertEqual(type(frame_result), DataFrame) # check time_rule works @@ -584,13 +907,11 @@ def _check_structures(self, func, static_comp, minp = 10 if has_min_periods: - series_result = func(self.series[::2], win, min_periods=minp, - freq='B') - frame_result = func(self.frame[::2], win, min_periods=minp, - freq='B') + series_result = get_result(self.series[::2], window=win, min_periods=minp, freq='B') + frame_result = get_result(self.frame[::2], window=win, min_periods=minp, freq='B') else: - series_result = func(self.series[::2], win, freq='B') - frame_result = func(self.frame[::2], win, freq='B') + series_result = get_result(self.series[::2], window=win, freq='B') + frame_result = get_result(self.frame[::2], window=win, freq='B') last_date = series_result.index[-1] prev_date = last_date - 24 * datetools.bday @@ -605,22 +926,41 @@ def _check_structures(self, func, static_comp, # GH 7925 if has_center: + + # shifter index + s = ['x%d'%x for x in range(12)] + if has_min_periods: minp = 10 - series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.series.index) - frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25, min_periods=minp).shift(-12).reindex(self.frame.index) - series_rs = func(self.series, 25, min_periods=minp, - center=True) - frame_rs = func(self.frame, 25, min_periods=minp, - center=True) + series_xp = get_result(self.series.reindex(list(self.series.index)+s), + window=25, + min_periods=minp).shift(-12).reindex(self.series.index) + frame_xp = get_result(self.frame.reindex(list(self.frame.index)+s), + window=25, + min_periods=minp).shift(-12).reindex(self.frame.index) + + series_rs = get_result(self.series, + window=25, + min_periods=minp, + center=True) + frame_rs = get_result(self.frame, + window=25, + min_periods=minp, + center=True) else: - series_xp = func(self.series.reindex(list(self.series.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.series.index) - frame_xp = func(self.frame.reindex(list(self.frame.index)+['x%d'%x for x in range(12)]), 25).shift(-12).reindex(self.frame.index) - - series_rs = func(self.series, 25, center=True) - frame_rs = func(self.frame, 25, center=True) + series_xp = get_result(self.series.reindex(list(self.series.index)+s), + window=25).shift(-12).reindex(self.series.index) + frame_xp = get_result(self.frame.reindex(list(self.frame.index)+s), + window=25).shift(-12).reindex(self.frame.index) + + series_rs = get_result(self.series, + window=25, + center=True) + frame_rs = get_result(self.frame, + window=25, + center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) @@ -629,38 +969,39 @@ def _check_structures(self, func, static_comp, assert_frame_equal(frame_xp, frame_rs) def test_ewma(self): - self._check_ew(mom.ewma) + self._check_ew(mom.ewma,name='mean') arr = np.zeros(1000) arr[5] = 1 - result = mom.ewma(arr, span=100, adjust=False).sum() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = mom.ewma(arr, span=100, adjust=False).sum() self.assertTrue(np.abs(result - 1) < 1e-2) s = Series([1.0, 2.0, 4.0, 8.0]) expected = Series([1.0, 1.6, 2.736842, 4.923077]) - for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), - lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), - lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True), - ]: + for f in [lambda s: s.ewm(com=2.0, adjust=True).mean(), + lambda s: s.ewm(com=2.0, adjust=True, ignore_na=False).mean(), + lambda s: s.ewm(com=2.0, adjust=True, ignore_na=True).mean(), + ]: result = f(s) assert_series_equal(result, expected) expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - for f in [lambda s: mom.ewma(s, com=2.0, adjust=False), - lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False), - lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True), + for f in [lambda s: s.ewm(com=2.0, adjust=False).mean(), + lambda s: s.ewm(com=2.0, adjust=False, ignore_na=False).mean(), + lambda s: s.ewm(com=2.0, adjust=False, ignore_na=True).mean(), ]: result = f(s) assert_series_equal(result, expected) def test_ewma_nan_handling(self): s = Series([1.] + [np.nan] * 5 + [1.]) - result = mom.ewma(s, com=5) + result = s.ewm(com=5).mean() assert_almost_equal(result, [1.] * len(s)) s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.]) - result = mom.ewma(s, com=5) + result = s.ewm(com=5).mean() assert_almost_equal(result, [np.nan] * 2 + [1.] * 4) # GH 7603 @@ -693,58 +1034,55 @@ def simple_wma(s, w): (s3, False, True, [(1. - alpha)**2, np.nan, (1. - alpha) * alpha, alpha]), ]: expected = simple_wma(s, Series(w)) - result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() + assert_series_equal(result, expected) if ignore_na is False: # check that ignore_na defaults to False - result = mom.ewma(s, com=com, adjust=adjust) + result = s.ewm(com=com, adjust=adjust).mean() assert_series_equal(result, expected) def test_ewmvar(self): - self._check_ew(mom.ewmvar) + self._check_ew(mom.ewmvar, name='var') def test_ewmvol(self): - self._check_ew(mom.ewmvol) + self._check_ew(mom.ewmvol, name='vol') def test_ewma_span_com_args(self): - A = mom.ewma(self.arr, com=9.5) - B = mom.ewma(self.arr, span=20) - assert_almost_equal(A, B) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + A = mom.ewma(self.arr, com=9.5) + B = mom.ewma(self.arr, span=20) + assert_almost_equal(A, B) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20) - self.assertRaises(Exception, mom.ewma, self.arr) + self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20) + self.assertRaises(Exception, mom.ewma, self.arr) def test_ewma_halflife_arg(self): - A = mom.ewma(self.arr, com=13.932726172912965) - B = mom.ewma(self.arr, halflife=10.0) - assert_almost_equal(A, B) - - self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50) - self.assertRaises(Exception, mom.ewma, self.arr) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + A = mom.ewma(self.arr, com=13.932726172912965) + B = mom.ewma(self.arr, halflife=10.0) + assert_almost_equal(A, B) - def test_moment_preserve_series_name(self): - # GH 10565 - s = Series(np.arange(100), name='foo') - s2 = mom.rolling_mean(s, 30) - s3 = mom.rolling_sum(s, 20) - self.assertEqual(s2.name, 'foo') - self.assertEqual(s3.name, 'foo') + self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50) + self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50) + self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50) + self.assertRaises(Exception, mom.ewma, self.arr) def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64) funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] for f in funcs: - result = f(arr, 3) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = f(arr, 3) assert_almost_equal(result, arr) - def _check_ew(self, func): - self._check_ew_ndarray(func) - self._check_ew_structures(func) + def _check_ew(self, func, name=None): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_ew_ndarray(func, name=name) + self._check_ew_structures(func, name=name) - def _check_ew_ndarray(self, func, preserve_nan=False): + def _check_ew_ndarray(self, func, preserve_nan=False, name=None): result = func(self.arr, com=10) if preserve_nan: assert(np.isnan(result[self._nan_locs]).all()) @@ -787,10 +1125,11 @@ def _check_ew_ndarray(self, func, preserve_nan=False): result2 = func(np.arange(50), span=10) self.assertEqual(result2.dtype, np.float_) - def _check_ew_structures(self, func): - series_result = func(self.series, com=10) + def _check_ew_structures(self, func, name): + series_result = getattr(self.series.ewm(com=10),name)() tm.assertIsInstance(series_result, Series) - frame_result = func(self.frame, com=10) + + frame_result = getattr(self.frame.ewm(com=10),name)() self.assertEqual(type(frame_result), DataFrame) # create the data only once as we are not setting it @@ -1044,7 +1383,7 @@ def _variance_debiasing_factors(s, com, adjust, ignore_na): def _ewma(s, com, min_periods, adjust, ignore_na): weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) result = s.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method='ffill') - result[mom.expanding_count(s) < (max(min_periods, 1) if min_periods else 1)] = np.nan + result[s.expanding().count() < (max(min_periods, 1) if min_periods else 1)] = np.nan return result com = 3. @@ -1054,16 +1393,16 @@ def _ewma(s, com, min_periods, adjust, ignore_na): # test consistency between different ewm* moments self._test_moments_consistency( min_periods=min_periods, - count=mom.expanding_count, - mean=lambda x: mom.ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).mean(), mock_mean=lambda x: _ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), - corr=lambda x, y: mom.ewmcorr(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), - var_unbiased=lambda x: mom.ewmvar(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), - std_unbiased=lambda x: mom.ewmstd(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), - cov_unbiased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), - var_biased=lambda x: mom.ewmvar(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), - std_biased=lambda x: mom.ewmstd(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), - cov_biased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).var(bias=False), + std_unbiased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).std(bias=False), + cov_unbiased=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).cov(y, bias=False), + var_biased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).var(bias=True), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).cov(y, bias=True), var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na)) @slow @@ -1078,17 +1417,17 @@ def test_expanding_consistency(self): # test consistency between different expanding_* moments self._test_moments_consistency( min_periods=min_periods, - count=mom.expanding_count, - mean=lambda x: mom.expanding_mean(x, min_periods=min_periods), - mock_mean=lambda x: mom.expanding_sum(x, min_periods=min_periods) / mom.expanding_count(x), - corr=lambda x, y: mom.expanding_corr(x, y, min_periods=min_periods), - var_unbiased=lambda x: mom.expanding_var(x, min_periods=min_periods), - std_unbiased=lambda x: mom.expanding_std(x, min_periods=min_periods), - cov_unbiased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods), - var_biased=lambda x: mom.expanding_var(x, min_periods=min_periods, ddof=0), - std_biased=lambda x: mom.expanding_std(x, min_periods=min_periods, ddof=0), - cov_biased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods, ddof=0), - var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan) + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding(min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding(min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: x.expanding().count() / (x.expanding().count() - 1.).replace(0., np.nan) ) # test consistency between expanding_xyz() and either (a) expanding_apply of Series.xyz(), @@ -1101,117 +1440,120 @@ def test_expanding_consistency(self): if no_nans: functions = self.base_functions + self.no_nan_functions for (f, require_min_periods, name) in functions: - expanding_f = getattr(mom,'expanding_{0}'.format(name)) + expanding_f = getattr(x.expanding(min_periods=min_periods),name) if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): continue - if expanding_f is mom.expanding_count: - expanding_f_result = expanding_f(x) - expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=0) + if name == 'count': + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding(min_periods=0).apply(func=f) else: - if expanding_f in [mom.expanding_cov, mom.expanding_corr]: - expanding_f_result = expanding_f(x, min_periods=min_periods, pairwise=False) + if name in ['cov','corr']: + expanding_f_result = expanding_f(pairwise=False) else: - expanding_f_result = expanding_f(x, min_periods=min_periods) - expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=min_periods) + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(func=f) if not tm._incompat_bottleneck_version(name): assert_equal(expanding_f_result, expanding_apply_f_result) - if (expanding_f in [mom.expanding_cov, mom.expanding_corr]) and isinstance(x, DataFrame): + if (name in ['cov','corr']) and isinstance(x, DataFrame): # test pairwise=True - expanding_f_result = expanding_f(x, x, min_periods=min_periods, pairwise=True) + expanding_f_result = expanding_f(x, pairwise=True) expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) for i, _ in enumerate(x.columns): for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = expanding_f(x.iloc[:, i], x.iloc[:, j], min_periods=min_periods) + expected.iloc[:, i, j] = getattr(x.iloc[:, i].expanding(min_periods=min_periods),name)(x.iloc[:, j]) assert_panel_equal(expanding_f_result, expected) @slow def test_rolling_consistency(self): - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: + # suppress warnings about empty slices, as we are deliberately testing with empty/0-length Series/DataFrames + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: mom.rolling_count(x, window=window, center=center), - mean=lambda x: mom.rolling_mean(x, window=window, min_periods=min_periods, center=center), - mock_mean=lambda x: mom.rolling_sum(x, window=window, min_periods=min_periods, center=center).divide( - mom.rolling_count(x, window=window, center=center)), - corr=lambda x, y: mom.rolling_corr(x, y, window=window, min_periods=min_periods, center=center), - var_unbiased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center), - std_unbiased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center), - cov_unbiased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center), - var_biased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center, ddof=0), - std_biased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center, ddof=0), - cov_biased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center, ddof=0), - var_debiasing_factors=lambda x: mom.rolling_count(x, window=window, center=center).divide( - (mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)), - ) - - # test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(), - # or (b) rolling_apply of np.nanxyz() - for (x, is_constant, no_nans) in self.data: - - assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr(mom,'rolling_{0}'.format(name)) - - if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): - continue - - if rolling_f is mom.rolling_count: - rolling_f_result = rolling_f(x, window=window, center=center) - rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, - min_periods=0, center=center) - else: - if rolling_f in [mom.rolling_cov, mom.rolling_corr]: - rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center, pairwise=False) + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.rolling(window=window, center=center).count(), + mean=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).mean(), + mock_mean=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).sum().divide( + x.rolling(window=window, min_periods=min_periods, center=center).count()), + corr=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).corr(y), + var_unbiased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).var(), + std_unbiased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).std(), + cov_unbiased=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).cov(y), + var_biased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).var(ddof=0), + std_biased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).std(ddof=0), + cov_biased=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).cov(y, ddof=0), + var_debiasing_factors=lambda x: x.rolling(window=window, center=center).count().divide( + (x.rolling(window=window, center=center).count() - 1.).replace(0., np.nan)), + ) + + # test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(), + # or (b) rolling_apply of np.nanxyz() + for (x, is_constant, no_nans) in self.data: + + assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr(x.rolling(window=window, center=center, min_periods=min_periods),name) + + if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling(window=window, + min_periods=0, center=center).apply(func=f) else: - rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center) - rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, - min_periods=min_periods, center=center) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, rolling_apply_f_result) - - if (rolling_f in [mom.rolling_cov, mom.rolling_corr]) and isinstance(x, DataFrame): - # test pairwise=True - rolling_f_result = rolling_f(x, x, window=window, min_periods=min_periods, - center=center, pairwise=True) - expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j], - window=window, min_periods=min_periods, center=center) - assert_panel_equal(rolling_f_result, expected) + if name in ['cov','corr']: + rolling_f_result = rolling_f(pairwise=False) + else: + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling(window=window, + min_periods=min_periods, center=center).apply(func=f) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, rolling_apply_f_result) + + if (name in ['cov','corr']) and isinstance(x, DataFrame): + # test pairwise=True + rolling_f_result = rolling_f(x, pairwise=True) + expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) + for i, _ in enumerate(x.columns): + for j, _ in enumerate(x.columns): + expected.iloc[:, i, j] = getattr(x.iloc[:, i].rolling( + window=window, min_periods=min_periods, center=center),name)(x.iloc[:, j]) + assert_panel_equal(rolling_f_result, expected) # binary moments def test_rolling_cov(self): A = self.series B = A + randn(len(A)) - result = mom.rolling_cov(A, B, 50, min_periods=25) + result = A.rolling(window=50, min_periods=25).cov(B) assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) def test_rolling_cov_pairwise(self): - self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5) + self._check_pairwise_moment('rolling','cov', window=10, min_periods=5) def test_rolling_corr(self): A = self.series B = A + randn(len(A)) - result = mom.rolling_corr(A, B, 50, min_periods=25) + result = A.rolling(window=50, min_periods=25).corr(B) assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) # test for correct bias correction @@ -1220,24 +1562,27 @@ def test_rolling_corr(self): a[:5] = np.nan b[:10] = np.nan - result = mom.rolling_corr(a, b, len(a), min_periods=1) + result = a.rolling(window=len(a), min_periods=1).corr(b) assert_almost_equal(result[-1], a.corr(b)) def test_rolling_corr_pairwise(self): - self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5) + self._check_pairwise_moment('rolling', 'corr', window=10, min_periods=5) + + def _check_pairwise_moment(self, dispatch, name, **kwargs): - def _check_pairwise_moment(self, func, *args, **kwargs): - panel = func(self.frame, *args, **kwargs) + def get_result(obj, obj2=None): + return getattr(getattr(obj,dispatch)(**kwargs),name)(obj2) + panel = get_result(self.frame) actual = panel.ix[:, 1, 5] - expected = func(self.frame[1], self.frame[5], *args, **kwargs) + expected = get_result(self.frame[1], self.frame[5]) tm.assert_series_equal(actual, expected, check_names=False) self.assertEqual(actual.name, 5) def test_flex_binary_moment(self): # GH3155 # don't blow the stack - self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None) + self.assertRaises(TypeError, rwindow._flex_binary_moment,5,6,None) def test_corr_sanity(self): #GH 3155 @@ -1251,13 +1596,13 @@ def test_corr_sanity(self): [ 0.78369152, 0.63919667]]) ) - res = mom.rolling_corr(df[0],df[1],5,center=True) + res = df[0].rolling(5,center=True).corr(df[1]) self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) # and some fuzzing for i in range(10): df = DataFrame(np.random.rand(30,2)) - res = mom.rolling_corr(df[0],df[1],5,center=True) + res = df[0].rolling(5,center=True).corr(df[1]) try: self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) except: @@ -1268,9 +1613,9 @@ def test_flex_binary_frame(self): def _check(method): series = self.frame[1] - res = method(series, self.frame, 10) - res2 = method(self.frame, series, 10) - exp = self.frame.apply(lambda x: method(series, x, 10)) + res = getattr(series.rolling(window=10),method)(self.frame) + res2 = getattr(self.frame.rolling(window=10),method)(series) + exp = self.frame.apply(lambda x: getattr(series.rolling(window=10),method)(x)) tm.assert_frame_equal(res, exp) tm.assert_frame_equal(res2, exp) @@ -1278,28 +1623,32 @@ def _check(method): frame2 = self.frame.copy() frame2.values[:] = np.random.randn(*frame2.shape) - res3 = method(self.frame, frame2, 10) - exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10)) + res3 = getattr(self.frame.rolling(window=10),method)(frame2) + exp = DataFrame(dict((k, getattr(self.frame[k].rolling(window=10),method)(frame2[k])) for k in self.frame)) tm.assert_frame_equal(res3, exp) - methods = [mom.rolling_corr, mom.rolling_cov] + methods = ['corr','cov'] for meth in methods: _check(meth) def test_ewmcov(self): - self._check_binary_ew(mom.ewmcov) + self._check_binary_ew('cov') def test_ewmcov_pairwise(self): - self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5) + self._check_pairwise_moment('ewm','cov', span=10, min_periods=5) def test_ewmcorr(self): - self._check_binary_ew(mom.ewmcorr) + self._check_binary_ew('corr') def test_ewmcorr_pairwise(self): - self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5) + self._check_pairwise_moment('ewm','corr', span=10, min_periods=5) + + def _check_binary_ew(self, name): + + def func(A, B, com, **kwargs): + return getattr(A.ewm(com, **kwargs),name)(B) - def _check_binary_ew(self, func): A = Series(randn(50), index=np.arange(50)) B = A[2:] + randn(48) @@ -1329,7 +1678,7 @@ def _check_binary_ew(self, func): def test_expanding_apply(self): ser = Series([]) - assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean())) + assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) def expanding_mean(x, min_periods=1, freq=None): return mom.expanding_apply(x, @@ -1340,7 +1689,7 @@ def expanding_mean(x, min_periods=1, freq=None): # GH 8080 s = Series([None, None, None]) - result = mom.expanding_apply(s, lambda x: len(x), min_periods=0) + result = s.expanding(min_periods=0).apply(lambda x: len(x)) expected = Series([1., 2., 3.]) assert_series_equal(result, expected) @@ -1350,36 +1699,34 @@ def mean_w_arg(x, const): df = DataFrame(np.random.rand(20, 3)) - expected = mom.expanding_apply(df, np.mean) + 20. + expected = df.expanding().apply(np.mean) + 20. - assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)), - expected) - assert_frame_equal(mom.expanding_apply(df, mean_w_arg, - kwargs={'const' : 20}), + assert_frame_equal(df.expanding().apply(mean_w_arg, args=(20,)), expected) + assert_frame_equal(df.expanding().apply(mean_w_arg, + kwargs={'const' : 20}), + expected) def test_expanding_corr(self): A = self.series.dropna() B = (A + randn(len(A)))[:-5] - result = mom.expanding_corr(A, B) + result = A.expanding().corr(B) - rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1) + rolling_result = A.rolling(window=len(A),min_periods=1).corr(B) assert_almost_equal(rolling_result, result) def test_expanding_count(self): - result = mom.expanding_count(self.series) - assert_almost_equal(result, mom.rolling_count(self.series, - len(self.series))) + result = self.series.expanding().count() + assert_almost_equal(result, self.series.rolling(window=len(self.series)).count()) def test_expanding_quantile(self): - result = mom.expanding_quantile(self.series, 0.5) + result = self.series.expanding().quantile(0.5) - rolling_result = mom.rolling_quantile(self.series, - len(self.series), - 0.5, min_periods=1) + rolling_result = self.series.rolling( + window=len(self.series),min_periods=1).quantile(0.5) assert_almost_equal(result, rolling_result) @@ -1387,9 +1734,9 @@ def test_expanding_cov(self): A = self.series B = (A + randn(len(A)))[:-5] - result = mom.expanding_cov(A, B) + result = A.expanding().cov(B) - rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1) + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) assert_almost_equal(rolling_result, result) @@ -1397,19 +1744,17 @@ def test_expanding_max(self): self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) def test_expanding_cov_pairwise(self): - result = mom.expanding_cov(self.frame) + result = self.frame.expanding().corr() - rolling_result = mom.rolling_cov(self.frame, len(self.frame), - min_periods=1) + rolling_result = self.frame.rolling(window=len(self.frame),min_periods=1).corr() for i in result.items: assert_almost_equal(result[i], rolling_result[i]) def test_expanding_corr_pairwise(self): - result = mom.expanding_corr(self.frame) + result = self.frame.expanding().corr() - rolling_result = mom.rolling_corr(self.frame, len(self.frame), - min_periods=1) + rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() for i in result.items: assert_almost_equal(result[i], rolling_result[i]) @@ -1418,17 +1763,17 @@ def test_expanding_cov_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.expanding_cov(s1, s2) + result = s1.expanding().cov(s2) expected = Series([None, None, 2.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.expanding_cov(s1, s2a) + result = s1.expanding().cov(s2a) assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) - result = mom.expanding_cov(s1, s2) + result = s1.expanding().cov(s2) expected = Series([None, None, None, 4.5]) assert_series_equal(result, expected) @@ -1436,17 +1781,17 @@ def test_expanding_corr_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.expanding_corr(s1, s2) + result = s1.expanding().corr(s2) expected = Series([None, None, 1.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.expanding_corr(s1, s2a) + result = s1.expanding().corr(s2a) assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) - result = mom.expanding_corr(s1, s2) + result = s1.expanding().corr(s2) expected = Series([None, None, None, 1.]) assert_series_equal(result, expected) @@ -1454,24 +1799,24 @@ def test_rolling_cov_diff_length(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.rolling_cov(s1, s2, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).cov(s2) expected = Series([None, None, 2.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.rolling_cov(s1, s2a, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).cov(s2a) assert_series_equal(result, expected) def test_rolling_corr_diff_length(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) - result = mom.rolling_corr(s1, s2, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).corr(s2) expected = Series([None, None, 1.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) - result = mom.rolling_corr(s1, s2a, window=3, min_periods=2) + result = s1.rolling(window=3, min_periods=2).corr(s2a) assert_series_equal(result, expected) def test_rolling_functions_window_non_shrinkage(self): @@ -1482,20 +1827,20 @@ def test_rolling_functions_window_non_shrinkage(self): df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) - functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_max(x, window=10, min_periods=5), - lambda x: mom.rolling_min(x, window=10, min_periods=5), - lambda x: mom.rolling_sum(x, window=10, min_periods=5), - lambda x: mom.rolling_mean(x, window=10, min_periods=5), - lambda x: mom.rolling_std(x, window=10, min_periods=5), - lambda x: mom.rolling_var(x, window=10, min_periods=5), - lambda x: mom.rolling_skew(x, window=10, min_periods=5), - lambda x: mom.rolling_kurt(x, window=10, min_periods=5), - lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), - lambda x: mom.rolling_median(x, window=10, min_periods=5), - lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), - lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), + functions = [lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(quantile=0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum), + lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), ] for f in functions: try: @@ -1509,8 +1854,8 @@ def test_rolling_functions_window_non_shrinkage(self): # scipy needed for rolling_window continue - functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), + functions = [lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=True), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=True), ] for f in functions: df_result_panel = f(df) @@ -1528,35 +1873,35 @@ def test_moment_functions_zero_length(self): df2_expected = df2 df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns) - functions = [lambda x: mom.expanding_count(x), - lambda x: mom.expanding_cov(x, x, pairwise=False, min_periods=5), - lambda x: mom.expanding_corr(x, x, pairwise=False, min_periods=5), - lambda x: mom.expanding_max(x, min_periods=5), - lambda x: mom.expanding_min(x, min_periods=5), - lambda x: mom.expanding_sum(x, min_periods=5), - lambda x: mom.expanding_mean(x, min_periods=5), - lambda x: mom.expanding_std(x, min_periods=5), - lambda x: mom.expanding_var(x, min_periods=5), - lambda x: mom.expanding_skew(x, min_periods=5), - lambda x: mom.expanding_kurt(x, min_periods=5), - lambda x: mom.expanding_quantile(x, quantile=0.5, min_periods=5), - lambda x: mom.expanding_median(x, min_periods=5), - lambda x: mom.expanding_apply(x, func=sum, min_periods=5), - lambda x: mom.rolling_count(x, window=10), - lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), - lambda x: mom.rolling_max(x, window=10, min_periods=5), - lambda x: mom.rolling_min(x, window=10, min_periods=5), - lambda x: mom.rolling_sum(x, window=10, min_periods=5), - lambda x: mom.rolling_mean(x, window=10, min_periods=5), - lambda x: mom.rolling_std(x, window=10, min_periods=5), - lambda x: mom.rolling_var(x, window=10, min_periods=5), - lambda x: mom.rolling_skew(x, window=10, min_periods=5), - lambda x: mom.rolling_kurt(x, window=10, min_periods=5), - lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), - lambda x: mom.rolling_median(x, window=10, min_periods=5), - lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), - lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), + functions = [lambda x: x.expanding().count(), + lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), + lambda x: x.expanding(min_periods=5).max(), + lambda x: x.expanding(min_periods=5).min(), + lambda x: x.expanding(min_periods=5).sum(), + lambda x: x.expanding(min_periods=5).mean(), + lambda x: x.expanding(min_periods=5).std(), + lambda x: x.expanding(min_periods=5).var(), + lambda x: x.expanding(min_periods=5).skew(), + lambda x: x.expanding(min_periods=5).kurt(), + lambda x: x.expanding(min_periods=5).quantile(0.5), + lambda x: x.expanding(min_periods=5).median(), + lambda x: x.expanding(min_periods=5).apply(sum), + lambda x: x.rolling(window=10).count(), + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum), + lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), ] for f in functions: try: @@ -1573,10 +1918,10 @@ def test_moment_functions_zero_length(self): # scipy needed for rolling_window continue - functions = [lambda x: mom.expanding_cov(x, x, pairwise=True, min_periods=5), - lambda x: mom.expanding_corr(x, x, pairwise=True, min_periods=5), - lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), - lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), + functions = [lambda x: x.expanding(min_periods=5).cov(x, pairwise=True), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=True), + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=True), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=True), ] for f in functions: df1_result_panel = f(df1) @@ -1591,10 +1936,10 @@ def test_expanding_cov_pairwise_diff_length(self): df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B']) df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) - result1 = mom.expanding_cov(df1, df2, pairwise=True)[2] - result2 = mom.expanding_cov(df1, df2a, pairwise=True)[2] - result3 = mom.expanding_cov(df1a, df2, pairwise=True)[2] - result4 = mom.expanding_cov(df1a, df2a, pairwise=True)[2] + result1 = df1.expanding().cov(df2a, pairwise=True)[2] + result2 = df1.expanding().cov(df2a, pairwise=True)[2] + result3 = df1a.expanding().cov(df2, pairwise=True)[2] + result4 = df1a.expanding().cov(df2a, pairwise=True)[2] expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) @@ -1607,10 +1952,10 @@ def test_expanding_corr_pairwise_diff_length(self): df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B']) df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) - result1 = mom.expanding_corr(df1, df2, pairwise=True)[2] - result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2] - result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2] - result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2] + result1 = df1.expanding().corr(df2, pairwise=True)[2] + result2 = df1.expanding().corr(df2a, pairwise=True)[2] + result3 = df1a.expanding().corr(df2, pairwise=True)[2] + result4 = df1a.expanding().corr(df2a, pairwise=True)[2] expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) @@ -1650,12 +1995,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with itself, pairwise=True - for f in [lambda x: mom.expanding_cov(x, pairwise=True), - lambda x: mom.expanding_corr(x, pairwise=True), - lambda x: mom.rolling_cov(x, window=3, pairwise=True), - lambda x: mom.rolling_corr(x, window=3, pairwise=True), - lambda x: mom.ewmcov(x, com=3, pairwise=True), - lambda x: mom.ewmcorr(x, com=3, pairwise=True), + for f in [lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): @@ -1667,12 +2012,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with itself, pairwise=False - for f in [lambda x: mom.expanding_cov(x, pairwise=False), - lambda x: mom.expanding_corr(x, pairwise=False), - lambda x: mom.rolling_cov(x, window=3, pairwise=False), - lambda x: mom.rolling_corr(x, window=3, pairwise=False), - lambda x: mom.ewmcov(x, com=3, pairwise=False), - lambda x: mom.ewmcorr(x, com=3, pairwise=False), + for f in [lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): @@ -1683,12 +2028,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with another DataFrame, pairwise=True - for f in [lambda x, y: mom.expanding_cov(x, y, pairwise=True), - lambda x, y: mom.expanding_corr(x, y, pairwise=True), - lambda x, y: mom.rolling_cov(x, y, window=3, pairwise=True), - lambda x, y: mom.rolling_corr(x, y, window=3, pairwise=True), - lambda x, y: mom.ewmcov(x, y, com=3, pairwise=True), - lambda x, y: mom.ewmcorr(x, y, com=3, pairwise=True), + for f in [lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]: results = [f(df, df2) for df in df1s] for (df, result) in zip(df1s, results): @@ -1700,12 +2045,12 @@ def test_pairwise_stats_column_names_order(self): self.assert_numpy_array_equal(result, results[0]) # DataFrame with another DataFrame, pairwise=False - for f in [lambda x, y: mom.expanding_cov(x, y, pairwise=False), - lambda x, y: mom.expanding_corr(x, y, pairwise=False), - lambda x, y: mom.rolling_cov(x, y, window=3, pairwise=False), - lambda x, y: mom.rolling_corr(x, y, window=3, pairwise=False), - lambda x, y: mom.ewmcov(x, y, com=3, pairwise=False), - lambda x, y: mom.ewmcorr(x, y, com=3, pairwise=False), + for f in [lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]: results = [f(df, df2) if df.columns.is_unique else None for df in df1s] for (df, result) in zip(df1s, results): @@ -1719,12 +2064,12 @@ def test_pairwise_stats_column_names_order(self): tm.assertRaisesRegexp(ValueError, "'arg2' columns are not unique", f, df2, df) # DataFrame with a Series - for f in [lambda x, y: mom.expanding_cov(x, y), - lambda x, y: mom.expanding_corr(x, y), - lambda x, y: mom.rolling_cov(x, y, window=3), - lambda x, y: mom.rolling_corr(x, y, window=3), - lambda x, y: mom.ewmcov(x, y, com=3), - lambda x, y: mom.ewmcorr(x, y, com=3), + for f in [lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), ]: results = [f(df, s) for df in df1s] + [f(s, df) for df in df1s] for (df, result) in zip(df1s, results): @@ -1740,12 +2085,12 @@ def test_rolling_skew_edge_cases(self): # yields all NaN (0 variance) d = Series([1] * 5) - x = mom.rolling_skew(d, window=5) + x = d.rolling(window=5).skew() assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) - x = mom.rolling_skew(d, window=2) + x = d.rolling(window=2).skew() assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] @@ -1753,7 +2098,7 @@ def test_rolling_skew_edge_cases(self): 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) - x = mom.rolling_skew(d, window=4) + x = d.rolling(window=4).skew() assert_series_equal(expected, x) def test_rolling_kurt_edge_cases(self): @@ -1762,12 +2107,12 @@ def test_rolling_kurt_edge_cases(self): # yields all NaN (0 variance) d = Series([1] * 5) - x = mom.rolling_kurt(d, window=5) + x = d.rolling(window=5).kurt() assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) - x = mom.rolling_kurt(d, window=3) + x = d.rolling(window=3).kurt() assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] @@ -1775,7 +2120,7 @@ def test_rolling_kurt_edge_cases(self): 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) - x = mom.rolling_kurt(d, window=4) + x = d.rolling(window=4).kurt() assert_series_equal(expected, x) def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, @@ -1822,11 +2167,13 @@ def _check_expanding_structures(self, func): def _check_expanding(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): - self._check_expanding_ndarray(func, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - preserve_nan=preserve_nan) - self._check_expanding_structures(func) + with warnings.catch_warnings(record=True): + self._check_expanding_ndarray(func, static_comp, + has_min_periods=has_min_periods, + has_time_rule=has_time_rule, + preserve_nan=preserve_nan) + with warnings.catch_warnings(record=True): + self._check_expanding_structures(func) def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" @@ -1843,7 +2190,8 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) def test_rolling_max_how_resample(self): @@ -1862,14 +2210,16 @@ def test_rolling_max_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D', how='median') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max(how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 @@ -1877,8 +2227,9 @@ def test_rolling_max_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_max(series, window=1, freq='D', how='mean') - assert_series_equal(expected, x) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').max(how='mean') + assert_series_equal(expected, x) def test_rolling_min_how_resample(self): @@ -1897,8 +2248,9 @@ def test_rolling_min_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_min(series, window=1, freq='D') - assert_series_equal(expected, x) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + r = series.rolling(window=1, freq='D') + assert_series_equal(expected, r.min()) def test_rolling_median_how_resample(self): @@ -1916,14 +2268,15 @@ def test_rolling_median_how_resample(self): expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - x = mom.rolling_median(series, window=1, freq='D') - assert_series_equal(expected, x) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + x = series.rolling(window=1, freq='D').median() + assert_series_equal(expected, x) def test_rolling_median_memory_error(self): # GH11722 n = 20000 - mom.rolling_median(Series(np.random.randn(n)), window=2, center=False) - mom.rolling_median(Series(np.random.randn(n)), window=2, center=False) + Series(np.random.randn(n)).rolling(window=2, center=False).median() + Series(np.random.randn(n)).rolling(window=2, center=False).median() if __name__ == '__main__': import nose diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index a6aa5ff66576c..5c3cb573766d7 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -2,6 +2,7 @@ from pandas.lib import cache_readonly import sys import warnings +from textwrap import dedent from functools import wraps @@ -180,7 +181,7 @@ def __call__(self, func): func.__doc__ = func.__doc__ if func.__doc__ else '' self.addendum = self.addendum if self.addendum else '' docitems = [func.__doc__, self.addendum] - func.__doc__ = self.join.join(docitems) + func.__doc__ = dedent(self.join.join(docitems)) return func