Skip to content

Commit 3c23dc9

Browse files
committed
API: provide Rolling/Expanding/EWM objects for deferred rolling type calculations, xref #10702
1 parent 1357321 commit 3c23dc9

File tree

8 files changed

+1945
-761
lines changed

8 files changed

+1945
-761
lines changed

pandas/core/base.py

+261
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Base and utility classes for pandas objects.
33
"""
44
from pandas import compat
5+
from pandas.compat import builtins
56
import numpy as np
67
from pandas.core import common as com
78
import pandas.core.nanops as nanops
@@ -218,6 +219,266 @@ def __delete__(self, instance):
218219
raise AttributeError("can't delete attribute")
219220

220221

222+
class GroupByError(Exception):
223+
pass
224+
225+
226+
class DataError(GroupByError):
227+
pass
228+
229+
230+
class SpecificationError(GroupByError):
231+
pass
232+
233+
234+
class SelectionMixin(object):
235+
"""
236+
mixin implementing the selection & aggregation interface on a group-like object
237+
sub-classes need to define: obj, exclusions
238+
"""
239+
_selection = None
240+
_internal_names = ['_cache']
241+
_internal_names_set = set(_internal_names)
242+
_builtin_table = {
243+
builtins.sum: np.sum,
244+
builtins.max: np.max,
245+
builtins.min: np.min,
246+
}
247+
_cython_table = {
248+
builtins.sum: 'sum',
249+
builtins.max: 'max',
250+
builtins.min: 'min',
251+
np.sum: 'sum',
252+
np.mean: 'mean',
253+
np.prod: 'prod',
254+
np.std: 'std',
255+
np.var: 'var',
256+
np.median: 'median',
257+
np.max: 'max',
258+
np.min: 'min',
259+
np.cumprod: 'cumprod',
260+
np.cumsum: 'cumsum'
261+
}
262+
263+
@property
264+
def name(self):
265+
if self._selection is None:
266+
return None # 'result'
267+
else:
268+
return self._selection
269+
270+
@property
271+
def _selection_list(self):
272+
if not isinstance(self._selection, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)):
273+
return [self._selection]
274+
return self._selection
275+
276+
@cache_readonly
277+
def _selected_obj(self):
278+
279+
if self._selection is None or isinstance(self.obj, com.ABCSeries):
280+
return self.obj
281+
else:
282+
return self.obj[self._selection]
283+
284+
@cache_readonly
285+
def _obj_with_exclusions(self):
286+
if self._selection is not None and isinstance(self.obj, com.ABCDataFrame):
287+
return self.obj.reindex(columns=self._selection_list)
288+
289+
if len(self.exclusions) > 0:
290+
return self.obj.drop(self.exclusions, axis=1)
291+
else:
292+
return self.obj
293+
294+
def __getitem__(self, key):
295+
if self._selection is not None:
296+
raise Exception('Column(s) %s already selected' % self._selection)
297+
298+
if isinstance(key, (list, tuple, com.ABCSeries, com.ABCIndex, np.ndarray)):
299+
if len(self.obj.columns.intersection(key)) != len(key):
300+
bad_keys = list(set(key).difference(self.obj.columns))
301+
raise KeyError("Columns not found: %s"
302+
% str(bad_keys)[1:-1])
303+
return self._gotitem(list(key), ndim=2)
304+
305+
elif not getattr(self,'as_index',False):
306+
if key not in self.obj.columns:
307+
raise KeyError("Column not found: %s" % key)
308+
return self._gotitem(key, ndim=2)
309+
310+
else:
311+
if key not in self.obj:
312+
raise KeyError("Column not found: %s" % key)
313+
return self._gotitem(key, ndim=1)
314+
315+
def _gotitem(self, key, ndim, subset=None):
316+
"""
317+
sub-classes to define
318+
return a sliced object
319+
320+
Parameters
321+
----------
322+
key : string / list of selections
323+
ndim : 1,2
324+
requested ndim of result
325+
subset : object, default None
326+
subset to act on
327+
328+
"""
329+
raise AbstractMethodError(self)
330+
331+
_agg_doc = """Aggregate using input function or dict of {column -> function}
332+
333+
Parameters
334+
----------
335+
arg : function or dict
336+
Function to use for aggregating groups. If a function, must either
337+
work when passed a DataFrame or when passed to DataFrame.apply. If
338+
passed a dict, the keys must be DataFrame column names.
339+
340+
Accepted Combinations are:
341+
- string cythonized function name
342+
- function
343+
- list of functions
344+
- dict of columns -> functions
345+
- nested dict of names -> dicts of functions
346+
347+
Notes
348+
-----
349+
Numpy functions mean/median/prod/sum/std/var are special cased so the
350+
default behavior is applying the function along axis=0
351+
(e.g., np.mean(arr_2d, axis=0)) as opposed to
352+
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
353+
354+
Returns
355+
-------
356+
aggregated : DataFrame
357+
"""
358+
359+
@Appender(_agg_doc)
360+
def agg(self, func, *args, **kwargs):
361+
return self.aggregate(func, *args, **kwargs)
362+
363+
@Appender(_agg_doc)
364+
def aggregate(self, func, *args, **kwargs):
365+
raise AbstractMethodError(self)
366+
367+
def _aggregate(self, arg, *args, **kwargs):
368+
"""
369+
provide an implementation for the aggregators
370+
371+
Returns
372+
-------
373+
tuple of result, how
374+
375+
Notes
376+
-----
377+
how can be a string describe the required post-processing, or
378+
None if not required
379+
"""
380+
381+
if isinstance(arg, compat.string_types):
382+
return getattr(self, arg)(*args, **kwargs), None
383+
384+
result = compat.OrderedDict()
385+
if isinstance(arg, dict):
386+
if self.axis != 0: # pragma: no cover
387+
raise ValueError('Can only pass dict with axis=0')
388+
389+
obj = self._selected_obj
390+
391+
if any(isinstance(x, (list, tuple, dict)) for x in arg.values()):
392+
new_arg = compat.OrderedDict()
393+
for k, v in compat.iteritems(arg):
394+
if not isinstance(v, (tuple, list, dict)):
395+
new_arg[k] = [v]
396+
else:
397+
new_arg[k] = v
398+
arg = new_arg
399+
400+
keys = []
401+
if self._selection is not None:
402+
subset = obj
403+
404+
for fname, agg_how in compat.iteritems(arg):
405+
colg = self._gotitem(self._selection, ndim=1, subset=subset)
406+
result[fname] = colg.aggregate(agg_how)
407+
keys.append(fname)
408+
else:
409+
for col, agg_how in compat.iteritems(arg):
410+
colg = self._gotitem(col, ndim=1)
411+
result[col] = colg.aggregate(agg_how)
412+
keys.append(col)
413+
414+
if isinstance(list(result.values())[0], com.ABCDataFrame):
415+
from pandas.tools.merge import concat
416+
result = concat([result[k] for k in keys], keys=keys, axis=1)
417+
else:
418+
from pandas import DataFrame
419+
result = DataFrame(result)
420+
421+
return result, True
422+
elif hasattr(arg, '__iter__'):
423+
return self._aggregate_multiple_funcs(arg), None
424+
else:
425+
result = None
426+
427+
cy_func = self._is_cython_func(arg)
428+
if cy_func and not args and not kwargs:
429+
return getattr(self, cy_func)(), None
430+
431+
# caller can react
432+
return result, True
433+
434+
def _aggregate_multiple_funcs(self, arg):
435+
from pandas.tools.merge import concat
436+
437+
if self.axis != 0:
438+
raise NotImplementedError("axis other than 0 is not supported")
439+
440+
obj = self._obj_with_exclusions
441+
results = []
442+
keys = []
443+
444+
# degenerate case
445+
if obj.ndim == 1:
446+
for a in arg:
447+
try:
448+
colg = self._gotitem(obj.name, ndim=1, subset=obj)
449+
results.append(colg.aggregate(a))
450+
keys.append(getattr(a,'name',a))
451+
except (TypeError, DataError):
452+
pass
453+
except SpecificationError:
454+
raise
455+
456+
# multiples
457+
else:
458+
for col in obj:
459+
try:
460+
colg = self._gotitem(col, ndim=1, subset=obj[col])
461+
results.append(colg.aggregate(arg))
462+
keys.append(col)
463+
except (TypeError, DataError):
464+
pass
465+
except SpecificationError:
466+
raise
467+
result = concat(results, keys=keys, axis=1)
468+
469+
return result
470+
471+
def _is_cython_func(self, arg):
472+
""" if we define an internal function for this argument, return it """
473+
return self._cython_table.get(arg)
474+
475+
def _is_builtin_func(self, arg):
476+
"""
477+
if we define an builtin function for this argument, return it,
478+
otherwise return the arg
479+
"""
480+
return self._builtin_table.get(arg, arg)
481+
221482
class FrozenList(PandasObject, list):
222483

223484
"""

pandas/core/frame.py

+1
Original file line numberDiff line numberDiff line change
@@ -5149,6 +5149,7 @@ def combineMult(self, other):
51495149
DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0,
51505150
axes_are_reversed=True, aliases={'rows': 0})
51515151
DataFrame._add_numeric_operations()
5152+
DataFrame._add_series_or_dataframe_operations()
51525153

51535154
_EMPTY_SERIES = Series([])
51545155

pandas/core/generic.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from pandas.util.decorators import Appender, Substitution, deprecate_kwarg
3030
from pandas.core import config
3131

32-
3332
# goal is to be able to define the docs close to function, while still being
3433
# able to share
3534
_shared_docs = dict()
@@ -4734,6 +4733,36 @@ def nanptp(values, axis=0, skipna=True):
47344733
method ``ptp``.""", nanptp)
47354734

47364735

4736+
@classmethod
4737+
def _add_series_or_dataframe_operations(cls):
4738+
""" add the series or dataframe only operations to the cls; evaluate the doc strings again """
4739+
4740+
from pandas.core import window as rwindow
4741+
4742+
@Appender(rwindow.rolling.__doc__)
4743+
def rolling(self, window, min_periods=None, freq=None, center=False,
4744+
how=None, win_type=None, axis=0):
4745+
axis = self._get_axis_number(axis)
4746+
return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center,
4747+
how=how, win_type=win_type, axis=axis)
4748+
cls.rolling = rolling
4749+
4750+
@Appender(rwindow.expanding.__doc__)
4751+
def expanding(self, min_periods=None, freq=None, center=False,
4752+
how=None, axis=0):
4753+
axis = self._get_axis_number(axis)
4754+
return rwindow.expanding(self, min_periods=min_periods, freq=freq, center=center,
4755+
how=how, axis=axis)
4756+
cls.expanding = expanding
4757+
4758+
@Appender(rwindow.ewm.__doc__)
4759+
def ewm(self, com=None, span=None, halflife=None, min_periods=0, freq=None,
4760+
adjust=True, how=None, ignore_na=False, axis=0):
4761+
axis = self._get_axis_number(axis)
4762+
return rwindow.ewm(self, com=com, span=span, halflife=halflife, min_periods=min_periods,
4763+
freq=freq, adjust=adjust, how=how, ignore_na=ignore_na, axis=axis)
4764+
cls.ewm = ewm
4765+
47374766
def _doc_parms(cls):
47384767
""" return a tuple of the doc parms """
47394768
axis_descr = "{%s}" % ', '.join([
@@ -4916,6 +4945,6 @@ def logical_func(self, axis=None, bool_only=None, skipna=None,
49164945
logical_func.__name__ = name
49174946
return logical_func
49184947

4919-
# install the indexerse
4948+
# install the indexes
49204949
for _name, _indexer in indexing.get_indexers_list():
49214950
NDFrame._create_indexer(_name, _indexer)

0 commit comments

Comments
 (0)