Skip to content

Commit b4dfbc5

Browse files
committed
fix according to comments
1 parent e243f18 commit b4dfbc5

File tree

8 files changed

+304
-51
lines changed

8 files changed

+304
-51
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1773,6 +1773,7 @@ Computations / Descriptive Stats
17731773
:toctree: generated/
17741774

17751775
Resampler.count
1776+
Resampler.nunique
17761777
Resampler.first
17771778
Resampler.last
17781779
Resampler.max

doc/source/timeseries.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,7 @@ For upsampling, you can specify a way to upsample and the ``limit`` parameter to
12951295
12961296
# from secondly to every 250 milliseconds
12971297
1298-
ts[:2].resample('250L').reindex()
1298+
ts[:2].resample('250L').asfreq()
12991299
13001300
ts[:2].resample('250L').ffill()
13011301
@@ -1398,7 +1398,7 @@ must be implemented on the Resampled object
13981398
13991399
r.agg({'A' : 'sum', 'B' : 'std'})
14001400
1401-
Furthermore you can pass a nested dict to indicate different aggregations on different columns.
1401+
Furthermore, you can also specify multiple aggregation functions for each column separately.
14021402

14031403
.. ipython:: python
14041404

pandas/core/base.py

+25-13
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,6 @@ def _aggregate(self, arg, *args, **kwargs):
423423
if isinstance(arg, compat.string_types):
424424
return getattr(self, arg)(*args, **kwargs), None
425425

426-
result = compat.OrderedDict()
427426
if isinstance(arg, dict):
428427

429428
# aggregate based on the passed dict
@@ -483,22 +482,33 @@ def _agg_2dim(name, how):
483482
subset=obj)
484483
return colg.aggregate(how, _level=None)
485484

485+
def _agg(arg, func):
486+
"""
487+
run the aggregations over the arg with func
488+
return an OrderedDict
489+
"""
490+
result = compat.OrderedDict()
491+
for fname, agg_how in compat.iteritems(arg):
492+
result[fname] = func(fname, agg_how)
493+
return result
494+
486495
# set the final keys
487496
keys = list(compat.iterkeys(arg))
497+
result = compat.OrderedDict()
488498

489499
# nested renamer
490500
if is_nested_renamer:
491-
results = [_agg_1dim(k, v) for k, v in compat.iteritems(arg)]
501+
result = list(_agg(arg, _agg_1dim).values())
492502

493-
if all(isinstance(r, dict) for r in results):
503+
if all(isinstance(r, dict) for r in result):
494504

505+
result, results = compat.OrderedDict(), result
495506
for r in results:
496507
result.update(r)
497508
keys = list(compat.iterkeys(result))
498509

499510
else:
500511

501-
result = results
502512
if self._selection is not None:
503513
keys = None
504514

@@ -511,27 +521,29 @@ def _agg_2dim(name, how):
511521
# but may have multiple aggregations
512522
if len(sl) == 1:
513523

514-
for fname, agg_how in compat.iteritems(arg):
515-
result[fname] = _agg_1dim(self._selection,
516-
agg_how)
524+
result = _agg(arg, lambda fname,
525+
agg_how: _agg_1dim(self._selection, agg_how))
517526

518527
# we are selecting the same set as we are aggregating
519528
elif not len(sl - set(compat.iterkeys(arg))):
520529

521-
for fname, agg_how in compat.iteritems(arg):
522-
result[fname] = _agg_1dim(fname, agg_how)
530+
result = _agg(arg, _agg_1dim)
523531

524532
# we are a DataFrame, with possibly multiple aggregations
525533
else:
526534

527-
for fname, agg_how in compat.iteritems(arg):
528-
result[fname] = _agg_2dim(fname, agg_how)
535+
result = _agg(arg, _agg_2dim)
529536

530537
# no selection
531538
else:
532539

533-
for col, agg_how in compat.iteritems(arg):
534-
result[col] = _agg_1dim(col, agg_how)
540+
try:
541+
result = _agg(arg, _agg_1dim)
542+
except SpecificationError:
543+
544+
# we are aggregating expecting all 1d-returns
545+
# but we have 2d
546+
result = _agg(arg, _agg_2dim)
535547

536548
# combine results
537549
if isinstance(result, list):

pandas/core/generic.py

+30-18
Original file line numberDiff line numberDiff line change
@@ -3627,7 +3627,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
36273627
36283628
Upsample the series into 30 second bins.
36293629
3630-
>>> series.resample('30S').upsample()[0:5] #select first 5 rows
3630+
>>> series.resample('30S').asfreq()[0:5] #select first 5 rows
36313631
2000-01-01 00:00:00 0
36323632
2000-01-01 00:00:30 NaN
36333633
2000-01-01 00:01:00 1
@@ -3677,20 +3677,9 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
36773677
fill_method=fill_method, convention=convention,
36783678
limit=limit, base=base)
36793679

3680-
# deprecation warning
3681-
# but call the method anyhow
3682-
if fill_method is not None:
3683-
args = "limit={0}".format(limit) if limit is not None else ""
3684-
warnings.warn("fill_method is deprecated to .resample()\n"
3685-
"the new syntax is .resample(...)."
3686-
"{fill_method}({args})".format(
3687-
fill_method=fill_method,
3688-
args=args),
3689-
FutureWarning, stacklevel=2)
3690-
return r.aggregate(fill_method, limit=limit)
3680+
# deprecation warnings
3681+
# but call methods anyhow
36913682

3692-
# deprecation warning
3693-
# but call the method anyhow
36943683
if how is not None:
36953684

36963685
# .resample(..., how='sum')
@@ -3701,11 +3690,34 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
37013690
else:
37023691
method = ".apply(<func>)"
37033692

3704-
warnings.warn("how in .resample() is deprecated\n"
3705-
"the new syntax is .resample(...).{method}".format(
3706-
method=method),
3693+
# if we have both a how and fill_method, then show
3694+
# the following warning
3695+
if fill_method is None:
3696+
warnings.warn("how in .resample() is deprecated\n"
3697+
"the new syntax is "
3698+
".resample(...).{method}".format(
3699+
method=method),
3700+
FutureWarning, stacklevel=2)
3701+
r = r.aggregate(how)
3702+
3703+
if fill_method is not None:
3704+
3705+
# show the prior function call
3706+
method = '.' + method if how is not None else ''
3707+
3708+
args = "limit={0}".format(limit) if limit is not None else ""
3709+
warnings.warn("fill_method is deprecated to .resample()\n"
3710+
"the new syntax is .resample(...){method}"
3711+
".{fill_method}({args})".format(
3712+
method=method,
3713+
fill_method=fill_method,
3714+
args=args),
37073715
FutureWarning, stacklevel=2)
3708-
return r.aggregate(how)
3716+
3717+
if how is not None:
3718+
r = getattr(r, fill_method)(limit=limit)
3719+
else:
3720+
r = r.aggregate(fill_method, limit=limit)
37093721

37103722
return r
37113723

pandas/core/groupby.py

+10
Original file line numberDiff line numberDiff line change
@@ -355,18 +355,28 @@ def __unicode__(self):
355355
# TODO: Better unicode/repr for GroupBy object
356356
return object.__repr__(self)
357357

358+
def _assure_grouper(self):
359+
"""
360+
we create the grouper on instantiation
361+
sub-classes may have a different policy
362+
"""
363+
pass
364+
358365
@property
359366
def groups(self):
360367
""" dict {group name -> group labels} """
368+
self._assure_grouper()
361369
return self.grouper.groups
362370

363371
@property
364372
def ngroups(self):
373+
self._assure_grouper()
365374
return self.grouper.ngroups
366375

367376
@property
368377
def indices(self):
369378
""" dict {group name -> group indices} """
379+
self._assure_grouper()
370380
return self.grouper.indices
371381

372382
def _get_indices(self, names):

pandas/tests/test_groupby.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1501,8 +1501,24 @@ def test_aggregate_api_consistency(self):
15011501
['sum', 'mean']])
15021502
assert_frame_equal(result, expected, check_like=True)
15031503

1504-
result = grouped[['D', 'C']].agg(OrderedDict([('r', np.sum),
1505-
('r2', np.mean)]))
1504+
result = grouped.agg({'C': 'mean', 'D': 'sum'})
1505+
expected = pd.concat([d_sum,
1506+
c_mean],
1507+
axis=1)
1508+
assert_frame_equal(result, expected, check_like=True)
1509+
1510+
result = grouped.agg({'C': ['mean', 'sum'],
1511+
'D': ['mean', 'sum']})
1512+
expected = pd.concat([c_mean,
1513+
c_sum,
1514+
d_mean,
1515+
d_sum],
1516+
axis=1)
1517+
expected.columns = MultiIndex.from_product([['C', 'D'],
1518+
['mean', 'sum']])
1519+
1520+
result = grouped[['D', 'C']].agg({'r': np.sum,
1521+
'r2': np.mean})
15061522
expected = pd.concat([d_sum,
15071523
c_sum,
15081524
d_mean,

0 commit comments

Comments
 (0)