49
49
from pandas .formats .printing import pprint_thing
50
50
from pandas .util .validators import validate_kwargs
51
51
52
+ from pandas .tools import weightby
52
53
import pandas .core .algorithms as algos
53
54
import pandas .core .common as com
54
55
from pandas .core .config import option_context
@@ -791,15 +792,21 @@ def _cython_transform(self, how, numeric_only=True):
791
792
792
793
return self ._wrap_transformed_output (output , names )
793
794
794
- def _cython_agg_general (self , how , numeric_only = True ):
795
+ def _cython_agg_general (self , how , weights = None , numeric_only = True ):
796
+ if weights is not None :
797
+
798
+ # TODO, need to integrate this with the exclusions
799
+ _ , weights = weightby .weightby (self .obj , weights = weights , axis = axis )
800
+
795
801
output = {}
796
802
for name , obj in self ._iterate_slices ():
797
803
is_numeric = is_numeric_dtype (obj .dtype )
798
804
if numeric_only and not is_numeric :
799
805
continue
800
806
807
+ values = weightby .weight (obj .values , weights )
801
808
try :
802
- result , names = self .grouper .aggregate (obj . values , how )
809
+ result , names = self .grouper .aggregate (values , how )
803
810
except AssertionError as e :
804
811
raise GroupByError (str (e ))
805
812
output [name ] = self ._try_cast (result , obj )
@@ -1006,6 +1013,26 @@ def count(self):
1006
1013
# defined here for API doc
1007
1014
raise NotImplementedError
1008
1015
1016
+ @Substitution (name = 'groupby' )
1017
+ @Appender (_doc_template )
1018
+ def sum (self , * args , ** kwargs ):
1019
+ """
1020
+ Compute sum of groups, excluding missing values
1021
+
1022
+ For multiple groupings, the result index will be a MultiIndex
1023
+ """
1024
+
1025
+ # TODO: this is slightly different from other cythonized functions (e.g. mean)
1026
+ # to accomodate np.sum functionaility
1027
+ nv .validate_groupby_func ('sum' , args , kwargs , ('weights' , 'numeric_only' ))
1028
+ self ._set_group_selection ()
1029
+ try :
1030
+ return self ._cython_agg_general ('add' , ** kwargs )
1031
+ except AssertionError as e :
1032
+ raise SpecificationError (str (e ))
1033
+ except Exception : # pragma: no cover
1034
+ return self .aggregate (lambda x : np .sum (x , axis = self .axis ))
1035
+
1009
1036
@Substitution (name = 'groupby' )
1010
1037
@Appender (_doc_template )
1011
1038
def mean (self , * args , ** kwargs ):
@@ -1014,14 +1041,15 @@ def mean(self, *args, **kwargs):
1014
1041
1015
1042
For multiple groupings, the result index will be a MultiIndex
1016
1043
"""
1017
- nv .validate_groupby_func ('mean' , args , kwargs )
1044
+ nv .validate_groupby_func ('mean' , args , kwargs , ( 'weights' , 'numeric_only' ) )
1018
1045
try :
1019
- return self ._cython_agg_general ('mean' )
1046
+ return self ._cython_agg_general ('mean' , ** kwargs )
1020
1047
except GroupByError :
1021
1048
raise
1022
1049
except Exception : # pragma: no cover
1023
1050
self ._set_group_selection ()
1024
- f = lambda x : x .mean (axis = self .axis )
1051
+ kwargs ['axis' ] = self .axis
1052
+ f = lambda x : x .mean (** kwargs )
1025
1053
return self ._python_agg_general (f )
1026
1054
1027
1055
@Substitution (name = 'groupby' )
@@ -1107,7 +1135,6 @@ def size(self):
1107
1135
"""Compute group sizes"""
1108
1136
return self .grouper .size ()
1109
1137
1110
- sum = _groupby_function ('sum' , 'add' , np .sum )
1111
1138
prod = _groupby_function ('prod' , 'prod' , np .prod )
1112
1139
min = _groupby_function ('min' , 'min' , np .min , numeric_only = False )
1113
1140
max = _groupby_function ('max' , 'max' , np .max , numeric_only = False )
@@ -3134,9 +3161,9 @@ def _iterate_slices(self):
3134
3161
continue
3135
3162
yield val , slicer (val )
3136
3163
3137
- def _cython_agg_general (self , how , numeric_only = True ):
3164
+ def _cython_agg_general (self , how , ** kwargs ):
3138
3165
new_items , new_blocks = self ._cython_agg_blocks (
3139
- how , numeric_only = numeric_only )
3166
+ how , ** kwargs )
3140
3167
return self ._wrap_agged_blocks (new_items , new_blocks )
3141
3168
3142
3169
def _wrap_agged_blocks (self , items , blocks ):
@@ -3162,18 +3189,26 @@ def _wrap_agged_blocks(self, items, blocks):
3162
3189
3163
3190
_block_agg_axis = 0
3164
3191
3165
- def _cython_agg_blocks (self , how , numeric_only = True ):
3192
+ def _cython_agg_blocks (self , how , weights = None , numeric_only = True ):
3166
3193
data , agg_axis = self ._get_data_to_aggregate ()
3167
3194
3195
+ if weights is not None :
3196
+
3197
+ # TODO, need to integrate this with the exclusions
3198
+ _ , weights = weightby .weightby (self .obj ,
3199
+ weights = weights ,
3200
+ axis = self .axis )
3201
+
3168
3202
new_blocks = []
3169
3203
3170
3204
if numeric_only :
3171
3205
data = data .get_numeric_data (copy = False )
3172
3206
3173
3207
for block in data .blocks :
3174
3208
3209
+ values = weightby .weight (block .values , weights )
3175
3210
result , _ = self .grouper .aggregate (
3176
- block . values , how , axis = agg_axis )
3211
+ values , how , axis = agg_axis )
3177
3212
3178
3213
# see if we can cast the block back to the original dtype
3179
3214
result = block ._try_coerce_and_cast_result (result )
0 commit comments