Skip to content

Commit 13ebddd

Browse files
authored
feat: added aggregation operators (#29)
* feat: added aggregation operators * fix: fixed c code for agg * fix: removed zip json
1 parent 4ca289d commit 13ebddd

File tree

5 files changed

+35
-190
lines changed

5 files changed

+35
-190
lines changed

src/hydrodiy/data/c_dutils.c

+17-7
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
/**
44
* Aggregate inputs based on the aggindex
5-
* The operator applied to the aggregation is defined by op:
6-
* oper = 0 : sum
7-
* oper = 1 : mean
5+
* The operatorator applied to the aggregation is defined by op:
6+
* operator = 0 : sum
7+
* operator = 1 : mean
8+
* operator = 2 : max
9+
* operator = 3 : tail
810
**/
9-
int c_aggregate(int nval, int oper, int maxnan, int * aggindex,
11+
int c_aggregate(int nval, int operator, int maxnan, int * aggindex,
1012
double * inputs, double * outputs, int * iend)
1113
{
1214
int i, nagg, nagg_nan, count, ia, iaprev;
@@ -35,7 +37,7 @@ int c_aggregate(int nval, int oper, int maxnan, int * aggindex,
3537
if(ia != iaprev)
3638
{
3739
/* Mean instead of agg */
38-
if(oper == 1 && nagg>0)
40+
if(operator == 1 && nagg>0)
3941
agg/=nagg;
4042

4143
/* Store outputs */
@@ -65,11 +67,19 @@ int c_aggregate(int nval, int oper, int maxnan, int * aggindex,
6567
} else
6668
nagg ++;
6769

68-
agg += inp;
70+
if(operator<=1) {
71+
agg += inp;
72+
}
73+
else if (operator == 2){
74+
agg = inp > agg ? inp : agg;
75+
}
76+
else if (operator == 3){
77+
agg = inp;
78+
}
6979
}
7080

7181
/* Final step */
72-
if(oper == 1 && nagg>0)
82+
if(operator == 1 && nagg>0)
7383
agg/=nagg;
7484

7585
if(nagg_nan > maxnan)

src/hydrodiy/data/dutils.py

+2
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ def aggregate(aggindex, inputs, operator=0, maxnan=0):
163163
Aggregation operator:
164164
0 = sum
165165
1 = mean
166+
2 = max
167+
3 = tail (last valid value)
166168
maxnan : int
167169
Maximum number of nan in inputs for each
168170
aggregation index

src/hydrodiy/data/tests/test_hydata_dutils.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@
2323
# Utility function to aggregate data
2424
# using various version of pandas
2525
def agg_d2m(x, fun="mean"):
26-
assert fun in ["mean", "sum"]
26+
assert fun in ["mean", "sum", "max", "tail"]
2727

2828
# Define aggregation function
2929
if fun == "mean":
3030
aggfun = lambda y: np.mean(y.values)
31-
else:
31+
elif fun == "sum":
3232
aggfun = lambda y: np.sum(y.values)
33+
elif fun == "max":
34+
aggfun = lambda y: np.nanmax(y.values)
35+
else:
36+
aggfun = lambda y: y.values[~np.isnan(y.values)][-1]
3337

3438
# Run aggregation
3539
try:
@@ -191,6 +195,14 @@ def test_aggregate(allclose):
191195
obsm2 = dutils.aggregate(aggindex, obs.values, operator=1)
192196
assert allclose(obsm.values, obsm2)
193197

198+
obsm = agg_d2m(obs, fun="max")
199+
obsm2 = dutils.aggregate(aggindex, obs.values, operator=2)
200+
assert allclose(obsm.values, obsm2)
201+
202+
obsm = agg_d2m(obs, fun="tail")
203+
obsm2 = dutils.aggregate(aggindex, obs.values, operator=3)
204+
assert allclose(obsm.values, obsm2)
205+
194206
kk = np.random.choice(range(nval), nval//10, replace=False)
195207
obs[kk] = np.nan
196208
obsm = obs.resample("MS").apply(lambda x: np.sum(x.values))
@@ -209,9 +221,8 @@ def test_aggregate(allclose):
209221
def test_aggregate_error(allclose):
210222
dt = pd.date_range("1990-01-01", "2000-12-31")
211223
nval = len(dt)
212-
obs = pd.Series(np.random.uniform(0, 1, nval), \
213-
index=dt)
214-
224+
obs = pd.Series(np.random.uniform(0, 1, nval),
225+
index=dt)
215226
aggindex = dt.year * 100 + dt.month
216227

217228
try:

src/hydrodiy/io/tests/test_hyio_zipjson.py

-55
This file was deleted.

src/hydrodiy/io/zipjson.py

-123
This file was deleted.

0 commit comments

Comments
 (0)