@@ -6,6 +6,7 @@ from libc.math cimport (
6
6
sqrt,
7
7
)
8
8
from libcpp.deque cimport deque
9
+ from libcpp.stack cimport stack
9
10
from libcpp.unordered_map cimport unordered_map
10
11
11
12
from pandas._libs.algos cimport TiebreakEnumType
@@ -991,36 +992,24 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
991
992
# Moving maximum / minimum code taken from Bottleneck
992
993
# Licence at LICENSES/BOTTLENECK_LICENCE
993
994
995
+ cdef int64_t bisect_left(
996
+ deque[int64_t]& a,
997
+ int64_t x,
998
+ int64_t lo = 0 ,
999
+ int64_t hi = - 1
1000
+ ) nogil:
1001
+ cdef int64_t mid
1002
+ if hi == - 1 :
1003
+ hi = a.size()
1004
+ while lo < hi:
1005
+ mid = (lo + hi) // 2
1006
+ if a.at(mid) < x:
1007
+ lo = mid + 1
1008
+ else :
1009
+ hi = mid
1010
+ return lo
994
1011
995
- cdef float64_t init_mm(float64_t ai, Py_ssize_t * nobs, bint is_max) noexcept nogil:
996
-
997
- if ai == ai:
998
- nobs[0 ] = nobs[0 ] + 1
999
- elif is_max:
1000
- ai = MINfloat64
1001
- else :
1002
- ai = MAXfloat64
1003
-
1004
- return ai
1005
-
1006
-
1007
- cdef void remove_mm(float64_t aold, Py_ssize_t * nobs) noexcept nogil:
1008
- """ remove a value from the mm calc """
1009
- if aold == aold:
1010
- nobs[0 ] = nobs[0 ] - 1
1011
-
1012
-
1013
- cdef float64_t calc_mm(int64_t minp, Py_ssize_t nobs,
1014
- float64_t value) noexcept nogil:
1015
- cdef:
1016
- float64_t result
1017
-
1018
- if nobs >= minp:
1019
- result = value
1020
- else :
1021
- result = NaN
1022
-
1023
- return result
1012
+ from libc.math cimport isnan
1024
1013
1025
1014
1026
1015
def roll_max (ndarray[float64_t] values , ndarray[int64_t] start ,
@@ -1068,69 +1057,124 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
1068
1057
return _roll_min_max(values , start , end , minp , is_max = 0 )
1069
1058
1070
1059
1071
- cdef _roll_min_max(ndarray[float64_t] values ,
1072
- ndarray[int64_t] starti ,
1073
- ndarray[int64_t] endi ,
1074
- int64_t minp ,
1075
- bint is_max ):
1060
+ def _roll_min_max(
1061
+ ndarray[float64_t] values ,
1062
+ ndarray[int64_t] start ,
1063
+ ndarray[int64_t] end ,
1064
+ int64_t minp ,
1065
+ bint is_max
1066
+ ):
1076
1067
cdef:
1077
- float64_t ai
1078
- int64_t curr_win_size, start
1079
- Py_ssize_t i, k, nobs = 0 , N = len (starti)
1080
- deque Q[int64_t] # min/max always the front
1081
- deque W[int64_t] # track the whole window for nobs compute
1068
+ Py_ssize_t i, i_next, k, valid_start, last_end, last_start, N = len (start)
1069
+ deque Q[int64_t]
1070
+ stack Dominators[int64_t]
1082
1071
ndarray[float64_t, ndim= 1 ] output
1083
1072
1073
+ # ideally want these in the i-loop scope
1074
+ Py_ssize_t this_start, this_end, stash_start
1075
+ int64_t q_idx
1076
+
1084
1077
output = np.empty(N, dtype = np.float64)
1085
1078
Q = deque[int64_t]()
1086
- W = deque[int64_t]()
1079
+ Dominators = stack[int64_t]()
1080
+
1081
+ # This function was "ported" / translated from sliding_min_max()
1082
+ # in /pandas/core/_numba/kernels/min_max_.py. (See there for detailed
1083
+ # comments and credits.)
1084
+ # Code translation assumptions/rules:
1085
+ # - min_periods --> minp
1086
+ # - deque[0] --> front()
1087
+ # - deque[-1] --> back()
1088
+ # - stack[-1] --> top()
1089
+ # - bool(stack/deque) --> !empty()
1090
+ # - deque.append() --> push_back()
1091
+ # - stack.append() --> push()
1092
+ # - deque.popleft --> pop_front()
1093
+ # - deque.pop() --> pop_back()
1087
1094
1088
1095
with nogil:
1096
+ if minp < 1 :
1097
+ minp = 1
1098
+
1099
+ if N> 2 :
1100
+ i_next = N - 1
1101
+ for i in range (N - 2 , - 1 , - 1 ):
1102
+ if start[i_next] < start[i] \
1103
+ and (
1104
+ Dominators.empty()
1105
+ or start[Dominators.top()] > start[i_next]
1106
+ ):
1107
+ Dominators.push(i_next)
1108
+ i_next = i
1089
1109
1090
- # This is using a modified version of the C++ code in this
1091
- # SO post: https://stackoverflow.com/a/12239580
1092
- # The original impl didn't deal with variable window sizes
1093
- # So the code was optimized for that
1110
+ valid_start = - minp
1111
+
1112
+ last_end = 0
1113
+ last_start = - 1
1094
1114
1095
- # first window's size
1096
- curr_win_size = endi[0 ] - starti[0 ]
1097
- # GH 32865
1098
- # Anchor output index to values index to provide custom
1099
- # BaseIndexer support
1100
1115
for i in range (N):
1116
+ this_start = start[i]
1117
+ this_end = end[i]
1118
+
1119
+ if (not Dominators.empty() and Dominators.top() == i):
1120
+ Dominators.pop()
1101
1121
1102
- curr_win_size = endi[i] - starti[i]
1103
- if i == 0 :
1104
- start = starti[i]
1122
+ if not (this_end > last_end
1123
+ or (this_end == last_end and this_start >= last_start)):
1124
+ raise ValueError (
1125
+ " Start/End ordering requirement is violated at index {}" .format(i))
1126
+
1127
+ if Dominators.empty():
1128
+ stash_start = this_start
1105
1129
else :
1106
- start = endi[i - 1 ]
1107
-
1108
- for k in range (start, endi[i]):
1109
- ai = init_mm(values[k], & nobs, is_max)
1110
- # Discard previous entries if we find new min or max
1111
- if is_max:
1112
- while not Q.empty() and ((ai >= values[Q.back()]) or
1113
- values[Q.back()] != values[Q.back()]):
1114
- Q.pop_back()
1115
- else :
1116
- while not Q.empty() and ((ai <= values[Q.back()]) or
1117
- values[Q.back()] != values[Q.back()]):
1118
- Q.pop_back()
1119
- Q.push_back(k)
1120
- W.push_back(k)
1121
-
1122
- # Discard entries outside and left of current window
1123
- while not Q.empty() and Q.front() <= starti[i] - 1 :
1130
+ stash_start = min (this_start, start[Dominators.top()])
1131
+
1132
+ while not Q.empty() and Q.front() < stash_start:
1124
1133
Q.pop_front()
1125
- while not W.empty() and W.front() <= starti[i] - 1 :
1126
- remove_mm(values[W.front()], & nobs)
1127
- W.pop_front()
1128
1134
1129
- # Save output based on index in input value array
1130
- if not Q.empty() and curr_win_size > 0 :
1131
- output[i] = calc_mm(minp, nobs, values[Q.front()])
1132
- else :
1135
+ for k in range (last_end, this_end):
1136
+ if not isnan(values[k]):
1137
+ valid_start += 1
1138
+ while valid_start>= 0 and isnan(values[valid_start]):
1139
+ valid_start += 1
1140
+
1141
+ # Sadly, this runs more than 15% faster than trying to use
1142
+ # generic comparison functions.
1143
+ # That is, I tried:
1144
+ #
1145
+ # | cdef inline bint le(float64_t a, float64_t b) nogil:
1146
+ # | return a <= b
1147
+ # | cdef inline bint ge(float64_t a, float64_t b) nogil:
1148
+ # | return a >= b
1149
+ # | ctypedef bint (*cmp_func_t) (float64_t a, float64_t b) nogil
1150
+ # | ...
1151
+ # | cmp_func_t cmp
1152
+ # |
1153
+ # | if is_max:
1154
+ # | cmp = ge
1155
+ # | else:
1156
+ # | cmp = le
1157
+ # and, finally
1158
+ # | while not Q.empty() and cmp(values[k], values[Q.back()]):
1159
+ # | Q.pop_back()
1160
+
1161
+ if is_max:
1162
+ while not Q.empty() and values[k] >= values[Q.back()]:
1163
+ Q.pop_back()
1164
+ else :
1165
+ while not Q.empty() and values[k] <= values[Q.back()]:
1166
+ Q.pop_back()
1167
+ Q.push_back(k)
1168
+
1169
+ if Q.empty() or this_start > valid_start:
1133
1170
output[i] = NaN
1171
+ elif Q.front() >= this_start:
1172
+ output[i] = values[Q.front()]
1173
+ else :
1174
+ q_idx = bisect_left(Q, this_start, lo = 1 )
1175
+ output[i] = values[Q[q_idx]]
1176
+ last_end = this_end
1177
+ last_start = this_start
1134
1178
1135
1179
return output
1136
1180
0 commit comments