Skip to content

Commit 0823f43

Browse files
authored
gh-115532: Minor tweaks to kde() (gh-117897)
1 parent 10f1a26 commit 0823f43

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

Doc/library/statistics.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ accurately approximated inverse cumulative distribution function.
11631163
.. testcode::
11641164

11651165
from random import choice, random, seed
1166-
from math import sqrt, log, pi, tan, asin
1166+
from math import sqrt, log, pi, tan, asin, cos, acos
11671167
from statistics import NormalDist
11681168

11691169
kernel_invcdfs = {
@@ -1172,6 +1172,7 @@ accurately approximated inverse cumulative distribution function.
11721172
'sigmoid': lambda p: log(tan(p * pi/2)),
11731173
'rectangular': lambda p: 2*p - 1,
11741174
'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
1175+
'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3),
11751176
'cosine': lambda p: 2*asin(2*p - 1)/pi,
11761177
}
11771178

Lib/statistics.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -919,53 +919,53 @@ def kde(data, h, kernel='normal', *, cumulative=False):
919919
sqrt2pi = sqrt(2 * pi)
920920
sqrt2 = sqrt(2)
921921
K = lambda t: exp(-1/2 * t * t) / sqrt2pi
922-
I = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
922+
W = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
923923
support = None
924924

925925
case 'logistic':
926926
# 1.0 / (exp(t) + 2.0 + exp(-t))
927927
K = lambda t: 1/2 / (1.0 + cosh(t))
928-
I = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
928+
W = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
929929
support = None
930930

931931
case 'sigmoid':
932932
# (2/pi) / (exp(t) + exp(-t))
933933
c1 = 1 / pi
934934
c2 = 2 / pi
935935
K = lambda t: c1 / cosh(t)
936-
I = lambda t: c2 * atan(exp(t))
936+
W = lambda t: c2 * atan(exp(t))
937937
support = None
938938

939939
case 'rectangular' | 'uniform':
940940
K = lambda t: 1/2
941-
I = lambda t: 1/2 * t + 1/2
941+
W = lambda t: 1/2 * t + 1/2
942942
support = 1.0
943943

944944
case 'triangular':
945945
K = lambda t: 1.0 - abs(t)
946-
I = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
946+
W = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
947947
support = 1.0
948948

949949
case 'parabolic' | 'epanechnikov':
950950
K = lambda t: 3/4 * (1.0 - t * t)
951-
I = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
951+
W = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
952952
support = 1.0
953953

954954
case 'quartic' | 'biweight':
955955
K = lambda t: 15/16 * (1.0 - t * t) ** 2
956-
I = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
956+
W = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
957957
support = 1.0
958958

959959
case 'triweight':
960960
K = lambda t: 35/32 * (1.0 - t * t) ** 3
961-
I = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
961+
W = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
962962
support = 1.0
963963

964964
case 'cosine':
965965
c1 = pi / 4
966966
c2 = pi / 2
967967
K = lambda t: c1 * cos(c2 * t)
968-
I = lambda t: 1/2 * sin(c2 * t) + 1/2
968+
W = lambda t: 1/2 * sin(c2 * t) + 1/2
969969
support = 1.0
970970

971971
case _:
@@ -974,27 +974,39 @@ def kde(data, h, kernel='normal', *, cumulative=False):
974974
if support is None:
975975

976976
def pdf(x):
977+
n = len(data)
977978
return sum(K((x - x_i) / h) for x_i in data) / (n * h)
978979

979980
def cdf(x):
980-
return sum(I((x - x_i) / h) for x_i in data) / n
981+
982+
n = len(data)
983+
return sum(W((x - x_i) / h) for x_i in data) / n
984+
981985

982986
else:
983987

984988
sample = sorted(data)
985989
bandwidth = h * support
986990

987991
def pdf(x):
992+
nonlocal n, sample
993+
if len(data) != n:
994+
sample = sorted(data)
995+
n = len(data)
988996
i = bisect_left(sample, x - bandwidth)
989997
j = bisect_right(sample, x + bandwidth)
990998
supported = sample[i : j]
991999
return sum(K((x - x_i) / h) for x_i in supported) / (n * h)
9921000

9931001
def cdf(x):
1002+
nonlocal n, sample
1003+
if len(data) != n:
1004+
sample = sorted(data)
1005+
n = len(data)
9941006
i = bisect_left(sample, x - bandwidth)
9951007
j = bisect_right(sample, x + bandwidth)
9961008
supported = sample[i : j]
997-
return sum((I((x - x_i) / h) for x_i in supported), i) / n
1009+
return sum((W((x - x_i) / h) for x_i in supported), i) / n
9981010

9991011
if cumulative:
10001012
cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}'

0 commit comments

Comments
 (0)