Skip to content

Commit

Permalink
fix: violinplot speed for large number of points
Browse files Browse the repository at this point in the history
  • Loading branch information
jlerat committed Mar 7, 2025
1 parent 2172e12 commit 49f284a
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 5 deletions.
25 changes: 25 additions & 0 deletions src/hydrodiy/plot/tests/test_hyplot_violinplot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
import os
import math
import time
import pytest

import numpy as np
Expand Down Expand Up @@ -144,6 +145,30 @@ def test_violin_censored():
fig.savefig(fp)


def test_violin_large():
plt.close("all")
fig, axs = plt.subplots(ncols=2)

df = pd.DataFrame(np.random.normal(size=(100, 5)))
ax = axs[0]
t0 = time.time()
vl = Violin(data=df)
vl.draw(ax=ax)
delta0 = time.time() - t0

df = pd.DataFrame(np.random.normal(size=(100000, 5)))
ax = axs[1]
t1 = time.time()
vl = Violin(data=df)
vl.draw(ax=ax)
delta1 = time.time() - t1

assert delta1 / delta0 < 100

fp = FIMG / "violin_large.png"
fig.savefig(fp)


def test_violin_allnan():
plt.close("all")
df = DATA1.copy()
Expand Down
21 changes: 16 additions & 5 deletions src/hydrodiy/plot/violinplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde
from numpy.polynomial import Chebyshev

import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
Expand Down Expand Up @@ -32,7 +33,9 @@ def __init__(self, data,
col_ref_others="tab:blue",
brightening_factor_light=-0.5,
brightening_factor_superlight=-1.0,
npoints_kde=None, **kwargs):
npoints_kde=None,
nresample_kde=500,
**kwargs):
""" Draw boxplots with labels and defined colors
Parameters
Expand All @@ -57,6 +60,9 @@ def __init__(self, data,
0.25 and 0.75 quantiles. Can use 'bfsl' abbreviation.
npoints_kde: int
Number points used in kde density estimation
nresample_kde: int
Number resampled data points used in kde density estimation
to accelerate. Ignored if number of data points is smaller.
"""
# Check input data
try:
Expand All @@ -68,10 +74,12 @@ def __init__(self, data,

# initialise objects
if npoints_kde is None:
self.npoints_kde = 2*len(data)
self.npoints_kde = max(100, min(500, len(data)))
else:
self.npoints_kde = int(npoints_kde)

self.nresample_kde = int(nresample_kde)

self._ax = None
self._data = data
self._kde_x = None
Expand Down Expand Up @@ -241,10 +249,13 @@ def _compute(self):
kernel = gaussian_kde(values[selected])

# blend regular spacing and ecdf spacing
x = np.linspace(x0, x1, (npts-len(sen)))
err = 1e-6*np.random.uniform(-1, 1, len(sen))
x = np.sort(np.concatenate([x, sen.values+err]))
q = np.linspace(0, 1, npts//2)
err = 1e-6 * np.random.uniform(-1, 1, len(q))
x = np.concatenate([np.linspace(x0, x1, npts // 2),
sen.quantile(q) + err])
x = np.sort(x)
y = kernel(x)

y = (y-y.min())/(y.max()-y.min())
kde_x.loc[:, cn] = x
kde_y.loc[:, cn] = y
Expand Down

0 comments on commit 49f284a

Please sign in to comment.