From 2172e12fd61fd7c4bcaeba2ed4bf75aa64ca8471 Mon Sep 17 00:00:00 2001 From: Julien Lerat Date: Fri, 7 Mar 2025 09:45:48 +1100 Subject: [PATCH] fix: fixed clipping of kde for censored values in violinplot (#38) --- .../plot/tests/test_hyplot_violinplot.py | 12 ++++++++++ src/hydrodiy/plot/violinplot.py | 22 +++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/hydrodiy/plot/tests/test_hyplot_violinplot.py b/src/hydrodiy/plot/tests/test_hyplot_violinplot.py index f7e98e9..6dcae9b 100644 --- a/src/hydrodiy/plot/tests/test_hyplot_violinplot.py +++ b/src/hydrodiy/plot/tests/test_hyplot_violinplot.py @@ -132,6 +132,18 @@ def test_violin_missing(): fig.savefig(fp) +def test_violin_censored(): + plt.close("all") + df = DATA1.copy() + df.data2 = df.data2.clip(-np.inf, df.data2.quantile(0.3)) + df.data3 = df.data3.clip(df.data3.quantile(0.3)) + vl = Violin(data=df) + fig, ax = plt.subplots() + vl.draw(ax=ax) + fp = FIMG / "violin_censored.png" + fig.savefig(fp) + + def test_violin_allnan(): plt.close("all") df = DATA1.copy() diff --git a/src/hydrodiy/plot/violinplot.py b/src/hydrodiy/plot/violinplot.py index 6890189..405196e 100644 --- a/src/hydrodiy/plot/violinplot.py +++ b/src/hydrodiy/plot/violinplot.py @@ -219,10 +219,28 @@ def _compute(self): continue sen = se[notnull] - kernel = gaussian_kde(sen.values) + values = sen.values + x0, x1 = sen.min(), sen.max() + + # reduce impact of censored data + ilow = np.abs(values-x0) < 1e-10 + if ilow.sum() > 1: + idx = np.where(ilow)[0][1:] + ilow[idx] = False + + ihigh = np.abs(values-x1) < 1e-10 + if ihigh.sum() > 1: + idx = np.where(ihigh)[0][1:] + ihigh[idx] = False + + irest = ~ilow & ~ihigh + + selected = irest | ilow | ihigh + + # Run kde estimate + kernel = gaussian_kde(values[selected]) # blend regular spacing and ecdf spacing - x0, x1 = sen.min(), sen.max() x = np.linspace(x0, x1, (npts-len(sen))) err = 1e-6*np.random.uniform(-1, 1, len(sen)) x = np.sort(np.concatenate([x, sen.values+err]))