Skip to content

Commit ab2e684

Browse files
authored
Merge pull request #19 from singjc/patch/spectrum_bin_peaks
Patch/spectrum bin peaks
2 parents 5d06317 + afb7c28 commit ab2e684

File tree

2 files changed

+106
-4
lines changed

2 files changed

+106
-4
lines changed

pyopenms_viz/_core.py

+64-4
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
import types
77
import re
88

9-
from pandas import cut
9+
from pandas import cut, merge
1010
from pandas.core.frame import DataFrame
1111
from pandas.core.dtypes.generic import ABCDataFrame
1212
from pandas.core.dtypes.common import is_integer
1313
from pandas.util._decorators import Appender
1414

15-
from numpy import log1p
15+
from numpy import ceil, log1p, log2
1616

1717
from ._config import LegendConfig, FeatureConfig, _BasePlotConfig
18-
from ._misc import ColorGenerator
18+
from ._misc import ColorGenerator, sturges_rule, freedman_diaconis_rule
1919

2020

2121
_common_kinds = ("line", "vline", "scatter")
@@ -566,6 +566,9 @@ def __init__(
566566
reference_spectrum: DataFrame | None = None,
567567
mirror_spectrum: bool = False,
568568
relative_intensity: bool = False,
569+
bin_peaks: Union[Literal["auto"], bool] = "auto",
570+
bin_method: Literal['none', 'sturges', 'freedman-diaconis'] = 'freedman-diaconis',
571+
num_x_bins: int = 50,
569572
peak_color: str | None = None,
570573
annotate_top_n_peaks: int | None | Literal["all"] = 5,
571574
annotate_mz: bool = True,
@@ -584,6 +587,17 @@ def __init__(
584587
self.reference_spectrum = reference_spectrum
585588
self.mirror_spectrum = mirror_spectrum
586589
self.relative_intensity = relative_intensity
590+
self.bin_peaks = bin_peaks
591+
self.bin_method = bin_method
592+
if self.bin_peaks == "auto":
593+
if self.bin_method == 'sturges':
594+
self.num_x_bins = sturges_rule(data, x)
595+
elif self.bin_method == 'freedman-diaconis':
596+
self.num_x_bins = freedman_diaconis_rule(data, x)
597+
elif self.bin_method == 'none':
598+
self.num_x_bins = num_x_bins
599+
else:
600+
self.num_x_bins = num_x_bins
587601
self.peak_color = peak_color
588602
self.annotate_top_n_peaks = annotate_top_n_peaks
589603
self.annotate_mz = annotate_mz
@@ -599,9 +613,10 @@ def __init__(
599613

600614
def plot(self, x, y, **kwargs):
601615
"""Standard spectrum plot with m/z on x-axis, intensity on y-axis and optional mirror spectrum."""
616+
602617
# Prepare data
603618
spectrum, reference_spectrum = self._prepare_data(
604-
self.data, y, self.reference_spectrum
619+
self.data, x, y, self.reference_spectrum
605620
)
606621
kwargs.pop("fig", None) # remove figure from **kwargs if exists
607622

@@ -672,9 +687,46 @@ def plot(self, x, y, **kwargs):
672687

673688
self._modify_y_range((min_value, max_value), padding=(min_padding, max_padding))
674689

690+
def _bin_peaks(
691+
self,
692+
data: DataFrame,
693+
x: str,
694+
y: str
695+
) -> DataFrame:
696+
"""
697+
Bin peaks based on x-axis values.
698+
699+
Args:
700+
data (DataFrame): The data to bin.
701+
x (str): The column name for the x-axis data.
702+
y (str): The column name for the y-axis data.
703+
704+
Returns:
705+
DataFrame: The binned data.
706+
"""
707+
data[x] = cut(data[x], bins=self.num_x_bins)
708+
if self.by is not None:
709+
# Group by x bin and by column and calculate the mean intensity within each bin
710+
data = (
711+
data.groupby([x, self.by], observed=True)
712+
.agg({y: "mean"})
713+
.reset_index()
714+
)
715+
else:
716+
# Group by x bins and calculate the mean intensity within each bin
717+
data = (
718+
data.groupby([x], observed=True)
719+
.agg({y: "mean"})
720+
.reset_index()
721+
)
722+
data[x] = data[x].apply(lambda interval: interval.mid).astype(float)
723+
data = data.fillna(0)
724+
return data
725+
675726
def _prepare_data(
676727
self,
677728
spectrum: DataFrame,
729+
x: str,
678730
y: str,
679731
reference_spectrum: Union[DataFrame, None],
680732
) -> tuple[list, list]:
@@ -693,6 +745,14 @@ def _prepare_data(
693745
reference_spectrum[y] = (
694746
reference_spectrum[y] / reference_spectrum[y].max() * 100
695747
)
748+
749+
# Bin peaks if required
750+
if self.bin_peaks == True or (self.bin_peaks == "auto"
751+
):
752+
spectrum = self._bin_peaks(spectrum, x, y)
753+
if reference_spectrum is not None:
754+
reference_spectrum = self._bin_peaks(reference_spectrum, x, y)
755+
696756
return spectrum, reference_spectrum
697757

698758
def _get_colors(

pyopenms_viz/_misc.py

+42
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,45 @@ def __next__(self):
225225
Returns the next shape in the shape cycle.
226226
"""
227227
return next(self.shape_cycle)
228+
229+
230+
def sturges_rule(df, value):
231+
"""
232+
Calculate the number of bins using Sturges' rule.
233+
234+
Args:
235+
df (pd.DataFrame): A pandas DataFrame containing the data.
236+
value (str): The column name of the data.
237+
238+
Returns:
239+
int: The number of bins.
240+
"""
241+
n = len(df[value])
242+
num_bins = int(np.ceil(1 + np.log2(n)))
243+
return num_bins
244+
245+
def freedman_diaconis_rule(df, value):
246+
"""
247+
Calculate the number of bins using the Freedman-Diaconis rule.
248+
249+
Args:
250+
df (pd.DataFrame): A pandas DataFrame containing the data.
251+
value (str): The column name of the data.
252+
253+
Returns:
254+
int: The number of bins.
255+
"""
256+
# Calculate IQR
257+
Q1 = df[value].quantile(0.25)
258+
Q3 = df[value].quantile(0.75)
259+
IQR = Q3 - Q1
260+
261+
# Number of observations
262+
n = len(df)
263+
264+
# Calculate bin width using the Freedman-Diaconis rule
265+
bin_width = 2 * IQR / (n ** (1/3))
266+
267+
# Calculate the number of bins
268+
num_bins = int((df[value].max() - df[value].min()) / bin_width)
269+
return num_bins

0 commit comments

Comments
 (0)