6
6
import types
7
7
import re
8
8
9
- from pandas import cut
9
+ from pandas import cut , merge
10
10
from pandas .core .frame import DataFrame
11
11
from pandas .core .dtypes .generic import ABCDataFrame
12
12
from pandas .core .dtypes .common import is_integer
13
13
from pandas .util ._decorators import Appender
14
14
15
- from numpy import log1p
15
+ from numpy import ceil , log1p , log2
16
16
17
17
from ._config import LegendConfig , FeatureConfig , _BasePlotConfig
18
- from ._misc import ColorGenerator
18
+ from ._misc import ColorGenerator , sturges_rule , freedman_diaconis_rule
19
19
20
20
21
21
_common_kinds = ("line" , "vline" , "scatter" )
@@ -566,6 +566,9 @@ def __init__(
566
566
reference_spectrum : DataFrame | None = None ,
567
567
mirror_spectrum : bool = False ,
568
568
relative_intensity : bool = False ,
569
+ bin_peaks : Union [Literal ["auto" ], bool ] = "auto" ,
570
+ bin_method : Literal ['none' , 'sturges' , 'freedman-diaconis' ] = 'freedman-diaconis' ,
571
+ num_x_bins : int = 50 ,
569
572
peak_color : str | None = None ,
570
573
annotate_top_n_peaks : int | None | Literal ["all" ] = 5 ,
571
574
annotate_mz : bool = True ,
@@ -584,6 +587,17 @@ def __init__(
584
587
self .reference_spectrum = reference_spectrum
585
588
self .mirror_spectrum = mirror_spectrum
586
589
self .relative_intensity = relative_intensity
590
+ self .bin_peaks = bin_peaks
591
+ self .bin_method = bin_method
592
+ if self .bin_peaks == "auto" :
593
+ if self .bin_method == 'sturges' :
594
+ self .num_x_bins = sturges_rule (data , x )
595
+ elif self .bin_method == 'freedman-diaconis' :
596
+ self .num_x_bins = freedman_diaconis_rule (data , x )
597
+ elif self .bin_method == 'none' :
598
+ self .num_x_bins = num_x_bins
599
+ else :
600
+ self .num_x_bins = num_x_bins
587
601
self .peak_color = peak_color
588
602
self .annotate_top_n_peaks = annotate_top_n_peaks
589
603
self .annotate_mz = annotate_mz
@@ -599,9 +613,10 @@ def __init__(
599
613
600
614
def plot (self , x , y , ** kwargs ):
601
615
"""Standard spectrum plot with m/z on x-axis, intensity on y-axis and optional mirror spectrum."""
616
+
602
617
# Prepare data
603
618
spectrum , reference_spectrum = self ._prepare_data (
604
- self .data , y , self .reference_spectrum
619
+ self .data , x , y , self .reference_spectrum
605
620
)
606
621
kwargs .pop ("fig" , None ) # remove figure from **kwargs if exists
607
622
@@ -672,9 +687,46 @@ def plot(self, x, y, **kwargs):
672
687
673
688
self ._modify_y_range ((min_value , max_value ), padding = (min_padding , max_padding ))
674
689
690
+ def _bin_peaks (
691
+ self ,
692
+ data : DataFrame ,
693
+ x : str ,
694
+ y : str
695
+ ) -> DataFrame :
696
+ """
697
+ Bin peaks based on x-axis values.
698
+
699
+ Args:
700
+ data (DataFrame): The data to bin.
701
+ x (str): The column name for the x-axis data.
702
+ y (str): The column name for the y-axis data.
703
+
704
+ Returns:
705
+ DataFrame: The binned data.
706
+ """
707
+ data [x ] = cut (data [x ], bins = self .num_x_bins )
708
+ if self .by is not None :
709
+ # Group by x bin and by column and calculate the mean intensity within each bin
710
+ data = (
711
+ data .groupby ([x , self .by ], observed = True )
712
+ .agg ({y : "mean" })
713
+ .reset_index ()
714
+ )
715
+ else :
716
+ # Group by x bins and calculate the mean intensity within each bin
717
+ data = (
718
+ data .groupby ([x ], observed = True )
719
+ .agg ({y : "mean" })
720
+ .reset_index ()
721
+ )
722
+ data [x ] = data [x ].apply (lambda interval : interval .mid ).astype (float )
723
+ data = data .fillna (0 )
724
+ return data
725
+
675
726
def _prepare_data (
676
727
self ,
677
728
spectrum : DataFrame ,
729
+ x : str ,
678
730
y : str ,
679
731
reference_spectrum : Union [DataFrame , None ],
680
732
) -> tuple [list , list ]:
@@ -693,6 +745,14 @@ def _prepare_data(
693
745
reference_spectrum [y ] = (
694
746
reference_spectrum [y ] / reference_spectrum [y ].max () * 100
695
747
)
748
+
749
+ # Bin peaks if required
750
+ if self .bin_peaks == True or (self .bin_peaks == "auto"
751
+ ):
752
+ spectrum = self ._bin_peaks (spectrum , x , y )
753
+ if reference_spectrum is not None :
754
+ reference_spectrum = self ._bin_peaks (reference_spectrum , x , y )
755
+
696
756
return spectrum , reference_spectrum
697
757
698
758
def _get_colors (
0 commit comments