Waveform noise adder (#2847)

moralejo · kosack · maxnoe · web-flow · commit 2a4b8810098a · 2026-03-30T11:30:45.000+02:00
* Class to add noise in waveforms * Completed init function * Added preliminary __call__ function * Fixed case of gain-selected R1 * Added documentation * Added test function * total_noise not a traitlet * Simplified code * Added filter to use only pedestal events, and some comments on the possible use of real data as source of NSB waveforms. * Use EventTypeFilter * Event types are now configurable * Merge strings * parent=self in EventTypeFilter * Use of context manager in test function Co-authored-by: Karl Kosack <kosack@users.noreply.github.com> * Use context manager in WaveformModifier::init * Improved docstring * Use rng.choice instead of permutation. Updated test std dev values * Class NoiseEventTypeFilter so that we can set the proper default * Introduced @maxnoe's suggestion for doing the pixel-wise noise generation using numba. Now this is the default option, but one can choose to shuffle "full cameras" of noise (like before), which is about 5 times faster. Implemented other reviewers' suggestions * Added comment * Fixed typo & improved class description * Three new functions to do the init stuff * Simplified code in WaveformModifier.__call__ * numba cache Co-authored-by: Maximilian Linhoff <maximilian.linhoff@cta-observatory.org> * Merge remote-tracking branch 'origin/main' into waveform_noise_adder * Changelog * Typo * Add changelog * Introduce needs_atmosphere_profile property to reconstructors * Removed obsolete needs_atmosphere_profile (it was back there after an attempt to clean the history of this branch) * Changed variable shuffle_full_cameras to sample_pixels_independently, which shows its meaning more clearly --------- Co-authored-by: Karl Kosack <kosack@users.noreply.github.com> Co-authored-by: Maximilian Linhoff <maximilian.linhoff@cta-observatory.org> Co-authored-by: Jonas Hackfeld <jonas.hackfeld@ruhr-uni-bochum.de>
diff --git a/docs/changes/2847.feature.rst b/docs/changes/2847.feature.rst
@@ -0,0 +1 @@
+Component to add noise in waveforms, intended to adapt the noise level in simulations to that in a given real observation.
diff --git a/src/ctapipe/image/__init__.py b/src/ctapipe/image/__init__.py
@@ -37,7 +37,11 @@
 from .image_processor import ImageProcessor
 from .invalid_pixels import InvalidPixelHandler, NeighborAverage
 from .leakage import leakage_parameters
-from .modifications import ImageModifier
+from .modifications import (
+    ImageModifier,
+    NoiseEventTypeFilter,
+    WaveformModifier,
+)
 from .morphology import (
     brightest_island,
     largest_island,
@@ -69,6 +73,8 @@
 from .timing import timing_parameters
 
 __all__ = [
+    "NoiseEventTypeFilter",
+    "WaveformModifier",
     "ImageModifier",
     "ImageProcessor",
     "hillas_parameters",
diff --git a/src/ctapipe/image/modifications.py b/src/ctapipe/image/modifications.py
@@ -1,10 +1,22 @@
+from collections import defaultdict
+
 import numpy as np
 from numba import njit
+from traitlets import default
 
+from ..containers import EventType
 from ..core import TelescopeComponent
 from ..core.env import CTAPIPE_DISABLE_NUMBA_CACHE
-from ..core.traits import BoolTelescopeParameter, FloatTelescopeParameter, Int
+from ..core.traits import (
+    Bool,
+    BoolTelescopeParameter,
+    FloatTelescopeParameter,
+    Int,
+    Path,
+)
 from ..instrument import SubarrayDescription
+from ..io import EventSource
+from ..utils import EventTypeFilter
 
 __all__ = [
     "ImageModifier",
@@ -100,6 +112,317 @@ def _smear_psf_randomly(
     return new_image
 
 
+class NoiseEventTypeFilter(EventTypeFilter):
+    """
+    Event filter to select noise events for MC tuning
+    By default it selects SKY_PEDESTAL events
+    """
+
+    @default("allowed_types")
+    def allowed_types_default(self):
+        return {EventType.SKY_PEDESTAL}
+
+
+@njit(cache=not CTAPIPE_DISABLE_NUMBA_CACHE)
+def build_wf_noise_pixelwise(
+    waveforms, n_noise_realizations, nsb_level, rng, sample_pixels_independently
+):
+    """
+    Combine "elemental noise waveforms" into total noise waveforms by
+    combining a given number of them, chosen randomly
+
+    Parameters
+    ----------
+    waveforms: array (nevents, ngains, npixels, nsamples), the elemental noise
+    waveforms
+
+    n_noise_realizations: int
+    the number of total noise waveforms we want to generate
+
+    nsb_level: int
+    the number of elemental noise waveforms we combine to produce each total
+    noise waveform
+
+    rng: random number generator
+
+    sample_pixels_independently: bool
+    if True, each pixel will use a different random combination of the elemental
+    noise events to build the noise to be added to its waveform.
+    if False, the waveform for each pixel in any given noise realization comes
+    from the same random combination of the input elemental noise events.
+
+
+    Returns
+    -------
+    array (n_noise, ngains, npixels, nsamples), total noise waveforms
+
+    """
+    n_events, n_gains, n_pixels, n_samples = waveforms.shape
+    noise = np.zeros(
+        (n_noise_realizations, n_gains, n_pixels, n_samples), dtype=np.float32
+    )
+
+    for i in range(n_noise_realizations):
+        if sample_pixels_independently:
+            for pixel in range(n_pixels):
+                chosen = rng.permutation(n_events)[:nsb_level]
+                # The line above is slower (especially for n_events much
+                # larger than nsb_level) than rng.choice(n_events, nsb_level)
+                # Unfortunately rng.choice does not currently work with numba.
+
+                for event in chosen:
+                    noise[i, :, pixel] += waveforms[event, :, pixel]
+        else:
+            chosen = rng.permutation(n_events)[:nsb_level]
+            for event in chosen:
+                noise[i] += waveforms[event]
+
+    return noise
+
+
+class WaveformModifier(TelescopeComponent):
+    """
+    Component to add NSB noise to R1 waveforms.
+
+    This component in principle to be applied on MC shower simulations, to make
+    them closer to real data in terms of noise level.
+
+    There are two possibilities:
+        1. The "showers MC" file has dark-NSB settings and electronic noise (
+        waveform baseline fluctuations), and the input NSB file is a
+        dedicated sim_telarray file, which must be produced with the same
+        telescope array configuration (and other simulation settings) as the
+        showers MC to which the noise is to be added, but containing only NSB
+        noise (electronic fluctuations of the baseline should be switched off)
+
+        2. The showers MC file is produced with no noise (baseline
+        fluctuations) of any kind (electronic or NSB), just the Cherenkov
+        signal (with the appropriate single-p.e.-response fluctuations),
+        whereas the nsb file is a real data DL0 file from which only the
+        interleaved pedestals are used (all gain channels must be present
+        for all pixels). In that case, nsb_level must be =1 (to
+        match the MC to the data) and sample_pixels_independently=False
+        (we do not want e.g. to duplicate stars in the FoV).
+
+
+    In case (1), the number of available noise events per telescope in the NSB
+    file must be at least twice the number of waveforms ("nsb_level") from
+    that file that we want to add up. If the NSB file is produced with a
+    level of 25% of dark NSB, and we want to simulate 10x dark NSB,
+    then nsb_level=40 (=10/0.25) and the file must contain at least 80
+    events. This is to guarantee that the different noise waveforms are not
+    too correlated
+
+
+    """
+
+    nsb_file = Path(
+        default_value=None,
+        help="Path to a dedicated NSB-only file (e.g. from sim_telarray)",
+    ).tag(config=True)
+
+    nsb_level = Int(
+        default_value=1,
+        help=(
+            "Number of random instances of the NSB waveforms from the "
+            "NSB file to be added up to the waveform"
+        ),
+    ).tag(config=True)
+
+    n_noise_realizations = Int(
+        default_value=100,
+        help=(
+            "Number of different realizations of the total NSB waveform to "
+            "be created per pixel"
+        ),
+    ).tag(config=True)
+
+    sample_pixels_independently = Bool(
+        default_value=True,
+        help=(
+            "If True, each pixel uses a different "
+            "random combination of the input noise "
+            "events"
+            "If False, all pixels use the same random "
+            "combination of noise events. That is, noise "
+            "events will be combined as full cameras"
+        ),
+    ).tag(config=True)
+
+    rng_seed = Int(default_value=1, help="Seed for the random number generator").tag(
+        config=True
+    )
+
+    def __init__(
+        self,
+        subarray: SubarrayDescription,
+        config=None,
+        parent=None,
+        **kwargs,
+    ):
+        """
+
+        Parameters
+        ----------
+        subarray: SubarrayDescription
+            Description of the subarray. Provides information about the
+            camera which are useful in calibration. Also required for
+            configuring the TelescopeParameter traitlets.
+        config: traitlets.loader.Config
+            Configuration specified by config file or cmdline arguments.
+            Used to set traitlet values.
+            This is mutually exclusive with passing a ``parent``.
+        parent: ctapipe.core.Component or ctapipe.core.Tool
+            Parent of this component in the configuration hierarchy,
+            this is mutually exclusive with passing ``config``
+
+        """
+
+        super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
+        self.rng = np.random.default_rng(self.rng_seed)
+
+        self.event_type_filter = NoiseEventTypeFilter(parent=self)
+
+        self.total_noise = dict()
+        # One key per tel_id, and each of them is an array of shape
+        # [n_noise_realizations, ngains, npixels, nsamples]
+
+        # Read in the waveforms in the NSB-only file. Store in a dictionary
+        # with one key per telescope, containing an array [n_events, n_gains,
+        # n_pixels, n_samples]
+        nsb_database = self.read_nsb_database()
+
+        # Check if noise statistics is sufficient:
+        stats_ok = self.check_noise_statistics(nsb_database)
+        if not stats_ok:
+            raise ValueError("Please use an input nsb_file with more events!")
+
+        # Now shift the waveforms so that they have mean=0 and do not introduce
+        # any bias (just fluctuations)
+        self.zero_baseline(nsb_database)
+
+        # Add up waveforms selected at random to obtain different
+        # realizations of the total noise that will be added
+        for tel_id in nsb_database:
+            self.total_noise[tel_id] = build_wf_noise_pixelwise(
+                nsb_database[tel_id],
+                self.n_noise_realizations,
+                self.nsb_level,
+                self.rng,
+                self.sample_pixels_independently,
+            )
+
+    def read_nsb_database(self):
+        """
+        Reads in R1 noise waveforms from an input file self.nsb_file
+
+        Returns
+        -------
+        nsb_database : dict
+        Dictionary with one key per telescope, containing an array [n_events,
+        n_gains, n_pixels, n_samples] (noise waveforms)
+
+        """
+        nsb_database = defaultdict(list)
+        with EventSource(
+            input_url=self.nsb_file, skip_calibration_events=False
+        ) as source:
+            for event in source:
+                if not self.event_type_filter(event):
+                    continue
+                for tel_id, r1 in event.r1.tel.items():
+                    nsb_database[tel_id].append(r1.waveform)
+
+        nsb_database = {
+            tel_id: np.stack(waveforms) for tel_id, waveforms in nsb_database.items()
+        }
+
+        return nsb_database
+
+    def check_noise_statistics(self, nsb_database):
+        """
+        Check that we have enough NSB-only events for all telescopes. We
+        require that the number of NSB events for any telescope is at
+        least two times the number of waveforms (=nsb_level) that we will add
+        up. This is to avoid excessive correlation among the waveforms.
+
+        Parameters
+        ----------
+        nsb_database: dict
+        Dictionary with one key per telescope, containing an array [n_events,
+        n_gains, n_pixels, n_samples] (noise waveforms)
+
+        Returns
+        -------
+        stats_ok : bool
+        True if statistics of noise events is considered sufficient
+        """
+
+        stats_ok = True
+        for tel_id in nsb_database:
+            nevents = nsb_database[tel_id].shape[0]
+            if nevents >= 2 * self.nsb_level:
+                continue
+            self.log.error(
+                f"Not enough NSB events available for tel_"
+                f"id {tel_id}. "
+                f"For nsb_level = {self.nsb_level}, at least "
+                f"{2 * self.nsb_level} events are needed ({nevents} "
+                f"were found)."
+            )
+            stats_ok = False
+
+        return stats_ok
+
+    def zero_baseline(self, nsb_database):
+        """
+        For each telescope and gain we average the waveform values for all
+        pixels, and subtract those averages from the waveforms.
+        In this way we make sure we do not introduce any net average charge,
+        but only increase the fluctuations.
+
+        Parameters
+        ----------
+        nsb_database: dict
+        Dictionary with one key per telescope, containing an array [n_events,
+        n_gains, n_pixels, n_samples] (noise waveforms)
+
+        Returns
+        -------
+        nsb_database: dict
+        Dictionary, same as above but after baseline zeroing
+        """
+
+        for tel_id in nsb_database:
+            for channel in range(nsb_database[tel_id].shape[1]):
+                mean = np.mean(nsb_database[tel_id][:, channel, :, :])
+                nsb_database[tel_id][:, channel, :, :] -= mean
+
+    def __call__(self, tel_id, waveforms, selected_gain_channel=None):
+        """
+        Parameters
+        ----------
+        tel_id
+        waveforms: ndarray [ngains, npixels, nsamples] (ngains=1 if
+        gain-selected)
+        selected_gain_channel: ndarray[npixels] or None if no gain selection
+
+        Returns
+        -------
+        ndarray, same shape as waveforms: original waveforms plus added NSB
+
+        """
+
+        # Note: MC waveforms passed to this function should contain data in all
+        # pixels (not DVR'ed - obviously DVR depends on noise level, it does not
+        # make sense to add the noise after DVR was applied)
+        noise = self.total_noise[tel_id][self.rng.integers(self.n_noise_realizations)]
+        if selected_gain_channel is not None:
+            noise = noise[selected_gain_channel, np.arange(waveforms.shape[1])]
+
+        return waveforms + noise
+
+
 class ImageModifier(TelescopeComponent):
     """
     Component to tune simulated background to
diff --git a/src/ctapipe/image/tests/test_modifications.py b/src/ctapipe/image/tests/test_modifications.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Component to add noise in waveforms, intended to adapt the noise level in simulations to that in a given real observation.`