cta-observatory · TjarkMiener · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/docs/changes/2996.feature.rst b/docs/changes/2996.feature.rst
@@ -0,0 +1 @@
+Added a new class HistogramAggregator to compute histograms along a specified axis, and updated the documentation to reflect this new functionality. The documentation includes examples of how to use the HistogramsAggregator in practice.
diff --git a/examples/tutorials/histogram_aggregation.py b/examples/tutorials/histogram_aggregation.py
@@ -0,0 +1,248 @@
+"""
+Histogram aggregation with HistogramAggregator
+==============================================
+
+This tutorial shows how to:
+
+1. Build an event table with camera-like data (images and peak times) and some invalid values.
+2. Configure and run HistogramAggregator in chunks.
+3. Access histogram counts, bin edges, summary statistics, and valid-event counts (n_events).
+4. Plot one pixel histogram from the selected chunks and both gain channels for both image and peak_time columns.
+5. Overlay mean, median, and std on top of the histogram curves.
+6. Plot the same histogram using Hist's built-in plotting functionality after filling a Hist object with the aggregated histogram counts and variances.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import hist
+from astropy.table import Table
+from astropy.time import Time
+from traitlets.config import Config
+
+from ctapipe.monitoring.aggregator import HistogramAggregator
+from hist import Hist
+
+
+# -------------------------------------------------------------------
+# Create synthetic event-wise camera data
+# -------------------------------------------------------------------
+rng = np.random.default_rng(42)
+
+n_events = 2000
+n_channels = 2
+n_pixels = 100
+
+times = Time(
+    np.linspace(60117.911, 60117.9258, num=n_events),
+    scale="tai",
+    format="mjd",
+)
+event_ids = np.arange(n_events)
+images = rng.normal(loc=85.0, scale=10.0, size=(n_events, n_channels, n_pixels))
+images[:, 1, :] -= 15  # Simulate lower gain channel by shifting the mean down by 15
+peak_time = rng.normal(loc=20.0, scale=2.0, size=(n_events, n_channels, n_pixels))
+
+# Add a few invalid values to demonstrate n_events behavior.
+images[3, 0, 10] = np.nan
+images[15, 0, 10] = np.nan
+peak_time[5, 0, 10] = np.nan
+peak_time[35, 1, 10] = np.nan
+
+# Optional static mask over sample dimensions (channel, pixel).
+# Here we exclude channel 1, pixel 99 for all events.
+masked_elements_of_sample = np.zeros((n_channels, n_pixels), dtype=bool)
+masked_elements_of_sample[1, 99] = True
+
+table = Table(
+    [times, event_ids, images, peak_time],
+    names=("time", "event_id", "image", "peak_time"),
+)
+
+
+# -------------------------------------------------------------------
+# Configure and run histogram aggregation
+# -------------------------------------------------------------------
+config_image = Config(
+    {
+        "HistogramAggregator": {
+            "chunking_type": "SizeChunking",
+            "axis_definition": {
+                "class_name": "Regular",
+                "bins": 50,
+                "start": 40.0,
+                "stop": 110.0,
+            },
+        },
+        "SizeChunking": {"chunk_size": 1000},
+    }
+)
+
+aggregator_image = HistogramAggregator(config=config_image)
+result = aggregator_image(
+    table=table,
+    col_name="image",
+    masked_elements_of_sample=masked_elements_of_sample,
+)
+
+config_peak_time = Config(
+    {
+        "HistogramAggregator": {
+            "chunking_type": "SizeChunking",
+            "axis_definition": {
+                "class_name": "Regular",
+                "bins": 50,
+                "start": 2.0,
+                "stop": 38.0,
+            },
+        },
+        "SizeChunking": {"chunk_size": 1000},
+    }
+)
+
+aggregator_peak_time = HistogramAggregator(config=config_peak_time)
+result_peak_time = aggregator_peak_time(
+    table=table,
+    col_name="peak_time",
+    masked_elements_of_sample=masked_elements_of_sample,
+)
+
+print(f"Number of chunks: {len(result)}")
+print(f"histogram shape per chunk: {result[0]['histogram'].shape}")
+print(f"bin edges shape per chunk: {result[0].meta['bin_edges'].shape}")
+print(f"bin centers shape per chunk: {result[0].meta['bin_centers'].shape}")
+print(f"n_events shape per chunk: {result[0]['n_events'].shape}")
+
+
+# -------------------------------------------------------------------
+# Plot the histograms for one pixel with two gain channels
+# -------------------------------------------------------------------
+# We aggreagted the histograms in two chunks of 1000 events each, so we have two histograms per gain channel
+# for the selected pixel. We will plot both chunks for the selected pixel and gain channels
+# on the same axes for comparison, and then do the same for the peak_time column in a separate figure.
+
+pixel_index = 10
+gain_label = {0: "High Gain", 1: "Low Gain"}
+
+fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
+for chunk_index, ax in enumerate(axes):
+    bin_edges = result[chunk_index].meta["bin_edges"]
+    bin_centers = result[chunk_index].meta["bin_centers"]
+    channel_handles = []
+
+    for channel_index in range(n_channels):
+        counts = result[chunk_index]["histogram"][:, channel_index, pixel_index]
+        valid_events = result[chunk_index]["n_events"][channel_index, pixel_index]
+
+        line = ax.step(
+            bin_edges[:-1],
+            counts,
+            where="post",
+            label=f"{gain_label[channel_index]} (n_events={valid_events})",
+        )[0]
+        channel_handles.append(line)
+        color = line.get_color()
+
+        # Plot bin variances as error bars (use sqrt of variance for error) at bin centers
+        bin_errors = np.sqrt(counts)
+        ax.errorbar(
+            bin_centers,
+            counts,
+            yerr=bin_errors,
+            fmt="none",
+            color=color,
+            elinewidth=1.0,
+            capsize=3,
+            alpha=0.6,
+        )
+
+    ax.set_title(f"Chunk {chunk_index}, pixel {pixel_index}")
+    ax.set_xlabel("image value")
+    ax.set_ylabel("Counts")
+
+    ax.legend(
+        handles=channel_handles,
+        loc="upper left",
+        fontsize=8,
+    )
+
+plt.show()
+
+
+# -------------------------------------------------------------------
+# Plot peak_time histograms in a separate figure
+# -------------------------------------------------------------------
+fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
+for chunk_index, ax in enumerate(axes):
+    bin_edges = result_peak_time[chunk_index].meta["bin_edges"]
+    bin_centers = result_peak_time[chunk_index].meta["bin_centers"]
+
+    channel_handles = []
+
+    for channel_index in range(n_channels):
+        counts = result_peak_time[chunk_index]["histogram"][
+            :, channel_index, pixel_index
+        ]
+        valid_events = result_peak_time[chunk_index]["n_events"][
+            channel_index, pixel_index
+        ]
+
+        line = ax.step(
+            bin_edges[:-1],
+            counts,
+            where="post",
+            label=f"{gain_label[channel_index]} (n_events={valid_events})",
+        )[0]
+        channel_handles.append(line)
+        color = line.get_color()
+
+        # Plot bin variances as error bars (use sqrt of variance for error) at bin centers
+        bin_errors = np.sqrt(counts)
+        ax.errorbar(
+            bin_centers,
+            counts,
+            yerr=bin_errors,
+            fmt="none",
+            color=color,
+            elinewidth=1.0,
+            capsize=3,
+            alpha=0.6,
+        )
+
+    ax.set_title(f"Peak Time - Chunk {chunk_index}, pixel {pixel_index}")
+    ax.set_xlabel("peak_time value")
+    ax.set_ylabel("Counts")
+    ax.legend(
+        handles=channel_handles,
+        loc="upper left",
+        fontsize=8,
+    )
+
+plt.show()
+
+
+# -------------------------------------------------------------------
+# Initialize hist, fill it and plot via Hist functionality
+# -------------------------------------------------------------------
+
+# Create a Hist object with the same binning as the aggregator
+bin_edges = result[0].meta["bin_edges"]
+h = Hist(
+    hist.axis.Regular(len(bin_edges) - 1, bin_edges[0], bin_edges[-1], name="value")
+)
+
+# Get the histogram counts and variances for the selected pixel and channel
+chunk_index = 0
+counts = result[0]["histogram"][:, chunk_index, pixel_index]
+
+# Set the histogram values using the view interface
+h.view(flow=False)[:] = counts
+
+# Plot the histogram with error bars using Hist's built-in plotting functionality
+# Requires 'hist[plot]' to be installed in the environment.
+h.plot(yerr=True)
+plt.title(
+    f"Chunk {chunk_index}, Pixel {pixel_index} (High Gain) histogram from Hist object"
+)
+plt.xlabel("image value")
+plt.ylabel("Counts")
+plt.show()
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
     "joblib",
     "numba >=0.59.0",
     "numpy >=1.26,<3.0.0",
+    "hist",
     "packaging",
     "psutil",
     "pyyaml >=5.1",
@@ -79,6 +80,7 @@ docs = [
     "ctapipe[all]",
     "ffmpeg-python",
     "graphviz",
+    "hist[plot]",
     "ipython",
     "jupyter",
     "nbsphinx",

diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
@@ -63,6 +63,7 @@
     "StatisticsContainer",
     "ChunkContainer",
     "ChunkStatisticsContainer",
+    "ChunkHistogramsContainer",
     "ImageStatisticsContainer",
     "IntensityStatisticsContainer",
     "PeakTimeStatisticsContainer",
@@ -1267,6 +1268,12 @@ class ChunkStatisticsContainer(ChunkContainer):
     std = Field(None, "standard deviation of the chunk distribution")
 
 
+class ChunkHistogramsContainer(ChunkContainer):
+    """Container for histograms of the chunk distribution"""
+
+    histogram = Field(None, "histogram of the chunk distribution")
+
+
 class PixelStatisticsContainer(Container):
     """
     Container for pixel statistics from flat-field and sky pedestal events

diff --git a/src/ctapipe/io/datawriter.py b/src/ctapipe/io/datawriter.py
@@ -44,8 +44,9 @@ def _get_tel_index(event, tel_id):
 #   (meaning readers need to update scripts)
 # - increase the minor number if new columns or datasets are added
 # - increase the patch number if there is a small bugfix to the model.
-DATA_MODEL_VERSION = "v7.5.0"
+DATA_MODEL_VERSION = "v7.6.0"
 DATA_MODEL_CHANGE_HISTORY = """
+- v7.6.0: - Add new monitoring group for pixel histograms: DL1_PIXEL_HISTOGRAMS_GROUP.
 - v7.5.0: - Add new field pixel_time_shift in R1CameraContainer and DL0CameraContainer
 - v7.4.0: - Add new data quality and top-level monitoring groups for DL0 and DL1 data.
 - v7.3.0: - Add possibility to attach monitoring data to the event HDF5 file.

diff --git a/src/ctapipe/io/hdf5dataformat.py b/src/ctapipe/io/hdf5dataformat.py
@@ -147,6 +147,9 @@
 DL1_FLATFIELD_PEAK_TIME_GROUP = (
     "/dl1/monitoring/telescope/calibration/camera/pixel_statistics/flatfield_peak_time"
 )
+DL1_PIXEL_HISTOGRAMS_GROUP = (
+    "/dl1/monitoring/telescope/calibration/camera/pixel_histograms"
+)
 DL2_SUBARRAY_MONITORING_GROUP = "/dl2/monitoring/subarray"
 DL2_SUBARRAY_INTER_CALIBRATION_GROUP = "/dl2/monitoring/subarray/inter_calibration"
 DL2_SUBARRAY_CROSS_CALIBRATION_GROUP = "/dl2/monitoring/subarray/cross_calibration"

diff --git a/src/ctapipe/io/hdf5eventsource.py b/src/ctapipe/io/hdf5eventsource.py
@@ -108,6 +108,7 @@
     "v7.3.0",
     "v7.4.0",
     "v7.5.0",
+    "v7.6.0",
 ]
 
 

diff --git a/src/ctapipe/io/hdf5merger.py b/src/ctapipe/io/hdf5merger.py
@@ -20,6 +20,7 @@
     DL1_CAMERA_COEFFICIENTS_GROUP,
     DL1_COLUMN_NAMES,
     DL1_IMAGE_STATISTICS_TABLE,
+    DL1_PIXEL_HISTOGRAMS_GROUP,
     DL1_PIXEL_STATISTICS_GROUP,
     DL1_SUBARRAY_POINTING_GROUP,
     DL1_SUBARRAY_QUALITY_GROUP,
@@ -59,6 +60,7 @@
     "v7.3.0",
     "v7.4.0",
     "v7.5.0",
+    "v7.6.0",
 ]
 
 
@@ -95,6 +97,7 @@ class NodeType(enum.Enum):
     DL1_SUBARRAY_POINTING_GROUP: NodeType.TABLE,
     DL1_TEL_POINTING_GROUP: NodeType.TEL_GROUP,
     DL1_PIXEL_STATISTICS_GROUP: NodeType.ITER_TEL_GROUP,
+    DL1_PIXEL_HISTOGRAMS_GROUP: NodeType.ITER_TEL_GROUP,
     DL1_CAMERA_COEFFICIENTS_GROUP: NodeType.TEL_GROUP,
     DL1_TEL_MUON_THROUGHPUT_GROUP: NodeType.TEL_GROUP,
     DL2_TEL_GROUP: NodeType.ITER_TEL_GROUP,
@@ -485,6 +488,7 @@ def _append_monitoring_data(self, other):
         if self.telescope_events:
             self._append_monitoring_telescope_groups(other)
             self._append_pixel_statistics(other)
+            self._append_pixel_histograms(other)
             self._append_quality_telescope_groups(other)
 
     def _append_monitoring_subarray_groups(self, other):
@@ -531,6 +535,16 @@ def _append_pixel_statistics(self, other):
                         other, other.root[key], once=self.single_ob
                     )
 
+    def _append_pixel_histograms(self, other):
+        """Append pixel histogram monitoring data."""
+        for dl1_colname in DL1_COLUMN_NAMES:
+            for event_type in EventType:
+                key = f"{DL1_PIXEL_HISTOGRAMS_GROUP}/{event_type.name.lower()}_{dl1_colname}"
+                if key in other.root:
+                    self._append_table_group(
+                        other, other.root[key], once=self.single_ob
+                    )
+
     def _append_quality_telescope_groups(self, other):
         """Append quality telescope groups.
 

diff --git a/src/ctapipe/io/tests/test_hdf5eventsource.py b/src/ctapipe/io/tests/test_hdf5eventsource.py
@@ -31,7 +31,7 @@ def test_is_compatible(compatible_file, request):
 def test_metadata(dl1_file):
     with HDF5EventSource(input_url=dl1_file) as source:
         assert source.is_simulation
-        assert source.datamodel_version == (7, 5, 0)
+        assert source.datamodel_version == (7, 6, 0)
         assert set(source.datalevels) == {
             DataLevel.DL1_IMAGES,
             DataLevel.DL1_PARAMETERS,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added a new class HistogramAggregator to compute histograms along a specified axis, and updated the documentation to reflect this new functionality. The documentation includes examples of how to use the HistogramsAggregator in practice.