Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b19566a
added HistogramsAggregator
TjarkMiener Apr 20, 2026
0a13a75
fix docs
TjarkMiener Apr 20, 2026
cfa8e35
ensure same binnnig for each chunk
TjarkMiener Apr 20, 2026
ed35129
remove density from docstring
TjarkMiener Apr 20, 2026
a29981b
use boosting histograms for the aggregation
TjarkMiener Apr 20, 2026
4348f4b
make assert for hist object more meanigful
TjarkMiener Apr 20, 2026
4fb91e6
make HistogramAggregator a subclass of the stats one and calculate al…
TjarkMiener Apr 22, 2026
acb1ff5
fix table meta reading in docs example
TjarkMiener Apr 22, 2026
aad09a0
also compute histo variances
TjarkMiener Apr 23, 2026
819f417
bump data format version in test
TjarkMiener Apr 23, 2026
358d028
add to the docs a small code snippet to use basic hist plotting funcs
TjarkMiener Apr 23, 2026
958a8f2
added step in tutorial description
TjarkMiener Apr 23, 2026
397ffbb
fix docs
TjarkMiener Apr 23, 2026
bc12780
fix underscore for tutorial title
TjarkMiener Apr 23, 2026
94fd377
do not store vars from histograms
TjarkMiener Apr 23, 2026
d3c8dbf
fix typo
TjarkMiener Apr 23, 2026
e23ca0a
polish configuration of histo agg
TjarkMiener Apr 23, 2026
69b54b0
fix metadata passing of _add_result_columns()
TjarkMiener Apr 24, 2026
bf20fe0
polish comment in histogram aggregation tutorial
TjarkMiener Apr 24, 2026
2d8d092
make HistoAgg inherit from BaseAgg
TjarkMiener Apr 24, 2026
d79757c
make table group class attribute
TjarkMiener Apr 24, 2026
320eb75
fix axis name bug
TjarkMiener Apr 27, 2026
ec1403e
only calc histograms in HistogramAggregatorr; do not assess stats des…
TjarkMiener Apr 27, 2026
cd47153
fix import
TjarkMiener Apr 27, 2026
3eacb9b
fix tests for pix stats tool
TjarkMiener Apr 27, 2026
35d1918
rename conotainer for chunked histograms and fix inheritance
TjarkMiener Apr 27, 2026
408f51e
implemented latest comments
TjarkMiener Apr 27, 2026
62d4b60
make aggregator and chunk/stats containers independent from the camer…
TjarkMiener Apr 27, 2026
3e5f00e
polish compute_histograms() function to make it generic and independe…
TjarkMiener Apr 27, 2026
aa3483a
polish testing for random shapes and check that introduced shift betw…
TjarkMiener Apr 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/2996.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a new class HistogramAggregator to compute histograms along a specified axis, and updated the documentation to reflect this new functionality. The documentation includes examples of how to use the HistogramsAggregator in practice.
248 changes: 248 additions & 0 deletions examples/tutorials/histogram_aggregation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
"""
Histogram aggregation with HistogramAggregator
==============================================

This tutorial shows how to:

1. Build an event table with camera-like data (images and peak times) and some invalid values.
2. Configure and run HistogramAggregator in chunks.
3. Access histogram counts, bin edges, summary statistics, and valid-event counts (n_events).
4. Plot one pixel histogram from the selected chunks and both gain channels for both image and peak_time columns.
5. Overlay mean, median, and std on top of the histogram curves.
6. Plot the same histogram using Hist's built-in plotting functionality after filling a Hist object with the aggregated histogram counts and variances.
"""

import matplotlib.pyplot as plt
import numpy as np
import hist
from astropy.table import Table
from astropy.time import Time
from traitlets.config import Config

from ctapipe.monitoring.aggregator import HistogramAggregator
from hist import Hist


# -------------------------------------------------------------------
# Create synthetic event-wise camera data
# -------------------------------------------------------------------
rng = np.random.default_rng(42)

n_events = 2000
n_channels = 2
n_pixels = 100

times = Time(
np.linspace(60117.911, 60117.9258, num=n_events),
scale="tai",
format="mjd",
)
event_ids = np.arange(n_events)
images = rng.normal(loc=85.0, scale=10.0, size=(n_events, n_channels, n_pixels))
images[:, 1, :] -= 15 # Simulate lower gain channel by shifting the mean down by 15
peak_time = rng.normal(loc=20.0, scale=2.0, size=(n_events, n_channels, n_pixels))

# Add a few invalid values to demonstrate n_events behavior.
images[3, 0, 10] = np.nan
images[15, 0, 10] = np.nan
peak_time[5, 0, 10] = np.nan
peak_time[35, 1, 10] = np.nan

# Optional static mask over sample dimensions (channel, pixel).
# Here we exclude channel 1, pixel 99 for all events.
masked_elements_of_sample = np.zeros((n_channels, n_pixels), dtype=bool)
masked_elements_of_sample[1, 99] = True

table = Table(
[times, event_ids, images, peak_time],
names=("time", "event_id", "image", "peak_time"),
)


# -------------------------------------------------------------------
# Configure and run histogram aggregation
# -------------------------------------------------------------------
config_image = Config(
{
"HistogramAggregator": {
"chunking_type": "SizeChunking",
"axis_definition": {
"class_name": "Regular",
"bins": 50,
"start": 40.0,
"stop": 110.0,
},
},
"SizeChunking": {"chunk_size": 1000},
}
)

aggregator_image = HistogramAggregator(config=config_image)
result = aggregator_image(
table=table,
col_name="image",
masked_elements_of_sample=masked_elements_of_sample,
)

config_peak_time = Config(
{
"HistogramAggregator": {
"chunking_type": "SizeChunking",
"axis_definition": {
"class_name": "Regular",
"bins": 50,
"start": 2.0,
"stop": 38.0,
},
},
"SizeChunking": {"chunk_size": 1000},
}
)

aggregator_peak_time = HistogramAggregator(config=config_peak_time)
result_peak_time = aggregator_peak_time(
table=table,
col_name="peak_time",
masked_elements_of_sample=masked_elements_of_sample,
)

print(f"Number of chunks: {len(result)}")
print(f"histogram shape per chunk: {result[0]['histogram'].shape}")
print(f"bin edges shape per chunk: {result[0].meta['bin_edges'].shape}")
print(f"bin centers shape per chunk: {result[0].meta['bin_centers'].shape}")
print(f"n_events shape per chunk: {result[0]['n_events'].shape}")


# -------------------------------------------------------------------
# Plot the histograms for one pixel with two gain channels
# -------------------------------------------------------------------
# We aggreagted the histograms in two chunks of 1000 events each, so we have two histograms per gain channel
# for the selected pixel. We will plot both chunks for the selected pixel and gain channels
# on the same axes for comparison, and then do the same for the peak_time column in a separate figure.

pixel_index = 10
gain_label = {0: "High Gain", 1: "Low Gain"}

fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
for chunk_index, ax in enumerate(axes):
bin_edges = result[chunk_index].meta["bin_edges"]
bin_centers = result[chunk_index].meta["bin_centers"]
channel_handles = []

for channel_index in range(n_channels):
counts = result[chunk_index]["histogram"][:, channel_index, pixel_index]
valid_events = result[chunk_index]["n_events"][channel_index, pixel_index]

line = ax.step(
bin_edges[:-1],
counts,
where="post",
label=f"{gain_label[channel_index]} (n_events={valid_events})",
)[0]
channel_handles.append(line)
color = line.get_color()

# Plot bin variances as error bars (use sqrt of variance for error) at bin centers
bin_errors = np.sqrt(counts)
ax.errorbar(
bin_centers,
counts,
yerr=bin_errors,
fmt="none",
color=color,
elinewidth=1.0,
capsize=3,
alpha=0.6,
)

ax.set_title(f"Chunk {chunk_index}, pixel {pixel_index}")
ax.set_xlabel("image value")
ax.set_ylabel("Counts")

ax.legend(
handles=channel_handles,
loc="upper left",
fontsize=8,
)

plt.show()


# -------------------------------------------------------------------
# Plot peak_time histograms in a separate figure
# -------------------------------------------------------------------
fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
for chunk_index, ax in enumerate(axes):
bin_edges = result_peak_time[chunk_index].meta["bin_edges"]
bin_centers = result_peak_time[chunk_index].meta["bin_centers"]

channel_handles = []

for channel_index in range(n_channels):
counts = result_peak_time[chunk_index]["histogram"][
:, channel_index, pixel_index
]
valid_events = result_peak_time[chunk_index]["n_events"][
channel_index, pixel_index
]

line = ax.step(
bin_edges[:-1],
counts,
where="post",
label=f"{gain_label[channel_index]} (n_events={valid_events})",
)[0]
channel_handles.append(line)
color = line.get_color()

# Plot bin variances as error bars (use sqrt of variance for error) at bin centers
bin_errors = np.sqrt(counts)
ax.errorbar(
bin_centers,
counts,
yerr=bin_errors,
fmt="none",
color=color,
elinewidth=1.0,
capsize=3,
alpha=0.6,
)

ax.set_title(f"Peak Time - Chunk {chunk_index}, pixel {pixel_index}")
ax.set_xlabel("peak_time value")
ax.set_ylabel("Counts")
ax.legend(
handles=channel_handles,
loc="upper left",
fontsize=8,
)

plt.show()


# -------------------------------------------------------------------
# Initialize hist, fill it and plot via Hist functionality
# -------------------------------------------------------------------

# Create a Hist object with the same binning as the aggregator
bin_edges = result[0].meta["bin_edges"]
h = Hist(
hist.axis.Regular(len(bin_edges) - 1, bin_edges[0], bin_edges[-1], name="value")
)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See prev comment: a helper function to do this would be nice.


# Get the histogram counts and variances for the selected pixel and channel
chunk_index = 0
counts = result[0]["histogram"][:, chunk_index, pixel_index]

# Set the histogram values using the view interface
h.view(flow=False)[:] = counts

# Plot the histogram with error bars using Hist's built-in plotting functionality
# Requires 'hist[plot]' to be installed in the environment.
h.plot(yerr=True)
plt.title(
f"Chunk {chunk_index}, Pixel {pixel_index} (High Gain) histogram from Hist object"
)
plt.xlabel("image value")
plt.ylabel("Counts")
plt.show()
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"joblib",
"numba >=0.59.0",
"numpy >=1.26,<3.0.0",
"hist",
"packaging",
"psutil",
"pyyaml >=5.1",
Expand Down Expand Up @@ -79,6 +80,7 @@ docs = [
"ctapipe[all]",
"ffmpeg-python",
"graphviz",
"hist[plot]",
"ipython",
"jupyter",
"nbsphinx",
Expand Down
7 changes: 7 additions & 0 deletions src/ctapipe/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"StatisticsContainer",
"ChunkContainer",
"ChunkStatisticsContainer",
"ChunkHistogramsContainer",
"ImageStatisticsContainer",
"IntensityStatisticsContainer",
"PeakTimeStatisticsContainer",
Expand Down Expand Up @@ -1267,6 +1268,12 @@ class ChunkStatisticsContainer(ChunkContainer):
std = Field(None, "standard deviation of the chunk distribution")


class ChunkHistogramsContainer(ChunkContainer):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd name this singular ChunkHistogramContainer to match how we name others. It only stores one "histogram" (even if it's one with categories per pixel and channel)

"""Container for histograms of the chunk distribution"""

histogram = Field(None, "histogram of the chunk distribution")

Copy link
Copy Markdown
Member

@kosack kosack Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be nice to have a helper function to convert this Container back to a Hist object. Here is an example:

def hist_from_container(
    cont: ChunkHistogramsContainer, axis_names=["pedestal", "channel", "pixel"]
) -> Hist:
    """Returns a Hist constructed from a stored ChunkHistogramsContainer."""

    bin_edges = cont.meta["bin_edges"]
    axes = [hist.axis.Variable(edges=bin_edges, name=axis_names[0])]

    # the rest of the dimensions
    for name, n_bins in zip(axis_names[1:], cont.histogram.shape[1:]):
        if n_bins == 2:
            axes.append(hist.axis.IntCategory(categories=np.arange(2), name=name))
        else:
            axes.append(
                hist.axis.Regular(bins=n_bins, start=0, stop=n_bins - 1, name=name)
            )

    h = Hist(*axes)
    h[...] = cont.histogram[...]
    return h

Then I can do things like:

with HDF5TableReader("stats.h5") as reader:
    for i, container in enumerate(
        reader.read(
            table_name="/dl1/monitoring/telescope/calibration/camera/pixel_histograms/sky_pedestal_image/tel_001",
            containers=ChunkHistogramsContainer,
            prefixes=[""],
        )
    ):
        h = hist_from_container(container)

        fig, ax = plt.subplots(1, 3, figsize=(10, 3), layout="constrained")
        fig.suptitle(f"Chunk {i}")
        h[:, 0, :].plot(ax=ax[0], norm="log")
        h[:, 1, :].plot(ax=ax[1], norm="log")
        h.integrate("pixel").stack("channel").plot(ax=ax[2], legend=True)

        ax[0].set_title(f"Channel 0")
        ax[1].set_title(f"Channel 1")
        ax[2].set_title("Integral oval all pixels")
image

Ideally, you could also store the axis names (["pedestal", "channel_id", "pixel_id"]) in the metadata, that way making one of these plots is trivial from any file.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we did in datapipe-testbench is to serialize all the necessary Hist info in the metadata: the axis names, the axis types (e.g. Regular, Variable, Category), units, etc. That would work well here as well (and in the future I could use this class directly).

Copy link
Copy Markdown
Member

@kosack kosack Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For column-wise access, you could also do a similar hist_from_table() that would then give back a 4d histogram, with an added time dimension (maybe use the mean-time of the chunk) . Then you can easily do many plots like "plot pixel 3's pedestal over time"

Copy link
Copy Markdown
Member

@kosack kosack Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more nice demo of using this if it's a Hist:

fig, ax = plt.subplots(1,2, figsize=(10,3))
disp1 = CameraDisplay(geom, image=h[:,0,:].profile("pedestal").values(), ax=ax[0])
disp2 = CameraDisplay(geom, image=h[:,0,:].profile("pedestal").variances(), ax=ax[1])

ax[0].set_title(ax[0].get_title() +  " Pedestal")
ax[1].set_title(ax[0].get_title() +  " Pedestal Variance")

disp1.add_colorbar()
disp2.add_colorbar()
image

There, you don't even need the stats, however, as @maxnoe pointed out above, computing the mean and variance from a histogram isn't as precise as doing it from the events.


class PixelStatisticsContainer(Container):
"""
Container for pixel statistics from flat-field and sky pedestal events
Expand Down
3 changes: 2 additions & 1 deletion src/ctapipe/io/datawriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ def _get_tel_index(event, tel_id):
# (meaning readers need to update scripts)
# - increase the minor number if new columns or datasets are added
# - increase the patch number if there is a small bugfix to the model.
DATA_MODEL_VERSION = "v7.5.0"
DATA_MODEL_VERSION = "v7.6.0"
DATA_MODEL_CHANGE_HISTORY = """
- v7.6.0: - Add new monitoring group for pixel histograms: DL1_PIXEL_HISTOGRAMS_GROUP.
- v7.5.0: - Add new field pixel_time_shift in R1CameraContainer and DL0CameraContainer
- v7.4.0: - Add new data quality and top-level monitoring groups for DL0 and DL1 data.
- v7.3.0: - Add possibility to attach monitoring data to the event HDF5 file.
Expand Down
3 changes: 3 additions & 0 deletions src/ctapipe/io/hdf5dataformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@
DL1_FLATFIELD_PEAK_TIME_GROUP = (
"/dl1/monitoring/telescope/calibration/camera/pixel_statistics/flatfield_peak_time"
)
DL1_PIXEL_HISTOGRAMS_GROUP = (
"/dl1/monitoring/telescope/calibration/camera/pixel_histograms"
)
DL2_SUBARRAY_MONITORING_GROUP = "/dl2/monitoring/subarray"
DL2_SUBARRAY_INTER_CALIBRATION_GROUP = "/dl2/monitoring/subarray/inter_calibration"
DL2_SUBARRAY_CROSS_CALIBRATION_GROUP = "/dl2/monitoring/subarray/cross_calibration"
Expand Down
1 change: 1 addition & 0 deletions src/ctapipe/io/hdf5eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
"v7.3.0",
"v7.4.0",
"v7.5.0",
"v7.6.0",
]


Expand Down
14 changes: 14 additions & 0 deletions src/ctapipe/io/hdf5merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
DL1_CAMERA_COEFFICIENTS_GROUP,
DL1_COLUMN_NAMES,
DL1_IMAGE_STATISTICS_TABLE,
DL1_PIXEL_HISTOGRAMS_GROUP,
DL1_PIXEL_STATISTICS_GROUP,
DL1_SUBARRAY_POINTING_GROUP,
DL1_SUBARRAY_QUALITY_GROUP,
Expand Down Expand Up @@ -59,6 +60,7 @@
"v7.3.0",
"v7.4.0",
"v7.5.0",
"v7.6.0",
]


Expand Down Expand Up @@ -95,6 +97,7 @@ class NodeType(enum.Enum):
DL1_SUBARRAY_POINTING_GROUP: NodeType.TABLE,
DL1_TEL_POINTING_GROUP: NodeType.TEL_GROUP,
DL1_PIXEL_STATISTICS_GROUP: NodeType.ITER_TEL_GROUP,
DL1_PIXEL_HISTOGRAMS_GROUP: NodeType.ITER_TEL_GROUP,
DL1_CAMERA_COEFFICIENTS_GROUP: NodeType.TEL_GROUP,
DL1_TEL_MUON_THROUGHPUT_GROUP: NodeType.TEL_GROUP,
DL2_TEL_GROUP: NodeType.ITER_TEL_GROUP,
Expand Down Expand Up @@ -485,6 +488,7 @@ def _append_monitoring_data(self, other):
if self.telescope_events:
self._append_monitoring_telescope_groups(other)
self._append_pixel_statistics(other)
self._append_pixel_histograms(other)
self._append_quality_telescope_groups(other)

def _append_monitoring_subarray_groups(self, other):
Expand Down Expand Up @@ -531,6 +535,16 @@ def _append_pixel_statistics(self, other):
other, other.root[key], once=self.single_ob
)

def _append_pixel_histograms(self, other):
"""Append pixel histogram monitoring data."""
for dl1_colname in DL1_COLUMN_NAMES:
for event_type in EventType:
key = f"{DL1_PIXEL_HISTOGRAMS_GROUP}/{event_type.name.lower()}_{dl1_colname}"
if key in other.root:
self._append_table_group(
other, other.root[key], once=self.single_ob
)

def _append_quality_telescope_groups(self, other):
"""Append quality telescope groups.

Expand Down
2 changes: 1 addition & 1 deletion src/ctapipe/io/tests/test_hdf5eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_is_compatible(compatible_file, request):
def test_metadata(dl1_file):
with HDF5EventSource(input_url=dl1_file) as source:
assert source.is_simulation
assert source.datamodel_version == (7, 5, 0)
assert source.datamodel_version == (7, 6, 0)
assert set(source.datalevels) == {
DataLevel.DL1_IMAGES,
DataLevel.DL1_PARAMETERS,
Expand Down
Loading
Loading