Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions docs/changes/2928.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
Introduces the `~ctapipe.io.EventPreprocessor` class that can generically
transform an event table by applying the following steps:

* Generate new or rename existing columns with a `~ctapipe.core.FeatureGenerator`
* Select "good" event rows with a `~ctapipe.core.QualityQuery`
* Select which columns to output (by setting the ``features`` configuration
attribute of the `~ctapipe.io.EventPreprocessor`)

This is useful for doing the final steps of DL2 processing, and will eventually
replace what is in `~ctapipe.io.DL2EventPreprocessor` and `~ctapipe.io.DL2EventLoader`, which will be deprecated in a future release.

The `~ctapipe.io.EventPreprocessor` also includes the ability to pre-configure
itself for specific use cases by setting the ``feature_set`` option. Currently
only two are implemented: ``feature_set=dl2_irf``, which defines the transforms,
event selection, and output features for processing simulated DL2 events, and
``feature_set=custom``, which has no pre-configuration and requires all
parameters to be set by the user in a config file. More can be added by adding
to the registry.

The functionality of `~ctapipe.io.DL2EventLoader` can be mimicked with the following:

.. code-block:: python

from ctapipe.io import TableLoader, EventPreprocessor
from astropy.table import vstack

DL2FILE = "some_dl2_file.h5"
with TableLoader(DL2FILE, dl2=True, simulated=True, observation_info=True) as loader:
preprocess = EventPreprocessor(feature_set="dl2_irf")
events = vstack(
[
preprocess(QTable(c.data))
for c in loader.read_subarray_events_chunked(chunk_size=100_000)
]
)


This also introduces a helper function `~ctapipe.coordinates.altaz_to_nominal`
to convert columns of alt/az coordinates to FOV coordinates in the
`~ctapipe.coordinates.NominalFrame`, which works with the
`~ctapipe.core.FeatureGenerator`.
7 changes: 6 additions & 1 deletion src/ctapipe/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
from .impact_distance import impact_distance, shower_impact_distance
from .nominal_frame import NominalFrame
from .telescope_frame import TelescopeFrame
from .utils import altaz_to_righthanded_cartesian, get_point_on_shower_axis
from .utils import (
altaz_to_nominal,
altaz_to_righthanded_cartesian,
get_point_on_shower_axis,
)

__all__ = [
"TelescopeFrame",
Expand All @@ -37,6 +41,7 @@
"impact_distance",
"shower_impact_distance",
"get_point_on_shower_axis",
"altaz_to_nominal",
]


Expand Down
15 changes: 15 additions & 0 deletions src/ctapipe/coordinates/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,18 @@ def test_single_telescope(subarray_prod5_paranal):
# 10 km is around the shower maximum, should be around 1 degree from the source
with pytest.warns(MissingFrameAttributeWarning):
assert u.isclose(source.separation(point), 1.0 * u.deg, atol=0.1 * u.deg)


def test_altaz_to_nominal():
from ctapipe.coordinates import altaz_to_nominal

column = altaz_to_nominal(
az=[220.0, 220.2] * u.deg,
alt=[80.0, 79.2] * u.deg,
pointing_az=[220.0, 220.0] * u.deg,
pointing_alt=[80.0, 80.0] * u.deg,
)

assert column.unit == u.deg
assert np.allclose(column[0].value, 0)
assert np.allclose(column[1].value, [0.03747984, -0.79993558])
25 changes: 24 additions & 1 deletion src/ctapipe/coordinates/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import astropy.units as u
import numpy as np
from astropy.coordinates import AltAz
from astropy.coordinates import AltAz, SkyCoord
from erfa.ufunc import p2s as cartesian_to_spherical
from erfa.ufunc import s2p as spherical_to_cartesian

from .ground_frames import _get_xyz
from .nominal_frame import NominalFrame

__all__ = [
"altaz_to_righthanded_cartesian",
"get_point_on_shower_axis",
"altaz_to_nominal",
]


Expand Down Expand Up @@ -80,3 +82,24 @@ def get_point_on_shower_axis(core_x, core_y, alt, az, telescope_position, distan
cartesian = point[np.newaxis, :] - _get_xyz(telescope_position).T
lon, lat, _ = cartesian_to_spherical(cartesian)
return AltAz(alt=lat, az=-lon, copy=False)


def altaz_to_nominal(az, alt, pointing_az, pointing_alt) -> u.Quantity:
"""
Compute nominal (FOV) coordinates from alt/az coordinates.

This can be used in a FeatureGenerator or ExpressionEngine to get a single
column with fov_lon, fov_lat coordinates.

Returns
-------
u.Quantity:
2D array of coordinates with 2 columns: fov_lon, fov_lat
"""
pointing_coord = SkyCoord(az=pointing_az, alt=pointing_alt, frame="altaz")
event_coord = SkyCoord(az=az, alt=alt, frame="altaz", origin=pointing_coord)
nominal_coord = event_coord.transform_to(NominalFrame)

return u.Quantity(
np.column_stack((nominal_coord.fov_lon.deg, nominal_coord.fov_lat.deg)), u.deg
)
2 changes: 2 additions & 0 deletions src/ctapipe/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .astropy_helpers import read_table, write_table # noqa: I001
from .datalevels import DataLevel
from .dl2_tables_preprocessing import DL2EventPreprocessor, DL2EventLoader
from .event_preprocessor import EventPreprocessor
from .eventsource import EventSource
from .eventseeker import EventSeeker
from .tableio import TableReader, TableWriter
Expand Down Expand Up @@ -46,4 +47,5 @@
"DL2EventPreprocessor",
"DL2EventLoader",
"get_hdf5_monitoring_types",
"EventPreprocessor",
]
217 changes: 217 additions & 0 deletions src/ctapipe/io/event_preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
"""Module containing classes related to event loading and preprocessing"""

from astropy.coordinates import angular_separation

from ..coordinates import altaz_to_nominal
from ..core import (
Component,
FeatureGenerator,
QualityQuery,
ToolConfigurationError,
traits,
)

__all__ = ["EventPreprocessor"]


from typing import Callable


class FeatureSetRegistry:
"""Registry for custom feature set configurations."""

_registry = {}

@classmethod
def register(cls, name: str):
"""Register a feature set configuration.

Examples
--------
>>> @FeatureSetRegistry.register("my_analysis")
... def my_config(preprocessor):
... return {
... "features_to_generate": [("custom", "col_a / col_b")],
... "quality_criteria": [("cut", "custom > 0.5")],
... "output_features": ["event_id", "custom"]
... }
"""

def decorator(func: Callable):
cls._registry[name] = func
return func

return decorator

@classmethod
def get(cls, name: str):
"""Get a registered configuration function."""
return cls._registry.get(name)

@classmethod
def list_available(cls):
"""List all registered feature set names."""
return list(cls._registry.keys())


@FeatureSetRegistry.register("dl2_irf")
def _dl2_irf_config(preprocessor):
"""Built-in configuration for DL2 IRF generation."""
return {
"features_to_generate": [
("reco_energy", f"{preprocessor.energy_reconstructor}_energy"),
("reco_alt", f"{preprocessor.geometry_reconstructor}_alt"),
("reco_az", f"{preprocessor.geometry_reconstructor}_az"),
("gh_score", f"{preprocessor.gammaness_reconstructor}_prediction"),
("theta", "angular_separation(reco_az, reco_alt, true_az, true_alt)"),
(
"reco_fov_coord",
"altaz_to_nominal(reco_az, reco_alt, subarray_pointing_lon, subarray_pointing_lat)",
),
(
"reco_fov_lon",
"reco_fov_coord[:,0]",
), # note: GADF IRFs use the negative of this
("reco_fov_lat", "reco_fov_coord[:,1]"),
(
"true_fov_coord",
"altaz_to_nominal(true_az, true_alt, subarray_pointing_lon, subarray_pointing_lat)",
),
(
"true_fov_lon",
"true_fov_coord[:,0]",
), # note: GADF IRFs use the negative of this
("true_fov_lat", "true_fov_coord[:,1]"),
(
"true_fov_offset",
"angular_separation(true_fov_lon, true_fov_lat, 0*u.deg, 0*u.deg)",
),
(
"reco_fov_offset",
"angular_separation(reco_fov_lon, reco_fov_lat, 0*u.deg, 0*u.deg)",
),
(
"multiplicity",
f"np.count_nonzero({preprocessor.gammaness_reconstructor}_telescopes,axis=1)",
),
],
"quality_criteria": [
("Valid geometry", f"{preprocessor.geometry_reconstructor}_is_valid"),
("valid energy", f"{preprocessor.energy_reconstructor}_is_valid"),
("valid gammaness", f"{preprocessor.gammaness_reconstructor}_is_valid"),
("sufficient multiplicity", "multiplicity >= 4"),
],
"output_features": [
"event_id",
"obs_id",
"reco_energy",
"reco_alt",
"reco_az",
"gh_score",
"true_energy",
"true_alt",
"true_az",
"true_fov_offset",
"reco_fov_offset",
"theta",
"reco_fov_lat",
"true_fov_lat",
"reco_fov_lon",
"true_fov_lon",
"multiplicity",
],
}


class EventPreprocessor(Component):
"""
Selects or generates features and filters tables of events.

In normal use, one only has to specify the ``feature_set`` option, which
will generate features supports standard use cases. For advanced usage, you
can set ``feature_set=custom`` and pass in a configured
`~ctapipe.core.FeatureGenerator` and set the ``features`` property of this
class with the columns you to retain in the output table.

In the `~ctapipe.core.FeatureGenerator` used internally, you have access to
several additional functions useful for DL2 processing:

- `~astropy.coordinates.angular_separation`
- `~ctapipe.coordinates.altaz_to_nominal`
"""

energy_reconstructor = traits.Unicode(
default_value="RandomForestRegressor",
help="Prefix of the reco `_energy` column",
).tag(config=True)

geometry_reconstructor = traits.Unicode(
default_value="HillasReconstructor",
help="Prefix of the `_alt` and `_az` reco geometry columns",
).tag(config=True)

gammaness_reconstructor = traits.Unicode(
default_value="RandomForestClassifier",
help="Prefix of the classifier `_prediction` column",
).tag(config=True)

feature_set = traits.CaselessStrEnum(
["custom"] + FeatureSetRegistry.list_available(),
default_value="custom",
help=(
"Set up the FeatureGenerator.features, output features, and quality criteria "
"based on standard use cases."
"Specify 'custom' if you want to set your own in your config file. If this is set to "
"any value other than 'custom', the feature properties of the configuration "
"file you pass in will be overridden."
),
).tag(config=True)

features = traits.List(
traits.Unicode(),
help=(
"Features (columns) to retain in the output. "
"These can include columns generated by the FeatureGenerator. "
"If you set these, make sure feature_set=custom."
),
).tag(config=True)

def __init__(self, config=None, parent=None, **kwargs):
super().__init__(config=config, parent=parent, **kwargs)
if self.feature_set == "custom":
self.feature_generator = FeatureGenerator(parent=self)
self.quality_query = QualityQuery(parent=self)
else: # use a pre-registered feature set
feature_set = FeatureSetRegistry.get(self.feature_set)(self)
self.feature_generator = FeatureGenerator(
parent=self, features=feature_set["features_to_generate"]
)
self.quality_query = QualityQuery(
parent=self, quality_criteria=feature_set["quality_criteria"]
)
self.features = feature_set["output_features"]
# sanity checks:
if len(self.features) == 0:
raise ToolConfigurationError(
"DL2EventPreprocessor has no output features configured."
"You have set `feature_set=custom`, but did not provide the list "
"of features in the configuration (DL2EventPreprocessor.features)."
)

def __call__(self, table):
"""Return new table with only the columns in features."""

# generate new features, which includes renaming columns:
generated = self.feature_generator(
table,
angular_separation=angular_separation,
altaz_to_nominal=altaz_to_nominal,
)
Comment thread
maxnoe marked this conversation as resolved.

# apply event selection on the resulting table

selected_mask = self.quality_query.get_table_mask(generated)

# return only the columns specified in `self.features`, and rows in
# `selected_mask`
return generated[self.features][selected_mask]
Loading