Skip to content

Commit b64697a

Browse files
authored
Merge pull request #1068 from MetOffice/1065_update_collapse_by_hour
Update collapse_by_hour for multiple cases
2 parents 649faec + 7836992 commit b64697a

File tree

5 files changed

+191
-58
lines changed

5 files changed

+191
-58
lines changed

src/CSET/operators/_utils.py

+34-36
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,38 @@ def fully_equalise_attributes(cubes: iris.cube.CubeList):
191191
return cubes
192192

193193

194+
def is_time_aggregatable(cube: iris.cube.Cube) -> bool:
195+
"""Determine whether a cube can be aggregated in time.
196+
197+
If a cube is aggregatable it will contain both a 'forecast_reference_time'
198+
and 'forecast_period' coordinate as dimensional coordinates.
199+
200+
Arguments
201+
---------
202+
cube: iris.cube.Cube
203+
An iris cube which will be checked to see if it is aggregatable based
204+
on a set of pre-defined dimensional time coordinates:
205+
'forecast_period' and 'forecast_reference_time'.
206+
207+
Returns
208+
-------
209+
bool
210+
If true, then the cube is aggregatable and contains dimensional
211+
coordinates including both 'forecast_reference_time' and
212+
'forecast_period'.
213+
"""
214+
# Acceptable time coordinate names for aggregatable cube.
215+
TEMPORAL_COORD_NAMES = ["forecast_period", "forecast_reference_time"]
216+
217+
# Coordinate names for the cube.
218+
coord_names = [coord.name() for coord in cube.coords(dim_coords=True)]
219+
220+
# Check which temporal coordinates we have.
221+
temporal_coords = [coord for coord in coord_names if coord in TEMPORAL_COORD_NAMES]
222+
# Return whether both coordinates are in the temporal coordinates.
223+
return len(temporal_coords) == 2
224+
225+
194226
def ensure_aggregatable_across_cases(
195227
cube: iris.cube.Cube | iris.cube.CubeList,
196228
) -> iris.cube.Cube:
@@ -199,8 +231,8 @@ def ensure_aggregatable_across_cases(
199231
Arguments
200232
---------
201233
cube: iris.cube.Cube | iris.cube.CubeList
202-
If a Cube is provided a sub-operator is called to determine if the
203-
cube has the necessary dimensional coordinates to be aggregateable.
234+
If a Cube is provided it is checked to determine if it has the
235+
the necessary dimensional coordinates to be aggregateable.
204236
These necessary coordinates are 'forecast_period' and
205237
'forecast_reference_time'.If a CubeList is provided a Cube is created
206238
by slicing over all time coordinates and the resulting list is merged
@@ -219,40 +251,6 @@ def ensure_aggregatable_across_cases(
219251
raised. The user should then provide a CubeList to be turned into an
220252
aggregatable cube to allow aggregation across multiple cases to occur.
221253
"""
222-
223-
def is_time_aggregatable(cube: iris.cube.Cube) -> bool:
224-
"""Determine whether a cube can be aggregated in time.
225-
226-
If a cube is aggregatable it will contain both a 'forecast_reference_time'
227-
and 'forecast_period' coordinate as dimensional coordinates.
228-
229-
Arguments
230-
---------
231-
cube: iris.cube.Cube
232-
An iris cube which will be checked to see if it is aggregatable based
233-
on a set of pre-defined dimensional time coordinates:
234-
'forecast_period' and 'forecast_reference_time'.
235-
236-
Returns
237-
-------
238-
bool
239-
If true, then the cube is aggregatable and contains dimensional
240-
coordinates including both 'forecast_reference_time' and
241-
'forecast_period'.
242-
"""
243-
# Acceptable time coordinate names for aggregatable cube.
244-
TEMPORAL_COORD_NAMES = ["forecast_period", "forecast_reference_time"]
245-
246-
# Coordinate names for the cube.
247-
coord_names = [coord.name() for coord in cube.coords(dim_coords=True)]
248-
249-
# Check which temporal coordinates we have.
250-
temporal_coords = [
251-
coord for coord in coord_names if coord in TEMPORAL_COORD_NAMES
252-
]
253-
# Return whether both coordinates are in the temporal coordinates.
254-
return len(temporal_coords) == 2
255-
256254
# Check to see if a cube is input and if that cube is iterable.
257255
if isinstance(cube, iris.cube.Cube):
258256
if is_time_aggregatable(cube):

src/CSET/operators/collapse.py

+68-9
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
import iris.analysis
2121
import iris.coord_categorisation
2222
import iris.cube
23+
import iris.util
2324

24-
from CSET.operators._utils import ensure_aggregatable_across_cases
25+
from CSET.operators._utils import ensure_aggregatable_across_cases, is_time_aggregatable
2526

2627

2728
def collapse(
@@ -88,6 +89,7 @@ def collapse_by_lead_time(
8889
8990
First checks if the data can be aggregated by lead time easily. Then
9091
collapses by lead time for a specified method using the collapse function.
92+
This operator provides the average of all T+1, T+2, etc.
9193
9294
Arguments
9395
---------
@@ -116,31 +118,50 @@ def collapse_by_lead_time(
116118
if method == "PERCENTILE":
117119
collapsed_cube = collapse(
118120
cube_to_collapse,
119-
"forecast_period",
121+
"forecast_reference_time",
120122
method,
121123
additional_percent=additional_percent,
122124
)
123125
else:
124-
collapsed_cube = collapse(cube_to_collapse, "forecast_period", method)
126+
collapsed_cube = collapse(cube_to_collapse, "forecast_reference_time", method)
125127
return collapsed_cube
126128

127129

128130
def collapse_by_hour_of_day(
129-
cube: iris.cube.Cube,
131+
cube: iris.cube.Cube | iris.cube.CubeList,
130132
method: str,
131133
additional_percent: float = None,
134+
multi_case: bool = True,
132135
**kwargs,
133136
) -> iris.cube.Cube:
134137
"""Collapse a cube by hour of the day.
135138
139+
Collapses a cube by hour of the day in the time coordinates provided by the
140+
model. It is useful for creating diurnal cycle plots. It aggregates all
141+
00 UTC together regardless of lead time.
142+
136143
Arguments
137144
---------
138-
cube: iris.cube.Cube
139-
Cube to collapse and iterate over one dimension. It should contain only
140-
one time dimension.
145+
cube: iris.cube.Cube | iris.cube.CubeList
146+
Cube to collapse and iterate over one dimension or CubeList to
147+
convert to a cube and then collapse prior to aggregating by hour.
148+
If a CubeList is provided multi_case must be set to True as the Cube List
149+
should only contain cubes of multiple dates and not different variables
150+
or models. A cube that only contains one time dimension must have
151+
multi_case set to False as it contains only one forecast. A cube
152+
containing two time dimensions, e.g., 'forecast_reference_time' and
153+
'forecast_period' must have multi_case set to True as it will contain
154+
multiple forecasts.
141155
method: str
142156
Type of collapse i.e. method: 'MEAN', 'MAX', 'MIN', 'MEDIAN',
143157
'PERCENTILE'. For 'PERCENTILE' the additional_percent must be specified.
158+
multi_case: boolean, optional
159+
Default is True. If True multiple cases will be aggregated by hour of
160+
day; if False a single forecast will be aggregated by hour of day.
161+
Information around the usage of multi_case is provided above under the
162+
description for the cube argument. It is kept as an argument rather
163+
than being automatically generated to maintain traceability for the
164+
users actions.
144165
145166
Returns
146167
-------
@@ -151,6 +172,10 @@ def collapse_by_hour_of_day(
151172
------
152173
ValueError
153174
If additional_percent wasn't supplied while using PERCENTILE method.
175+
TypeError
176+
If a CubeList is given and multi_case is not True;
177+
if a Cube is given and contains two time dimensions and multi_case is not True;
178+
if a Cube is given and contains one time dimensions and multi_case is not False.
154179
155180
Notes
156181
-----
@@ -163,10 +188,36 @@ def collapse_by_hour_of_day(
163188
To apply this operator successfully there must only be one time dimension.
164189
Should a MultiDim exception be raised the user first needs to apply the
165190
collapse operator to reduce the time dimensions before applying this
166-
operator.
191+
operator. If multi_case is true the collapse_by_lead_time operator is
192+
applied and performs this step.
167193
"""
168194
if method == "PERCENTILE" and additional_percent is None:
169195
raise ValueError("Must specify additional_percent")
196+
elif (
197+
isinstance(cube, iris.cube.Cube)
198+
and is_time_aggregatable(cube)
199+
and not multi_case
200+
):
201+
raise TypeError(
202+
"multi_case must be true for a cube containing two time dimensions"
203+
)
204+
elif (
205+
isinstance(cube, iris.cube.Cube)
206+
and not is_time_aggregatable(cube)
207+
and multi_case
208+
):
209+
raise TypeError(
210+
"multi_case must be false for a cube containing one time dimension"
211+
)
212+
elif isinstance(cube, iris.cube.CubeList) and not multi_case:
213+
raise TypeError("multi_case must be true for a CubeList")
214+
215+
if multi_case:
216+
# Collapse by lead time to get a single time dimension.
217+
cube = collapse_by_lead_time(
218+
cube, method, additional_percent=additional_percent
219+
)
220+
170221
# Categorise the time coordinate by hour of the day.
171222
iris.coord_categorisation.add_hour(cube, "time", name="hour")
172223
# Aggregate by the new category coordinate.
@@ -176,10 +227,18 @@ def collapse_by_hour_of_day(
176227
)
177228
else:
178229
collapsed_cube = cube.aggregated_by("hour", getattr(iris.analysis, method))
230+
179231
# Remove unnecessary time coordinates.
180232
collapsed_cube.remove_coord("time")
181-
collapsed_cube.remove_coord("forecast_reference_time")
182233
collapsed_cube.remove_coord("forecast_period")
234+
# Remove forecast_reference_time if a single case, as collapse_by_lead_time
235+
# will have effectively done this if multi_case is True.
236+
if not multi_case:
237+
collapsed_cube.remove_coord("forecast_reference_time")
238+
239+
# Promote "hour" to dim_coord if monotonic.
240+
if collapsed_cube.coord("hour").is_monotonic():
241+
iris.util.promote_aux_coord_to_dim_coord(collapsed_cube, "hour")
183242
return collapsed_cube
184243

185244

tests/operators/test_collapse.py

+79-13
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ def long_forecast_many_cubes() -> iris.cube.Cube:
4646
)
4747

4848

49+
@pytest.fixture()
50+
def medium_forecast() -> iris.cube.Cube:
51+
"""Get medium forecast with monotonic time coordinate."""
52+
return iris.load_cube(
53+
"tests/test_data/medium_forecast_air_temp_monotonic.nc", "air_temperature"
54+
)
55+
56+
4957
def test_collapse(cube):
5058
"""Reduces dimension of cube."""
5159
# Test collapsing a single coordinate.
@@ -80,28 +88,78 @@ def test_collapse_percentile(cube):
8088

8189
def test_collapse_by_hour_of_day(long_forecast):
8290
"""Convert and aggregates time dimension by hour of day."""
83-
# Test collapsing a long forecast.
84-
collapsed_cube = collapse.collapse_by_hour_of_day(long_forecast, "MEAN")
91+
collapsed_cube = collapse.collapse_by_hour_of_day(
92+
long_forecast, "MEAN", multi_case=False
93+
)
8594
expected_cube = "<iris 'Cube' of air_temperature / (K) (-- : 24; grid_latitude: 3; grid_longitude: 3)>"
8695
assert repr(collapsed_cube) == expected_cube
8796

8897

98+
def test_collapse_by_hour_of_day_fail(long_forecast):
99+
"""Test failing due to multi_case set to True."""
100+
with pytest.raises(TypeError):
101+
collapse.collapse_by_hour_of_day(long_forecast, "MEAN")
102+
103+
89104
def test_collapse_by_hour_of_day_percentile(long_forecast):
90105
"""Convert and aggregate time dimension by hour of day with percentiles."""
91-
with pytest.raises(ValueError):
92-
collapse.collapse_by_hour_of_day(long_forecast, "PERCENTILE")
93106
# Test collapsing long forecast.
94107
collapsed_cube = collapse.collapse_by_hour_of_day(
95-
long_forecast, "PERCENTILE", additional_percent=[25, 75]
108+
long_forecast, "PERCENTILE", additional_percent=[25, 75], multi_case=False
96109
)
97110
expected_cube = "<iris 'Cube' of air_temperature / (K) (percentile_over_hour: 2; -- : 24; grid_latitude: 3; grid_longitude: 3)>"
98111
assert repr(collapsed_cube) == expected_cube
99112

100113

114+
def test_collapse_by_hour_of_day_percentile_fail(long_forecast):
115+
"""Test failing due to non-specified additional_percent."""
116+
with pytest.raises(ValueError):
117+
collapse.collapse_by_hour_of_day(long_forecast, "PERCENTILE", multi_case=False)
118+
119+
120+
def test_collapse_by_hour_of_day_multi_forecast_cube(long_forecast_multi_day):
121+
"""Convert and aggregates time dimension by hour of day for a multi day cube."""
122+
collapsed_cube = collapse.collapse_by_hour_of_day(long_forecast_multi_day, "MEAN")
123+
expected_cube = "<iris 'Cube' of air_temperature / (K) (-- : 24; grid_latitude: 3; grid_longitude: 3)>"
124+
assert repr(collapsed_cube) == expected_cube
125+
126+
127+
def test_collapse_by_hour_of_day_multi_forecast_cube_fail(long_forecast_multi_day):
128+
"""Test failing due to multi_case set to False."""
129+
with pytest.raises(TypeError):
130+
collapse.collapse_by_hour_of_day(
131+
long_forecast_multi_day, "MEAN", multi_case=False
132+
)
133+
134+
135+
def test_collapse_by_hour_of_day_multi_forecast_cubelist(long_forecast_many_cubes):
136+
"""Convert and aggregates time dimension by hour of day for a CubeList."""
137+
collapsed_cube = collapse.collapse_by_hour_of_day(long_forecast_many_cubes, "MEAN")
138+
expected_cube = "<iris 'Cube' of air_temperature / (K) (-- : 24; grid_latitude: 3; grid_longitude: 3)>"
139+
assert repr(collapsed_cube) == expected_cube
140+
141+
142+
def test_collapse_by_hour_of_day_multi_forecast_cubelist_fail(long_forecast_many_cubes):
143+
"""Test failing due to multi_case set to False."""
144+
with pytest.raises(TypeError):
145+
collapse.collapse_by_hour_of_day(
146+
long_forecast_many_cubes, "MEAN", multi_case=False
147+
)
148+
149+
150+
def test_collapse_by_hour_of_day_monotonic_coords(medium_forecast):
151+
"""Convert and aggregates time dimension by hour of day with montonic coordinates."""
152+
collapsed_cube = collapse.collapse_by_hour_of_day(
153+
medium_forecast, "MEAN", multi_case=False
154+
)
155+
expected_cube = "<iris 'Cube' of air_temperature / (K) (hour: 24; grid_latitude: 3; grid_longitude: 3)>"
156+
assert repr(collapsed_cube) == expected_cube
157+
158+
101159
def test_collapse_by_lead_time_single_cube(long_forecast_multi_day):
102160
"""Check cube collapse by lead time."""
103161
calculated_cube = collapse.collapse(
104-
long_forecast_multi_day, "forecast_period", "MEAN"
162+
long_forecast_multi_day, "forecast_reference_time", "MEAN"
105163
)
106164
assert np.allclose(
107165
calculated_cube.data,
@@ -116,7 +174,7 @@ def test_collapse_by_lead_time_cube_list(
116174
):
117175
"""Check CubeList is made into an aggregatable cube and collapses by lead time."""
118176
calculated_cube = collapse.collapse(
119-
long_forecast_multi_day, "forecast_period", "MEAN"
177+
long_forecast_multi_day, "forecast_reference_time", "MEAN"
120178
)
121179
assert np.allclose(
122180
calculated_cube.data,
@@ -129,10 +187,11 @@ def test_collapse_by_lead_time_cube_list(
129187
def test_collapse_by_lead_time_single_cube_percentile(long_forecast_multi_day):
130188
"""Check Cube collapse by lead time with percentiles."""
131189
calculated_cube = collapse.collapse(
132-
long_forecast_multi_day, "forecast_period", "PERCENTILE", additional_percent=75
190+
long_forecast_multi_day,
191+
"forecast_reference_time",
192+
"PERCENTILE",
193+
additional_percent=75,
133194
)
134-
with pytest.raises(ValueError):
135-
collapse.collapse_by_lead_time(long_forecast_multi_day, "PERCENTILE")
136195
assert np.allclose(
137196
calculated_cube.data,
138197
collapse.collapse_by_lead_time(
@@ -143,15 +202,22 @@ def test_collapse_by_lead_time_single_cube_percentile(long_forecast_multi_day):
143202
)
144203

145204

205+
def test_collapse_by_lead_time_single_cube_percentile_fail(long_forecast_multi_day):
206+
"""Test fail by not setting additional percent."""
207+
with pytest.raises(ValueError):
208+
collapse.collapse_by_lead_time(long_forecast_multi_day, "PERCENTILE")
209+
210+
146211
def test_collapse_by_lead_time_cube_list_percentile(
147212
long_forecast_multi_day, long_forecast_many_cubes
148213
):
149214
"""Check CubeList is made into an aggregatable cube and collapses by lead time with percentiles."""
150215
calculated_cube = collapse.collapse(
151-
long_forecast_multi_day, "forecast_period", "PERCENTILE", additional_percent=75
216+
long_forecast_multi_day,
217+
"forecast_reference_time",
218+
"PERCENTILE",
219+
additional_percent=75,
152220
)
153-
with pytest.raises(ValueError):
154-
collapse.collapse_by_lead_time(long_forecast_many_cubes, "PERCENTILE")
155221
assert np.allclose(
156222
calculated_cube.data,
157223
collapse.collapse_by_lead_time(

0 commit comments

Comments
 (0)