From ba38213e816e521aa98fde973a1554670148580a Mon Sep 17 00:00:00 2001 From: daflack Date: Wed, 22 Jan 2025 13:35:40 +0000 Subject: [PATCH 1/4] Adds collapse by validity time operator. Fixes #1030 --- src/CSET/operators/collapse.py | 75 ++++++++++++++++++++++++++++++++ tests/operators/test_collapse.py | 40 +++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/src/CSET/operators/collapse.py b/src/CSET/operators/collapse.py index 75a4566ef..1e5d9e3a7 100644 --- a/src/CSET/operators/collapse.py +++ b/src/CSET/operators/collapse.py @@ -183,6 +183,81 @@ def collapse_by_hour_of_day( return collapsed_cube +def collapse_by_validity_time( + cube: iris.cube.Cube | iris.cube.CubeList, + method: str, + additional_percent: float = None, + **kwargs, +) -> iris.cube.Cube: + """Collapse a cube around validity time for multiple cases. + + First checks if the data can be aggregated easily. Then creates a new cube + by slicing over the time dimensions, removing the time dimensions, + re-merging the data, and creating a new time coordinate. It then collapses + by the new time coordinate for a specified method using the collapse + function. + + Arguments + --------- + cube: iris.cube.Cube | iris.cube.CubeList + Cube to collapse by validity time or CubeList that will be converted + to a cube before collapsing by validity time. + method: str + Type of collapse i.e. method: 'MEAN', 'MAX', 'MIN', 'MEDIAN', + 'PERCENTILE'. For 'PERCENTILE' the additional_percent must be specified. + + Returns + ------- + cube: iris.cube.Cube + Single variable collapsed by lead time based on chosen method. + + Raises + ------ + ValueError + If additional_percent wasn't supplied while using PERCENTILE method. + """ + if method == "PERCENTILE" and additional_percent is None: + raise ValueError("Must specify additional_percent") + # Ensure the cube can be aggregated over multiple times. + cube_to_collapse = ensure_aggregatable_across_cases(cube) + # Convert to a cube that is split by validity time. + new_cubelist = iris.cube.CubeList() + # Slice over cube by both time dimensions to create a cube list. + for sub_cube in cube_to_collapse.slives_over( + ["forecast_period", "forecast_reference_time"] + ): + new_cubelist.append(sub_cube) + # Remove forecast_period and forecast_reference_time coordinates. + for sub_cube in new_cubelist: + sub_cube.remove_coord("forecast_period") + sub_cube.remove_coord("forecast_reference_time") + # Create new CubeList by merging with unique = False to produce a validity + # time cube. + merged_list_1 = new_cubelist.merge(unique=False) + # Create a new "fake" coordinate and apply to each remaining cube to allow + # final merging to take place into a single cube. + y = 0 + for x in merged_list_1: + fake_time_coord = iris.coords.AuxCoord(y) + fake_time_coord.units = "1" + fake_time_coord.rename("fake_time_coord") + x.add_aux_coord(fake_time_coord) + y += 1 + # Merge CubeList to create final cube. + final_cube = merged_list_1.merge() + # Collapse over fake_time_coord to represent collapsing over validity time. + if method == "PERCENTILE": + collapsed_cube = collapse( + final_cube, + "fake_time_coord", + method, + additional_percent=additional_percent, + ) + else: + collapsed_cube = collapse(final_cube, "fake_time_coord", method) + return collapsed_cube + + # TODO # Collapse function that calculates means, medians etc across members of an # ensemble or stratified groups. Need to allow collapse over realisation diff --git a/tests/operators/test_collapse.py b/tests/operators/test_collapse.py index dcd7f528f..d4acc0a40 100644 --- a/tests/operators/test_collapse.py +++ b/tests/operators/test_collapse.py @@ -160,3 +160,43 @@ def test_collapse_by_lead_time_cube_list_percentile( rtol=1e-06, atol=1e-02, ) + + +def test_collapse_by_validity_time(long_forecast_multi_day): + """Reduce a dimension of a cube by validity time.""" + collapsed_cube = collapse.collapse_by_validity_time(long_forecast_multi_day, "MEAN") + expected_cube = "" + assert repr(collapsed_cube) == expected_cube + + +def test_collapse_by_validity_time_cubelist(long_forecast_many_cubes): + """Convert to cube and reduce a dimension by validity time.""" + collapsed_cube = collapse.collapse_by_validity_time( + long_forecast_many_cubes, "MEAN" + ) + expected_cube = "" + assert repr(collapsed_cube) == expected_cube + + +def test_collapse_by_validity_time_percentile(long_forecast_multi_day): + """Reduce by validity time with percentiles.""" + with pytest.raises(ValueError): + collapse.collapse_by_validity_time(long_forecast_multi_day, "PERCENTILE") + # Test successful collapsing by validity time. + collapsed_cube = collapse.collapse_by_validity_time( + long_forecast_multi_day, "PERCENTILE", additional_percent=[25, 75] + ) + expected_cube = "" + assert repr(collapsed_cube) == expected_cube + + +def test_collapse_by_validity_time_cubelist_percentile(long_forecast_many_cubes): + """Convert to cube and reduce by validity time with percentiles.""" + with pytest.raises(ValueError): + collapse.collapse_by_validity_time(long_forecast_many_cubes, "PERCENTILE") + # Test successful collapsing by validity time. + collapsed_cube = collapse.collapse_by_validity_time( + long_forecast_many_cubes, "PERCENTILE", additional_percent=[25, 75] + ) + expected_cube = "" + assert repr(collapsed_cube) == expected_cube From 65a6dfa0e2e40b52058a2bfca9264e3dc28b8035 Mon Sep 17 00:00:00 2001 From: daflack Date: Wed, 22 Jan 2025 16:30:04 +0000 Subject: [PATCH 2/4] Fixes tests --- src/CSET/operators/collapse.py | 14 +++++++------- tests/operators/test_collapse.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/CSET/operators/collapse.py b/src/CSET/operators/collapse.py index 1e5d9e3a7..78f3a7c3d 100644 --- a/src/CSET/operators/collapse.py +++ b/src/CSET/operators/collapse.py @@ -223,7 +223,7 @@ def collapse_by_validity_time( # Convert to a cube that is split by validity time. new_cubelist = iris.cube.CubeList() # Slice over cube by both time dimensions to create a cube list. - for sub_cube in cube_to_collapse.slives_over( + for sub_cube in cube_to_collapse.slices_over( ["forecast_period", "forecast_reference_time"] ): new_cubelist.append(sub_cube) @@ -236,15 +236,15 @@ def collapse_by_validity_time( merged_list_1 = new_cubelist.merge(unique=False) # Create a new "fake" coordinate and apply to each remaining cube to allow # final merging to take place into a single cube. - y = 0 - for x in merged_list_1: - fake_time_coord = iris.coords.AuxCoord(y) + fake_time = 0 + for sub_cube in merged_list_1: + fake_time_coord = iris.coords.AuxCoord(fake_time) fake_time_coord.units = "1" fake_time_coord.rename("fake_time_coord") - x.add_aux_coord(fake_time_coord) - y += 1 + sub_cube.add_aux_coord(fake_time_coord) + fake_time += 1 # Merge CubeList to create final cube. - final_cube = merged_list_1.merge() + final_cube = merged_list_1.merge_cube() # Collapse over fake_time_coord to represent collapsing over validity time. if method == "PERCENTILE": collapsed_cube = collapse( diff --git a/tests/operators/test_collapse.py b/tests/operators/test_collapse.py index d4acc0a40..9c344234d 100644 --- a/tests/operators/test_collapse.py +++ b/tests/operators/test_collapse.py @@ -165,7 +165,7 @@ def test_collapse_by_lead_time_cube_list_percentile( def test_collapse_by_validity_time(long_forecast_multi_day): """Reduce a dimension of a cube by validity time.""" collapsed_cube = collapse.collapse_by_validity_time(long_forecast_multi_day, "MEAN") - expected_cube = "" + expected_cube = "" assert repr(collapsed_cube) == expected_cube @@ -174,7 +174,7 @@ def test_collapse_by_validity_time_cubelist(long_forecast_many_cubes): collapsed_cube = collapse.collapse_by_validity_time( long_forecast_many_cubes, "MEAN" ) - expected_cube = "" + expected_cube = "" assert repr(collapsed_cube) == expected_cube @@ -186,7 +186,7 @@ def test_collapse_by_validity_time_percentile(long_forecast_multi_day): collapsed_cube = collapse.collapse_by_validity_time( long_forecast_multi_day, "PERCENTILE", additional_percent=[25, 75] ) - expected_cube = "" + expected_cube = "" assert repr(collapsed_cube) == expected_cube @@ -198,5 +198,5 @@ def test_collapse_by_validity_time_cubelist_percentile(long_forecast_many_cubes) collapsed_cube = collapse.collapse_by_validity_time( long_forecast_many_cubes, "PERCENTILE", additional_percent=[25, 75] ) - expected_cube = "" + expected_cube = "" assert repr(collapsed_cube) == expected_cube From 6a53f88b7c856ee675c855e6864554252e97a365 Mon Sep 17 00:00:00 2001 From: David Flack <77390156+daflack@users.noreply.github.com> Date: Thu, 23 Jan 2025 11:40:37 +0000 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: James Frost --- src/CSET/operators/collapse.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/CSET/operators/collapse.py b/src/CSET/operators/collapse.py index 78f3a7c3d..2e73a9cf0 100644 --- a/src/CSET/operators/collapse.py +++ b/src/CSET/operators/collapse.py @@ -221,12 +221,10 @@ def collapse_by_validity_time( # Ensure the cube can be aggregated over multiple times. cube_to_collapse = ensure_aggregatable_across_cases(cube) # Convert to a cube that is split by validity time. - new_cubelist = iris.cube.CubeList() - # Slice over cube by both time dimensions to create a cube list. - for sub_cube in cube_to_collapse.slices_over( - ["forecast_period", "forecast_reference_time"] - ): - new_cubelist.append(sub_cube) + # Slice over cube by both time dimensions to create a CubeList. + new_cubelist = iris.cube.CubeList( + cube_to_collapse.slices_over(["forecast_period", "forecast_reference_time"]) + ) # Remove forecast_period and forecast_reference_time coordinates. for sub_cube in new_cubelist: sub_cube.remove_coord("forecast_period") From de1d67179f954b514751bbe748bcc52a015f69b6 Mon Sep 17 00:00:00 2001 From: daflack Date: Thu, 23 Jan 2025 11:55:18 +0000 Subject: [PATCH 4/4] Update tests following review comments --- src/CSET/operators/collapse.py | 19 ++++++++++--------- tests/operators/test_collapse.py | 15 ++++++++------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/CSET/operators/collapse.py b/src/CSET/operators/collapse.py index 2e73a9cf0..5b4bb435e 100644 --- a/src/CSET/operators/collapse.py +++ b/src/CSET/operators/collapse.py @@ -234,25 +234,26 @@ def collapse_by_validity_time( merged_list_1 = new_cubelist.merge(unique=False) # Create a new "fake" coordinate and apply to each remaining cube to allow # final merging to take place into a single cube. - fake_time = 0 - for sub_cube in merged_list_1: - fake_time_coord = iris.coords.AuxCoord(fake_time) - fake_time_coord.units = "1" - fake_time_coord.rename("fake_time_coord") - sub_cube.add_aux_coord(fake_time_coord) - fake_time += 1 + equalised_validity_time = iris.coords.AuxCoord( + points=0, long_name="equalised_validity_time", units="1" + ) + for sub_cube, eq_valid_time in zip( + merged_list_1, range(len(merged_list_1)), strict=True + ): + sub_cube.add_aux_coord(equalised_validity_time.copy(points=eq_valid_time)) + # Merge CubeList to create final cube. final_cube = merged_list_1.merge_cube() # Collapse over fake_time_coord to represent collapsing over validity time. if method == "PERCENTILE": collapsed_cube = collapse( final_cube, - "fake_time_coord", + "equalised_validity_time", method, additional_percent=additional_percent, ) else: - collapsed_cube = collapse(final_cube, "fake_time_coord", method) + collapsed_cube = collapse(final_cube, "equalised_validity_time", method) return collapsed_cube diff --git a/tests/operators/test_collapse.py b/tests/operators/test_collapse.py index 9c344234d..9938c3d6c 100644 --- a/tests/operators/test_collapse.py +++ b/tests/operators/test_collapse.py @@ -180,23 +180,24 @@ def test_collapse_by_validity_time_cubelist(long_forecast_many_cubes): def test_collapse_by_validity_time_percentile(long_forecast_multi_day): """Reduce by validity time with percentiles.""" - with pytest.raises(ValueError): - collapse.collapse_by_validity_time(long_forecast_multi_day, "PERCENTILE") # Test successful collapsing by validity time. collapsed_cube = collapse.collapse_by_validity_time( long_forecast_multi_day, "PERCENTILE", additional_percent=[25, 75] ) - expected_cube = "" + expected_cube = "" assert repr(collapsed_cube) == expected_cube +def test_collapse_by_validity_time_percentile_fail(long_forecast_multi_day): + """Test not specifying additional percent fails.""" + with pytest.raises(ValueError): + collapse.collapse_by_validity_time(long_forecast_multi_day, "PERCENTILE") + + def test_collapse_by_validity_time_cubelist_percentile(long_forecast_many_cubes): """Convert to cube and reduce by validity time with percentiles.""" - with pytest.raises(ValueError): - collapse.collapse_by_validity_time(long_forecast_many_cubes, "PERCENTILE") - # Test successful collapsing by validity time. collapsed_cube = collapse.collapse_by_validity_time( long_forecast_many_cubes, "PERCENTILE", additional_percent=[25, 75] ) - expected_cube = "" + expected_cube = "" assert repr(collapsed_cube) == expected_cube