From a698e68671b802a5365350adf30d0d23bbd63787 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Mon, 14 Oct 2024 14:44:42 +0200 Subject: [PATCH] Removed special handling for `rh_cor` values in `value_clipping.py` The logic for handling `rh` values has been simplified to align with the general clipping rules. This change removes the exception cases previously applied to `rh`, ensuring consistent value clipping across all variables. --- src/pypromice/process/value_clipping.py | 18 ++------- tests/unit/test_value_clippping.py | 53 +++++++++++++++---------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/pypromice/process/value_clipping.py b/src/pypromice/process/value_clipping.py index 020e7317..f65cad52 100644 --- a/src/pypromice/process/value_clipping.py +++ b/src/pypromice/process/value_clipping.py @@ -41,20 +41,10 @@ def clip_values( if var not in list(ds.variables): continue - # This is a special case for rh_u_cor and rh_l_cor where values are clipped to 0 and 100. - if var in ["rh_u_cor", "rh_l_cor"]: - # Nan inputs should stay nan - was_nan = ds[var].isnull() - if ~np.isnan(row.lo): - ds[var] = ds[var].where(ds[var] >= row.lo, other=0) - if ~np.isnan(row.hi): - ds[var] = ds[var].where( ds[var] <= row.hi, other=100) - ds[var] = ds[var].where(~was_nan) - else: - if ~np.isnan(row.lo): - ds[var] = ds[var].where(ds[var] >= row.lo) - if ~np.isnan(row.hi): - ds[var] = ds[var].where(ds[var] <= row.hi) + if ~np.isnan(row.lo): + ds[var] = ds[var].where(ds[var] >= row.lo) + if ~np.isnan(row.hi): + ds[var] = ds[var].where(ds[var] <= row.hi) # Flag dependents as NaN if parent is NaN for o in row.dependents_closure: diff --git a/tests/unit/test_value_clippping.py b/tests/unit/test_value_clippping.py index 063242fc..52f33234 100644 --- a/tests/unit/test_value_clippping.py +++ b/tests/unit/test_value_clippping.py @@ -198,37 +198,48 @@ def test_circular_dependencies(self): check_dtype=True, ) - def test_rh_corrected_case(self): - """ - The rh corrected variables are treated differently in the clipping function. - """ - data_index = pd.RangeIndex(2) - rh_u = pd.Series(index=data_index, data=[0, 54], name="rh_u") - rh_u_cor = pd.Series(index=data_index, data=[0, np.nan], name="rh_u_cor") - rh_l = pd.Series(index=data_index, data=[-20, 54], name="rh_l") - rh_l_cor = pd.Series(index=data_index, data=[0, 254], name="rh_l_cor") - data = pd.concat([rh_u, rh_u_cor, rh_l, rh_l_cor], axis=1) + def test_rh_corrected(self): variable_config = pd.DataFrame( columns=["field", "lo", "hi", "OOL"], data=[ - ["rh_u", 0, 100, "rh_u_cor"], + ["rh_u", 0, 150, "rh_u_cor"], ["rh_u_cor", 0, 150, ""], - ["rh_l_cor", np.nan, np.nan, ""], - ["rh_l", 0, 100, "rh_l_cor"], ], ).set_index("field") - data_set = xr.Dataset(data) + rows_input = [] + rows_expected = [] + # All values are within the expected range + rows_input.append(dict(rh_u=42, rh_u_cor=43)) + rows_expected.append(dict(rh_u=42, rh_u_cor=43)) + # rh_u is below range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=-10, rh_u_cor=3)) + rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) + # rh_u is within range, but rh_u_cor is below range; rh_u_cor should be flagged + rows_input.append(dict(rh_u=54, rh_u_cor=-4)) + rows_expected.append(dict(rh_u=54, rh_u_cor=np.nan)) + # rh_u is above range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=160, rh_u_cor=120)) + rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) + # rh_u is within range, but rh_u_cor is above range; rh_u_cor should be flagged + rows_input.append(dict(rh_u=100, rh_u_cor=255)) + rows_expected.append(dict(rh_u=100, rh_u_cor=np.nan)) + + # Prepare the data + df_input = pd.DataFrame(rows_input, dtype=float) + df_expected = pd.DataFrame(rows_expected, dtype=float) + data_set = xr.Dataset(df_input) + + # Run the function data_set_out = clip_values(data_set, variable_config) - # Convert to dataframe for easier comparison data_frame_out = data_set_out.to_dataframe() - # The value of rh_u_cor should be nan since the input was nan - self.assertTrue(np.isnan(data_frame_out.iloc[1]["rh_u_cor"])) - # The value of rh_l_cor should not be changed since the hi threshold is not nan - self.assertEqual(data_frame_out.iloc[1]["rh_l_cor"], 254) - # The value of rh_l_cor should be nan since rh_l is below its threshold - self.assertTrue(np.isnan(data_frame_out.iloc[0]["rh_l_cor"])) + pd.testing.assert_frame_equal( + data_frame_out, + df_expected, + check_names=False, + check_dtype=True, + ) def test_nan_input(self): """