Skip to content

Commit

Permalink
Fixed bug in persistence QC where initial repetitions were ignored
Browse files Browse the repository at this point in the history
* Relocated unit persistence tests
* Added explicit test for `get_duration_consecutive_true`
* Renamed `duration_consecutive_true` to `get_duration_consecutive_true` for imperative clarity
  • Loading branch information
ladsmund committed Aug 8, 2024
1 parent f29f54b commit d598395
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 15 deletions.
20 changes: 12 additions & 8 deletions src/pypromice/qc/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"persistence_qc",
"find_persistent_regions",
"count_consecutive_persistent_values",
"duration_consecutive_true",
"get_duration_consecutive_true",
]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -152,19 +152,21 @@ def count_consecutive_persistent_values(
) -> pd.Series:
diff = data.ffill().diff().abs() # forward filling all NaNs!
mask: pd.Series = diff < max_diff
return duration_consecutive_true(mask)
return get_duration_consecutive_true(mask)


def duration_consecutive_true(
def get_duration_consecutive_true(
series: pd.Series,
) -> pd.Series:
"""
From a boolean series, calculates the duration, in hours, of the periods with concecutive true values.
The first value will be set to NaN, as it is not possible to calculate the duration of a single value.
Examples
--------
>>> duration_consecutive_true(pd.Series([False, True, False, False, True, True, True, False, True]))
pd.Series([0, 1, 0, 0, 1, 2, 3, 0, 1])
>>> get_duration_consecutive_true(pd.Series([False, True, False, False, True, True, True, False, True]))
pd.Series([np.nan, 1, 0, 0, 1, 2, 3, 0, 1])
Parameters
----------
Expand All @@ -177,9 +179,11 @@ def duration_consecutive_true(
Integer pandas Series or DataFrame with values representing the number of connective true values.
"""
# assert series.dtype == bool
cumsum = ((series.index - series.index[0]).total_seconds()/3600).to_series(index=series.index)
is_first = series.astype("int").diff() == 1
offset = (is_first * cumsum).replace(0, np.nan).ffill().fillna(0)
delta_time = (series.index.diff().total_seconds() / 3600).to_series(
index=series.index
)
cumsum = delta_time.cumsum()
offset = (is_first * (cumsum - delta_time)).replace(0, np.nan).ffill().fillna(0)

return (cumsum - offset) * series
Empty file added tests/unit/qc/__init__.py
Empty file.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import unittest

import numpy as np
import numpy.testing
import pandas as pd

from pypromice.qc import persistence
from pypromice.qc.persistence import find_persistent_regions


Expand Down Expand Up @@ -32,7 +32,9 @@ def _test_1_hour_repeat(self, index: int):
input_series, min_repeats=min_repeats, max_diff=0.001
)

pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False)
pd.testing.assert_series_equal(
expected_output, persistent_mask, check_names=False
)

def test_no_persistent_period(self):
time_range = pd.date_range(
Expand All @@ -46,7 +48,9 @@ def test_no_persistent_period(self):
input_series, min_repeats=min_repeats, max_diff=0.001
)

pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False)
pd.testing.assert_series_equal(
expected_output, persistent_mask, check_names=False
)

def test_persistent_period_longer_than_period_threshold(self):
time_range = pd.date_range(
Expand All @@ -66,7 +70,9 @@ def test_persistent_period_longer_than_period_threshold(self):
input_series, min_repeats=min_repeats, max_diff=0.001
)

pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False)
pd.testing.assert_series_equal(
expected_output, persistent_mask, check_names=False
)

def test_period_threshold_longer_than_persistent_period(self):
time_range = pd.date_range(
Expand All @@ -83,7 +89,9 @@ def test_period_threshold_longer_than_persistent_period(self):
input_series, min_repeats=min_repeats, max_diff=0.001
)

pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False)
pd.testing.assert_series_equal(
expected_output, persistent_mask, check_names=False
)

def test_persistent_period_at_the_end(self):
time_range = pd.date_range(
Expand All @@ -101,7 +109,9 @@ def test_persistent_period_at_the_end(self):
input_series, min_repeats=min_repeats, max_diff=0.001
)

pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False)
pd.testing.assert_series_equal(
expected_output, persistent_mask, check_names=False
)

def test_dont_filter_nan_values(self):
time_range = pd.date_range(
Expand All @@ -123,7 +133,9 @@ def test_dont_filter_nan_values(self):
input_series, min_repeats=min_repeats, max_diff=0.001
)

pd.testing.assert_series_equal(expected_output, persistent_mask, check_names=False)
pd.testing.assert_series_equal(
expected_output, persistent_mask, check_names=False
)

def test_series_with_nan_values_between_persistent_values(self):
time_range = pd.date_range(
Expand All @@ -145,6 +157,40 @@ def test_series_with_nan_values_between_persistent_values(self):

np.testing.assert_equal(expected_mask, output_mask)

def test_get_duration_consecutive_true(self):
delta_time_hours = np.random.random(24) * 2
time_range = pd.to_datetime("2023-01-25") + pd.to_timedelta(
delta_time_hours.cumsum(), unit="h"
)
values = time_range == False
values[0:2] = True
values[6] = True
values[10:14] = True
values[-3:] = True
series = pd.Series(index=time_range, data=values)

duration_consecutive_true = persistence.get_duration_consecutive_true(series)

self.assertTrue(
np.isnan(duration_consecutive_true[0]), "The first index should be ignored"
)
np.testing.assert_almost_equal(
duration_consecutive_true[1],
delta_time_hours[1],
)
np.testing.assert_almost_equal(
duration_consecutive_true[6],
delta_time_hours[6],
)
np.testing.assert_almost_equal(
duration_consecutive_true[10:14],
delta_time_hours[10:14].cumsum(),
)
np.testing.assert_almost_equal(
duration_consecutive_true[-3:],
delta_time_hours[-3:].cumsum(),
)


if __name__ == "__main__":
unittest.main()

0 comments on commit d598395

Please sign in to comment.