Skip to content

Commit 8ad7bb9

Browse files
Fixed bug where division lead to nan or inf values (#33)
1 parent bcb2d48 commit 8ad7bb9

File tree

4 files changed

+54
-17
lines changed

4 files changed

+54
-17
lines changed

Makefile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ pre-commit:
4545

4646
## Create the changelog
4747
##
48-
## Generate the changelog
49-
##
5048
changelog:
5149
docker run -it --rm \
5250
-v "$(pwd)":/usr/local/src/python-cmethods \

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ The documentation is available at: [https://python-cmethods.readthedocs.io/en/st
4141

4242
## 1. About
4343

44-
These programs and data structures are designed to help minimize discrepancies between modeled and observed climate data. Data from past periods are used to adjust variables from current and future time series so that their distributional properties approximate possible actual values.
44+
These programs and data structures are developed with the aim of reducing discrepancies between modeled and observed climate data. Historical data is utilized to calibrate variables from current and future time series to achieve distributional properties that closely resemble the possible actual values.
4545

4646
<figure>
4747
<img
@@ -51,7 +51,7 @@ These programs and data structures are designed to help minimize discrepancies b
5151
<figcaption>Figure 1: Schematic representation of a bias adjustment procedure</figcaption>
5252
</figure>
5353

54-
In this way, for example, modeled data, which on average represent values that are too cold, can be adjusted by applying an adjustment procedure. The following figure shows the observed, the modeled, and the adjusted values. It is directly visible that the delta adjusted time series ($T^{\*DM}_{sim,p}$) are much more similar to the observed data ($T_{obs,p}$) than the raw modeled data ($T_{sim,p}$).
54+
For instance, modeled data typically indicate values that are colder than the actual values. To address this issue, an adjustment procedure is employed. The figure below illustrates the observed, modeled, and adjusted values, revealing that the delta adjusted time series ($T^{*DM}_{sim,p}$) are significantly more similar to the observed data ($T{obs,p}$) than the raw modeled data ($T_{sim,p}$).
5555

5656
<figure>
5757
<img
@@ -136,7 +136,7 @@ qdm_result = cm.adjust_3d( # 3d = 2 spatial and 1 time dimension
136136
Notes:
137137

138138
- When using the `adjust_3d` method you have to specify the method by name.
139-
- For the multiplicative linear scaling and the delta method as well as the variance scaling method a maximum scaling factor of 10 is defined. This can be changed by the parameter `max_scaling_factor`.
139+
- For the multiplicative techniques a maximum scaling factor of 10 is defined. This can be changed by the attribute `max_scaling_factor`.
140140

141141
## Examples (see repository on [GitHub](https://github.com/btschwertfeger/python-cmethods))
142142

cmethods/__init__.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def linear_scaling(
462462
return np.array(simp) + (np.nanmean(obs) - np.nanmean(simh)) # Eq. 1
463463
if kind in cls.MULTIPLICATIVE:
464464
adj_scaling_factor = cls.get_adjusted_scaling_factor(
465-
np.nanmean(obs) / np.nanmean(simh),
465+
cls.ensure_devidable(np.nanmean(obs), np.nanmean(simh)),
466466
kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
467467
)
468468
return np.array(simp) * adj_scaling_factor # Eq. 2
@@ -585,7 +585,7 @@ def variance_scaling(
585585
VS_1_simp = LS_simp - np.nanmean(LS_simp) # Eq. 4
586586

587587
adj_scaling_factor = cls.get_adjusted_scaling_factor(
588-
np.std(np.array(obs)) / np.std(VS_1_simh),
588+
cls.ensure_devidable(np.std(np.array(obs)), np.std(VS_1_simh)),
589589
kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
590590
)
591591

@@ -700,7 +700,7 @@ def delta_method(
700700
return np.array(obs) + (np.nanmean(simp) - np.nanmean(simh)) # Eq. 1
701701
if kind in cls.MULTIPLICATIVE:
702702
adj_scaling_factor = cls.get_adjusted_scaling_factor(
703-
np.nanmean(simp) / np.nanmean(simh),
703+
cls.ensure_devidable(np.nanmean(simp), np.nanmean(simh)),
704704
kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
705705
)
706706
return np.array(obs) * adj_scaling_factor # Eq. 2
@@ -864,14 +864,14 @@ def quantile_mapping(
864864

865865
elif kind in cls.MULTIPLICATIVE:
866866
epsilon = np.interp( # Eq. 2
867-
(m_simh_mean * m_simp) / m_simp_mean,
867+
cls.ensure_devidable((m_simh_mean * m_simp), m_simp_mean),
868868
xbins,
869869
cdf_simh,
870870
left=kwargs.get("val_min", 0.0),
871871
right=kwargs.get("val_max", None),
872872
)
873873
X = np.interp(epsilon, cdf_obs, xbins) * (
874-
m_simp_mean / m_simh_mean
874+
cls.ensure_devidable(m_simp_mean, m_simh_mean)
875875
) # Eq. 2
876876
for i, idx in enumerate(idxs):
877877
res[idx] = X[i]
@@ -1006,7 +1006,7 @@ def quantile_delta_mapping(
10061006
.. math::
10071007
10081008
\Delta(i) & = \frac{ F^{-1}_{sim,p}\left[\varepsilon(i)\right] }{ F^{-1}_{sim,h}\left[\varepsilon(i)\right] } \\[1pt]
1009-
& = \frac{ X_{sim,p}(i) }{ F^{-1}_{sim,h}\left\{F^{}_{sim,p}\left[X_{sim,p}(i)\right]\right\} }
1009+
& = \frac{ X_{sim,p}(i) }{ F^{-1}_{sim,h}\left\{F^_{sim,p}\left[X_{sim,p}(i)\right]\right\} }
10101010
10111011
10121012
**(2.4)** The relative change between the modeled data of the control and scenario period is than
@@ -1089,17 +1089,46 @@ def quantile_delta_mapping(
10891089
epsilon = np.interp(simp, xbins, cdf_simp) # Eq. 1.1
10901090
QDM1 = cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins) # Eq. 1.2
10911091

1092-
with np.errstate(divide="ignore", invalid="ignore"):
1093-
delta = simp / cls.get_inverse_of_cdf(
1094-
cdf_simh, epsilon, xbins
1095-
) # Eq. 2.3
1096-
delta[np.isnan(delta)] = 0
1097-
1092+
delta = cls.ensure_devidable(
1093+
simp, cls.get_inverse_of_cdf(cdf_simh, epsilon, xbins)
1094+
) # Eq. 2.3
10981095
return QDM1 * delta # Eq. 2.4
10991096
raise NotImplementedError(
11001097
f"{kind} not available for quantile_delta_mapping. Use '+' or '*' instead."
11011098
)
11021099

1100+
@classmethod
1101+
def ensure_devidable(
1102+
cls, numerator: Union[float, np.array], denominator: Union[float, np.array]
1103+
) -> np.array:
1104+
"""
1105+
Ensures that the arrays can be devided. The numerator will be multiplied by
1106+
the maximum scaling factor of the CMethods class.
1107+
1108+
:param numerator: Numerator to use
1109+
:type numerator: np.array
1110+
:param denominator: Denominator that can be zero
1111+
:type denominator: np.array
1112+
:return: Zero-ensured devision
1113+
:rtype: np.array
1114+
"""
1115+
with np.errstate(divide="ignore", invalid="ignore"):
1116+
result = numerator / denominator
1117+
1118+
if isinstance(numerator, np.ndarray):
1119+
mask_inf = np.isinf(result)
1120+
result[mask_inf] = numerator[mask_inf] * cls.MAX_SCALING_FACTOR
1121+
1122+
mask_nan = np.isnan(result)
1123+
result[mask_nan] = 0
1124+
else:
1125+
if np.isinf(result):
1126+
result = numerator * cls.MAX_SCALING_FACTOR
1127+
elif np.isnan(result):
1128+
result = 0
1129+
1130+
return result
1131+
11031132
@staticmethod
11041133
def get_pdf(x: Union[list, np.array], xbins: Union[list, np.array]) -> np.array:
11051134
r"""

tests/test_methods.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ def test_quantile_delta_mapping(self) -> None:
222222
)
223223

224224
assert isinstance(qdm_result, (np.ndarray, np.generic))
225+
if np.isnan(qdm_result).any():
226+
raise ValueError(qdm_result)
225227
assert mean_squared_error(
226228
qdm_result, self.data[kind]["obsp"][:, 0, 0], squared=False
227229
) < mean_squared_error(
@@ -397,6 +399,14 @@ def test_get_adjusted_scaling_factor(self) -> None:
397399
assert cm.get_adjusted_scaling_factor(-10, -11) == -10
398400
assert cm.get_adjusted_scaling_factor(-11, -10) == -10
399401

402+
def test_ensure_devidable(self) -> None:
403+
assert np.array_equal(
404+
cm.ensure_devidable(
405+
np.array((1, 2, 3, 4, 5, 0)), np.array((0, 1, 0, 2, 3, 0))
406+
),
407+
np.array((10, 2, 30, 2, 5 / 3, 0)),
408+
)
409+
400410

401411
if __name__ == "__main__":
402412
unittest.main()

0 commit comments

Comments
 (0)