Fixed bug where division lead to nan or inf values (#33)

btschwertfeger · web-flow · commit 8ad7bb928c7c · 2023-04-17T17:38:08.000+02:00
diff --git a/Makefile b/Makefile
@@ -45,8 +45,6 @@ pre-commit:
 
 ## 		Create the changelog
 ##
-##		Generate the changelog
-##
 changelog:
 	docker run -it --rm \
 		-v "$(pwd)":/usr/local/src/python-cmethods \
diff --git a/README.md b/README.md
@@ -41,7 +41,7 @@ The documentation is available at: [https://python-cmethods.readthedocs.io/en/st
 
 ## 1. About
 
-These programs and data structures are designed to help minimize discrepancies between modeled and observed climate data. Data from past periods are used to adjust variables from current and future time series so that their distributional properties approximate possible actual values.
+These programs and data structures are developed with the aim of reducing discrepancies between modeled and observed climate data. Historical data is utilized to calibrate variables from current and future time series to achieve distributional properties that closely resemble the possible actual values.
 
 <figure>
   <img
@@ -51,7 +51,7 @@ These programs and data structures are designed to help minimize discrepancies b
   <figcaption>Figure 1: Schematic representation of a bias adjustment procedure</figcaption>
 </figure>
 
-In this way, for example, modeled data, which on average represent values that are too cold, can be adjusted by applying an adjustment procedure. The following figure shows the observed, the modeled, and the adjusted values. It is directly visible that the delta adjusted time series ($T^{\*DM}_{sim,p}$) are much more similar to the observed data ($T_{obs,p}$) than the raw modeled data ($T_{sim,p}$).
+For instance, modeled data typically indicate values that are colder than the actual values. To address this issue, an adjustment procedure is employed. The figure below illustrates the observed, modeled, and adjusted values, revealing that the delta adjusted time series ($T^{*DM}_{sim,p}$) are significantly more similar to the observed data ($T{obs,p}$) than the raw modeled data ($T_{sim,p}$).
 
 <figure>
   <img
@@ -136,7 +136,7 @@ qdm_result = cm.adjust_3d( # 3d = 2 spatial and 1 time dimension
 Notes:
 
 - When using the `adjust_3d` method you have to specify the method by name.
-- For the multiplicative linear scaling and the delta method as well as the variance scaling method a maximum scaling factor of 10 is defined. This can be changed by the parameter `max_scaling_factor`.
+- For the multiplicative techniques a maximum scaling factor of 10 is defined. This can be changed by the attribute `max_scaling_factor`.
 
 ## Examples (see repository on [GitHub](https://github.com/btschwertfeger/python-cmethods))
 
diff --git a/cmethods/__init__.py b/cmethods/__init__.py
@@ -462,7 +462,7 @@ def linear_scaling(
             return np.array(simp) + (np.nanmean(obs) - np.nanmean(simh))  # Eq. 1
         if kind in cls.MULTIPLICATIVE:
             adj_scaling_factor = cls.get_adjusted_scaling_factor(
-                np.nanmean(obs) / np.nanmean(simh),
+                cls.ensure_devidable(np.nanmean(obs), np.nanmean(simh)),
                 kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
             )
             return np.array(simp) * adj_scaling_factor  # Eq. 2
@@ -585,7 +585,7 @@ def variance_scaling(
             VS_1_simp = LS_simp - np.nanmean(LS_simp)  # Eq. 4
 
             adj_scaling_factor = cls.get_adjusted_scaling_factor(
-                np.std(np.array(obs)) / np.std(VS_1_simh),
+                cls.ensure_devidable(np.std(np.array(obs)), np.std(VS_1_simh)),
                 kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
             )
 
@@ -700,7 +700,7 @@ def delta_method(
             return np.array(obs) + (np.nanmean(simp) - np.nanmean(simh))  # Eq. 1
         if kind in cls.MULTIPLICATIVE:
             adj_scaling_factor = cls.get_adjusted_scaling_factor(
-                np.nanmean(simp) / np.nanmean(simh),
+                cls.ensure_devidable(np.nanmean(simp), np.nanmean(simh)),
                 kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
             )
             return np.array(obs) * adj_scaling_factor  # Eq. 2
@@ -864,14 +864,14 @@ def quantile_mapping(
 
                 elif kind in cls.MULTIPLICATIVE:
                     epsilon = np.interp(  # Eq. 2
-                        (m_simh_mean * m_simp) / m_simp_mean,
+                        cls.ensure_devidable((m_simh_mean * m_simp), m_simp_mean),
                         xbins,
                         cdf_simh,
                         left=kwargs.get("val_min", 0.0),
                         right=kwargs.get("val_max", None),
                     )
                     X = np.interp(epsilon, cdf_obs, xbins) * (
-                        m_simp_mean / m_simh_mean
+                        cls.ensure_devidable(m_simp_mean, m_simh_mean)
                     )  # Eq. 2
                 for i, idx in enumerate(idxs):
                     res[idx] = X[i]
@@ -1006,7 +1006,7 @@ def quantile_delta_mapping(
                 .. math::
 
                     \Delta(i) & = \frac{ F^{-1}_{sim,p}\left[\varepsilon(i)\right] }{ F^{-1}_{sim,h}\left[\varepsilon(i)\right] } \\[1pt]
-                              & = \frac{ X_{sim,p}(i) }{ F^{-1}_{sim,h}\left\{F^{}_{sim,p}\left[X_{sim,p}(i)\right]\right\} }
+                              & = \frac{ X_{sim,p}(i) }{ F^{-1}_{sim,h}\left\{F^_{sim,p}\left[X_{sim,p}(i)\right]\right\} }
 
 
             **(2.4)** The relative change between the modeled data of the control and scenario period is than
@@ -1089,17 +1089,46 @@ def quantile_delta_mapping(
             epsilon = np.interp(simp, xbins, cdf_simp)  # Eq. 1.1
             QDM1 = cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins)  # Eq. 1.2
 
-            with np.errstate(divide="ignore", invalid="ignore"):
-                delta = simp / cls.get_inverse_of_cdf(
-                    cdf_simh, epsilon, xbins
-                )  # Eq. 2.3
-            delta[np.isnan(delta)] = 0
-
+            delta = cls.ensure_devidable(
+                simp, cls.get_inverse_of_cdf(cdf_simh, epsilon, xbins)
+            )  # Eq. 2.3
             return QDM1 * delta  # Eq. 2.4
         raise NotImplementedError(
             f"{kind} not available for quantile_delta_mapping. Use '+' or '*' instead."
         )
 
+    @classmethod
+    def ensure_devidable(
+        cls, numerator: Union[float, np.array], denominator: Union[float, np.array]
+    ) -> np.array:
+        """
+        Ensures that the arrays can be devided. The numerator will be multiplied by
+        the maximum scaling factor of the CMethods class.
+
+        :param numerator: Numerator to use
+        :type numerator: np.array
+        :param denominator: Denominator that can be zero
+        :type denominator: np.array
+        :return: Zero-ensured devision
+        :rtype: np.array
+        """
+        with np.errstate(divide="ignore", invalid="ignore"):
+            result = numerator / denominator
+
+        if isinstance(numerator, np.ndarray):
+            mask_inf = np.isinf(result)
+            result[mask_inf] = numerator[mask_inf] * cls.MAX_SCALING_FACTOR
+
+            mask_nan = np.isnan(result)
+            result[mask_nan] = 0
+        else:
+            if np.isinf(result):
+                result = numerator * cls.MAX_SCALING_FACTOR
+            elif np.isnan(result):
+                result = 0
+
+        return result
+
     @staticmethod
     def get_pdf(x: Union[list, np.array], xbins: Union[list, np.array]) -> np.array:
         r"""
diff --git a/tests/test_methods.py b/tests/test_methods.py
@@ -222,6 +222,8 @@ def test_quantile_delta_mapping(self) -> None:
             )
 
             assert isinstance(qdm_result, (np.ndarray, np.generic))
+            if np.isnan(qdm_result).any():
+                raise ValueError(qdm_result)
             assert mean_squared_error(
                 qdm_result, self.data[kind]["obsp"][:, 0, 0], squared=False
             ) < mean_squared_error(
@@ -397,6 +399,14 @@ def test_get_adjusted_scaling_factor(self) -> None:
         assert cm.get_adjusted_scaling_factor(-10, -11) == -10
         assert cm.get_adjusted_scaling_factor(-11, -10) == -10
 
+    def test_ensure_devidable(self) -> None:
+        assert np.array_equal(
+            cm.ensure_devidable(
+                np.array((1, 2, 3, 4, 5, 0)), np.array((0, 1, 0, 2, 3, 0))
+            ),
+            np.array((10, 2, 30, 2, 5 / 3, 0)),
+        )
+
 
 if __name__ == "__main__":
     unittest.main()