Merge branch 'feature/risk_trajectory' into feature/cb_refactoring

spjuhel · spjuhel · commit 769125b7178a · 2025-05-22T14:45:44.000+02:00
diff --git a/climada/trajectories/risk_trajectory.py b/climada/trajectories/risk_trajectory.py
@@ -16,6 +16,9 @@
 
 ---
 
+This file implements risk trajectory objects, to allow a better evaluation
+of risk in between two points in time (snapshots).
+
 """
 
 import datetime
@@ -106,6 +109,13 @@ def _reset_metrics(self):
 
     @property
     def default_rp(self):
+        """The default return period values to use when computing risk period metrics.
+
+        Notes
+        -----
+
+        Changing its value resets the corresponding metric.
+        """
         return self._default_rp
 
     @default_rp.setter
@@ -120,7 +130,14 @@ def default_rp(self, value):
 
     @property
     def risk_transf_cover(self):
-        """The risk transfer coverage."""
+        """The risk transfer coverage.
+
+        Notes
+        -----
+
+        Changing its  value resets the risk metrics.
+        """
+
         return self._risk_transf_cover
 
     @risk_transf_cover.setter
@@ -131,7 +148,14 @@ def risk_transf_cover(self, value):
 
     @property
     def risk_transf_attach(self):
-        """The risk transfer attachment."""
+        """The risk transfer attachment.
+
+        Notes
+        -----
+
+        Changing its  value resets the risk metrics.
+        """
+
         return self._risk_transf_attach
 
     @risk_transf_attach.setter
@@ -190,11 +214,27 @@ def pairwise(container: list):
         ]
 
     @classmethod
-    def npv_transform(cls, df: pd.DataFrame, risk_disc) -> pd.DataFrame:
+    def npv_transform(cls, df: pd.DataFrame, risk_disc: DiscRates) -> pd.DataFrame:
+        """Apply discount rate to a metric `DataFrame`.
+
+        Parameters
+        ----------
+        df : pd.DataFrame
+            The `DataFrame` of the metric to discount.
+        risk_disc : DiscRate
+            The discount rate to apply.
+
+        Returns
+        -------
+        pd.DataFrame
+            The discounted risk metric.
+
+
+        """
+
         def _npv_group(group, disc):
             start_date = group.index.get_level_values("date").min()
-            end_date = group.index.get_level_values("date").max()
-            return calc_npv_cash_flows(group, start_date, end_date, disc)
+            return calc_npv_cash_flows(group, start_date, disc)
 
         df = df.set_index("date")
         grouper = cls._grouper
@@ -234,7 +274,11 @@ def _generic_metrics(
             tmp.append(getattr(calc_period, metric_meth)(**kwargs))
 
         tmp = pd.concat(tmp)
-        tmp.drop_duplicates(inplace=True)
+        tmp = tmp.set_index(["date", "group", "measure", "metric"])
+        tmp = tmp[
+            ~tmp.index.duplicated(keep="last")
+        ]  # We want to avoid overlap when more than 2 snapshots
+        tmp = tmp.reset_index()
         tmp["group"] = tmp["group"].fillna(self._all_groups_name)
         columns_to_front = ["group", "date", "measure", "metric"]
         tmp = tmp[
@@ -272,11 +316,39 @@ def _compute_metrics(
         return df
 
     def eai_metrics(self, npv: bool = True, **kwargs):
+        """Return the estimatated annual impacts at each exposure point for each date.
+
+        This method computes and return a `GeoDataFrame` with eai metric
+        (for each exposure point) for each date.
+
+        Parameters
+        ----------
+        npv : bool
+            Whether to apply the (risk) discount rate if it is defined.
+            Defaults to `True`.
+
+        Notes
+        -----
+
+        This computation may become quite expensive for big areas with high resolution.
+
+        """
         return self._compute_metrics(
             npv=npv, metric_name="eai", metric_meth="calc_eai_gdf", **kwargs
         )
 
     def aai_metrics(self, npv: bool = True, **kwargs):
+        """Return the average annual impacts for each date.
+
+        This method computes and return a `DataFrame` with aai metric for each date.
+
+        Parameters
+        ----------
+        npv : bool
+            Whether to apply the (risk) discount rate if it is defined.
+            Defaults to `True`.
+        """
+
         return self._compute_metrics(
             npv=npv, metric_name="aai", metric_meth="calc_aai_metric", **kwargs
         )
@@ -291,6 +363,18 @@ def return_periods_metrics(self, return_periods, npv: bool = True, **kwargs):
         )
 
     def aai_per_group_metrics(self, npv: bool = True, **kwargs):
+        """Return the average annual impacts for each exposure group ID.
+
+        This method computes and return a `DataFrame` with aai metric for each
+        of the exposure group defined by a group id, for each date.
+
+        Parameters
+        ----------
+        npv : bool
+            Whether to apply the (risk) discount rate if it is defined.
+            Defaults to `True`.
+        """
+
         return self._compute_metrics(
             npv=npv,
             metric_name="aai_per_group",
@@ -299,6 +383,24 @@ def aai_per_group_metrics(self, npv: bool = True, **kwargs):
         )
 
     def risk_components_metrics(self, npv: bool = True, **kwargs):
+        """Return the "components" of change in future risk (Exposure and Hazard)
+
+        This method returns the components of the change in risk at each date:
+
+           - The base risk, i.e., the risk without change in hazard or exposure, compared to trajectory earliest date.
+           - The "delta from exposure", i.e., the additional risks that come with change in exposure
+           - The "delta from hazard", i.e., the additional risks that come with change in hazard
+
+        Due to how computations are being done the "delta from exposure" corresponds to the change of risk due to change in exposure while hazard remains constant to "baseline hazard", while "delta from hazard" corresponds to the change of risk due to change in hazard, while exposure remains constant to **future** exposure.
+
+        Parameters
+        ----------
+        npv : bool
+            Whether to apply the (risk) discount rate if it is defined.
+            Defaults to `True`.
+
+        """
+
         return self._compute_metrics(
             npv=npv,
             metric_name="risk_components",
@@ -312,6 +414,30 @@ def per_date_risk_metrics(
         return_periods: list[int] | None = None,
         npv: bool = True,
     ) -> pd.DataFrame | pd.Series:
+        """Returns a DataFrame of risk metrics for each dates
+
+        This methods collects (and if needed computes) the `metrics`
+        (Defaulting to "aai", "return_periods" and "aai_per_group").
+
+        Parameters
+        ----------
+        metrics : list[str], optional
+            The list of metrics to return (defaults to
+            ["aai","return_periods","aai_per_group"])
+        return_periods : list[int], optional
+            The return periods to consider for the return periods metric
+            (default to the value of the `.default_rp` attribute)
+        npv : bool
+            Whether to apply the (risk) discount rate if it was defined
+            when instantiating the trajectory. Defaults to `True`.
+
+        Returns
+        -------
+        pd.DataFrame | pd.Series
+            A tidy DataFrame with metrics value for all possible dates.
+
+        """
+
         metrics_df = []
         metrics = (
             ["aai", "return_periods", "aai_per_group"] if metrics is None else metrics
@@ -578,20 +704,35 @@ def plot_waterfall(
 def calc_npv_cash_flows(
     cash_flows: pd.DataFrame,
     start_date: datetime.date,
-    end_date: datetime.date | None = None,
     disc: DiscRates | None = None,
 ):
-    # If no discount rates are provided, return the cash flows as is
+    """Apply discount rate to cash flows
+
+    If it is defined, applies a discount rate `disc` to a given cash flow
+    `cash_flows` assuming present year corresponds to `start_date`.
+
+    Parameters
+    ----------
+    cash_flows : pd.DataFrame
+        The cash flow to apply the discount rate to
+    start_date : datetime.date
+        The date representing the present
+    end_date : datetime.date, optional
+    disc : DiscRates, optional
+        The discount rate to apply
+
+    Returns
+    -------
+
+    A dataframe (copy) of `cash_flows` where values are discounted according to `disc`
+    """
+
     if not disc:
         return cash_flows
 
     if not isinstance(cash_flows.index, pd.DatetimeIndex):
         raise ValueError("cash_flows must be a pandas Series with a datetime index")
 
-    # Determine the end date if not provided
-    if end_date is None:
-        end_date = cash_flows.index[-1]
-
     df = cash_flows.to_frame(name="cash_flow")
     df["year"] = df.index.year
 
diff --git a/climada/trajectories/riskperiod.py b/climada/trajectories/riskperiod.py
@@ -16,7 +16,13 @@
 
 ---
 
-This modules implements the Snapshot and SnapshotsCollection classes.
+This modules implements the CalcRiskPeriod class.
+
+CalcRiskPeriod are used to compute risk metrics (and intermediate requirements)
+in between two snapshots.
+
+As these computations are not always required and can become "heavy", a so called "lazy"
+approach is used: computation is only done when required, and then stored.
 
 """
 
@@ -39,6 +45,10 @@
 
 LOGGER = logging.getLogger(__name__)
 
+logging.getLogger("climada.util.coordinates").setLevel(logging.WARNING)
+logging.getLogger("climada.entity.exposures.base").setLevel(logging.WARNING)
+logging.getLogger("climada.engine.impact_calc").setLevel(logging.WARNING)
+
 
 def lazy_property(method):
     # This function is used as a decorator for properties
@@ -105,7 +115,7 @@ def __init__(
         risk_transf_cover: float | None = None,
         calc_residual: bool = False,
     ):
-        LOGGER.info("Instantiating new CalcRiskPeriod.")
+        LOGGER.debug("Instantiating new CalcRiskPeriod.")
         self._snapshot0 = snapshot0
         self._snapshot1 = snapshot1
         self.date_idx = CalcRiskPeriod._set_date_idx(
@@ -124,8 +134,16 @@ def __init__(
         self.calc_residual = calc_residual
         self.measure = None  # Only possible to set with apply_measure to make sure snapshots are consistent
 
-        self._group_id_E0 = self.snapshot0.exposure.gdf["group_id"].values
-        self._group_id_E1 = self.snapshot1.exposure.gdf["group_id"].values
+        self._group_id_E0 = (
+            self.snapshot0.exposure.gdf["group_id"].values
+            if "group_id" in self.snapshot0.exposure.gdf.columns
+            else np.array([])
+        )
+        self._group_id_E1 = (
+            self.snapshot1.exposure.gdf["group_id"].values
+            if "group_id" in self.snapshot1.exposure.gdf.columns
+            else np.array([])
+        )
 
     def _reset_impact_data(self):
         self._impacts_arrays = None
@@ -212,7 +230,7 @@ def date_idx(self, value, /):
         if not isinstance(value, pd.DatetimeIndex):
             raise ValueError("Not a DatetimeIndex")
 
-        self._date_idx = value.normalize()
+        self._date_idx = value.normalize()  # Avoids weird hourly data
         self._time_points = len(self.date_idx)
         self._interval_freq = pd.infer_freq(self.date_idx)
         self._prop_H1 = np.linspace(0, 1, num=self.time_points)
diff --git a/climada/trajectories/snapshot.py b/climada/trajectories/snapshot.py
@@ -16,7 +16,10 @@
 
 ---
 
-This modules implements the Snapshot and SnapshotsCollection classes.
+This modules implements the Snapshot class.
+
+Snapshot are used to store the a snapshot of Exposure, Hazard, Vulnerability
+at a specific date.
 
 """
 
diff --git a/doc/tutorial/climada_trajectories.ipynb b/doc/tutorial/climada_trajectories.ipynb