Merge branch 'develop' into feature/costum_grid_litpop

emanuel-schmid · emanuel-schmid · commit 6bf947858443 · 2025-05-19T11:24:58.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,18 +15,24 @@ Removed:
 - `pandas-datareader`
 
 ### Added
+- Added optional parameter to `geo_im_from_array`, `plot_from_gdf`, `plot_rp_imp`, `plot_rp_intensity`,
+`plot_intensity`, `plot_fraction`, `_event_plot` to mask plotting when regions are too far from data points [#1047](https://github.com/CLIMADA-project/climada_python/pull/1047). To recreate previous plots (no masking), the parameter can be set to None.
 - Added instructions to install Climada petals on Euler cluster in `doc.guide.Guide_Euler.ipynb` [#1029](https://github.com/CLIMADA-project/climada_python/pull/1029)
 
 ### Changed
+
 - `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
 - World Bank indicator data is now downloaded directly from their API via the function `download_world_bank_indicator`, instead of relying on the `pandas-datareader` package [#1033](https://github.com/CLIMADA-project/climada_python/pull/1033)
+- `Exposures.write_hdf5` pickles geometry data in WKB format, which is faster and more sustainable. [#1051](https://github.com/CLIMADA-project/climada_python/pull/1051)
 
 ### Fixed
+
 - NaN plotting issues in `geo_im_from_array`[#1038](https://github.com/CLIMADA-project/climada_python/pull/1038)
 
 ### Deprecated
 
 ### Removed
+
 - `climada.util.interpolation.round_to_sig_digits` [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
 
 ## 6.0.1
diff --git a/climada/engine/impact.py b/climada/engine/impact.py
@@ -1178,6 +1178,7 @@ def plot_rp_imp(
         return_periods=(25, 50, 100, 250),
         log10_scale=True,
         axis=None,
+        mask_distance=0.01,
         kwargs_local_exceedance_impact=None,
         **kwargs,
     ):
@@ -1194,6 +1195,11 @@ def plot_rp_imp(
             plot impact as log10(impact). Default: True
         smooth : bool, optional
             smooth plot to plot.RESOLUTIONxplot.RESOLUTION. Default: True
+        mask_distance: float, optional
+            Only regions are plotted that are closer to any of the data points than this distance,
+            relative to overall plot size. For instance, to only plot values
+            at the centroids, use mask_distance=0.01. If None, the plot is not masked.
+            Default is 0.01.
         kwargs_local_exceedance_impact: dict
             Dictionary of keyword arguments for the method impact.local_exceedance_impact.
         kwargs : dict, optional
@@ -1242,7 +1248,12 @@ def plot_rp_imp(
             )
 
         axis = u_plot.plot_from_gdf(
-            impacts_stats, title, column_labels, axis=axis, **kwargs
+            impacts_stats,
+            title,
+            column_labels,
+            axis=axis,
+            mask_distance=mask_distance,
+            **kwargs,
         )
         return axis, impacts_stats_vals
 
diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
@@ -29,6 +29,7 @@
 
 import cartopy.crs as ccrs
 import contextily as ctx
+import geopandas as gpd
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -1131,10 +1132,8 @@ def write_hdf5(self, file_name):
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
-        pandas_df = pd.DataFrame(self.gdf)
-        for col in pandas_df.columns:
-            if str(pandas_df[col].dtype) == "geometry":
-                pandas_df[col] = np.asarray(self.gdf[col])
+        geocols = self.data.columns[self.data.dtypes == "geometry"].to_list()
+        pandas_df = self.data.to_wkb()
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
@@ -1146,6 +1145,7 @@ def write_hdf5(self, file_name):
         for var in type(self)._metadata:
             var_meta[var] = getattr(self, var)
         var_meta["crs"] = self.crs
+        var_meta["wkb_columns"] = geocols
         store.get_storer("exposures").attrs.metadata = var_meta
 
         store.close()
@@ -1184,7 +1184,15 @@ def from_hdf5(cls, file_name):
             crs = metadata.get("crs", metadata.get("_crs"))
             if crs is None and metadata.get("meta"):
                 crs = metadata["meta"].get("crs")
-            exp = cls(store["exposures"], crs=crs)
+            data = pd.DataFrame(store["exposures"])
+
+            wkb_columns = (
+                metadata.pop("wkb_columns") if "wkb_columns" in metadata else []
+            )
+            for col in wkb_columns:
+                data[col] = gpd.GeoSeries.from_wkb(data[col])
+
+            exp = cls(data, crs=crs)
             for key, val in metadata.items():
                 if key in type(exp)._metadata:  # pylint: disable=protected-access
                     setattr(exp, key, val)
diff --git a/climada/entity/exposures/test/test_base.py b/climada/entity/exposures/test/test_base.py
@@ -378,11 +378,14 @@ def test_read_template_pass(self):
 
     def test_io_hdf5_pass(self):
         """write and read hdf5"""
-        exp_df = Exposures(pd.read_excel(ENT_TEMPLATE_XLS), crs="epsg:32632")
-        exp_df.check()
+        exp = Exposures(pd.read_excel(ENT_TEMPLATE_XLS), crs="epsg:32632")
+
         # set metadata
-        exp_df.ref_year = 2020
-        exp_df.value_unit = "XSD"
+        exp.ref_year = 2020
+        exp.value_unit = "XSD"
+
+        # add another geometry column
+        exp.data["geocol2"] = exp.data.geometry.copy(deep=True)
 
         file_name = DATA_DIR.joinpath("test_hdf5_exp.h5")
 
@@ -392,46 +395,51 @@ def test_io_hdf5_pass(self):
 
         with warnings.catch_warnings():
             warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
-            exp_df.write_hdf5(file_name)
+            exp.write_hdf5(file_name=file_name)
 
         exp_read = Exposures.from_hdf5(file_name)
 
-        self.assertEqual(exp_df.ref_year, exp_read.ref_year)
-        self.assertEqual(exp_df.value_unit, exp_read.value_unit)
-        self.assertEqual(exp_df.description, exp_read.description)
-        np.testing.assert_array_equal(exp_df.latitude, exp_read.latitude)
-        np.testing.assert_array_equal(exp_df.longitude, exp_read.longitude)
-        np.testing.assert_array_equal(exp_df.value, exp_read.value)
+        self.assertEqual(exp.ref_year, exp_read.ref_year)
+        self.assertEqual(exp.value_unit, exp_read.value_unit)
+        self.assertEqual(exp.description, exp_read.description)
+        np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
+        np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
+        np.testing.assert_array_equal(exp.value, exp_read.value)
         np.testing.assert_array_equal(
-            exp_df.data["deductible"].values, exp_read.data["deductible"].values
+            exp.data["deductible"].values, exp_read.data["deductible"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["cover"].values, exp_read.data["cover"].values
+            exp.data["cover"].values, exp_read.data["cover"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["region_id"].values, exp_read.data["region_id"].values
+            exp.data["region_id"].values, exp_read.data["region_id"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["category_id"].values, exp_read.data["category_id"].values
+            exp.data["category_id"].values, exp_read.data["category_id"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["impf_TC"].values, exp_read.data["impf_TC"].values
+            exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["centr_TC"].values, exp_read.data["centr_TC"].values
+            exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["impf_FL"].values, exp_read.data["impf_FL"].values
+            exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
         )
         np.testing.assert_array_equal(
-            exp_df.data["centr_FL"].values, exp_read.data["centr_FL"].values
+            exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
         )
 
         self.assertTrue(
-            u_coord.equal_crs(exp_df.crs, exp_read.crs),
-            f"{exp_df.crs} and {exp_read.crs} are different",
+            u_coord.equal_crs(exp.crs, exp_read.crs),
+            f"{exp.crs} and {exp_read.crs} are different",
+        )
+        self.assertTrue(u_coord.equal_crs(exp.data.crs, exp_read.data.crs))
+
+        self.assertTrue(exp_read.data["geocol2"].dtype == "geometry")
+        np.testing.assert_array_equal(
+            exp.data["geocol2"].geometry, exp_read.data["geocol2"].values
         )
-        self.assertTrue(u_coord.equal_crs(exp_df.gdf.crs, exp_read.gdf.crs))
 
 
 class TestAddSea(unittest.TestCase):
diff --git a/climada/hazard/plot.py b/climada/hazard/plot.py
@@ -40,6 +40,7 @@ def plot_rp_intensity(
         self,
         return_periods=(25, 50, 100, 250),
         axis=None,
+        mask_distance=0.01,
         kwargs_local_exceedance_intensity=None,
         **kwargs,
     ):
@@ -56,6 +57,11 @@ def plot_rp_intensity(
             axis to use
         kwargs_local_exceedance_intensity: dict
             Dictionary of keyword arguments for the method hazard.local_exceedance_intensity.
+        mask_distance: float, optional
+            Only regions are plotted that are closer to any of the data points than this distance,
+            relative to overall plot size. For instance, to only plot values
+            at the centroids, use mask_distance=0.01. If None, the plot is not masked.
+            Default is 0.01.
         kwargs: optional
             arguments for pcolormesh matplotlib function used in event plots
 
@@ -89,7 +95,12 @@ def plot_rp_intensity(
         )
 
         axis = u_plot.plot_from_gdf(
-            inten_stats, title, column_labels, axis=axis, **kwargs
+            inten_stats,
+            title,
+            column_labels,
+            axis=axis,
+            mask_distance=mask_distance,
+            **kwargs,
         )
         return axis, inten_stats.values[:, 1:].T.astype(float)
 
@@ -100,6 +111,7 @@ def plot_intensity(
         smooth=True,
         axis=None,
         adapt_fontsize=True,
+        mask_distance=0.01,
         **kwargs,
     ):
         """Plot intensity values for a selected event or centroid.
@@ -123,6 +135,11 @@ def plot_intensity(
             in module `climada.util.plot`)
         axis: matplotlib.axes._subplots.AxesSubplot, optional
             axis to use
+        mask_distance: float, optional
+            Only regions are plotted that are closer to any of the data points than this distance,
+            relative to overall plot size. For instance, to only plot values
+            at the centroids, use mask_distance=0.01. If None, the plot is not masked.
+            Default is 0.01.
         kwargs: optional
             arguments for pcolormesh matplotlib function
             used in event plots or for plot function used in centroids plots
@@ -148,6 +165,7 @@ def plot_intensity(
                 crs_epsg,
                 axis,
                 adapt_fontsize=adapt_fontsize,
+                mask_distance=mask_distance,
                 **kwargs,
             )
         if centr is not None:
@@ -157,7 +175,15 @@ def plot_intensity(
 
         raise ValueError("Provide one event id or one centroid id.")
 
-    def plot_fraction(self, event=None, centr=None, smooth=True, axis=None, **kwargs):
+    def plot_fraction(
+        self,
+        event=None,
+        centr=None,
+        smooth=True,
+        axis=None,
+        mask_distance=0.01,
+        **kwargs,
+    ):
         """Plot fraction values for a selected event or centroid.
 
         Parameters
@@ -179,6 +205,11 @@ def plot_fraction(self, event=None, centr=None, smooth=True, axis=None, **kwargs
             in module `climada.util.plot`)
         axis: matplotlib.axes._subplots.AxesSubplot, optional
             axis to use
+        mask_distance: float, optional
+            Relative distance (with respect to maximal map extent in longitude or latitude) to data
+            points above which plot should not display values. For instance, to only plot values
+            at the centroids, use mask_distance=0.01. If None, the plot is not masked.
+            Default is None.
         kwargs: optional
             arguments for pcolormesh matplotlib function
             used in event plots or for plot function used in centroids plots
@@ -196,7 +227,13 @@ def plot_fraction(self, event=None, centr=None, smooth=True, axis=None, **kwargs
             if isinstance(event, str):
                 event = self.get_event_id(event)
             return self._event_plot(
-                event, self.fraction, col_label, smooth, axis, **kwargs
+                event,
+                self.fraction,
+                col_label,
+                smooth,
+                axis,
+                mask_distance=mask_distance,
+                **kwargs,
             )
         if centr is not None:
             if isinstance(centr, tuple):
@@ -215,6 +252,7 @@ def _event_plot(
         axis=None,
         figsize=(9, 13),
         adapt_fontsize=True,
+        mask_distance=0.01,
         **kwargs,
     ):
         """Plot an event of the input matrix.
@@ -236,6 +274,11 @@ def _event_plot(
             axis to use
         figsize: tuple, optional
             figure size for plt.subplots
+        mask_distance: float, optional
+            Only regions are plotted that are closer to any of the data points than this distance,
+            relative to overall plot size. For instance, to only plot values
+            at the centroids, use mask_distance=0.01. If None, the plot is not masked.
+            Default is None.
         kwargs: optional
             arguments for pcolormesh matplotlib function
 
@@ -283,6 +326,7 @@ def _event_plot(
             figsize=figsize,
             proj=crs_espg,
             adapt_fontsize=adapt_fontsize,
+            mask_distance=mask_distance,
             **kwargs,
         )
 
diff --git a/climada/util/plot.py b/climada/util/plot.py