From 922b2b7924085d91410ecfdc86e950373477a4d6 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Mon, 27 Jun 2022 12:04:23 +0200
Subject: [PATCH 1/8] Upgraded pymatgen and matminer requirements

---
 README.md                         |  6 ------
 modnet/featurizers/featurizers.py |  8 ++++----
 modnet/preprocessing.py           | 10 +++++-----
 setup.py                          |  8 ++++----
 4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index d4ffd405..72a4b761 100644
--- a/README.md
+++ b/README.md
@@ -45,12 +45,6 @@ activate the environment:
 conda activate modnet
 ```
 
-Then, install pymatgen v2020.8.13 with conda, which will bundle several pre-built dependencies (e.g., numpy, scipy):
-
-```shell
-conda install -c conda-forge pymatgen=2020.8.13
-```
-
 Finally, install MODNet from PyPI with pip:
 
 ```bash
diff --git a/modnet/featurizers/featurizers.py b/modnet/featurizers/featurizers.py
index 0835668c..0fd3ec77 100644
--- a/modnet/featurizers/featurizers.py
+++ b/modnet/featurizers/featurizers.py
@@ -70,7 +70,7 @@ def featurize(self, df: pd.DataFrame) -> pd.DataFrame:
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
 
         Returns:
             The featurized DataFrame.
@@ -137,7 +137,7 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
 
         Returns:
             pandas.DataFrame: the decorated DataFrame, or an empty
@@ -184,7 +184,7 @@ def featurize_structure(self, df: pd.DataFrame) -> pd.DataFrame:
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
 
         Returns:
             pandas.DataFrame: the decorated DataFrame.
@@ -206,7 +206,7 @@ def featurize_site(
 
         Arguments:
             df: the input dataframe with a `"structure"` column
-                containing `pymatgen.Structure` objects.
+                containing `pymatgen.core.structure.Structure` objects.
             aliases: optional dictionary to map matminer output column
                 names to new aliases, mostly used for
                 backwards-compatibility.
diff --git a/modnet/preprocessing.py b/modnet/preprocessing.py
index 8cf3bed5..7b888eee 100644
--- a/modnet/preprocessing.py
+++ b/modnet/preprocessing.py
@@ -13,7 +13,7 @@
 from typing import Dict, List, Union, Optional, Callable, Hashable, Iterable, Tuple
 from functools import partial
 
-from pymatgen import Structure, Composition
+from pymatgen.core import Structure, Composition
 
 from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
 from sklearn.utils import resample
@@ -539,14 +539,14 @@ def merge_ranked(lists: List[List[Hashable]]) -> List[Hashable]:
 
 
 class MODData:
-    """The MODData class takes takes a list of `pymatgen.Structure`
+    """The MODData class takes takes a list of `pymatgen.core.structure.Structure`
     objects and creates a `pandas.DataFrame` that contains many matminer
     features per structure. It then uses mutual information between
     features and targets, and between the features themselves, to
     perform feature selection using relevance-redundancy indices.
 
     Attributes:
-        df_structure (pd.DataFrame): dataframe storing the `pymatgen.Structure`
+        df_structure (pd.DataFrame): dataframe storing the `pymatgen.core.structure.Structure`
             representations for each structured, indexed by ID.
         df_targets (pd.Dataframe): dataframe storing the prediction targets
             per structure, indexed by ID.
@@ -906,12 +906,12 @@ def rebalance(self):
 
     @property
     def structures(self) -> List[Union[Structure, CompositionContainer]]:
-        """Returns the list of `pymatgen.Structure` objects."""
+        """Returns the list of `pymatgen.core.structure.Structure` objects."""
         return list(self.df_structure["structure"])
 
     @property
     def compositions(self) -> List[Union[Structure, CompositionContainer]]:
-        """Returns the list of materials as`pymatgen.Composition` objects."""
+        """Returns the list of materials as`pymatgen.core.composition.Composition` objects."""
         return [s.composition for s in self.df_structure["structure"]]
 
     @property
diff --git a/setup.py b/setup.py
index 45d311d3..bb05700a 100644
--- a/setup.py
+++ b/setup.py
@@ -37,10 +37,10 @@
         "pandas>=0.25.3",
         "tensorflow>=2.4",
         "tensorflow-probability>=0.12",
-        "pymatgen>=2020,<2020.9",
-        "matminer>=0.6.2",
-        "numpy>=1.18.3",
-        "scikit-learn>=0.23,<0.24",
+        "pymatgen>=2022.5.17",
+        "matminer>=0.7.6",
+        "numpy>=1.22.3",
+        "scikit-learn>=1.1.0",
     ],
     tests_require=tests_require,
     test_suite="modnet.tests",

From 62c482571b9a1ba0d6f3a825f095204475f20153 Mon Sep 17 00:00:00 2001
From: ppdebreuck <pierre-paul.debreuck@student.uclouvain.be>
Date: Tue, 11 Jul 2023 16:57:26 +0200
Subject: [PATCH 2/8] backward compatibility warning

---
 modnet/models/vanilla.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py
index 77119444..bb145796 100644
--- a/modnet/models/vanilla.py
+++ b/modnet/models/vanilla.py
@@ -846,6 +846,11 @@ def _restore_model(self):
                     fill_value=-1,
                 ).fit(np.zeros((1, self.n_feat))),
             )
+        if not hasattr(self, "targets_groups"):
+            self.targets_groups = [x for subl in self.targets for x in subl]
+            LOG.warning(
+                "Installed modnet version (v>=0.4.0) does not match loaded model (v<0.4.0) and may result in errors. Please retrain or change your modnet version !"
+            )
 
     def save(self, filename: str) -> None:
         """Save the `MODNetModel` to filename:

From a408f0ac02120fc63b9f1987d618deb2583958f6 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Fri, 14 Jul 2023 09:04:03 +0200
Subject: [PATCH 3/8] Possibility to remove all NaNs features or not after
 featurization.

---
 modnet/featurizers/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modnet/featurizers/utils.py b/modnet/featurizers/utils.py
index 2297b6ef..3d54863e 100644
--- a/modnet/featurizers/utils.py
+++ b/modnet/featurizers/utils.py
@@ -3,12 +3,13 @@
 __all__ = ("clean_df",)
 
 
-def clean_df(df):
+def clean_df(df, drop_allnan: bool = True):
     """Cleans dataframe by dropping missing values, replacing NaN's and infinities
     and selecting only columns containing numerical data.
 
     Args:
         df (pd.DataFrame): the dataframe to clean.
+        drop_allnan: if True, clean_df will remove features that are fully NaNs.
 
     Returns:
         pandas.DataFrame: the cleaned dataframe.
@@ -16,7 +17,8 @@ def clean_df(df):
     """
 
     df = df.select_dtypes(include="number")
-    df = df.dropna(axis=1, how="all")
+    if drop_allnan:
+        df = df.dropna(axis=1, how="all")
     df = df.replace([np.inf, -np.inf, np.nan], np.nan)
 
     return df

From b9700c721e303f09ae455109ddc86d413d924cd6 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Fri, 14 Jul 2023 09:09:48 +0200
Subject: [PATCH 4/8] Arg in featurize.

---
 modnet/preprocessing.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/modnet/preprocessing.py b/modnet/preprocessing.py
index d8f5e2e7..f34914ba 100644
--- a/modnet/preprocessing.py
+++ b/modnet/preprocessing.py
@@ -706,7 +706,9 @@ def __init__(
         self.df_structure = pd.DataFrame({"id": structure_ids, "structure": materials})
         self.df_structure.set_index("id", inplace=True)
 
-    def featurize(self, fast: bool = False, db_file=None, n_jobs=None):
+    def featurize(
+        self, fast: bool = False, db_file=None, n_jobs=None, drop_allnan: bool = True
+    ):
         """For the input structures, construct many matminer features
         and save a featurized dataframe. If `db_file` is specified, this
         method will try to load previous feature calculations for each
@@ -720,6 +722,7 @@ def featurize(self, fast: bool = False, db_file=None, n_jobs=None):
             Note : The database will be downloaded in this case, and takes around 2GB of space on your drive !
 
             db_file: Deprecated. Do Not use this anymore.
+            drop_allnan: if True, features that are fully NaNs will be removed.
 
 
         """
@@ -778,7 +781,7 @@ def featurize(self, fast: bool = False, db_file=None, n_jobs=None):
             df_final = self.featurizer.featurize(self.df_structure)
 
         # replace infinite values by nan that are handled during the fit
-        df_final = clean_df(df_final)
+        df_final = clean_df(df_final, drop_allnan=drop_allnan)
 
         self.df_featurized = df_final
         LOG.info("Data has successfully been featurized!")

From 7f41fd795c3a42edc737bcbcffb4e166050a79d9 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Fri, 14 Jul 2023 09:36:24 +0200
Subject: [PATCH 5/8] Arg in preset because there are clean_df there as well.

---
 .../featurizers/presets/matminer_all_2023.py  | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/modnet/featurizers/presets/matminer_all_2023.py b/modnet/featurizers/presets/matminer_all_2023.py
index 2e2a4e4b..3e26d60f 100644
--- a/modnet/featurizers/presets/matminer_all_2023.py
+++ b/modnet/featurizers/presets/matminer_all_2023.py
@@ -16,7 +16,12 @@ class MatminerAll2023Featurizer(modnet.featurizers.MODFeaturizer):
 
     """
 
-    def __init__(self, fast_oxid: bool = False, continuous_only: bool = False):
+    def __init__(
+        self,
+        fast_oxid: bool = False,
+        continuous_only: bool = False,
+        drop_allnan: bool = True,
+    ):
         """Creates the featurizer and imports all featurizer functions.
 
         Parameters:
@@ -28,12 +33,14 @@ def __init__(self, fast_oxid: bool = False, continuous_only: bool = False):
             continuous_only: Whether to keep only the features that are continuous
                 with respect to the composition (only for composition featurizers).
                 Discontinuous features may lead to discontinuities in the model predictions.
+            drop_allnan: if True, features that are fully NaNs will be removed.
 
         """
 
         super().__init__()
         self.fast_oxid = fast_oxid
         self.continuous_only = continuous_only
+        self.drop_allnan = drop_allnan
         self.load_featurizers()
 
     def load_featurizers(self):
@@ -323,7 +330,7 @@ def featurize_composition(self, df):
             if self.oxid_composition_featurizers:
                 df.drop(columns=["IonProperty|max ionic char"], inplace=True)
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
     def featurize_structure(self, df):
         """Applies the preset structural featurizers to the input dataframe,
@@ -359,7 +366,7 @@ def _int_map(x):
             "GlobalSymmetryFeatures|is_centrosymmetric"
         ].map(_int_map)
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
     def featurize_site(self, df):
         """Applies the preset site featurizers to the input dataframe,
@@ -376,7 +383,7 @@ def featurize_site(self, df):
         df = super().featurize_site(df, aliases=aliases)
         df = df.loc[:, (df != 0).any(axis=0)]
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
 
 class CompositionOnlyMatminerAll2023Featurizer(MatminerAll2023Featurizer):
@@ -391,9 +398,14 @@ def __init__(
         self,
         continuous_only: bool = False,
         oxidation_featurizers: bool = False,
+        drop_allnan: bool = True,
         fast_oxid: bool = False,
     ):
-        super().__init__(fast_oxid=fast_oxid, continuous_only=continuous_only)
+        super().__init__(
+            fast_oxid=fast_oxid,
+            continuous_only=continuous_only,
+            drop_allnan=drop_allnan,
+        )
         self.fast_oxid = fast_oxid
         self.structure_featurizers = ()
         self.site_featurizers = ()

From 5874ef9e71a2b750e96f194901c1b2d2c2afca0f Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Fri, 14 Jul 2023 12:15:15 +0200
Subject: [PATCH 6/8] Easier setting of drop_allnan.

---
 modnet/featurizers/featurizers.py               | 7 ++++++-
 modnet/featurizers/presets/debreuck_2020.py     | 6 +++---
 modnet/featurizers/presets/matminer_2023.py     | 6 +++---
 modnet/featurizers/presets/matminer_all_2023.py | 5 -----
 modnet/preprocessing.py                         | 2 ++
 5 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/modnet/featurizers/featurizers.py b/modnet/featurizers/featurizers.py
index 4422c570..49bbcca0 100644
--- a/modnet/featurizers/featurizers.py
+++ b/modnet/featurizers/featurizers.py
@@ -48,16 +48,18 @@ class MODFeaturizer(abc.ABC):
     site_stats: Tuple[str] = ("mean", "std_dev")
     featurizer_mode: str = "multi"
 
-    def __init__(self, n_jobs=None):
+    def __init__(self, n_jobs=None, drop_allnan: bool = True):
         """Initialise the MODFeaturizer object with a requested
         number of threads to use during featurization.
 
         Arguments:
             n_jobs: The number of threads to use. If `None`, matminer
             will use `multiprocessing.cpu_count()` by default.
+            drop_allnan: if True, features that are fully NaNs will be removed.
 
         """
         self.set_n_jobs(n_jobs)
+        self.set_drop_allnan(drop_allnan)
 
     def set_n_jobs(self, n_jobs: Optional[int]):
         """Set the no. of threads to pass to matminer for featurizer
@@ -70,6 +72,9 @@ def set_n_jobs(self, n_jobs: Optional[int]):
         """
         self._n_jobs = n_jobs
 
+    def set_drop_allnan(self, drop_allnan: bool = True):
+        self.drop_allnan = drop_allnan
+
     def featurize(self, df: pd.DataFrame) -> pd.DataFrame:
         """Run all of the preset featurizers on the input dataframe.
 
diff --git a/modnet/featurizers/presets/debreuck_2020.py b/modnet/featurizers/presets/debreuck_2020.py
index dd588f14..065506bf 100644
--- a/modnet/featurizers/presets/debreuck_2020.py
+++ b/modnet/featurizers/presets/debreuck_2020.py
@@ -175,7 +175,7 @@ def featurize_composition(self, df):
             lambda x: -1 if not isinstance(x, str) else Element(x).Z
         )
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
     def featurize_structure(self, df):
         """Applies the preset structural featurizers to the input dataframe,
@@ -226,7 +226,7 @@ def _int_map(x):
             "GlobalSymmetryFeatures|is_centrosymmetric"
         ].map(_int_map)
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
     def featurize_site(self, df):
         """Applies the preset site featurizers to the input dataframe,
@@ -243,7 +243,7 @@ def featurize_site(self, df):
         df = super().featurize_site(df, aliases=aliases)
         df = df.loc[:, (df != 0).any(axis=0)]
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
 
 class CompositionOnlyFeaturizer(DeBreuck2020Featurizer):
diff --git a/modnet/featurizers/presets/matminer_2023.py b/modnet/featurizers/presets/matminer_2023.py
index 0d67a2a6..7e557b69 100644
--- a/modnet/featurizers/presets/matminer_2023.py
+++ b/modnet/featurizers/presets/matminer_2023.py
@@ -179,7 +179,7 @@ def featurize_composition(self, df):
         else:
             df.drop(columns=["IonProperty|max ionic char"], inplace=True)
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
     def featurize_structure(self, df):
         """Applies the preset structural featurizers to the input dataframe,
@@ -215,7 +215,7 @@ def _int_map(x):
             "GlobalSymmetryFeatures|is_centrosymmetric"
         ].map(_int_map)
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
     def featurize_site(self, df):
         """Applies the preset site featurizers to the input dataframe,
@@ -232,7 +232,7 @@ def featurize_site(self, df):
         df = super().featurize_site(df, aliases=aliases)
         df = df.loc[:, (df != 0).any(axis=0)]
 
-        return modnet.featurizers.clean_df(df)
+        return modnet.featurizers.clean_df(df, drop_allnan=self.drop_allnan)
 
 
 class CompositionOnlyMatminer2023Featurizer(Matminer2023Featurizer):
diff --git a/modnet/featurizers/presets/matminer_all_2023.py b/modnet/featurizers/presets/matminer_all_2023.py
index 3e26d60f..bfdfdc9e 100644
--- a/modnet/featurizers/presets/matminer_all_2023.py
+++ b/modnet/featurizers/presets/matminer_all_2023.py
@@ -20,7 +20,6 @@ def __init__(
         self,
         fast_oxid: bool = False,
         continuous_only: bool = False,
-        drop_allnan: bool = True,
     ):
         """Creates the featurizer and imports all featurizer functions.
 
@@ -33,14 +32,12 @@ def __init__(
             continuous_only: Whether to keep only the features that are continuous
                 with respect to the composition (only for composition featurizers).
                 Discontinuous features may lead to discontinuities in the model predictions.
-            drop_allnan: if True, features that are fully NaNs will be removed.
 
         """
 
         super().__init__()
         self.fast_oxid = fast_oxid
         self.continuous_only = continuous_only
-        self.drop_allnan = drop_allnan
         self.load_featurizers()
 
     def load_featurizers(self):
@@ -398,13 +395,11 @@ def __init__(
         self,
         continuous_only: bool = False,
         oxidation_featurizers: bool = False,
-        drop_allnan: bool = True,
         fast_oxid: bool = False,
     ):
         super().__init__(
             fast_oxid=fast_oxid,
             continuous_only=continuous_only,
-            drop_allnan=drop_allnan,
         )
         self.fast_oxid = fast_oxid
         self.structure_featurizers = ()
diff --git a/modnet/preprocessing.py b/modnet/preprocessing.py
index f34914ba..f7690d02 100644
--- a/modnet/preprocessing.py
+++ b/modnet/preprocessing.py
@@ -740,6 +740,8 @@ def featurize(
         if n_jobs is not None:
             self.featurizer.set_n_jobs(n_jobs)
 
+        self.featurizer.set_drop_allnan(drop_allnan)
+
         if self.df_featurized is not None:
             raise RuntimeError("Not overwriting existing featurized dataframe.")
 

From bdc8cf15c57a5b01b89a36e4af6403e4771852b2 Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Fri, 14 Jul 2023 12:21:18 +0200
Subject: [PATCH 7/8] Let this for another PR.

---
 modnet/models/vanilla.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/modnet/models/vanilla.py b/modnet/models/vanilla.py
index bb145796..77119444 100644
--- a/modnet/models/vanilla.py
+++ b/modnet/models/vanilla.py
@@ -846,11 +846,6 @@ def _restore_model(self):
                     fill_value=-1,
                 ).fit(np.zeros((1, self.n_feat))),
             )
-        if not hasattr(self, "targets_groups"):
-            self.targets_groups = [x for subl in self.targets for x in subl]
-            LOG.warning(
-                "Installed modnet version (v>=0.4.0) does not match loaded model (v<0.4.0) and may result in errors. Please retrain or change your modnet version !"
-            )
 
     def save(self, filename: str) -> None:
         """Save the `MODNetModel` to filename:

From 476b93a800b1d4e948ce4c52a7827bf2bb2a4a55 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 21 Aug 2023 09:51:53 +0000
Subject: [PATCH 8/8] Bump tensorflow from 2.11.0 to 2.13.0

Bumps [tensorflow](https://github.com/tensorflow/tensorflow) from 2.11.0 to 2.13.0.
- [Release notes](https://github.com/tensorflow/tensorflow/releases)
- [Changelog](https://github.com/tensorflow/tensorflow/blob/master/RELEASE.md)
- [Commits](https://github.com/tensorflow/tensorflow/compare/v2.11.0...v2.13.0)

---
updated-dependencies:
- dependency-name: tensorflow
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f82c7c1d..2a70e616 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-tensorflow==2.11.0
+tensorflow==2.13.0
 tensorflow-probability==0.19.0
 pandas==1.5.2
 pymatgen==2023.7.20