Skip to content

Commit bdeb35b

Browse files
committed
Merge branch 'master' of https://github.com/ppdebreuck/modnet
2 parents c89f179 + b4e38de commit bdeb35b

File tree

13 files changed

+872
-130
lines changed

13 files changed

+872
-130
lines changed

.readthedocs.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@ version: 2
33

44
sphinx:
55
configuration: docs/source/conf.py
6+
67
python:
7-
version: 3.8
88
install:
99
- requirements: docs/requirements.txt
1010
- method: setuptools
1111
path: docs/
12+
13+
build:
14+
os: ubuntu-22.04
15+
tools:
16+
python: "3.11"

modnet/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.2.1"
1+
__version__ = "0.3.0"

modnet/featurizers/featurizers.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Optional, Iterable, Tuple, Dict
33

44
import pandas as pd
5+
from pymatgen.core import Composition
56

67
from matminer.featurizers.base import MultipleFeaturizer, BaseFeaturizer
78
from matminer.featurizers.structure import SiteStatsFingerprint
@@ -204,14 +205,41 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
204205

205206
if self.oxid_composition_featurizers:
206207
LOG.info("Applying oxidation state featurizers...")
208+
# Get integer composition if some are not
209+
col_comp = "composition"
210+
if not all(
211+
all(amt == int(amt) for amt in comp.values())
212+
for comp in df["composition"].values
213+
):
214+
LOG.info(
215+
"There are non-integer compositions in the dataset, and featurizers that need them. "
216+
"Computing..."
217+
)
218+
df["integer_composition"] = [
219+
Composition(
220+
comp.get_integer_formula_and_factor(
221+
max_denominator=10
222+
if getattr(self, "fast_oxid", False)
223+
else 100
224+
)[0]
225+
)
226+
for comp in df["composition"].values
227+
]
228+
# df["integer_composition"] = df["composition"].apply(
229+
# lambda c: c.get_integer_formula_and_factor(
230+
# max_denominator=10 if getattr(self, "fast_oxid", False) else 100
231+
# )[0]
232+
# )
233+
234+
col_comp = "integer_composition"
207235
if getattr(self, "fast_oxid", False):
208236
df = CompositionToOxidComposition(
209237
all_oxi_states=False, max_sites=-1
210-
).featurize_dataframe(df, "composition")
238+
).featurize_dataframe(df, col_id=col_comp)
211239
else:
212-
df = CompositionToOxidComposition().featurize_dataframe(
213-
df, "composition"
214-
)
240+
df = CompositionToOxidComposition(
241+
max_sites=-1 if getattr(self, "continuous_only", False) else None
242+
).featurize_dataframe(df, col_id=col_comp, ignore_errors=True)
215243
df = self._fit_apply_featurizers(
216244
df,
217245
self.oxid_composition_featurizers,
@@ -271,14 +299,16 @@ def featurize_site(
271299
df.columns = ["Input data|" + x for x in df.columns]
272300

273301
for fingerprint in self.site_featurizers:
302+
fingerprint_name = fingerprint.__class__.__name__
303+
if fingerprint_name == "SOAP":
304+
fingerprint.fit(df["Input data|structure"])
274305
site_stats_fingerprint = SiteStatsFingerprint(
275306
fingerprint, stats=self.site_stats
276307
)
277308
df = site_stats_fingerprint.featurize_dataframe(
278309
df, "Input data|structure", multiindex=False, ignore_errors=True
279310
)
280311

281-
fingerprint_name = fingerprint.__class__.__name__
282312
if aliases:
283313
fingerprint_name = aliases.get(fingerprint_name, fingerprint_name)
284314
if "|" not in fingerprint_name:

modnet/featurizers/presets/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
from typing import Dict, Type
88
from .debreuck_2020 import DeBreuck2020Featurizer, CompositionOnlyFeaturizer
99
from .matminer_2023 import Matminer2023Featurizer, CompositionOnlyMatminer2023Featurizer
10+
from .matminer_all_2023 import (
11+
MatminerAll2023Featurizer,
12+
CompositionOnlyMatminerAll2023Featurizer,
13+
)
1014
from modnet.featurizers import MODFeaturizer
1115

1216
DEFAULT_FEATURIZER: str = "Matminer2023"
@@ -16,5 +20,7 @@
1620
"DeBreuck2020": DeBreuck2020Featurizer,
1721
"CompositionOnly": CompositionOnlyFeaturizer,
1822
"Matminer2023": Matminer2023Featurizer,
23+
"MatminerAll2023": MatminerAll2023Featurizer,
1924
"CompositionOnlyMatminer2023": CompositionOnlyMatminer2023Featurizer,
25+
"CompositionOnlyMatminerAll2023": CompositionOnlyMatminerAll2023Featurizer,
2026
}

modnet/featurizers/presets/matminer_2023.py

Lines changed: 50 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class Matminer2023Featurizer(modnet.featurizers.MODFeaturizer):
1515
1616
"""
1717

18-
def __init__(self, fast_oxid: bool = False):
18+
def __init__(self, fast_oxid: bool = False, continuous_only: bool = False):
1919
"""Creates the featurizer and imports all featurizer functions.
2020
2121
Parameters:
@@ -28,8 +28,9 @@ def __init__(self, fast_oxid: bool = False):
2828
"""
2929

3030
super().__init__()
31-
self.load_featurizers()
31+
self.continuous_only = continuous_only
3232
self.fast_oxid = fast_oxid
33+
self.load_featurizers()
3334

3435
def load_featurizers(self):
3536
with contextlib.redirect_stdout(None):
@@ -82,19 +83,33 @@ def load_featurizers(self):
8283
VoronoiFingerprint,
8384
)
8485

85-
self.composition_featurizers = (
86-
AtomicOrbitals(),
87-
AtomicPackingEfficiency(),
88-
BandCenter(),
89-
ElementFraction(),
90-
ElementProperty.from_preset("magpie"),
91-
IonProperty(),
92-
Miedema(),
93-
Stoichiometry(),
94-
TMetalFraction(),
95-
ValenceOrbital(),
96-
YangSolidSolution(),
97-
)
86+
if self.continuous_only:
87+
magpie_featurizer = ElementProperty.from_preset("magpie")
88+
magpie_featurizer.stats = ["mean", "avg_dev"]
89+
90+
self.composition_featurizers = (
91+
BandCenter(),
92+
ElementFraction(),
93+
magpie_featurizer,
94+
IonProperty(fast=self.fast_oxid),
95+
Stoichiometry(p_list=[2, 3, 5, 7, 10]),
96+
TMetalFraction(),
97+
ValenceOrbital(props=["frac"]),
98+
)
99+
else:
100+
self.composition_featurizers = (
101+
AtomicOrbitals(),
102+
AtomicPackingEfficiency(),
103+
BandCenter(),
104+
ElementFraction(),
105+
ElementProperty.from_preset("magpie"),
106+
IonProperty(),
107+
Miedema(),
108+
Stoichiometry(),
109+
TMetalFraction(),
110+
ValenceOrbital(),
111+
YangSolidSolution(),
112+
)
98113

99114
self.oxid_composition_featurizers = (
100115
ElectronegativityDiff(),
@@ -145,20 +160,24 @@ def featurize_composition(self, df):
145160

146161
df = super().featurize_composition(df)
147162

148-
_orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
149-
df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
150-
_orbitals
151-
)
152-
df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
153-
_orbitals
154-
)
155-
156-
df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
157-
lambda x: -1 if not isinstance(x, str) else Element(x).Z
158-
)
159-
df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
160-
lambda x: -1 if not isinstance(x, str) else Element(x).Z
161-
)
163+
if not self.continuous_only:
164+
_orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
165+
df["AtomicOrbitals|HOMO_character"] = df[
166+
"AtomicOrbitals|HOMO_character"
167+
].map(_orbitals)
168+
df["AtomicOrbitals|LUMO_character"] = df[
169+
"AtomicOrbitals|LUMO_character"
170+
].map(_orbitals)
171+
172+
df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
173+
lambda x: -1 if not isinstance(x, str) else Element(x).Z
174+
)
175+
df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
176+
lambda x: -1 if not isinstance(x, str) else Element(x).Z
177+
)
178+
179+
else:
180+
df.drop(columns=["IonProperty|max ionic char"], inplace=True)
162181

163182
return modnet.featurizers.clean_df(df)
164183

@@ -224,8 +243,8 @@ class CompositionOnlyMatminer2023Featurizer(Matminer2023Featurizer):
224243
225244
"""
226245

227-
def __init__(self):
228-
super().__init__()
246+
def __init__(self, continuous_only: bool = False, fast_oxid: bool = False):
247+
super().__init__(fast_oxid=fast_oxid, continuous_only=continuous_only)
229248
self.oxid_composition_featurizers = ()
230249
self.structure_featurizers = ()
231250
self.site_featurizers = ()

0 commit comments

Comments
 (0)