Skip to content

Commit fdb1301

Browse files
committed
Merge branch 'master' of github.com:ppdebreuck/modnet
2 parents 6afc37f + f0e2e99 commit fdb1301

9 files changed

+64
-26
lines changed

modnet/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.4.2"
1+
__version__ = "0.4.3"

modnet/featurizers/featurizers.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,10 @@ def _fit_apply_featurizers(
141141
_featurizers.set_n_jobs(self._n_jobs)
142142

143143
return _featurizers.featurize_dataframe(
144-
df, column, multiindex=True, ignore_errors=True
144+
df,
145+
column,
146+
multiindex=True,
147+
ignore_errors=getattr(self, "ignore_errors", True),
145148
)
146149
elif mode == "single":
147150

@@ -164,7 +167,10 @@ def _fit_apply_featurizers(
164167
)
165168
start = time.monotonic_ns()
166169
df = featurizer.featurize_dataframe(
167-
df, column, multiindex=True, ignore_errors=True
170+
df,
171+
column,
172+
multiindex=True,
173+
ignore_errors=getattr(self, "ignore_errors", True),
168174
)
169175
LOG.info(
170176
f"Applied featurizer {featurizer.__class__.__name__} to column {column!r} in {(time.monotonic_ns() - start) * 1e-9} seconds"
@@ -244,7 +250,11 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
244250
else:
245251
df = CompositionToOxidComposition(
246252
max_sites=-1 if getattr(self, "continuous_only", False) else None
247-
).featurize_dataframe(df, col_id=col_comp, ignore_errors=True)
253+
).featurize_dataframe(
254+
df,
255+
col_id=col_comp,
256+
ignore_errors=getattr(self, "ignore_errors", True),
257+
)
248258
df = self._fit_apply_featurizers(
249259
df,
250260
self.oxid_composition_featurizers,
@@ -311,7 +321,10 @@ def featurize_site(
311321
fingerprint, stats=self.site_stats
312322
)
313323
df = site_stats_fingerprint.featurize_dataframe(
314-
df, "Input data|structure", multiindex=False, ignore_errors=True
324+
df,
325+
"Input data|structure",
326+
multiindex=False,
327+
ignore_errors=getattr(self, "ignore_errors", True),
315328
)
316329

317330
if aliases:

modnet/models/bayesian.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
try:
1616
import tensorflow_probability as tfp
1717
except ImportError:
18-
raise RuntimeError(
18+
raise ImportError(
1919
"`tensorflow-probability` is required for Bayesian models: install modnet[bayesian]."
2020
)
2121

modnet/models/vanilla.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,9 @@ def evaluate(
825825
f"Loss {loss} not recognized. Use mae, mse or a callable."
826826
)
827827
else:
828-
score.append(loss(y_true, y_pred[i]))
828+
pass
829+
830+
score.append(loss(y_true, y_pred[i]))
829831

830832
return np.mean(score)
831833

modnet/tests/conftest.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import pytest
22
from pathlib import Path
3+
from modnet.preprocessing import CompositionContainer
34

45
from modnet.utils import get_hash_of_file
6+
from pymatgen.core import Structure
57

68

79
_TEST_DATA_HASHES = {
@@ -41,7 +43,24 @@ def _load_moddata(filename):
4143
# what it was when created
4244
assert get_hash_of_file(data_file) == _TEST_DATA_HASHES[filename]
4345

44-
return MODData.load(data_file)
46+
moddata = MODData.load(data_file)
47+
# For forwards compatibility with pymatgen, we have to patch our old test data to have the following attributes
48+
# to allow for depickling
49+
# This is hopefully only a temporary solution, and in future, we should serialize pymatgen objects
50+
# with Monty's `from_dict`/`to_dict` to avoid having to hack this private interface
51+
for ind, s in enumerate(moddata.structures):
52+
if isinstance(s, Structure):
53+
# assume all previous data was periodic
54+
moddata.structures[ind].lattice._pbc = [True, True, True]
55+
for jnd, site in enumerate(s.sites):
56+
# assume all of our previous data had ordered sites
57+
moddata.structures[ind].sites[jnd].label = str(next(iter(site.species)))
58+
# required for the global structure.is_ordered to work
59+
moddata.structures[ind].sites[jnd].species._n_atoms = 1.0
60+
elif isinstance(s, CompositionContainer):
61+
moddata.structures[ind].composition._n_atoms = s.composition._natoms
62+
63+
return moddata
4564

4665

4766
@pytest.fixture(scope="function")

modnet/tests/test_model.py

+13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python
22
import pytest
3+
import numpy as np
34

45

56
def test_train_small_model_single_target(subset_moddata, tf_session):
@@ -21,6 +22,7 @@ def test_train_small_model_single_target(subset_moddata, tf_session):
2122

2223
model.fit(data, epochs=2)
2324
model.predict(data)
25+
assert not np.isnan(model.evaluate(data))
2426

2527

2628
def test_train_small_model_single_target_classif(subset_moddata, tf_session):
@@ -49,6 +51,7 @@ def is_metal(egap):
4951
)
5052

5153
model.fit(data, epochs=2)
54+
assert not np.isnan(model.evaluate(data))
5255

5356

5457
def test_train_small_model_multi_target(subset_moddata, tf_session):
@@ -70,6 +73,7 @@ def test_train_small_model_multi_target(subset_moddata, tf_session):
7073

7174
model.fit(data, epochs=2)
7275
model.predict(data)
76+
assert not np.isnan(model.evaluate(data))
7377

7478

7579
def test_train_small_model_presets(subset_moddata, tf_session):
@@ -109,6 +113,7 @@ def test_train_small_model_presets(subset_moddata, tf_session):
109113
models = results[0]
110114
assert len(models) == len(modified_presets)
111115
assert len(models[0]) == num_nested
116+
assert not np.isnan(model.evaluate(data))
112117

113118

114119
def test_model_integration(subset_moddata, tf_session):
@@ -134,6 +139,7 @@ def test_model_integration(subset_moddata, tf_session):
134139
loaded_model = MODNetModel.load("test")
135140

136141
assert model.predict(data).equals(loaded_model.predict(data))
142+
assert not np.isnan(model.evaluate(data))
137143

138144

139145
def test_train_small_bayesian_single_target(subset_moddata, tf_session):
@@ -156,6 +162,7 @@ def test_train_small_bayesian_single_target(subset_moddata, tf_session):
156162
model.fit(data, epochs=2)
157163
model.predict(data)
158164
model.predict(data, return_unc=True)
165+
assert not np.isnan(model.evaluate(data))
159166

160167

161168
def test_train_small_bayesian_single_target_classif(subset_moddata, tf_session):
@@ -186,6 +193,7 @@ def is_metal(egap):
186193
model.fit(data, epochs=2)
187194
model.predict(data)
188195
model.predict(data, return_unc=True)
196+
assert not np.isnan(model.evaluate(data))
189197

190198

191199
def test_train_small_bayesian_multi_target(subset_moddata, tf_session):
@@ -208,6 +216,7 @@ def test_train_small_bayesian_multi_target(subset_moddata, tf_session):
208216
model.fit(data, epochs=2)
209217
model.predict(data)
210218
model.predict(data, return_unc=True)
219+
assert not np.isnan(model.evaluate(data))
211220

212221

213222
def test_train_small_bootstrap_single_target(subset_moddata, tf_session):
@@ -232,6 +241,7 @@ def test_train_small_bootstrap_single_target(subset_moddata, tf_session):
232241
model.fit(data, epochs=2)
233242
model.predict(data)
234243
model.predict(data, return_unc=True)
244+
assert not np.isnan(model.evaluate(data))
235245

236246

237247
def test_train_small_bootstrap_single_target_classif(small_moddata, tf_session):
@@ -264,6 +274,7 @@ def is_metal(egap):
264274
model.fit(data, epochs=2)
265275
model.predict(data)
266276
model.predict(data, return_unc=True)
277+
assert not np.isnan(model.evaluate(data))
267278

268279

269280
def test_train_small_bootstrap_multi_target(small_moddata, tf_session):
@@ -333,3 +344,5 @@ def test_train_small_bootstrap_presets(small_moddata, tf_session):
333344
models = results[0]
334345
assert len(models) == len(modified_presets)
335346
assert len(models[0]) == num_nested
347+
348+
assert not np.isnan(model.evaluate(data))

modnet/tests/test_preprocessing.py

+7-16
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,14 @@ def check_column_values(new: MODData, reference: MODData, tolerance=0.03):
1212
Allows for some columns to be checked more loosely (see inline comment below).
1313
1414
"""
15+
new_cols = set(new.df_featurized.columns)
16+
old_cols = set(reference.df_featurized.columns)
17+
18+
# Check that the new df only adds new columns and is not missing anything
19+
assert not (old_cols - new_cols)
20+
1521
error_cols = set()
16-
for col in new.df_featurized.columns:
22+
for col in old_cols:
1723
if not (
1824
np.absolute(
1925
(
@@ -349,14 +355,6 @@ def test_small_moddata_featurization(small_moddata_2023, featurizer_mode):
349355
featurizer.featurizer_mode = featurizer_mode
350356
new = MODData(structures, targets, target_names=names, featurizer=featurizer)
351357
new.featurize(fast=False, n_jobs=1)
352-
353-
new_cols = sorted(new.df_featurized.columns.tolist())
354-
old_cols = sorted(old.df_featurized.columns.tolist())
355-
356-
for i in range(len(old_cols)):
357-
assert new_cols[i] == old_cols[i]
358-
359-
np.testing.assert_array_equal(old_cols, new_cols)
360358
check_column_values(new, old, tolerance=0.03)
361359

362360

@@ -376,13 +374,6 @@ def test_small_moddata_composition_featurization(
376374
new = MODData(materials=compositions, featurizer=featurizer)
377375
new.featurize(fast=False, n_jobs=1)
378376

379-
new_cols = sorted(new.df_featurized.columns.tolist())
380-
ref_cols = sorted(reference.df_featurized.columns.tolist())
381-
382-
for i in range(len(ref_cols)):
383-
# print(new_cols[i], ref_cols[i])
384-
assert new_cols[i] == ref_cols[i]
385-
386377
# assert relative error below 3 percent
387378
check_column_values(new, reference, tolerance=0.03)
388379

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ pandas==1.5.2
33
scikit-learn==1.3.2
44
matminer==0.9.2
55
numpy>=1.25
6-
pymatgen==2023.11.12
6+
pymatgen==2024.3.1
77
scikit-learn==1.3.2

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
packages=setuptools.find_packages(),
3434
install_requires=[
3535
"pandas~=1.5",
36-
"tensorflow~=2.10",
36+
"tensorflow~=2.10,<2.12",
3737
"pymatgen>=2023",
3838
"matminer~=0.9",
3939
"numpy>=1.24",

0 commit comments

Comments
 (0)