Skip to content

Commit 4c50516

Browse files
committed
Merge branch 'master' of https://github.com/ppdebreuck/modnet
2 parents bdeb35b + d4cfcc4 commit 4c50516

File tree

8 files changed

+515
-139
lines changed

8 files changed

+515
-139
lines changed

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
<div align="center">
2+
<img src="img/modnet_logo.svg" alt="modnet-logo" width=200>
3+
<br>
4+
15
# MODNet: Material Optimal Descriptor Network
26

37
[![arXiv](https://img.shields.io/badge/arXiv-2004.14766-brightgreen)](https://arxiv.org/abs/2004.14766) [![Build Status](https://img.shields.io/github/actions/workflow/status/ppdebreuck/modnet/ci.yml?logo=github&branch=main)](https://github.com/ppdebreuck/modnet/actions?query=branch%3Amaster+) [![Read the Docs](https://img.shields.io/readthedocs/modnet)](https://modnet.readthedocs.io/en/latest/)
48

5-
<p align="center">
6-
<img src="img/modnet_logo.svg" alt="modnet-logo" width=200>
7-
<br>
8-
</p>
9+
</div>
910

1011
<a name="introduction"></a>
1112
## Introduction
Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Training the elastic properties\n",
8+
"\n",
9+
"This notebook goes trough the multi-target usage of MODNet. Either (1) a n-dimensional tree like m-MODNet model can be created, or (2) a simple n-dimensional vector output model."
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"# notebook dependencies\n",
19+
"from modnet.models import MODNetModel\n",
20+
"from modnet.preprocessing import MODData\n",
21+
"from modnet.hyper_opt import FitGenetic\n",
22+
"from modnet.models import MODNetModel\n",
23+
"from sklearn.model_selection import train_test_split\n",
24+
"import numpy as np\n",
25+
"import time"
26+
]
27+
},
28+
{
29+
"cell_type": "markdown",
30+
"metadata": {},
31+
"source": [
32+
"## 1. Loading the dataset and creating the MODData instance\n",
33+
"\n",
34+
"The elastic properties from matminer datasets is used in this example"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"from matminer.datasets import load_dataset\n",
44+
"df = load_dataset(\"elastic_tensor_2015\")\n",
45+
"compositions = [s.composition for s in df[\"structure\"]]\n",
46+
"G_VRH = df[\"G_VRH\"].values\n",
47+
"K_VRH = df[\"G_VRH\"].values\n",
48+
"poisson = df[\"poisson_ratio\"].values"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": null,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": [
57+
"data = MODData(materials = compositions,\n",
58+
" targets = np.array([G_VRH,K_VRH,poisson]).T, # one property per column\n",
59+
" target_names = [\"G_VRH\",\"K_VRH\",\"p\"]\n",
60+
" )\n",
61+
"data.featurize()\n",
62+
"\n",
63+
"idx_split = train_test_split(range(len(compositions)), test_size=0.2)\n",
64+
"train_data, test_data = data.split(idx_split)"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {},
71+
"outputs": [],
72+
"source": [
73+
"train_data.feature_selection(n_jobs=4, use_precomputed_cross_nmi=True)\n",
74+
"train_data.save(\"data/multi_prop_traindata\")\n",
75+
"test_data.save(\"data/multi_prop_testdata\")"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"metadata": {},
82+
"outputs": [],
83+
"source": [
84+
"train_data.df_targets.describe()"
85+
]
86+
},
87+
{
88+
"cell_type": "markdown",
89+
"metadata": {},
90+
"source": [
91+
"# 3. Tree MODNetModel"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"# loading train and test data\n",
101+
"train_data = MODData.load(\"data/multi_prop_traindata\")\n",
102+
"test_data = MODData.load(\"data/multi_prop_testdata\")"
103+
]
104+
},
105+
{
106+
"cell_type": "markdown",
107+
"metadata": {},
108+
"source": [
109+
"### Model\n",
110+
"Each property is put in a different inner list: \n",
111+
"\n",
112+
"targets = [[[\"G_VRH\"],[\"K_VRH\"],[\"p\"]]] \n",
113+
"\n",
114+
"This guarantees that the architecture will split on those properties , with multiple scalar output layers!\n"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"metadata": {},
121+
"outputs": [],
122+
"source": [
123+
"# model creation - carefully observe the architecture that contains multiple output layers\n",
124+
"model = MODNetModel([[[\"G_VRH\"],[\"K_VRH\"],[\"p\"]]], weights={\"G_VRH\":1, \"K_VRH\":1, \"p\":1})\n",
125+
"model.model.summary()"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": null,
131+
"metadata": {},
132+
"outputs": [],
133+
"source": [
134+
"# fitting\n",
135+
"model.fit(train_data)"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"metadata": {},
142+
"outputs": [],
143+
"source": [
144+
"# train - test predictions\n",
145+
"train_preds = model.predict(train_data)\n",
146+
"test_preds = model.predict(test_data)\n",
147+
"train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
148+
"test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
149+
"print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))"
150+
]
151+
},
152+
{
153+
"cell_type": "markdown",
154+
"metadata": {},
155+
"source": [
156+
"## 3. Vector MODNet"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"metadata": {},
163+
"outputs": [],
164+
"source": [
165+
"# loading train and test data\n",
166+
"\n",
167+
"train_data = MODData.load(\"data/multi_prop_traindata\")\n",
168+
"test_data = MODData.load(\"data/multi_prop_testdata\")"
169+
]
170+
},
171+
{
172+
"cell_type": "markdown",
173+
"metadata": {},
174+
"source": [
175+
"### Model\n",
176+
"All properties are put in the same inner list: \n",
177+
"\n",
178+
"targets = [[[\"G_VRH\", \"K_VRH\", \"p\"]]] \n",
179+
"\n",
180+
"This guarantees that the architecture will be have a single output vector!"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": null,
186+
"metadata": {},
187+
"outputs": [],
188+
"source": [
189+
"# model creation - carefully observe the architecture that is fully sequential\n",
190+
"model = MODNetModel([[[\"G_VRH\",\"K_VRH\",\"p\"]]], weights={\"G_VRH\":1})\n",
191+
"model.model.summary()"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": null,
197+
"metadata": {},
198+
"outputs": [],
199+
"source": [
200+
"# fitting\n",
201+
"model.fit(train_data)"
202+
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": null,
207+
"metadata": {},
208+
"outputs": [],
209+
"source": [
210+
"# train - test predictions\n",
211+
"train_preds = model.predict(train_data)\n",
212+
"test_preds = model.predict(test_data)\n",
213+
"train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
214+
"test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
215+
"print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))"
216+
]
217+
},
218+
{
219+
"cell_type": "markdown",
220+
"metadata": {},
221+
"source": [
222+
"# 4. Hyperparameter optimization\n",
223+
"More realistically, you will use the FitGenetic class to optimize hyperparameters.\n",
224+
"This class contains the targets argument that let you decide wheter a single vector model, or a multiple scalar output model is desired.\n",
225+
"\n",
226+
"*Note 1*\n",
227+
"\n",
228+
"It is also possible to have multiple vector output layers, e.g. targets = [[[\"p0\",\"p1\",\"p2\"],[\"p3\",\"p4\"]]]\n",
229+
"\n",
230+
"Or any combination: [[[\"p0\",\"p1\",\"p2\"],[\"p3\",\"p4\"]],[\"p5]]]\n",
231+
"\n",
232+
"*Note 2*\n",
233+
"When dealing with many properties, gathering them in inner lists (i.e. vector architecture) is recommended, as it will result in faster training times ! \n",
234+
"Example:"
235+
]
236+
},
237+
{
238+
"cell_type": "code",
239+
"execution_count": null,
240+
"metadata": {},
241+
"outputs": [],
242+
"source": [
243+
"train_data = MODData.load(\"data/multi_prop_traindata\")\n",
244+
"test_data = MODData.load(\"data/multi_prop_testdata\")"
245+
]
246+
},
247+
{
248+
"cell_type": "code",
249+
"execution_count": null,
250+
"metadata": {},
251+
"outputs": [],
252+
"source": [
253+
"# GA vector output\n",
254+
"ga = FitGenetic(train_data, targets = [[[\"G_VRH\",\"K_VRH\",\"p\"]]]) # single vector output architecture\n",
255+
"start_t = time.time()\n",
256+
"model = ga.run(nested=0, size_pop=10, num_generations=3, n_jobs = 8, refit=1) # small GA, use larger values for better optimization\n",
257+
"stop_t = time.time()\n",
258+
"\n",
259+
"train_preds = model.predict(train_data)\n",
260+
"test_preds = model.predict(test_data)\n",
261+
"train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
262+
"test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
263+
"print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))\n",
264+
"print(\"Hyperopt duration: {}\".format(stop_t-start_t))"
265+
]
266+
},
267+
{
268+
"cell_type": "code",
269+
"execution_count": null,
270+
"metadata": {},
271+
"outputs": [],
272+
"source": [
273+
"# GA multi scalar output\n",
274+
"ga = FitGenetic(train_data, targets = [[[\"G_VRH\"],[\"K_VRH\"],[\"p\"]]]) # single vector output architecture\n",
275+
"\n",
276+
"start_t = time.time()\n",
277+
"model = ga.run(nested=0, size_pop=10, num_generations=3, n_jobs = 8, refit=1)\n",
278+
"stop_t = time.time()\n",
279+
"\n",
280+
"train_preds = model.predict(train_data)\n",
281+
"test_preds = model.predict(test_data)\n",
282+
"train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
283+
"test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
284+
"print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))\n",
285+
"print(\"Hyperopt duration: {}\".format(stop_t-start_t))"
286+
]
287+
},
288+
{
289+
"cell_type": "code",
290+
"execution_count": null,
291+
"metadata": {},
292+
"outputs": [],
293+
"source": []
294+
}
295+
],
296+
"metadata": {
297+
"interpreter": {
298+
"hash": "65cdb1bf34a883aa160cb191d1776d32605e2c21ff3abc3b3101a9562d1e4c9d"
299+
},
300+
"kernelspec": {
301+
"display_name": "Python (modnet-develop)",
302+
"language": "python",
303+
"name": "python3"
304+
},
305+
"language_info": {
306+
"codemirror_mode": {
307+
"name": "ipython",
308+
"version": 3
309+
},
310+
"file_extension": ".py",
311+
"mimetype": "text/x-python",
312+
"name": "python",
313+
"nbconvert_exporter": "python",
314+
"pygments_lexer": "ipython3",
315+
"version": "3.8.12"
316+
}
317+
},
318+
"nbformat": 4,
319+
"nbformat_minor": 4
320+
}

modnet/hyper_opt/fit_genetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ def run(
659659
else:
660660
ensemble = []
661661
for m in models[ranking[:refit]]:
662-
ensemble += m.model
662+
ensemble += m.models
663663
self.best_model = EnsembleMODNetModel(models=ensemble)
664664

665665
self.results = self.best_individual.genes

0 commit comments

Comments
 (0)