Matgenix
diff --git a/‎README.md
Lines changed: 5 additions & 4 deletions b/‎README.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎example_notebooks/training_multi_data.ipynb
Lines changed: 320 additions & 0 deletions b/‎example_notebooks/training_multi_data.ipynb
Lines changed: 320 additions & 0 deletions
diff --git a/‎modnet/hyper_opt/fit_genetic.py
Lines changed: 1 addition & 1 deletion b/‎modnet/hyper_opt/fit_genetic.py
Lines changed: 1 addition & 1 deletion
@@ -1,11 +1,12 @@
+<div align="center">
+    <img src="img/modnet_logo.svg" alt="modnet-logo"  width=200>
+    <br>
+
 # MODNet: Material Optimal Descriptor Network
 
 [![arXiv](https://img.shields.io/badge/arXiv-2004.14766-brightgreen)](https://arxiv.org/abs/2004.14766) [![Build Status](https://img.shields.io/github/actions/workflow/status/ppdebreuck/modnet/ci.yml?logo=github&branch=main)](https://github.com/ppdebreuck/modnet/actions?query=branch%3Amaster+) [![Read the Docs](https://img.shields.io/readthedocs/modnet)](https://modnet.readthedocs.io/en/latest/)
 
-<p align="center">
-    <img src="img/modnet_logo.svg" alt="modnet-logo"  width=200>
-    <br>
-</p>
+</div>
 
 <a name="introduction"></a>
 ## Introduction
 
@@ -0,0 +1,320 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training the elastic properties\n",
+    "\n",
+    "This notebook goes trough the multi-target usage of MODNet. Either (1) a n-dimensional tree like m-MODNet model can be created, or (2) a simple n-dimensional vector output model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# notebook dependencies\n",
+    "from modnet.models import MODNetModel\n",
+    "from modnet.preprocessing import MODData\n",
+    "from modnet.hyper_opt import FitGenetic\n",
+    "from modnet.models import MODNetModel\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import numpy as np\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Loading the dataset and creating the MODData instance\n",
+    "\n",
+    "The elastic properties from matminer datasets is used in this example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from matminer.datasets import load_dataset\n",
+    "df = load_dataset(\"elastic_tensor_2015\")\n",
+    "compositions = [s.composition for s in df[\"structure\"]]\n",
+    "G_VRH = df[\"G_VRH\"].values\n",
+    "K_VRH = df[\"G_VRH\"].values\n",
+    "poisson = df[\"poisson_ratio\"].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = MODData(materials = compositions,\n",
+    "             targets = np.array([G_VRH,K_VRH,poisson]).T, # one property per column\n",
+    "             target_names = [\"G_VRH\",\"K_VRH\",\"p\"]\n",
+    "            )\n",
+    "data.featurize()\n",
+    "\n",
+    "idx_split = train_test_split(range(len(compositions)), test_size=0.2)\n",
+    "train_data, test_data = data.split(idx_split)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data.feature_selection(n_jobs=4, use_precomputed_cross_nmi=True)\n",
+    "train_data.save(\"data/multi_prop_traindata\")\n",
+    "test_data.save(\"data/multi_prop_testdata\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data.df_targets.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3. Tree MODNetModel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# loading train and test data\n",
+    "train_data = MODData.load(\"data/multi_prop_traindata\")\n",
+    "test_data = MODData.load(\"data/multi_prop_testdata\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model\n",
+    "Each property is put in a different inner list: \n",
+    "\n",
+    "targets = [[[\"G_VRH\"],[\"K_VRH\"],[\"p\"]]] \n",
+    "\n",
+    "This guarantees that the architecture will split on those properties , with multiple scalar output layers!\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model creation - carefully observe the architecture that contains multiple output layers\n",
+    "model = MODNetModel([[[\"G_VRH\"],[\"K_VRH\"],[\"p\"]]], weights={\"G_VRH\":1, \"K_VRH\":1, \"p\":1})\n",
+    "model.model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fitting\n",
+    "model.fit(train_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# train - test predictions\n",
+    "train_preds = model.predict(train_data)\n",
+    "test_preds = model.predict(test_data)\n",
+    "train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
+    "test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
+    "print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Vector MODNet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# loading train and test data\n",
+    "\n",
+    "train_data = MODData.load(\"data/multi_prop_traindata\")\n",
+    "test_data = MODData.load(\"data/multi_prop_testdata\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model\n",
+    "All properties are put in the same inner list: \n",
+    "\n",
+    "targets = [[[\"G_VRH\", \"K_VRH\", \"p\"]]] \n",
+    "\n",
+    "This guarantees that the architecture will be have a single output vector!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model creation - carefully observe the architecture that is fully sequential\n",
+    "model = MODNetModel([[[\"G_VRH\",\"K_VRH\",\"p\"]]], weights={\"G_VRH\":1})\n",
+    "model.model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fitting\n",
+    "model.fit(train_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# train - test predictions\n",
+    "train_preds = model.predict(train_data)\n",
+    "test_preds = model.predict(test_data)\n",
+    "train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
+    "test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
+    "print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Hyperparameter optimization\n",
+    "More realistically, you will use the FitGenetic class to optimize hyperparameters.\n",
+    "This class contains the targets argument that let you decide wheter a single vector model, or a multiple scalar output model is desired.\n",
+    "\n",
+    "*Note 1*\n",
+    "\n",
+    "It is also possible to have multiple vector output layers, e.g. targets = [[[\"p0\",\"p1\",\"p2\"],[\"p3\",\"p4\"]]]\n",
+    "\n",
+    "Or any combination: [[[\"p0\",\"p1\",\"p2\"],[\"p3\",\"p4\"]],[\"p5]]]\n",
+    "\n",
+    "*Note 2*\n",
+    "When dealing with many properties, gathering them in inner lists (i.e. vector architecture) is recommended, as it will result in faster training times ! \n",
+    "Example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data = MODData.load(\"data/multi_prop_traindata\")\n",
+    "test_data = MODData.load(\"data/multi_prop_testdata\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GA vector output\n",
+    "ga = FitGenetic(train_data, targets = [[[\"G_VRH\",\"K_VRH\",\"p\"]]]) # single vector output architecture\n",
+    "start_t = time.time()\n",
+    "model = ga.run(nested=0, size_pop=10, num_generations=3, n_jobs = 8, refit=1) # small GA, use larger values for better optimization\n",
+    "stop_t = time.time()\n",
+    "\n",
+    "train_preds = model.predict(train_data)\n",
+    "test_preds = model.predict(test_data)\n",
+    "train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
+    "test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
+    "print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))\n",
+    "print(\"Hyperopt duration: {}\".format(stop_t-start_t))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GA multi scalar output\n",
+    "ga = FitGenetic(train_data, targets = [[[\"G_VRH\"],[\"K_VRH\"],[\"p\"]]]) # single vector output architecture\n",
+    "\n",
+    "start_t = time.time()\n",
+    "model = ga.run(nested=0, size_pop=10, num_generations=3, n_jobs = 8, refit=1)\n",
+    "stop_t = time.time()\n",
+    "\n",
+    "train_preds = model.predict(train_data)\n",
+    "test_preds = model.predict(test_data)\n",
+    "train_mae = (train_preds - train_data.df_targets).abs().mean()\n",
+    "test_mae = (test_preds - test_data.df_targets).abs().mean()\n",
+    "print(\"-> train mae\\n{}\\n-> test mae\\n{}\".format(train_mae, test_mae))\n",
+    "print(\"Hyperopt duration: {}\".format(stop_t-start_t))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "65cdb1bf34a883aa160cb191d1776d32605e2c21ff3abc3b3101a9562d1e4c9d"
+  },
+  "kernelspec": {
+   "display_name": "Python (modnet-develop)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -659,7 +659,7 @@ def run(
         else:
             ensemble = []
             for m in models[ranking[:refit]]:
-                ensemble += m.model
+                ensemble += m.models
             self.best_model = EnsembleMODNetModel(models=ensemble)
 
         self.results = self.best_individual.genes