manashgoswami
diff --git a/‎NBSETUP.md
Lines changed: 3 additions & 1 deletion b/‎NBSETUP.md
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎configuration.ipynb
Lines changed: 2 additions & 2 deletions b/‎configuration.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/README.md
Lines changed: 3 additions & 0 deletions b/‎how-to-use-azureml/automated-machine-learning/README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/automl_env.yml
Lines changed: 0 additions & 12 deletions b/‎how-to-use-azureml/automated-machine-learning/automl_env.yml
Lines changed: 0 additions & 12 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/automl_env_mac.yml
Lines changed: 0 additions & 12 deletions b/‎how-to-use-azureml/automated-machine-learning/automl_env_mac.yml
Lines changed: 0 additions & 12 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb
Lines changed: 0 additions & 24 deletions b/‎how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb
Lines changed: 0 additions & 24 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb
Lines changed: 1 addition & 25 deletions b/‎how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb
Lines changed: 1 addition & 25 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/subsampling/auto-ml-subsampling-local.ipynb
Lines changed: 218 additions & 0 deletions b/‎how-to-use-azureml/automated-machine-learning/subsampling/auto-ml-subsampling-local.ipynb
Lines changed: 218 additions & 0 deletions
@@ -101,4 +101,6 @@ pip install azureml-sdk[explain]
 
 # install the core SDK and experimental components
 pip install azureml-sdk[contrib]
-```
+```
+Drag and Drop
+The image will be downloaded by Fatkun
@@ -11,7 +11,7 @@ and maintaining the complete data science workflow from the cloud.
 ```sh
 pip install azureml-sdk
 ```
-Read more detailed instructions on [how to set up your environment](./NBSETUP.md).
+Read more detailed instructions on [how to set up your environment](./NBSETUP.md) using Azure Notebook service, your own Jupyter notebook server, or Docker.
 
 ## How to navigate and use the example notebooks?
 You should always run the [Configuration](./configuration.ipynb) notebook first when setting up a notebook library on a new machine or in a new environment. It configures your notebook library to connect to an Azure Machine Learning workspace, and sets up your workspace and compute to be used by many of the other examples. 
 
@@ -96,7 +96,7 @@
       "source": [
         "import azureml.core\n",
         "\n",
-        "print(\"This notebook was created using version 1.0.10 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.0.6 of the Azure ML SDK\")\n",
         "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
       ]
     },
@@ -373,4 +373,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 2
-}
+}
@@ -169,6 +169,9 @@ bash automl_setup_linux.sh
     - How to specifying sample_weight
     - The difference that it makes to test results
 
+- [auto-ml-subsampling-local.ipynb](subsampling/auto-ml-subsampling-local.ipynb)
+    - How to enable subsampling
+
 - [auto-ml-dataprep.ipynb](dataprep/auto-ml-dataprep.ipynb)
     - Using DataPrep for reading data
 
 
@@ -13,19 +13,7 @@ dependencies:
 - pandas>=0.22.0,<0.23.0
 - tensorflow>=1.12.0
 
-# Required for azuremlftk
-- dill
-- pyodbc
-- statsmodels 
-- numexpr  
-- keras
-- distributed>=1.21.5,<1.24
-  
 - pip:
-
-  # Required for azuremlftk
-  - https://azuremlpackages.blob.core.windows.net/forecasting/azuremlftk-0.1.18323.5a1-py3-none-any.whl
-
   # Required packages for AzureML execution, history, and data preparation.
   - azureml-sdk[automl,notebooks,explain]
   - pandas_ml
 
@@ -13,19 +13,7 @@ dependencies:
 - pandas>=0.22.0,<0.23.0
 - tensorflow>=1.12.0
 
-# Required for azuremlftk
-- dill
-- pyodbc
-- statsmodels 
-- numexpr  
-- keras
-- distributed>=1.21.5,<1.24
-  
 - pip:
-
-  # Required for azuremlftk
-  - https://azuremlpackages.blob.core.windows.net/forecasting/azuremlftk-0.1.18323.5a1-py3-none-any.whl
-
   # Required packages for AzureML execution, history, and data preparation.
   - azureml-sdk[automl,notebooks,explain]
   - pandas_ml
 
@@ -47,30 +47,6 @@
         "## Setup\n"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "To use the *forecasting* task in AutoML, you need to have the **azuremlftk** package installed in your environment. The following cell tests whether this package is installed locally and, if not, gives you instructions for installing it.  "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "try:\n",
-        "    import ftk\n",
-        "    print('Using FTK version ' + ftk.__version__)\n",
-        "except ImportError:\n",
-        "    print(\"Unable to import forecasting package. This notebook does not work without this package.\\n\"\n",
-        "          + \"Please open a command prompt and run `pip install azuremlftk` to install the package. \\n\"\n",
-        "          + \"Make sure you install the package into AutoML's Python environment.\\n\\n\"\n",
-        "          + \"For instance, if AutoML is installed in a conda environment called `python36`, run:\\n\"\n",
-        "          + \"> activate python36\\n> pip install azuremlftk\")"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
 
@@ -38,7 +38,7 @@
         "3. Find and train a forecasting model using local compute\n",
         "4. Evaluate the performance of the model\n",
         "\n",
-        "The examples in the follow code samples use the [University of Chicago's Dominick's Finer Foods dataset](https://research.chicagobooth.edu/kilts/marketing-databases/dominicks) to forecast orange juice sales. Dominick's was a grocery chain in the Chicago metropolitan area."
+        "The examples in the follow code samples use the University of Chicago's Dominick's Finer Foods dataset to forecast orange juice sales. Dominick's was a grocery chain in the Chicago metropolitan area."
       ]
     },
     {
@@ -48,30 +48,6 @@
         "## Setup"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "To use the *forecasting* task in AutoML, you need to have the **azuremlftk** package installed in your environment. The following cell tests whether this package is installed locally and, if not, gives you instructions for installing it."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "try:\n",
-        "    import ftk\n",
-        "    print('Using FTK version ' + ftk.__version__)\n",
-        "except ImportError:\n",
-        "    print(\"Unable to import forecasting package. This notebook does not work without this package.\\n\"\n",
-        "          + \"Please open a command prompt and run `pip install azuremlftk` to install the package. \\n\"\n",
-        "          + \"Make sure you install the package into AutoML's Python environment.\\n\\n\"\n",
-        "          + \"For instance, if AutoML is installed in a conda environment called `python36`, run:\\n\"\n",
-        "          + \"> activate python36\\n> pip install azuremlftk\")"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
 
@@ -0,0 +1,218 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+        "\n",
+        "Licensed under the MIT License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Automated Machine Learning\n",
+        "_**Classification with Local Compute**_\n",
+        "\n",
+        "## Contents\n",
+        "1. [Introduction](#Introduction)\n",
+        "1. [Setup](#Setup)\n",
+        "1. [Data](#Data)\n",
+        "1. [Train](#Train)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Introduction\n",
+        "\n",
+        "In this example we will explore AutoML's subsampling feature. This is useful for training on large datasets to speed up the convergence.\n",
+        "\n",
+        "The setup is quiet similar to a normal classification, with the exception of the `enable_subsampling` option. Keep in mind that even with the `enable_subsampling` flag set, subsampling will only be run for large datasets (>= 50k rows) and large (>= 85) or no iteration restrictions.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Setup\n",
+        "\n",
+        "As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import logging\n",
+        "\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "\n",
+        "import azureml.core\n",
+        "from azureml.core.experiment import Experiment\n",
+        "from azureml.core.workspace import Workspace\n",
+        "from azureml.train.automl import AutoMLConfig\n",
+        "from azureml.train.automl.run import AutoMLRun"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ws = Workspace.from_config()\n",
+        "\n",
+        "# Choose a name for the experiment and specify the project folder.\n",
+        "experiment_name = 'automl-subsampling'\n",
+        "project_folder = './sample_projects/automl-subsampling'\n",
+        "\n",
+        "experiment = Experiment(ws, experiment_name)\n",
+        "\n",
+        "output = {}\n",
+        "output['SDK version'] = azureml.core.VERSION\n",
+        "output['Subscription ID'] = ws.subscription_id\n",
+        "output['Workspace Name'] = ws.name\n",
+        "output['Resource Group'] = ws.resource_group\n",
+        "output['Location'] = ws.location\n",
+        "output['Project Directory'] = project_folder\n",
+        "output['Experiment Name'] = experiment.name\n",
+        "pd.set_option('display.max_colwidth', -1)\n",
+        "pd.DataFrame(data = output, index = ['']).T"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Opt-in diagnostics for better experience, quality, and security of future releases."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.telemetry import set_diagnostics_collection\n",
+        "set_diagnostics_collection(send_diagnostics = True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Data\n",
+        "\n",
+        "We will create a simple dataset using the numpy sin function just for this example. We need just over 50k rows."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "base = np.arange(60000)\n",
+        "cos = np.cos(base)\n",
+        "y = np.round(np.sin(base)).astype('int')\n",
+        "\n",
+        "# Exclude the first 100 rows from training so that they can be used for test.\n",
+        "X_train = np.hstack((base.reshape(-1, 1), cos.reshape(-1, 1)))\n",
+        "y_train = y"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Train\n",
+        "\n",
+        "Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
+        "\n",
+        "|Property|Description|\n",
+        "|-|-|\n",
+        "|**enable_subsampling**|This enables subsampling as an option. However it does not guarantee subsampling will be used. It also depends on how large the dataset is and how many iterations it's expected to run at a minimum.|\n",
+        "|**iterations**|Number of iterations. Subsampling requires a lot of iterations at smaller percent so in order for subsampling to be used we need to set iterations to be a high number.|\n",
+        "|**experiment_timeout_minutes**|The experiment timeout, it's set to 5 right now to shorten the demo but it should probably be higher if we want to finish all the iterations.|\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "automl_config = AutoMLConfig(task = 'classification',\n",
+        "                             debug_log = 'automl_errors.log',\n",
+        "                             primary_metric = 'accuracy',\n",
+        "                             iterations = 85,\n",
+        "                             experiment_timeout_minutes = 5,\n",
+        "                             n_cross_validations = 2,\n",
+        "                             verbosity = logging.INFO,\n",
+        "                             X = X_train, \n",
+        "                             y = y_train,\n",
+        "                             enable_subsampling=True,\n",
+        "                             path = project_folder)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
+        "In this example, we specify `show_output = True` to print currently running iterations to the console."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "local_run = experiment.submit(automl_config, show_output = True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "authors": [
+      {
+        "name": "rogehe"
+      }
+    ],
+    "kernelspec": {
+      "display_name": "Python 3.6",
+      "language": "python",
+      "name": "python36"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}