YarShev
diff --git a/‎configuration.ipynb
Lines changed: 1 addition & 1 deletion b/‎configuration.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎how-to-use-azureml/automated-machine-learning/automl_env.yml
Lines changed: 1 addition & 1 deletion b/‎how-to-use-azureml/automated-machine-learning/automl_env.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎how-to-use-azureml/automated-machine-learning/automl_env_mac.yml
Lines changed: 1 addition & 1 deletion b/‎how-to-use-azureml/automated-machine-learning/automl_env_mac.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb
Lines changed: 5 additions & 5 deletions b/‎how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb
Lines changed: 5 additions & 5 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.yml
Lines changed: 1 addition & 1 deletion b/‎how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
Lines changed: 3 additions & 13 deletions b/‎how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
Lines changed: 3 additions & 13 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb
Lines changed: 40 additions & 22 deletions b/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb
Lines changed: 40 additions & 22 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/check_data.py
Lines changed: 6 additions & 35 deletions b/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/check_data.py
Lines changed: 6 additions & 35 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/get_data.py
Lines changed: 0 additions & 15 deletions b/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/get_data.py
Lines changed: 0 additions & 15 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/upload_weather_data.py
Lines changed: 6 additions & 6 deletions b/‎how-to-use-azureml/automated-machine-learning/continuous-retraining/upload_weather_data.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/forecasting-grouping/build.py
Lines changed: 2 additions & 2 deletions b/‎how-to-use-azureml/automated-machine-learning/forecasting-grouping/build.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/auto-ml-regression-hardware-performance-explanation-and-featurization.ipynb
Lines changed: 1 addition & 12 deletions b/‎how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/auto-ml-regression-hardware-performance-explanation-and-featurization.ipynb
Lines changed: 1 addition & 12 deletions
@@ -103,7 +103,7 @@
       "source": [
         "import azureml.core\n",
         "\n",
-        "print(\"This notebook was created using version 1.0.79 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.0.81 of the Azure ML SDK\")\n",
         "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
       ]
     },
 
@@ -30,7 +30,7 @@ dependencies:
   - pytorch-transformers==1.0.0
   - spacy==2.1.8
   - joblib
-  - onnxruntime==0.4.0
+  - onnxruntime==1.0.0
   - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
 
 channels:
 
@@ -31,7 +31,7 @@ dependencies:
   - pytorch-transformers==1.0.0
   - spacy==2.1.8
   - joblib
-  - onnxruntime==0.4.0
+  - onnxruntime==1.0.0
   - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz  
 
 channels:
 
@@ -288,7 +288,7 @@
         "|**blacklist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run. <br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><br>Allowed values for **Forecasting**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><i>Arima</i><br><i>Prophet</i>|\n",
         "| **whitelist_models** |  *List* of *strings* indicating machine learning algorithms for AutoML to use in this run. Same values listed above for **blacklist_models** allowed for **whitelist_models**.|\n",
         "|**experiment_exit_score**| Value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
-        "|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n",
+        "|**experiment_timeout_hours**| Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
         "|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n",
         "|**featurization**| 'auto' / 'off'  Indicator for whether featurization step should be done automatically or not. Note: If the input data is sparse, featurization cannot be turned on.|\n",
         "|**n_cross_validations**|Number of cross validation splits.|\n",
@@ -306,7 +306,7 @@
       "outputs": [],
       "source": [
         "automl_settings = {\n",
-        "    \"experiment_timeout_minutes\" : 20,\n",
+        "    \"experiment_timeout_hours\" : 0.3,\n",
         "    \"enable_early_stopping\" : True,\n",
         "    \"iteration_timeout_minutes\": 5,\n",
         "    \"max_concurrent_iterations\": 4,\n",
@@ -694,10 +694,10 @@
         "from azureml.core.webservice import AciWebservice\n",
         "from azureml.core.webservice import Webservice\n",
         "from azureml.core.model import Model\n",
+        "from azureml.core.environment import Environment\n",
         "\n",
-        "inference_config = InferenceConfig(runtime = \"python\", \n",
-        "                                   entry_script = script_file_name,\n",
-        "                                   conda_file = conda_env_file_name)\n",
+        "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=conda_env_file_name)\n",
+        "inference_config = InferenceConfig(entry_script=script_file_name, environment=myenv)\n",
         "\n",
         "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
         "                                               memory_gb = 1, \n",
 
@@ -8,6 +8,6 @@ dependencies:
   - azureml-widgets
   - matplotlib
   - pandas_ml
-  - onnxruntime==0.4.0
+  - onnxruntime==1.0.0
   - azureml-explain-model
   - azureml-contrib-interpret
@@ -213,7 +213,7 @@
         "    \"preprocess\": True,\n",
         "    \"enable_early_stopping\": True,\n",
         "    \"max_concurrent_iterations\": 2, # This is a limit for testing purpose, please increase it as per cluster size\n",
-        "    \"experiment_timeout_minutes\": 10, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
+        "    \"experiment_timeout_hours\": 0.2, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
         "    \"verbosity\": logging.INFO,\n",
         "}\n",
         "\n",
@@ -305,7 +305,7 @@
       "source": [
         "#### Explain model\n",
         "\n",
-        "Automated ML models can be explained and visualized using the SDK Explainability library. [Learn how to use the explainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/model-explanation-remote-amlcompute/auto-ml-model-explanations-remote-compute.ipynb)."
+        "Automated ML models can be explained and visualized using the SDK Explainability library. "
       ]
     },
     {
@@ -334,17 +334,7 @@
       "metadata": {},
       "source": [
         "#### Print the properties of the model\n",
-        "The fitted_model is a python object and you can read the different properties of the object.\n",
-        "See *Print the properties of the model* section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Deploy\n",
-        "\n",
-        "To deploy the model into a web service endpoint, see _Deploy_ section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb)"
+        "The fitted_model is a python object and you can read the different properties of the object.\n"
       ]
     },
     {
 
@@ -210,7 +210,24 @@
       "metadata": {},
       "source": [
         "## Data Ingestion Pipeline \n",
-        "For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n",
+        "For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# The name and target column of the Dataset to create \n",
+        "dataset = \"NOAA-Weather-DS4\"\n",
+        "target_column_name = \"temperature\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
         "\n",
         "### Upload Data Step\n",
         "The data ingestion pipeline has a single step with a script to query the latest weather data and upload it to the blob store. During the first run, the script will create and register a time-series based `TabularDataset` with the past one week of weather data. For each subsequent run, the script will create a partition in the blob store by querying NOAA for new weather data since the last modified time of the dataset (`dataset.data_changed_time`) and creating a data.csv file."
@@ -225,8 +242,6 @@
         "from azureml.pipeline.core import Pipeline, PipelineParameter\n",
         "from azureml.pipeline.steps import PythonScriptStep\n",
         "\n",
-        "# The name of the Dataset to create \n",
-        "dataset = \"NOAA-Weather-DS4\"\n",
         "ds_name = PipelineParameter(name=\"ds_name\", default_value=dataset)\n",
         "upload_data_step = PythonScriptStep(script_name=\"upload_weather_data.py\", \n",
         "                                         allow_reuse=False,\n",
@@ -272,7 +287,7 @@
         "## Training Pipeline\n",
         "### Prepare Training Data Step\n",
         "\n",
-        "Script to bring data into common X,y format. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
+        "Script to check if new data is available since the model was last trained. If no new data is available, we cancel the remaining pipeline steps. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
       ]
     },
     {
@@ -283,11 +298,8 @@
       "source": [
         "from azureml.pipeline.core import PipelineData\n",
         "\n",
-        "target_column = PipelineParameter(\"target_column\", default_value=\"y\")\n",
         "# The model name with which to register the trained model in the workspace.\n",
-        "model_name = PipelineParameter(\"model_name\", default_value=\"y\")\n",
-        "output_x = PipelineData(\"output_x\", datastore=dstor)\n",
-        "output_y = PipelineData(\"output_y\", datastore=dstor)"
+        "model_name = PipelineParameter(\"model_name\", default_value=\"noaaweatherds\")"
       ]
     },
     {
@@ -299,16 +311,23 @@
         "data_prep_step = PythonScriptStep(script_name=\"check_data.py\", \n",
         "                                         allow_reuse=False,\n",
         "                                         name=\"check_data\",\n",
-        "                                         arguments=[\"--target_column\", target_column,\n",
-        "                                                       \"--output_x\", output_x,\n",
-        "                                                       \"--output_y\", output_y,\n",
-        "                                                       \"--ds_name\", ds_name,\n",
-        "                                                       \"--model_name\", model_name],\n",
-        "                                         outputs=[output_x, output_y], \n",
+        "                                         arguments=[\"--ds_name\", ds_name,\n",
+        "                                                    \"--model_name\", model_name],\n",
         "                                         compute_target=compute_target, \n",
         "                                         runconfig=conda_run_config)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core import Dataset\n",
+        "train_ds = Dataset.get_by_name(ws, dataset)\n",
+        "train_ds = train_ds.drop_columns([\"partition_date\"])"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {},
@@ -324,11 +343,11 @@
       "outputs": [],
       "source": [
         "from azureml.train.automl import AutoMLConfig\n",
-        "from azureml.train.automl.runtime import AutoMLStep\n",
+        "from azureml.train.automl import AutoMLStep\n",
         "\n",
         "automl_settings = {\n",
-        "    \"iteration_timeout_minutes\": 20,\n",
-        "    \"experiment_timeout_minutes\": 30,\n",
+        "    \"iteration_timeout_minutes\": 10,\n",
+        "    \"experiment_timeout_minutes\": 10,\n",
         "    \"n_cross_validations\": 3,\n",
         "    \"primary_metric\": 'r2_score',\n",
         "    \"preprocess\": True,\n",
@@ -342,8 +361,8 @@
         "                             debug_log = 'automl_errors.log',\n",
         "                             path = \".\",\n",
         "                             compute_target=compute_target,\n",
-        "                             run_configuration=conda_run_config,\n",
-        "                             data_script = \"get_data.py\",\n",
+        "                             training_data = train_ds,\n",
+        "                             label_column_name = target_column_name,\n",
         "                             **automl_settings\n",
         "                            )"
       ]
@@ -378,7 +397,6 @@
         "automl_step = AutoMLStep(\n",
         "    name='automl_module',\n",
         "    automl_config=automl_config,\n",
-        "    inputs=[output_x, output_y],\n",
         "    outputs=[metirics_data, model_data],\n",
         "    allow_reuse=False)"
       ]
@@ -432,7 +450,7 @@
       "outputs": [],
       "source": [
         "training_pipeline_run = experiment.submit(training_pipeline, pipeline_parameters={\n",
-        "        \"target_column\": \"temperature\", \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
+        "        \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
       ]
     },
     {
@@ -475,7 +493,7 @@
       "source": [
         "from azureml.pipeline.core import Schedule\n",
         "schedule = Schedule.create(workspace=ws, name=\"RetrainingSchedule\",\n",
-        "                           pipeline_parameters={\"target_column\": \"temperature\",\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
+        "                           pipeline_parameters={\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
         "                           pipeline_id=published_pipeline.id, \n",
         "                           experiment_name=experiment_name, \n",
         "                           datastore=dstor,\n",
 
@@ -15,32 +15,16 @@
 else:
     ws = run.experiment.workspace
 
-
-def write_output(df, path):
-    os.makedirs(path, exist_ok=True)
-    print("%s created" % path)
-    df.to_csv(path + "/part-00000", index=False)
-
-
-print("Check for new data and prepare the data")
+print("Check for new data.")
 
 parser = argparse.ArgumentParser("split")
-parser.add_argument("--target_column", type=str, help="input split features")
 parser.add_argument("--ds_name", help="input dataset name")
 parser.add_argument("--model_name", help="name of the deployed model")
-parser.add_argument("--output_x", type=str,
-                    help="output features")
-parser.add_argument("--output_y", type=str,
-                    help="output labels")
-
 
 args = parser.parse_args()
 
 print("Argument 1(ds_name): %s" % args.ds_name)
-print("Argument 2(target_column): %s" % args.target_column)
-print("Argument 3(model_name): %s" % args.model_name)
-print("Argument 4(output_x): %s" % args.output_x)
-print("Argument 5(output_y): %s" % args.output_y)
+print("Argument 2(model_name): %s" % args.model_name)
 
 # Get the latest registered model
 try:
@@ -54,22 +38,9 @@ def write_output(df, path):
 train_ds = Dataset.get_by_name(ws, args.ds_name)
 dataset_changed_time = train_ds.data_changed_time
 
-if dataset_changed_time > last_train_time:
-    # New data is available since the model was last trained
-    print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
-    train_ds = train_ds.drop_columns(["partition_date"])
-    X_train = train_ds.drop_columns(
-        columns=[args.target_column]).to_pandas_dataframe()
-    y_train = train_ds.keep_columns(
-        columns=[args.target_column]).to_pandas_dataframe()
-
-    non_null = y_train[args.target_column].notnull()
-    y = y_train[non_null]
-    X = X_train[non_null]
-
-    if not (args.output_x is None and args.output_y is None):
-        write_output(X, args.output_x)
-        write_output(y, args.output_y)
-else:
+if not dataset_changed_time > last_train_time:
     print("Cancelling run since there is no new data.")
     run.parent.cancel()
+else:
+    # New data is available since the model was last trained
+    print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
@@ -58,7 +58,7 @@ def get_noaa_data(start_time, end_time):
     print(traceback.format_exc())
     print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name))
     register_dataset = True
-    end_time_last_slice = datetime.today() - relativedelta(weeks=1)
+    end_time_last_slice = datetime.today() - relativedelta(weeks=2)
 
 end_time = datetime.utcnow()
 train_df = get_noaa_data(end_time_last_slice, end_time)
@@ -80,10 +80,10 @@ def get_noaa_data(start_time, end_time):
                        target_path=folder_name,
                        overwrite=True,
                        show_progress=True)
-
-    if register_dataset:
-        ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
-            args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/hh/mm/ss}/data.csv')
-        ds.register(ws, name=args.ds_name)
 else:
     print("No new data since {0}.".format(end_time_last_slice))
+
+if register_dataset:
+    ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
+        args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/HH/mm/ss}/data.csv')
+    ds.register(ws, name=args.ds_name)
@@ -30,11 +30,11 @@ def _get_configs(automlconfig: AutoMLConfig,
     groups = _get_groups(data, group_column_names)
     configs = {}
     for i, group in groups.iterrows():
-        single = data
+        single = data._dataflow
         group_name = "#####".join(str(x) for x in group.values)
         group_name = valid_chars.sub('', group_name)
         for key in group.index:
-            single = single._dataflow.filter(data._dataflow[key] == group[key])
+            single = single.filter(data._dataflow[key] == group[key])
         t_dataset = TabularDataset._create(single)
         group_conf = copy.deepcopy(automlconfig)
         group_conf.user_settings['training_data'] = t_dataset
 
@@ -558,7 +558,6 @@
         "\n",
         "# specify CondaDependencies obj\n",
         "conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
-        "    conda_packages=['scikit-learn', 'numpy','py-xgboost<=0.80'],\n",
         "    pip_packages=azureml_pip_packages)"
       ]
     },
@@ -718,17 +717,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "from azureml.core.conda_dependencies import CondaDependencies \n",
-        "\n",
-        "azureml_pip_packages = [\n",
-        "    'azureml-explain-model', 'azureml-train-automl', 'azureml-defaults'\n",
-        "]\n",
-        " \n",
-        "\n",
-        "# specify CondaDependencies obj\n",
-        "myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas', 'numpy', 'py-xgboost<=0.80'],\n",
-        "                                 pip_packages=azureml_pip_packages,\n",
-        "                                 pin_sdk_version=True)\n",
+        "myenv = automl_run.get_environment().python.conda_dependencies\n",
         "\n",
         "with open(\"myenv.yml\",\"w\") as f:\n",
         "    f.write(myenv.serialize_to_string())\n",
Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@`
`103`	`103`	`"source": [`
`104`	`104`	`"import azureml.core\n",`
`105`	`105`	`"\n",`
`106`		`- "print(\"This notebook was created using version 1.0.79 of the Azure ML SDK\")\n",`
	`106`	`+ "print(\"This notebook was created using version 1.0.81 of the Azure ML SDK\")\n",`
`107`	`107`	`"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"`
`108`	`108`	`]`
`109`	`109`	`},`