|
69 | 69 | "metadata": {},
|
70 | 70 | "outputs": [],
|
71 | 71 | "source": [
|
72 |
| - "import json\n", |
73 | 72 | "import logging\n",
|
74 | 73 | "\n",
|
75 | 74 | "from matplotlib import pyplot as plt\n",
|
76 |
| - "import numpy as np\n", |
77 | 75 | "import pandas as pd\n",
|
78 | 76 | "import os\n",
|
79 |
| - "from sklearn import datasets\n", |
80 |
| - "import azureml.dataprep as dprep\n", |
81 |
| - "from sklearn.model_selection import train_test_split\n", |
82 | 77 | "\n",
|
83 | 78 | "import azureml.core\n",
|
84 | 79 | "from azureml.core.experiment import Experiment\n",
|
85 | 80 | "from azureml.core.workspace import Workspace\n",
|
86 |
| - "from azureml.train.automl import AutoMLConfig\n", |
87 |
| - "from azureml.train.automl.run import AutoMLRun" |
| 81 | + "from azureml.core.dataset import Dataset\n", |
| 82 | + "from azureml.train.automl import AutoMLConfig" |
88 | 83 | ]
|
89 | 84 | },
|
90 | 85 | {
|
|
155 | 150 | " # Create the cluster.\n",
|
156 | 151 | " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
157 | 152 | " \n",
|
158 |
| - " # Can poll for a minimum number of nodes and for a specific timeout.\n", |
159 |
| - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", |
160 |
| - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", |
| 153 | + "print('Checking cluster status...')\n", |
| 154 | + "# Can poll for a minimum number of nodes and for a specific timeout.\n", |
| 155 | + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", |
| 156 | + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", |
161 | 157 | " \n",
|
162 |
| - " # For a more detailed view of current AmlCompute status, use get_status()." |
| 158 | + "# For a more detailed view of current AmlCompute status, use get_status()." |
163 | 159 | ]
|
164 | 160 | },
|
165 | 161 | {
|
|
200 | 196 | "# Set compute target to AmlCompute\n",
|
201 | 197 | "conda_run_config.target = compute_target\n",
|
202 | 198 | "conda_run_config.environment.docker.enabled = True\n",
|
203 |
| - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", |
204 |
| - "\n", |
205 |
| - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", |
206 | 199 | "\n",
|
207 |
| - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n", |
| 200 | + "cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n", |
208 | 201 | "conda_run_config.environment.python.conda_dependencies = cd"
|
209 | 202 | ]
|
210 | 203 | },
|
|
224 | 217 | "outputs": [],
|
225 | 218 | "source": [
|
226 | 219 | "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\"\n",
|
227 |
| - "dflow = dprep.read_csv(data, infer_column_types=True)\n", |
228 |
| - "dflow.get_profile()\n", |
229 |
| - "X_train = dflow.drop_columns(columns=['y'])\n", |
230 |
| - "y_train = dflow.keep_columns(columns=['y'], validate_column_exists=True)\n", |
231 |
| - "dflow.head()" |
| 220 | + "dataset = Dataset.Tabular.from_delimited_files(data)\n", |
| 221 | + "X_train = dataset.drop_columns(columns=['y'])\n", |
| 222 | + "y_train = dataset.keep_columns(columns=['y'], validate=True)\n", |
| 223 | + "dataset.take(5).to_pandas_dataframe()" |
232 | 224 | ]
|
233 | 225 | },
|
234 | 226 | {
|
|
406 | 398 | "def run(rawdata):\n",
|
407 | 399 | " try:\n",
|
408 | 400 | " data = json.loads(rawdata)['data']\n",
|
409 |
| - " data = numpy.array(data)\n", |
| 401 | + " data = np.array(data)\n", |
410 | 402 | " result = model.predict(data)\n",
|
411 | 403 | " except Exception as e:\n",
|
412 | 404 | " result = str(e)\n",
|
|
443 | 435 | "metadata": {},
|
444 | 436 | "outputs": [],
|
445 | 437 | "source": [
|
446 |
| - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", |
| 438 | + "for p in ['azureml-train-automl', 'azureml-core']:\n", |
447 | 439 | " print('{}\\t{}'.format(p, dependencies[p]))"
|
448 | 440 | ]
|
449 | 441 | },
|
|
453 | 445 | "metadata": {},
|
454 | 446 | "outputs": [],
|
455 | 447 | "source": [
|
456 |
| - "from azureml.core.conda_dependencies import CondaDependencies\n", |
457 |
| - "\n", |
458 | 448 | "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
|
459 |
| - " pip_packages=['azureml-sdk[automl]'])\n", |
| 449 | + " pip_packages=['azureml-train-automl'])\n", |
460 | 450 | "\n",
|
461 | 451 | "conda_env_file_name = 'myenv.yml'\n",
|
462 | 452 | "myenv.save_to_file('.', conda_env_file_name)"
|
|
476 | 466 | " content = cefr.read()\n",
|
477 | 467 | "\n",
|
478 | 468 | "with open(conda_env_file_name, 'w') as cefw:\n",
|
479 |
| - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", |
| 469 | + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", |
480 | 470 | "\n",
|
481 | 471 | "# Substitute the actual model id in the script file.\n",
|
482 | 472 | "\n",
|
|
618 | 608 | "outputs": [],
|
619 | 609 | "source": [
|
620 | 610 | "# Load the bank marketing datasets.\n",
|
621 |
| - "from sklearn.datasets import load_diabetes\n", |
622 |
| - "from sklearn.model_selection import train_test_split\n", |
623 | 611 | "from numpy import array"
|
624 | 612 | ]
|
625 | 613 | },
|
|
630 | 618 | "outputs": [],
|
631 | 619 | "source": [
|
632 | 620 | "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv\"\n",
|
633 |
| - "dflow = dprep.read_csv(data, infer_column_types=True)\n", |
634 |
| - "dflow.get_profile()\n", |
635 |
| - "X_test = dflow.drop_columns(columns=['y'])\n", |
636 |
| - "y_test = dflow.keep_columns(columns=['y'], validate_column_exists=True)\n", |
637 |
| - "dflow.head()" |
| 621 | + "dataset = Dataset.Tabular.from_delimited_files(data)\n", |
| 622 | + "X_test = dataset.drop_columns(columns=['y'])\n", |
| 623 | + "y_test = dataset.keep_columns(columns=['y'], validate=True)\n", |
| 624 | + "dataset.take(5).to_pandas_dataframe()" |
638 | 625 | ]
|
639 | 626 | },
|
640 | 627 | {
|
|
0 commit comments