From c3ce932029167080fcd40c13e80925c50befd764 Mon Sep 17 00:00:00 2001 From: Roope Astala Date: Mon, 8 Apr 2019 15:34:51 -0400 Subject: [PATCH] version 1.0.23 --- .../automated-machine-learning/README.md | 12 + ...to-ml-classification-with-deployment.ipynb | 3 +- .../auto-ml-classification-with-onnx.ipynb | 284 + ...-ml-classification-with-whitelisting.ipynb | 19 +- .../auto-ml-classification.ipynb | 2 - .../dataprep/auto-ml-dataprep.ipynb | 3 +- ...ing-data-blacklist-early-termination.ipynb | 2 - ...oml-databricks-local-with-deployment.ipynb | 66 +- .../production-deploy-to-aks.ipynb | 106 +- ...er-model-create-image-deploy-service.ipynb | 27 +- .../machine-learning-pipelines/README.md | 23 +- .../aml-pipelines-data-transfer.ipynb | 2 +- ...l-pipelines-how-to-use-estimatorstep.ipynb | 281 + ...nes-use-databricks-as-compute-target.ipynb | 2 +- ...with-automated-machine-learning-step.ipynb | 542 ++ .../intro-to-pipelines/dummy_train.py | 30 + .../distributed-tensorflow-with-horovod.ipynb | 7 +- .../how-to-use-estimator.ipynb | 9 +- ...yperparameter-tune-deploy-with-keras.ipynb | 8 +- ...arameter-tune-deploy-with-tensorflow.ipynb | 10 +- .../training/logging-api/logging-api.ipynb | 1056 ++-- .../training/manage-runs/manage-runs.ipynb | 1186 ++-- .../train-on-local/train-on-local.ipynb | 2 +- .../train-on-remote-vm.ipynb | 2 +- .../train-within-notebook.ipynb | 2 +- tutorials/dflows.dprep | 5513 ++++++++--------- .../img-classification-part1-training.ipynb | 4 +- .../img-classification-part2-deploy.ipynb | 2 +- tutorials/regression-part1-data-prep.ipynb | 5 +- tutorials/regression-part2-automated-ml.ipynb | 3 +- 30 files changed, 5162 insertions(+), 4051 deletions(-) create mode 100644 how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb create mode 100644 how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb create mode 100644 how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb create mode 100644 how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/dummy_train.py diff --git a/how-to-use-azureml/automated-machine-learning/README.md b/how-to-use-azureml/automated-machine-learning/README.md index 0d44cbc7c..e3d565176 100644 --- a/how-to-use-azureml/automated-machine-learning/README.md +++ b/how-to-use-azureml/automated-machine-learning/README.md @@ -189,6 +189,11 @@ jupyter notebook - Dataset: [Dominick's grocery sales of orange juice](forecasting-b/dominicks_OJ.csv) - Example of training an AutoML forecasting model on multiple time-series +- [auto-ml-classification-with-onnx.ipynb](classification-with-onnx/auto-ml-classification-with-onnx.ipynb) + - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) + - Simple example of using Auto ML for classification with ONNX models + - Uses local compute for training + See [Configure automated machine learning experiments](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train) to learn how more about the the settings and features available for automated machine learning experiments. @@ -233,6 +238,13 @@ If a sample notebook fails with an error that property, method or library does n ## Numpy import fails on Windows Some Windows environments see an error loading numpy with the latest Python version 3.6.8. If you see this issue, try with Python version 3.6.7. +## Numpy import fails +Check the tensorflow version in the automated ml conda environment. Supported versions are < 1.13. Uninstall tensorflow from the environment if version is >= 1.13 +You may check the version of tensorflow and uninstall as follows +1) start a command shell, activate conda environment where automated ml packages are installed +2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13 +3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation. + ## Remote run: DsvmCompute.create fails There are several reasons why the DsvmCompute.create can fail. The reason is usually in the error message but you have to look at the end of the error message for the detailed reason. Some common reasons are: 1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.` Note that underscore is not allowed in the name. diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb index f6093b481..8e6c662c6 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb @@ -139,7 +139,6 @@ " primary_metric = 'AUC_weighted',\n", " iteration_timeout_minutes = 20,\n", " iterations = 10,\n", - " n_cross_validations = 2,\n", " verbosity = logging.INFO,\n", " X = X_train, \n", " y = y_train,\n", @@ -263,7 +262,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)." + "To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. The following cells create a file, myenv.yml, which specifies the dependencies from the run." ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb new file mode 100644 index 000000000..53351dc2b --- /dev/null +++ b/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Automated Machine Learning\n", + "_**Classification with Local Compute**_\n", + "\n", + "## Contents\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + "1. [Data](#Data)\n", + "1. [Train](#Train)\n", + "1. [Results](#Results)\n", + "1. [Test](#Test)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n", + "\n", + "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n", + "\n", + "Please find the ONNX related documentations [here](https://github.com/onnx/onnx).\n", + "\n", + "In this notebook you will learn how to:\n", + "1. Create an `Experiment` in an existing `Workspace`.\n", + "2. Configure AutoML using `AutoMLConfig`.\n", + "3. Train the model using local compute with ONNX compatible config on.\n", + "4. Explore the results and save the ONNX model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# Choose a name for the experiment and specify the project folder.\n", + "experiment_name = 'automl-classification-onnx'\n", + "project_folder = './sample_projects/automl-classification-onnx'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "outputDf = pd.DataFrame(data = output, index = [''])\n", + "outputDf.T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data\n", + "\n", + "This uses scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "\n", + "# Exclude the first 100 rows from training so that they can be used for test.\n", + "X_train = digits.data[100:,:]\n", + "y_train = digits.target[100:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train with enable ONNX compatible models config on\n", + "\n", + "Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n", + "\n", + "Set the parameter enable_onnx_compatible_models=True, if you also want to generate the ONNX compatible models. Please note, the forecasting task and TensorFlow models are not ONNX compatible yet.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification or regression|\n", + "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics:
accuracy
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted|\n", + "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", + "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", + "|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|\n", + "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " primary_metric = 'AUC_weighted',\n", + " iteration_timeout_minutes = 60,\n", + " iterations = 10,\n", + " verbosity = logging.INFO,\n", + " X = X_train, \n", + " y = y_train,\n", + " enable_onnx_compatible_models=True,\n", + " path = project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n", + "In this example, we specify `show_output = True` to print currently running iterations to the console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for Monitoring Runs\n", + "\n", + "The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(local_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best ONNX Model\n", + "\n", + "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*.\n", + "\n", + "Set the parameter return_onnx_model=True to retrieve the best ONNX model, instead of the Python model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, onnx_mdl = local_run.get_output(return_onnx_model=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the best ONNX model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.automl._vendor.automl.client.core.common.onnx_convert import OnnxConverter\n", + "onnx_fl_path = \"./best_model.onnx\"\n", + "OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "savitam" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb index 1b20301a2..61c6fde05 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb @@ -71,11 +71,17 @@ "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", - "try:\n", - " import tensorflow as tf1\n", - "except ImportError:\n", - " from pip._internal import main\n", - " main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n", + "import sys\n", + "whitelist_models=[\"LightGBM\"]\n", + "if \"3.7\" != sys.version[0:3]:\n", + " try:\n", + " import tensorflow as tf1\n", + " except ImportError:\n", + " from pip._internal import main\n", + " main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n", + " logging.getLogger().setLevel(logging.ERROR)\n", + " whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"]\n", + "\n", "from azureml.train.automl import AutoMLConfig" ] }, @@ -160,12 +166,11 @@ " primary_metric = 'AUC_weighted',\n", " iteration_timeout_minutes = 60,\n", " iterations = 10,\n", - " n_cross_validations = 3,\n", " verbosity = logging.INFO,\n", " X = X_train, \n", " y = y_train,\n", " enable_tf=True,\n", - " whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"],\n", + " whitelist_models=whitelist_models,\n", " path = project_folder)" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb b/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb index 03d9c8eb5..2f59b39c7 100644 --- a/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb @@ -135,7 +135,6 @@ "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics:
accuracy
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted|\n", "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", - "|**n_cross_validations**|Number of cross validation splits.|\n", "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n", "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" @@ -152,7 +151,6 @@ " primary_metric = 'AUC_weighted',\n", " iteration_timeout_minutes = 60,\n", " iterations = 25,\n", - " n_cross_validations = 3,\n", " verbosity = logging.INFO,\n", " X = X_train, \n", " y = y_train,\n", diff --git a/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb b/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb index 22c9a89dc..2836d1d71 100644 --- a/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb +++ b/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb @@ -163,8 +163,7 @@ " \"iterations\" : 2,\n", " \"primary_metric\" : 'AUC_weighted',\n", " \"preprocess\" : False,\n", - " \"verbosity\" : logging.INFO,\n", - " \"n_cross_validations\": 3\n", + " \"verbosity\" : logging.INFO\n", "}" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb b/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb index 83a6bfa8c..542a33bba 100644 --- a/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb +++ b/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb @@ -154,7 +154,6 @@ "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics:
accuracy
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted|\n", "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", - "|**n_cross_validations**|Number of cross validation splits.|\n", "|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n", "|**experiment_exit_score**|*double* value indicating the target for *primary_metric*.
Once the target is surpassed the run terminates.|\n", "|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.

Allowed values for **Classification**
LogisticRegression
SGD
MultinomialNaiveBayes
BernoulliNaiveBayes
SVM
LinearSVM
KNN
DecisionTree
RandomForest
ExtremeRandomTrees
LightGBM
GradientBoosting
TensorFlowDNN
TensorFlowLinearClassifier

Allowed values for **Regression**
ElasticNet
GradientBoosting
DecisionTree
KNN
LassoLars
SGD
RandomForest
ExtremeRandomTrees
LightGBM
TensorFlowLinearRegressor
TensorFlowDNN|\n", @@ -174,7 +173,6 @@ " primary_metric = 'AUC_weighted',\n", " iteration_timeout_minutes = 60,\n", " iterations = 20,\n", - " n_cross_validations = 5,\n", " preprocess = True,\n", " experiment_exit_score = 0.9984,\n", " blacklist_models = ['KNN','LinearSVM'],\n", diff --git a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb index 0ca2ddce8..97e35029b 100644 --- a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb +++ b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb @@ -207,6 +207,7 @@ "import os\n", "import random\n", "import time\n", + "import json\n", "\n", "from matplotlib import pyplot as plt\n", "from matplotlib.pyplot import imshow\n", @@ -295,7 +296,7 @@ " datastore_name = datastore_name, \n", " container_name = container_name, \n", " account_name = account_name,\n", - " overwrite = True\n", + " overwrite = True\n", ")" ] }, @@ -427,7 +428,7 @@ " debug_log = 'automl_errors.log',\n", " primary_metric = 'AUC_weighted',\n", " iteration_timeout_minutes = 10,\n", - " iterations = 30,\n", + " iterations = 5,\n", " preprocess = True,\n", " n_cross_validations = 10,\n", " max_concurrent_iterations = 2, #change it based on number of worker nodes\n", @@ -591,22 +592,21 @@ "%%writefile score.py\n", "import pickle\n", "import json\n", - "import numpy\n", + "import numpy as np\n", "import azureml.train.automl\n", "from sklearn.externals import joblib\n", "from azureml.core.model import Model\n", - "\n", + "import pandas as pd\n", "\n", "def init():\n", " global model\n", - " model_path = Model.get_model_path(model_name = '<>') # this name is model.id of model that we want to deploy\n", + " model_path = Model.get_model_path(model_name = '<>') # this name is model.id of model that we want to deploy\n", " # deserialize the model file back into a sklearn model\n", " model = joblib.load(model_path)\n", "\n", - "def run(rawdata):\n", + "def run(raw_data):\n", " try:\n", - " data = json.loads(rawdata)['data']\n", - " data = numpy.array(data)\n", + " data = (pd.DataFrame(np.array(json.loads(raw_data)['data']), columns=[str(i) for i in range(0,64)]))\n", " result = model.predict(data)\n", " except Exception as e:\n", " result = str(e)\n", @@ -614,6 +614,22 @@ " return json.dumps({\"result\":result.tolist()})" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Replace <>\n", + "content = \"\"\n", + "with open(\"score.py\", \"r\") as fo:\n", + " content = fo.read()\n", + "\n", + "new_content = content.replace(\"<>\", local_run.model_id)\n", + "with open(\"score.py\", \"w\") as fw:\n", + " fw.write(new_content)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -672,16 +688,19 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# this will take 10-15 minutes to finish\n", "\n", - "service_name = \"<>\"\n", + "import uuid\n", + "from azureml.core.image import ContainerImage\n", + "\n", + "guid = str(uuid.uuid4()).split(\"-\")[0]\n", + "service_name = \"myservice-{}\".format(guid)\n", + "print(\"Creating service with name: {}\".format(service_name))\n", "runtime = \"spark-py\" \n", "driver_file = \"score.py\"\n", "my_conda_file = \"mydeployenv.yml\"\n", "\n", "# image creation\n", - "from azureml.core.image import ContainerImage\n", "myimage_config = ContainerImage.image_configuration(execution_script = driver_file, \n", " runtime = runtime, \n", " conda_file = 'mydeployenv.yml')\n", @@ -744,18 +763,39 @@ "metadata": {}, "outputs": [], "source": [ + "import json\n", "# Randomly select digits and test.\n", "for index in np.random.choice(len(y_test), 2, replace = False):\n", " print(index)\n", - " predicted = fitted_model.predict(X_test[index:index + 1])[0]\n", + " test_sample = json.dumps({'data':X_test[index:index + 1].values.tolist()})\n", + " predicted = myservice.run(input_data = test_sample)\n", " label = y_test.values[index]\n", - " title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n", + " predictedDict = json.loads(predicted)\n", + " title = \"Label value = %d Predicted value = %s \" % ( label,predictedDict['result'][0]) \n", " fig = plt.figure(3, figsize = (5,5))\n", " ax1 = fig.add_axes((0,0,.8,.8))\n", " ax1.set_title(title)\n", " plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n", " display(fig)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Delete the service" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "myservice.delete()" + ] } ], "metadata": { diff --git a/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb b/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb index 628813ade..4539f6ba7 100644 --- a/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb +++ b/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb @@ -216,56 +216,6 @@ " provisioning_configuration = prov_config)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create AKS Cluster in an existing virtual network (optional)\n", - "See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "'''\n", - "from azureml.core.compute import ComputeTarget, AksCompute\n", - "\n", - "# Create the compute configuration and set virtual network information\n", - "config = AksCompute.provisioning_configuration(location=\"eastus2\")\n", - "config.vnet_resourcegroup_name = \"mygroup\"\n", - "config.vnet_name = \"mynetwork\"\n", - "config.subnet_name = \"default\"\n", - "config.service_cidr = \"10.0.0.0/16\"\n", - "config.dns_service_ip = \"10.0.0.10\"\n", - "config.docker_bridge_cidr = \"172.17.0.1/16\"\n", - "\n", - "# Create the compute target\n", - "aks_target = ComputeTarget.create(workspace = ws,\n", - " name = \"myaks\",\n", - " provisioning_configuration = config)\n", - "'''" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Enable SSL on the AKS Cluster (optional)\n", - "See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -345,9 +295,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Test the web service using run method\n", - "We test the web sevice by passing data.\n", - "Run() method retrieves API keys behind the scenes to make sure that call is authenticated." + "# Test the web service\n", + "We test the web sevice by passing data." ] }, { @@ -369,57 +318,6 @@ "print(prediction)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test the web service using raw HTTP request (optional)\n", - "Alternatively you can construct a raw HTTP request and send it to the service. In this case you need to explicitly pass the HTTP header. This process is shown in the next 2 cells." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# retreive the API keys. AML generates two keys.\n", - "'''\n", - "key1, Key2 = aks_service.get_keys()\n", - "print(key1)\n", - "'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# construct raw HTTP request and send to the service\n", - "'''\n", - "%%time\n", - "\n", - "import requests\n", - "\n", - "import json\n", - "\n", - "test_sample = json.dumps({'data': [\n", - " [1,2,3,4,5,6,7,8,9,10], \n", - " [10,9,8,7,6,5,4,3,2,1]\n", - "]})\n", - "test_sample = bytes(test_sample,encoding = 'utf8')\n", - "\n", - "# Don't forget to add key to the HTTP header.\n", - "headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n", - "\n", - "resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n", - "\n", - "\n", - "print(\"prediction:\", resp.text)\n", - "'''" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb b/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb index e1bebf46f..0d9bce06c 100644 --- a/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb +++ b/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb @@ -261,6 +261,31 @@ "image.wait_for_creation(show_output = True)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use a custom Docker image\n", + "\n", + "You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n", + "\n", + "Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n", + "```python\n", + "# use an image available in public Container Registry without authentication\n", + "image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n", + "\n", + "# or, use an image available in a private Container Registry\n", + "image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n", + "image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n", + "image_config.base_image_registry.username = \"username\"\n", + "image_config.base_image_registry.password = \"password\"\n", + "\n", + "# or, use an image built during training.\n", + "image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n", + "```\n", + "You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -395,7 +420,7 @@ "metadata": { "authors": [ { - "name": "raymondl" + "name": "aashishb" } ], "kernelspec": { diff --git a/how-to-use-azureml/machine-learning-pipelines/README.md b/how-to-use-azureml/machine-learning-pipelines/README.md index 093bc73e0..796aa0ce5 100644 --- a/how-to-use-azureml/machine-learning-pipelines/README.md +++ b/how-to-use-azureml/machine-learning-pipelines/README.md @@ -38,18 +38,19 @@ In this directory, there are two types of notebooks: * The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder: -1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started) -2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep) -3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep) -4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans) -5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks) -6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla) -7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive) -8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch) -9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule) -10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl) +1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started): Start with this notebook to understand the concepts of using Azure Machine Learning Pipelines. This notebook will show you how to runs steps in parallel and in sequence. +2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep): This notebooks shows how to connect steps in your pipeline using data. Data produced by one step is used by subsequent steps to force an explicit dependency between steps. +3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep): Once you are satisfied with your iterative runs in, you could publish your pipeline to get a REST endpoint which could be invoked from non-Pythons clients as well. +4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans): This notebook shows how you transfer data between supported datastores. +5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks): This notebooks shows how you can use Pipelines to send your compute payload to Azure Databricks. +6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla): This notebook shows how you can use Azure Data Lake Analytics (ADLA) as a compute target. +7. [aml-pipelines-how-to-use-estimatorstep.ipynb](https://aka.ms/pl-estimator): This notebook shows how to use the EstimatorStep. +7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive): HyperDriveStep in Pipelines shows how you can do hyper parameter tuning using Pipelines. +8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch): AzureBatchStep can be used to run your custom code in AzureBatch cluster. +9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore. +10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl): AutoMLStep in Pipelines shows how you can do automated machine learning using Pipelines. * The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include: -1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score) +1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines. 2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans) diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb index 40415dfb2..f4ddfb92e 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb @@ -141,7 +141,7 @@ " print(\"registered blob datastore with name: %s\" % blob_datastore_name)\n", "\n", "# CLI:\n", - "# az ml datastore register-blob -n -a -c -k [-t ]" + "# az ml datastore attach-blob -n -a -c -k [-t ]" ] }, { diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb new file mode 100644 index 000000000..9b57a8e9c --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use EstimatorStep in AML Pipeline\n", + "\n", + "This notebook shows how to use the EstimatorStep with Azure Machine Learning Pipelines. Estimator is a convenient object in Azure Machine Learning that wraps run configuration information to help simplify the tasks of specifying how a script is executed.\n", + "\n", + "\n", + "## Prerequisite:\n", + "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n", + "* Go through the [configuration notebook](../../../configuration.ipynb) to:\n", + " * install the AML SDK\n", + " * create a workspace and its configuration file (`config.json`)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's get started. First let's import some Python libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "# check core SDK version number\n", + "print(\"Azure ML SDK Version: \", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n", + "1. create the configuration (this step is local and only takes a second)\n", + "2. create the cluster (this step will take about **20 seconds**)\n", + "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "cluster_name = \"cpucluster\"\n", + "\n", + "try:\n", + " cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing compute target')\n", + "except ComputeTargetException:\n", + " print('Creating a new compute target...')\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n", + "\n", + " # create the cluster\n", + " cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it uses the scale settings for the cluster\n", + " cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + "# use get_status() to get a detailed status for the current cluster. \n", + "print(cpu_cluster.get_status().serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'cpucluster' of type `AmlCompute`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use a simple script\n", + "We have already created a simple \"hello world\" script. This is the script that we will submit through the estimator pattern. It prints a hello-world message, and if Azure ML SDK is installed, it will also logs an array of values ([Fibonacci numbers](https://en.wikipedia.org/wiki/Fibonacci_number))." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build an Estimator object\n", + "Estimator by default will attempt to use Docker-based execution. You can also enable Docker and let estimator pick the default CPU image supplied by Azure ML for execution. You can target an AmlCompute cluster (or any other supported compute target types). You can also customize the conda environment by adding conda and/or pip packages.\n", + "\n", + "> Note: The arguments to the entry script used in the Estimator object should be specified as *list* using\n", + " 'estimator_entry_script_arguments' parameter when instantiating EstimatorStep. Estimator object's parameter\n", + " 'script_params' accepts a dictionary. However 'estimator_entry_script_arguments' parameter expects arguments as\n", + " a list.\n", + "\n", + "> Estimator object initialization involves specifying a list of DataReference objects in its 'inputs' parameter.\n", + " In Pipelines, a step can take another step's output or DataReferences as input. So when creating an EstimatorStep,\n", + " the parameters 'inputs' and 'outputs' need to be set explicitly and that will override 'inputs' parameter\n", + " specified in the Estimator object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Datastore\n", + "from azureml.data.data_reference import DataReference\n", + "from azureml.pipeline.core import PipelineData\n", + "\n", + "def_blob_store = Datastore(ws, \"workspaceblobstore\")\n", + "\n", + "input_data = DataReference(\n", + " datastore=def_blob_store,\n", + " data_reference_name=\"input_data\",\n", + " path_on_datastore=\"20newsgroups/20news.pkl\")\n", + "\n", + "output = PipelineData(\"output\", datastore=def_blob_store)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.estimator import Estimator\n", + "\n", + "est = Estimator(source_directory='.', \n", + " compute_target=cpu_cluster, \n", + " entry_script='dummy_train.py', \n", + " conda_packages=['scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an EstimatorStep\n", + "[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Estimator in a Pipeline.\n", + "\n", + "- **name:** Name of the step\n", + "- **estimator:** Estimator object\n", + "- **estimator_entry_script_arguments:** \n", + "- **runconfig_pipeline_params:** Override runconfig properties at runtime using key-value pairs each with name of the runconfig property and PipelineParameter for that property\n", + "- **inputs:** Inputs\n", + "- **outputs:** Output is list of PipelineData\n", + "- **compute_target:** Compute target to use \n", + "- **allow_reuse:** Whether the step should reuse previous results when run with the same settings/inputs. If this is false, a new run will always be generated for this step during pipeline execution.\n", + "- **version:** Optional version tag to denote a change in functionality for the step" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.steps import EstimatorStep\n", + "\n", + "est_step = EstimatorStep(name=\"Estimator_Train\", \n", + " estimator=est, \n", + " estimator_entry_script_arguments=[\"--datadir\", input_data, \"--output\", output],\n", + " runconfig_pipeline_params=None, \n", + " inputs=[input_data], \n", + " outputs=[output], \n", + " compute_target=cpu_cluster)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build and Submit the Experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Pipeline\n", + "from azureml.core import Experiment\n", + "pipeline = Pipeline(workspace=ws, steps=[est_step])\n", + "pipeline_run = Experiment(ws, 'Estimator_sample').submit(pipeline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View Run Details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(pipeline_run).show()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "sanpil" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb index 4b92d2111..9826b3ea3 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb @@ -168,7 +168,7 @@ "metadata": {}, "source": [ "## Data Connections with Inputs and Outputs\n", - "The DatabricksStep supports Azure Bloband ADLS for inputs and outputs. You also will need to define a [Secrets](https://docs.azuredatabricks.net/user-guide/secrets/index.html) scope to enable authentication to external data sources such as Blob and ADLS from Databricks.\n", + "The DatabricksStep supports Azure Blob and ADLS for inputs and outputs. You also will need to define a [Secrets](https://docs.azuredatabricks.net/user-guide/secrets/index.html) scope to enable authentication to external data sources such as Blob and ADLS from Databricks.\n", "\n", "- Databricks documentation on [Azure Blob](https://docs.azuredatabricks.net/spark/latest/data-sources/azure/azure-storage.html)\n", "- Databricks documentation on [ADLS](https://docs.databricks.com/spark/latest/data-sources/azure/azure-datalake.html)\n", diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb new file mode 100644 index 000000000..34b2ab72d --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved. \n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Azure Machine Learning Pipeline with AutoMLStep\n", + "This notebook demonstrates the use of AutoMLStep in Azure Machine Learning Pipeline." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n", + "\n", + "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Create an `Experiment` in an existing `Workspace`.\n", + "2. Create or Attach existing AmlCompute to a workspace.\n", + "3. Configure AutoML using `AutoMLConfig`.\n", + "4. Use AutoMLStep\n", + "5. Train the model using AmlCompute\n", + "6. Explore the results.\n", + "7. Test the best fitted model.\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Parallel** executions for iterations\n", + "- **Asynchronous** tracking of progress\n", + "- Retrieving models for any iteration or logged metric\n", + "- Specifying AutoML settings as `**kwargs`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Azure Machine Learning and Pipeline SDK-specific imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import csv\n", + "\n", + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.core.compute import AmlCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "from azureml.train.automl import AutoMLStep\n", + "\n", + "# Check core SDK version number\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "Initialize a workspace object from persisted configuration. Make sure the config file is present at .\\config.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an Azure ML experiment\n", + "Let's create an experiment named \"automl-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose a name for the run history container in the workspace.\n", + "experiment_name = 'automlstep-classification'\n", + "project_folder = './project'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create or Attach existing AmlCompute\n", + "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create `AmlCompute` as your training compute resource.\n", + "\n", + "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose a name for your cluster.\n", + "amlcompute_cluster_name = \"cpucluster\"\n", + "\n", + "found = False\n", + "# Check if this compute target already exists in the workspace.\n", + "cts = ws.compute_targets\n", + "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n", + " found = True\n", + " print('Found existing compute target.')\n", + " compute_target = cts[amlcompute_cluster_name]\n", + " \n", + "if not found:\n", + " print('Creating a new compute target...')\n", + " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n", + " #vm_priority = 'lowpriority', # optional\n", + " max_nodes = 4)\n", + "\n", + " # Create the cluster.\n", + " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", + " \n", + " # Can poll for a minimum number of nodes and for a specific timeout.\n", + " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", + " compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n", + " \n", + " # For a more detailed view of current AmlCompute status, use get_status()." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare and Point to Data\n", + "For remote executions, you need to make the data accessible from the remote compute.\n", + "This can be done by uploading the data to DataStore.\n", + "In this example, we upload scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_train = datasets.load_digits()\n", + "\n", + "if not os.path.isdir('data'):\n", + " os.mkdir('data')\n", + " \n", + "if not os.path.exists(project_folder):\n", + " os.makedirs(project_folder)\n", + " \n", + "pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n", + "pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")\n", + "\n", + "ds = ws.get_default_datastore()\n", + "ds.upload(src_dir='./data', target_path='bai_data', overwrite=True, show_progress=True)\n", + "\n", + "from azureml.data.data_reference import DataReference \n", + "input_data = DataReference(datastore=ds, \n", + " data_reference_name=\"input_data_reference\",\n", + " path_on_datastore='bai_data',\n", + " mode='download',\n", + " path_on_compute='/tmp/azureml_runs',\n", + " overwrite=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a new RunConfig object\n", + "conda_run_config = RunConfiguration(framework=\"python\")\n", + "\n", + "# Set compute target to AmlCompute\n", + "#conda_run_config.target = compute_target\n", + "\n", + "conda_run_config.environment.docker.enabled = True\n", + "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "\n", + "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy'], pin_sdk_version=False)\n", + "conda_run_config.environment.python.conda_dependencies = cd\n", + "\n", + "print('run config is ready')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/get_data.py\n", + "\n", + "import pandas as pd\n", + "\n", + "def get_data():\n", + " X_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n", + " y_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n", + "\n", + " return { \"X\" : X_train.values, \"y\" : y_train[0].values }\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up AutoMLConfig for Training\n", + "\n", + "You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n", + "\n", + "**Note:** When using AmlCompute, you can't pass Numpy arrays directly to the fit method.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics:
accuracy
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted|\n", + "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", + "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n", + "|**n_cross_validations**|Number of cross validation splits.|\n", + "|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"iteration_timeout_minutes\": 5,\n", + " \"iterations\": 20,\n", + " \"n_cross_validations\": 5,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": False,\n", + " \"max_concurrent_iterations\": 3,\n", + " \"verbosity\": logging.INFO\n", + "}\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path = project_folder,\n", + " compute_target=compute_target,\n", + " run_configuration=conda_run_config,\n", + " data_script = project_folder + \"/get_data.py\",\n", + " **automl_settings\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n", + "In this example, we specify `show_output = False` to suppress console output while the run is in progress." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define AutoMLStep" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_step = AutoMLStep(\n", + " name='automl_module',\n", + " experiment=experiment,\n", + " automl_config=automl_config,\n", + " inputs=[input_data],\n", + " allow_reuse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Pipeline\n", + "pipeline = Pipeline(\n", + " description=\"pipeline_with_automlstep\",\n", + " workspace=ws, \n", + " steps=[automl_step])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run = experiment.submit(pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(pipeline_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run.wait_for_completion()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examine Results\n", + "\n", + "#### Loading executed runs\n", + "In case you need to load a previously executed run, enable the cell below and replace the `run_id` value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.automl.run import AutoMLRun\n", + "\n", + "# only one step exists in this pipeline\n", + "run_id = None\n", + "step_runs = pipeline_run.get_children()\n", + "for run in step_runs:\n", + " run_id=run._run_id\n", + " \n", + "automl_run = AutoMLRun(experiment = experiment, run_id=run_id)\n", + "automl_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Retrieve All Child Runs\n", + "You can also use SDK methods to fetch all the child runs and see individual metrics that we log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(automl_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "rundata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = automl_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model Based on Any Other Metric\n", + "Show the run and the model which has the smallest `log_loss` value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = automl_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Model from a Specific Iteration\n", + "Show the run and the model from the third iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 3\n", + "third_run, third_model = automl_run.get_output(iteration=iteration)\n", + "print(third_run)\n", + "print(third_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test the Model\n", + "\n", + "### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_test = digits.data[:10, :]\n", + "y_test = digits.target[:10]\n", + "images = digits.images[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Our Best Fitted Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Randomly select digits and test.\n", + "for index in np.random.choice(len(y_test), 3, replace = False):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_test[index:index + 1])[0]\n", + " label = y_test[index]\n", + " title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n", + " plt.show()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "sanpil" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/dummy_train.py b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/dummy_train.py new file mode 100644 index 000000000..0ad3b5ff7 --- /dev/null +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/dummy_train.py @@ -0,0 +1,30 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import argparse +import os + +print("*********************************************************") +print("Hello Azure ML!") + +parser = argparse.ArgumentParser() +parser.add_argument('--datadir', type=str, help="data directory") +parser.add_argument('--output', type=str, help="output") +args = parser.parse_args() + +print("Argument 1: %s" % args.datadir) +print("Argument 2: %s" % args.output) + +if not (args.output is None): + os.makedirs(args.output, exist_ok=True) + print("%s created" % args.output) + +try: + from azureml.core import Run + run = Run.get_context() + print("Log Fibonacci numbers.") + run.log_list('Fibonacci numbers', [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]) + run.complete() +except: + print("Warning: you need to install Azure ML SDK in order to log metrics.") + +print("*********************************************************") diff --git a/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb b/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb index c5cc399ee..6515e44d5 100644 --- a/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb +++ b/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb @@ -285,7 +285,9 @@ "metadata": {}, "source": [ "### Create a TensorFlow estimator\n", - "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow)." + "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n", + "\n", + "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release." ] }, { @@ -307,7 +309,8 @@ " node_count=2,\n", " process_count_per_node=1,\n", " distributed_backend='mpi',\n", - " use_gpu=True)" + " use_gpu=True, \n", + " framework_version='1.12')" ] }, { diff --git a/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb b/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb index 55fed3a60..01c06e791 100644 --- a/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb +++ b/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb @@ -291,7 +291,7 @@ "outputs": [], "source": [ "# use a custom Docker image\n", - "from azureml.core.runconfig import ContainerRegistry\n", + "from azureml.core.container_registry import ContainerRegistry\n", "\n", "# this is an image available in Docker Hub\n", "image_name = 'continuumio/miniconda3'\n", @@ -309,7 +309,8 @@ "est = Estimator(source_directory='.', compute_target='local', \n", " entry_script='dummy_train.py',\n", " custom_docker_image=image_name,\n", - " image_registry_details=image_registry_details,\n", + " # uncomment below line to use your private ACR\n", + " #image_registry_details=image_registry_details,\n", " user_managed=user_managed_dependencies\n", " )\n", "\n", @@ -336,7 +337,7 @@ "metadata": { "authors": [ { - "name": "minxia" + "name": "maxluk" } ], "kernelspec": { @@ -356,7 +357,7 @@ "pygments_lexer": "ipython3", "version": "3.6.8" }, - "msauthor": "haining" + "msauthor": "minxia" }, "nbformat": 4, "nbformat_minor": 2 diff --git a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb index 1713c5c0e..145044b42 100644 --- a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb +++ b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb @@ -396,7 +396,7 @@ "est = TensorFlow(source_directory=script_folder,\n", " script_params=script_params,\n", " compute_target=compute_target, \n", - " conda_packages=['keras', 'matplotlib'],\n", + " pip_packages=['keras', 'matplotlib'],\n", " entry_script='keras_mnist.py', \n", " use_gpu=True)" ] @@ -792,7 +792,7 @@ "outputs": [], "source": [ "best_run = hdr.get_best_run_by_primary_metric()\n", - "print(best_run.get_details()['runDefinition']['Arguments'])" + "print(best_run.get_details()['runDefinition']['arguments'])" ] }, { @@ -1144,7 +1144,7 @@ "metadata": { "authors": [ { - "name": "haining" + "name": "maxluk" } ], "kernelspec": { @@ -1164,7 +1164,7 @@ "pygments_lexer": "ipython3", "version": "3.6.7" }, - "msauthor": "haining" + "msauthor": "maxluk" }, "nbformat": 4, "nbformat_minor": 2 diff --git a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb index 78a30c1e4..274c1615d 100644 --- a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb +++ b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb @@ -396,7 +396,10 @@ "source": [ "## Create TensorFlow estimator\n", "Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n", - "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker." + "\n", + "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n", + "\n", + "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release." ] }, { @@ -419,7 +422,8 @@ " script_params=script_params,\n", " compute_target=compute_target,\n", " entry_script='tf_mnist.py', \n", - " use_gpu=True)" + " use_gpu=True, \n", + " framework_version='1.12')" ] }, { @@ -1158,7 +1162,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.6.6" }, "msauthor": "minxia" }, diff --git a/how-to-use-azureml/training/logging-api/logging-api.ipynb b/how-to-use-azureml/training/logging-api/logging-api.ipynb index 690779910..f662c7a14 100644 --- a/how-to-use-azureml/training/logging-api/logging-api.ipynb +++ b/how-to-use-azureml/training/logging-api/logging-api.ipynb @@ -1,530 +1,530 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved.\n", - "\n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Logging\n", - "\n", - "_**This notebook showcases various ways to use the Azure Machine Learning service run logging APIs, and view the results in the Azure portal.**_\n", - "\n", - "---\n", - "---\n", - "\n", - "## Table of Contents\n", - "\n", - "1. [Introduction](#Introduction)\n", - "1. [Setup](#Setup)\n", - " 1. Validate Azure ML SDK installation\n", - " 1. Initialize workspace\n", - " 1. Set experiment\n", - "1. [Logging](#Logging)\n", - " 1. Starting a run\n", - " 1. Viewing a run in the portal\n", - " 1. Viewing the experiment in the portal\n", - " 1. Logging metrics\n", - " 1. Logging string metrics\n", - " 1. Logging numeric metrics\n", - " 1. Logging vectors\n", - " 1. Logging tables\n", - " 1. Uploading files\n", - "1. [Analyzing results](#Analyzing-results)\n", - " 1. Tagging a run\n", - "1. [Next steps](#Next-steps)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Introduction\n", - "\n", - "Logging metrics from runs in your experiments allows you to track results from one run to another, determining trends in your outputs and understand how your inputs correspond to your model and script performance. Azure Machine Learning services (AzureML) allows you to track various types of metrics including images and arbitrary files in order to understand, analyze, and audit your experimental progress. \n", - "\n", - "Typically you should log all parameters for your experiment and all numerical and string outputs of your experiment. This will allow you to analyze the performance of your experiments across multiple runs, correlate inputs to outputs, and filter runs based on interesting criteria.\n", - "\n", - "The experiment's Run History report page automatically creates a report that can be customized to show the KPI's, charts, and column sets that are interesting to you. \n", - "\n", - "| ![Run Details](./img/run_details.PNG) | ![Run History](./img/run_history.PNG) |\n", - "|:--:|:--:|\n", - "| *Run Details* | *Run History* |\n", - "\n", - "---\n", - "\n", - "## Setup\n", - "\n", - "Make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't. Also make sure you have tqdm and matplotlib installed in the current kernel.\n", - "\n", - "```\n", - "(myenv) $ conda install -y tqdm matplotlib\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Validate Azure ML SDK installation and get version number for debugging purposes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "install" - ] - }, - "outputs": [], - "source": [ - "from azureml.core import Experiment, Workspace, Run\n", - "import azureml.core\n", - "import numpy as np\n", - "from tqdm import tqdm\n", - "\n", - "# Check core SDK version number\n", - "\n", - "print(\"This notebook was created using SDK version AZUREML-SDK-VERSION, you are currently running version\", azureml.core.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Initialize workspace\n", - "\n", - "Initialize a workspace object from persisted configuration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "create workspace" - ] - }, - "outputs": [], - "source": [ - "ws = Workspace.from_config()\n", - "print('Workspace name: ' + ws.name, \n", - " 'Azure region: ' + ws.location, \n", - " 'Subscription id: ' + ws.subscription_id, \n", - " 'Resource group: ' + ws.resource_group, sep='\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set experiment\n", - "Create a new experiment (or get the one with the specified name). An *experiment* is a container for an arbitrary set of *runs*. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "experiment = Experiment(workspace=ws, name='logging-api-test')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Logging\n", - "In this section we will explore the various logging mechanisms.\n", - "\n", - "### Starting a run\n", - "\n", - "A *run* is a singular experimental trial. In this notebook we will create a run directly on the experiment by calling `run = exp.start_logging()`. If you were experimenting by submitting a script file as an experiment using ``experiment.submit()``, you would call `run = Run.get_context()` in your script to access the run context of your code. In either case, the logging methods on the returned run object work the same.\n", - "\n", - "This cell also stores the run id for use later in this notebook. The run_id is not necessary for logging." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# start logging for the run\n", - "run = experiment.start_logging()\n", - "\n", - "# access the run id for use later\n", - "run_id = run.id\n", - "\n", - "# change the scale factor on different runs to see how you can compare multiple runs\n", - "scale_factor = 2\n", - "\n", - "# change the category on different runs to see how to organize data in reports\n", - "category = 'Red'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Viewing a run in the Portal\n", - "Once a run is started you can see the run in the portal by simply typing ``run``. Clicking on the \"Link to Portal\" link will take you to the Run Details page that shows the metrics you have logged and other run properties. You can refresh this page after each logging statement to see the updated results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Viewing an experiment in the portal\n", - "You can also view an experiement similarly by typing `experiment`. The portal link will take you to the experiment's Run History page that shows all runs and allows you to analyze trends across multiple runs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "experiment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Logging metrics\n", - "Metrics are visible in the run details page in the AzureML portal and also can be analyzed in experiment reports. The run details page looks as below and contains tabs for Details, Outputs, Logs, and Snapshot. \n", - "* The Details page displays attributes about the run, plus logged metrics and images. Metrics that are vectors appear as charts. \n", - "* The Outputs page contains any files, such as models, you uploaded into the \"outputs\" directory from your run into storage. If you place files in the \"outputs\" directory locally, the files are automatically uploaded on your behald when the run is completed.\n", - "* The Logs page allows you to view any log files created by your run. Logging runs created in notebooks typically do not generate log files.\n", - "* The Snapshot page contains a snapshot of the directory specified in the ''start_logging'' statement, plus the notebook at the time of the ''start_logging'' call. This snapshot and notebook can be downloaded from the Run Details page to continue or reproduce an experiment.\n", - "\n", - "### Logging string metrics\n", - "The following cell logs a string metric. A string metric is simply a string value associated with a name. A string metric String metrics are useful for labelling runs and to organize your data. Typically you should log all string parameters as metrics for later analysis - even information such as paths can help to understand how individual experiements perform differently.\n", - "\n", - "String metrics can be used in the following ways:\n", - "* Plot in hitograms\n", - "* Group by indicators for numerical plots\n", - "* Filtering runs\n", - "\n", - "String metrics appear in the **Tracked Metrics** section of the Run Details page and can be added as a column in Run History reports." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# log a string metric\n", - "run.log(name='Category', value=category)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Logging numerical metrics\n", - "The following cell logs some numerical metrics. Numerical metrics can include metrics such as AUC or MSE. You should log any parameter or significant output measure in order to understand trends across multiple experiments. Numerical metrics appear in the **Tracked Metrics** section of the Run Details page, and can be used in charts or KPI's in experiment Run History reports." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# log numerical values\n", - "run.log(name=\"scale factor\", value = scale_factor)\n", - "run.log(name='Magic Number', value=42 * scale_factor)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Logging vectors\n", - "Vectors are good for recording information such as loss curves. You can log a vector by create a list of numbers and call ``log_list()`` and supply a name and the list, or by repeatedly logging a value using the same name.\n", - "\n", - "Vectors are presented in Run Details as a chart, and are directly comparable in experiment reports when placed in a chart. **Note:** vectors logged into the run are expected to be relatively small. Logging very large vectors into Azure ML can result in reduced performance. If you need to store large amounts of data associated with the run, you can write the data to file that will be uploaded." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fibonacci_values = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]\n", - "scaled_values = (i * scale_factor for i in fibonacci_values)\n", - "\n", - "# Log a list of values. Note this will generate a single-variable line chart.\n", - "run.log_list(name='Fibonacci', value=scaled_values)\n", - "\n", - "for i in tqdm(range(-10, 10)):\n", - " # log a metric value repeatedly, this will generate a single-variable line chart.\n", - " run.log(name='Sigmoid', value=1 / (1 + np.exp(-i)))\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Logging tables\n", - "Tables are good for recording related sets of information such as accuracy tables, confusion matrices, etc. \n", - "You can log a table in two ways:\n", - "* Create a dictionary of lists where each list represents a column in the table and call ``log_table()``\n", - "* Repeatedly call ``log_row()`` providing the same table name with a consistent set of named args as the column values\n", - "\n", - "Tables are presented in Run Details as a chart using the first two columns of the table **Note:** tables logged into the run are expected to be relatively small. Logging very large tables into Azure ML can result in reduced performance. If you need to store large amounts of data associated with the run, you can write the data to file that will be uploaded." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# create a dictionary to hold a table of values\n", - "sines = {}\n", - "sines['angle'] = []\n", - "sines['sine'] = []\n", - "\n", - "for i in tqdm(range(-10, 10)):\n", - " angle = i / 2.0 * scale_factor\n", - " \n", - " # log a 2 (or more) values as a metric repeatedly. This will generate a 2-variable line chart if you have 2 numerical columns.\n", - " run.log_row(name='Cosine Wave', angle=angle, cos=np.cos(angle))\n", - " \n", - " sines['angle'].append(angle)\n", - " sines['sine'].append(np.sin(angle))\n", - "\n", - "# log a dictionary as a table, this will generate a 2-variable chart if you have 2 numerical columns\n", - "run.log_table(name='Sine Wave', value=sines)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Logging images\n", - "You can directly log _matplotlib_ plots and arbitrary images to your run record. This code logs a _matplotlib_ pyplot object. Images show up in the run details page in the Azure ML Portal." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "# Create a plot\n", - "import matplotlib.pyplot as plt\n", - "angle = np.linspace(-3, 3, 50) * scale_factor\n", - "plt.plot(angle,np.tanh(angle), label='tanh')\n", - "plt.legend(fontsize=12)\n", - "plt.title('Hyperbolic Tangent', fontsize=16)\n", - "plt.grid(True)\n", - "\n", - "# Log the plot to the run. To log an arbitrary image, use the form run.log_image(name, path='./image_path.png')\n", - "run.log_image(name='Hyperbolic Tangent', plot=plt)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Uploading files\n", - "\n", - "Any files that are placed in the ``.\\outputs`` directory are automatically uploaded when the run is completed. These files are also visible in the *Outputs* tab of the Run Details page. Files can also be uploaded explicitly and stored as artifacts along with the run record.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile .\\outputs\\myfile.txt\n", - "\n", - "This is an output file that will be automatically uploaded." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Completing the run\n", - "\n", - "Calling `run.complete()` marks the run as completed and triggers the output file collection. If for any reason you need to indicate the run failed or simply need to cancel the run you can call `run.fail()` or `run.cancel()`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run.complete()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Analyzing results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can refresh the run in the Azure portal to see all of your results. In many cases you will want to analyze runs that were performed previously to inspect the contents or compare results. Runs can be fetched from their parent Experiment object using the ``Run()`` constructor or the ``experiment.get_runs()`` method. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fetched_run = Run(experiment, run_id)\n", - "fetched_run" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Call ``run.get_metrics()`` to retrieve all the metrics from a run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fetched_run.get_metrics()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "See the files uploaded for this run by calling ``run.get_file_names()``" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fetched_run.get_file_names()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once you know the file names in a run, you can download the files using the ``run.download_file()`` method" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.makedirs('files', exist_ok=True)\n", - "\n", - "for f in run.get_file_names():\n", - " dest = os.path.join('files', f.split('/')[-1])\n", - " print('Downloading file {} to {}...'.format(f, dest))\n", - " fetched_run.download_file(f, dest) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Tagging a run\n", - "Often when you analyze the results of a run, you may need to tag that run with important personal or external information. You can add a tag to a run using the ``run.tag()`` method. AzureML supports valueless and valued tags." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fetched_run.tag(\"My Favorite Run\")\n", - "fetched_run.tag(\"Competition Rank\", 1)\n", - "\n", - "fetched_run.get_tags()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Next steps\n", - "To experiment more with logging and to understand how metrics can be visualized, go back to the *Start a run* section, try changing the category and scale_factor values and going through the notebook several times. Play with the KPI, charting, and column selection options on the experiment's Run History reports page to see how the various metrics can be combined and visualized.\n", - "\n", - "After learning about all of the logging options, go to the [train on remote vm](..\\train_on_remote_vm\\train_on_remote_vm.ipnyb) notebook and experiment with logging from remote compute contexts." - ] - } - ], - "metadata": { - "authors": [ - { - "name": "roastala" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logging\n", + "\n", + "_**This notebook showcases various ways to use the Azure Machine Learning service run logging APIs, and view the results in the Azure portal.**_\n", + "\n", + "---\n", + "---\n", + "\n", + "## Table of Contents\n", + "\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + " 1. Validate Azure ML SDK installation\n", + " 1. Initialize workspace\n", + " 1. Set experiment\n", + "1. [Logging](#Logging)\n", + " 1. Starting a run\n", + " 1. Viewing a run in the portal\n", + " 1. Viewing the experiment in the portal\n", + " 1. Logging metrics\n", + " 1. Logging string metrics\n", + " 1. Logging numeric metrics\n", + " 1. Logging vectors\n", + " 1. Logging tables\n", + " 1. Uploading files\n", + "1. [Analyzing results](#Analyzing-results)\n", + " 1. Tagging a run\n", + "1. [Next steps](#Next-steps)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Logging metrics from runs in your experiments allows you to track results from one run to another, determining trends in your outputs and understand how your inputs correspond to your model and script performance. Azure Machine Learning services (AzureML) allows you to track various types of metrics including images and arbitrary files in order to understand, analyze, and audit your experimental progress. \n", + "\n", + "Typically you should log all parameters for your experiment and all numerical and string outputs of your experiment. This will allow you to analyze the performance of your experiments across multiple runs, correlate inputs to outputs, and filter runs based on interesting criteria.\n", + "\n", + "The experiment's Run History report page automatically creates a report that can be customized to show the KPI's, charts, and column sets that are interesting to you. \n", + "\n", + "| ![Run Details](./img/run_details.PNG) | ![Run History](./img/run_history.png) |\n", + "|:--:|:--:|\n", + "| *Run Details* | *Run History* |\n", + "\n", + "---\n", + "\n", + "## Setup\n", + "\n", + "Make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't. Also make sure you have tqdm and matplotlib installed in the current kernel.\n", + "\n", + "```\n", + "(myenv) $ conda install -y tqdm matplotlib\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Validate Azure ML SDK installation and get version number for debugging purposes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "install" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Experiment, Workspace, Run\n", + "import azureml.core\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "\n", + "# Check core SDK version number\n", + "\n", + "print(\"This notebook was created using SDK version 1.0.23, you are currently running version\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set experiment\n", + "Create a new experiment (or get the one with the specified name). An *experiment* is a container for an arbitrary set of *runs*. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment = Experiment(workspace=ws, name='logging-api-test')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Logging\n", + "In this section we will explore the various logging mechanisms.\n", + "\n", + "### Starting a run\n", + "\n", + "A *run* is a singular experimental trial. In this notebook we will create a run directly on the experiment by calling `run = exp.start_logging()`. If you were experimenting by submitting a script file as an experiment using ``experiment.submit()``, you would call `run = Run.get_context()` in your script to access the run context of your code. In either case, the logging methods on the returned run object work the same.\n", + "\n", + "This cell also stores the run id for use later in this notebook. The run_id is not necessary for logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start logging for the run\n", + "run = experiment.start_logging()\n", + "\n", + "# access the run id for use later\n", + "run_id = run.id\n", + "\n", + "# change the scale factor on different runs to see how you can compare multiple runs\n", + "scale_factor = 2\n", + "\n", + "# change the category on different runs to see how to organize data in reports\n", + "category = 'Red'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Viewing a run in the Portal\n", + "Once a run is started you can see the run in the portal by simply typing ``run``. Clicking on the \"Link to Portal\" link will take you to the Run Details page that shows the metrics you have logged and other run properties. You can refresh this page after each logging statement to see the updated results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Viewing an experiment in the portal\n", + "You can also view an experiement similarly by typing `experiment`. The portal link will take you to the experiment's Run History page that shows all runs and allows you to analyze trends across multiple runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logging metrics\n", + "Metrics are visible in the run details page in the AzureML portal and also can be analyzed in experiment reports. The run details page looks as below and contains tabs for Details, Outputs, Logs, and Snapshot. \n", + "* The Details page displays attributes about the run, plus logged metrics and images. Metrics that are vectors appear as charts. \n", + "* The Outputs page contains any files, such as models, you uploaded into the \"outputs\" directory from your run into storage. If you place files in the \"outputs\" directory locally, the files are automatically uploaded on your behald when the run is completed.\n", + "* The Logs page allows you to view any log files created by your run. Logging runs created in notebooks typically do not generate log files.\n", + "* The Snapshot page contains a snapshot of the directory specified in the ''start_logging'' statement, plus the notebook at the time of the ''start_logging'' call. This snapshot and notebook can be downloaded from the Run Details page to continue or reproduce an experiment.\n", + "\n", + "### Logging string metrics\n", + "The following cell logs a string metric. A string metric is simply a string value associated with a name. A string metric String metrics are useful for labelling runs and to organize your data. Typically you should log all string parameters as metrics for later analysis - even information such as paths can help to understand how individual experiements perform differently.\n", + "\n", + "String metrics can be used in the following ways:\n", + "* Plot in hitograms\n", + "* Group by indicators for numerical plots\n", + "* Filtering runs\n", + "\n", + "String metrics appear in the **Tracked Metrics** section of the Run Details page and can be added as a column in Run History reports." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# log a string metric\n", + "run.log(name='Category', value=category)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logging numerical metrics\n", + "The following cell logs some numerical metrics. Numerical metrics can include metrics such as AUC or MSE. You should log any parameter or significant output measure in order to understand trends across multiple experiments. Numerical metrics appear in the **Tracked Metrics** section of the Run Details page, and can be used in charts or KPI's in experiment Run History reports." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# log numerical values\n", + "run.log(name=\"scale factor\", value = scale_factor)\n", + "run.log(name='Magic Number', value=42 * scale_factor)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logging vectors\n", + "Vectors are good for recording information such as loss curves. You can log a vector by create a list of numbers and call ``log_list()`` and supply a name and the list, or by repeatedly logging a value using the same name.\n", + "\n", + "Vectors are presented in Run Details as a chart, and are directly comparable in experiment reports when placed in a chart. **Note:** vectors logged into the run are expected to be relatively small. Logging very large vectors into Azure ML can result in reduced performance. If you need to store large amounts of data associated with the run, you can write the data to file that will be uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fibonacci_values = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]\n", + "scaled_values = (i * scale_factor for i in fibonacci_values)\n", + "\n", + "# Log a list of values. Note this will generate a single-variable line chart.\n", + "run.log_list(name='Fibonacci', value=scaled_values)\n", + "\n", + "for i in tqdm(range(-10, 10)):\n", + " # log a metric value repeatedly, this will generate a single-variable line chart.\n", + " run.log(name='Sigmoid', value=1 / (1 + np.exp(-i)))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logging tables\n", + "Tables are good for recording related sets of information such as accuracy tables, confusion matrices, etc. \n", + "You can log a table in two ways:\n", + "* Create a dictionary of lists where each list represents a column in the table and call ``log_table()``\n", + "* Repeatedly call ``log_row()`` providing the same table name with a consistent set of named args as the column values\n", + "\n", + "Tables are presented in Run Details as a chart using the first two columns of the table **Note:** tables logged into the run are expected to be relatively small. Logging very large tables into Azure ML can result in reduced performance. If you need to store large amounts of data associated with the run, you can write the data to file that will be uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a dictionary to hold a table of values\n", + "sines = {}\n", + "sines['angle'] = []\n", + "sines['sine'] = []\n", + "\n", + "for i in tqdm(range(-10, 10)):\n", + " angle = i / 2.0 * scale_factor\n", + " \n", + " # log a 2 (or more) values as a metric repeatedly. This will generate a 2-variable line chart if you have 2 numerical columns.\n", + " run.log_row(name='Cosine Wave', angle=angle, cos=np.cos(angle))\n", + " \n", + " sines['angle'].append(angle)\n", + " sines['sine'].append(np.sin(angle))\n", + "\n", + "# log a dictionary as a table, this will generate a 2-variable chart if you have 2 numerical columns\n", + "run.log_table(name='Sine Wave', value=sines)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logging images\n", + "You can directly log _matplotlib_ plots and arbitrary images to your run record. This code logs a _matplotlib_ pyplot object. Images show up in the run details page in the Azure ML Portal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "# Create a plot\n", + "import matplotlib.pyplot as plt\n", + "angle = np.linspace(-3, 3, 50) * scale_factor\n", + "plt.plot(angle,np.tanh(angle), label='tanh')\n", + "plt.legend(fontsize=12)\n", + "plt.title('Hyperbolic Tangent', fontsize=16)\n", + "plt.grid(True)\n", + "\n", + "# Log the plot to the run. To log an arbitrary image, use the form run.log_image(name, path='./image_path.png')\n", + "run.log_image(name='Hyperbolic Tangent', plot=plt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Uploading files\n", + "\n", + "Any files that are placed in the ``.\\outputs`` directory are automatically uploaded when the run is completed. These files are also visible in the *Outputs* tab of the Run Details page. Files can also be uploaded explicitly and stored as artifacts along with the run record.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile .\\outputs\\myfile.txt\n", + "\n", + "This is an output file that will be automatically uploaded." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Completing the run\n", + "\n", + "Calling `run.complete()` marks the run as completed and triggers the output file collection. If for any reason you need to indicate the run failed or simply need to cancel the run you can call `run.fail()` or `run.cancel()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.complete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Analyzing results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can refresh the run in the Azure portal to see all of your results. In many cases you will want to analyze runs that were performed previously to inspect the contents or compare results. Runs can be fetched from their parent Experiment object using the ``Run()`` constructor or the ``experiment.get_runs()`` method. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fetched_run = Run(experiment, run_id)\n", + "fetched_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Call ``run.get_metrics()`` to retrieve all the metrics from a run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fetched_run.get_metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See the files uploaded for this run by calling ``run.get_file_names()``" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fetched_run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you know the file names in a run, you can download the files using the ``run.download_file()`` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.makedirs('files', exist_ok=True)\n", + "\n", + "for f in run.get_file_names():\n", + " dest = os.path.join('files', f.split('/')[-1])\n", + " print('Downloading file {} to {}...'.format(f, dest))\n", + " fetched_run.download_file(f, dest) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tagging a run\n", + "Often when you analyze the results of a run, you may need to tag that run with important personal or external information. You can add a tag to a run using the ``run.tag()`` method. AzureML supports valueless and valued tags." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fetched_run.tag(\"My Favorite Run\")\n", + "fetched_run.tag(\"Competition Rank\", 1)\n", + "\n", + "fetched_run.get_tags()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next steps\n", + "To experiment more with logging and to understand how metrics can be visualized, go back to the *Start a run* section, try changing the category and scale_factor values and going through the notebook several times. Play with the KPI, charting, and column selection options on the experiment's Run History reports page to see how the various metrics can be combined and visualized.\n", + "\n", + "After learning about all of the logging options, go to the [train on remote vm](..\\train_on_remote_vm\\train_on_remote_vm.ipnyb) notebook and experiment with logging from remote compute contexts." + ] + } ], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "metadata": { + "authors": [ + { + "name": "roastala" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/training/manage-runs/manage-runs.ipynb b/how-to-use-azureml/training/manage-runs/manage-runs.ipynb index f183daf62..9536518ae 100644 --- a/how-to-use-azureml/training/manage-runs/manage-runs.ipynb +++ b/how-to-use-azureml/training/manage-runs/manage-runs.ipynb @@ -1,595 +1,595 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved.\n", - "\n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Manage runs\n", - "\n", - "## Table of contents\n", - "\n", - "1. [Introduction](#Introduction)\n", - "1. [Setup](#Setup)\n", - "1. [Start, monitor and complete a run](#Start,-monitor-and-complete-a-run)\n", - "1. [Add properties and tags](#Add-properties-and-tags)\n", - "1. [Query properties and tags](#Query-properties-and-tags)\n", - "1. [Start and query child runs](#Start-and-query-child-runs)\n", - "1. [Cancel or fail runs](#Cancel-or-fail-runs)\n", - "1. [Reproduce a run](#Reproduce-a-run)\n", - "1. [Next steps](#Next-steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Introduction\n", - "\n", - "When you're building enterprise-grade machine learning models, it is important to track, organize, monitor and reproduce your training runs. For example, you might want to trace the lineage behind a model deployed to production, and re-run the training experiment to troubleshoot issues. \n", - "\n", - "This notebooks shows examples how to use Azure Machine Learning services to manage your training runs." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "Make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't. Also, if you're new to Azure ML, we recommend that you go through [the tutorial](https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-train-models-with-aml) first to learn the basic concepts.\n", - "\n", - "Let's first import required packages, check Azure ML SDK version, connect to your workspace and create an Experiment to hold the runs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import azureml.core\n", - "from azureml.core import Workspace, Experiment, Run\n", - "from azureml.core import ScriptRunConfig\n", - "\n", - "print(azureml.core.VERSION)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ws = Workspace.from_config()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "exp = Experiment(workspace=ws, name=\"explore-runs\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Start, monitor and complete a run\n", - "\n", - "A run is an unit of execution, typically to train a model, but for other purposes as well, such as loading or transforming data. Runs are tracked by Azure ML service, and can be instrumented with metrics and artifact logging.\n", - "\n", - "A simplest way to start a run in your interactive Python session is to call *Experiment.start_logging* method. You can then log metrics from within the run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "notebook_run = exp.start_logging()\n", - "\n", - "notebook_run.log(name=\"message\", value=\"Hello from run!\")\n", - "\n", - "print(notebook_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use *get_status method* to get the status of the run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(notebook_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Also, you can simply enter the run to get a link to Azure Portal details" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "notebook_run" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Method *get_details* gives you more details on the run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "notebook_run.get_details()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use *complete* method to end the run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "notebook_run.complete()\n", - "print(notebook_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also use Python's *with...as* pattern. The run will automatically complete when moving out of scope. This way you don't need to manually complete the run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with exp.start_logging() as notebook_run:\n", - " notebook_run.log(name=\"message\", value=\"Hello from run!\")\n", - " print(\"Is it still running?\",notebook_run.get_status())\n", - " \n", - "print(\"Has it completed?\",notebook_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, let's look at submitting a run as a separate Python process. To keep the example simple, we submit the run on local computer. Other targets could include remote VMs and Machine Learning Compute clusters in your Azure ML Workspace.\n", - "\n", - "We use *hello.py* script as an example. To perform logging, we need to get a reference to the Run instance from within the scope of the script. We do this using *Run.get_context* method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!more hello.py" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's submit the run on a local computer. A standard pattern in Azure ML SDK is to create run configuration, and then use *Experiment.submit* method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_config = ScriptRunConfig(source_directory='.', script='hello.py')\n", - "\n", - "local_script_run = exp.submit(run_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can view the status of the run as before" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(local_script_run.get_status())\n", - "local_script_run" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Submitted runs have additional log files you can inspect using *get_details_with_logs*." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.get_details_with_logs()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use *wait_for_completion* method to block the local execution until remote run is complete." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.wait_for_completion(show_output=True)\n", - "print(local_script_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Add properties and tags\n", - "\n", - "Properties and tags help you organize your runs. You can use them to describe, for example, who authored the run, what the results were, and what machine learning approach was used. And as you'll later learn, properties and tags can be used to query the history of your runs to find the important ones.\n", - "\n", - "For example, let's add \"author\" property to the run:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.add_properties({\"author\":\"azureml-user\"})\n", - "print(local_script_run.get_properties())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Properties are immutable. Once you assign a value it cannot be changed, making them useful as a permanent record for auditing purposes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " local_script_run.add_properties({\"author\":\"different-user\"})\n", - "except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Tags on the other hand can be changed:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.tag(\"quality\", \"great run\")\n", - "print(local_script_run.get_tags())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.tag(\"quality\", \"fantastic run\")\n", - "print(local_script_run.get_tags())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also add a simple string tag. It appears in the tag dictionary with value of None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.tag(\"worth another look\")\n", - "print(local_script_run.get_tags())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query properties and tags\n", - "\n", - "You can quary runs within an experiment that match specific properties and tags. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list(exp.get_runs(properties={\"author\":\"azureml-user\"},tags={\"quality\":\"fantastic run\"}))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list(exp.get_runs(properties={\"author\":\"azureml-user\"},tags=\"worth another look\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Start and query child runs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use child runs to group together related runs, for example different hyperparameter tuning iterations.\n", - "\n", - "Let's use *hello_with_children* script to create a batch of 5 child runs from within a submitted run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!more hello_with_children.py" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_config = ScriptRunConfig(source_directory='.', script='hello_with_children.py')\n", - "\n", - "local_script_run = exp.submit(run_config)\n", - "local_script_run.wait_for_completion(show_output=True)\n", - "print(local_script_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can start child runs one by one. Note that this is less efficient than submitting a batch of runs, because each creation results in a network call.\n", - "\n", - "Child runs too complete automatically as they move out of scope." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with exp.start_logging() as parent_run:\n", - " for c,count in enumerate(range(5)):\n", - " with parent_run.child_run() as child:\n", - " child.log(name=\"Hello from child run\", value=c)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To query the child runs belonging to specific parent, use *get_children* method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list(parent_run.get_children())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cancel or fail runs\n", - "\n", - "Sometimes, you realize that the run is not performing as intended, and you want to cancel it instead of waiting for it to complete.\n", - "\n", - "As an example, let's create a Python script with a delay in the middle." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!more hello_with_delay.py" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use *cancel* method to cancel a run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_config = ScriptRunConfig(source_directory='.', script='hello_with_delay.py')\n", - "\n", - "local_script_run = exp.submit(run_config)\n", - "print(\"Did the run start?\",local_script_run.get_status())\n", - "local_script_run.cancel()\n", - "print(\"Did the run cancel?\",local_script_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also mark an unsuccessful run as failed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run = exp.submit(run_config)\n", - "local_script_run.fail()\n", - "print(local_script_run.get_status())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reproduce a run\n", - "\n", - "When updating or troubleshooting on a model deployed to production, you sometimes need to revisit the original training run that produced the model. To help you with this, Azure ML service by default creates snapshots of your scripts a the time of run submission:\n", - "\n", - "You can use *restore_snapshot* to obtain a zip package of the latest snapshot of the script folder. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_script_run.restore_snapshot(path=\"snapshots\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can then extract the zip package, examine the code, and submit your run again." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Next steps\n", - "\n", - " * To learn more about logging APIs, see [logging API notebook](./logging-api/logging-api.ipynb)\n", - " * To learn more about remote runs, see [train on AML compute notebook](./train-on-amlcompute/train-on-amlcompute.ipynb)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "authors": [ - { - "name": "roastala" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Manage runs\n", + "\n", + "## Table of contents\n", + "\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + "1. [Start, monitor and complete a run](#Start,-monitor-and-complete-a-run)\n", + "1. [Add properties and tags](#Add-properties-and-tags)\n", + "1. [Query properties and tags](#Query-properties-and-tags)\n", + "1. [Start and query child runs](#Start-and-query-child-runs)\n", + "1. [Cancel or fail runs](#Cancel-or-fail-runs)\n", + "1. [Reproduce a run](#Reproduce-a-run)\n", + "1. [Next steps](#Next-steps)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "When you're building enterprise-grade machine learning models, it is important to track, organize, monitor and reproduce your training runs. For example, you might want to trace the lineage behind a model deployed to production, and re-run the training experiment to troubleshoot issues. \n", + "\n", + "This notebooks shows examples how to use Azure Machine Learning services to manage your training runs." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "Make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't. Also, if you're new to Azure ML, we recommend that you go through [the tutorial](https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-train-models-with-aml) first to learn the basic concepts.\n", + "\n", + "Let's first import required packages, check Azure ML SDK version, connect to your workspace and create an Experiment to hold the runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "from azureml.core import Workspace, Experiment, Run\n", + "from azureml.core import ScriptRunConfig\n", + "\n", + "print(azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exp = Experiment(workspace=ws, name=\"explore-runs\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start, monitor and complete a run\n", + "\n", + "A run is an unit of execution, typically to train a model, but for other purposes as well, such as loading or transforming data. Runs are tracked by Azure ML service, and can be instrumented with metrics and artifact logging.\n", + "\n", + "A simplest way to start a run in your interactive Python session is to call *Experiment.start_logging* method. You can then log metrics from within the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "notebook_run = exp.start_logging()\n", + "\n", + "notebook_run.log(name=\"message\", value=\"Hello from run!\")\n", + "\n", + "print(notebook_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use *get_status method* to get the status of the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(notebook_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Also, you can simply enter the run to get a link to Azure Portal details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "notebook_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Method *get_details* gives you more details on the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "notebook_run.get_details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use *complete* method to end the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "notebook_run.complete()\n", + "print(notebook_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also use Python's *with...as* pattern. The run will automatically complete when moving out of scope. This way you don't need to manually complete the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with exp.start_logging() as notebook_run:\n", + " notebook_run.log(name=\"message\", value=\"Hello from run!\")\n", + " print(\"Is it still running?\",notebook_run.get_status())\n", + " \n", + "print(\"Has it completed?\",notebook_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let's look at submitting a run as a separate Python process. To keep the example simple, we submit the run on local computer. Other targets could include remote VMs and Machine Learning Compute clusters in your Azure ML Workspace.\n", + "\n", + "We use *hello.py* script as an example. To perform logging, we need to get a reference to the Run instance from within the scope of the script. We do this using *Run.get_context* method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!more hello.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's submit the run on a local computer. A standard pattern in Azure ML SDK is to create run configuration, and then use *Experiment.submit* method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config = ScriptRunConfig(source_directory='.', script='hello.py')\n", + "\n", + "local_script_run = exp.submit(run_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can view the status of the run as before" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(local_script_run.get_status())\n", + "local_script_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Submitted runs have additional log files you can inspect using *get_details_with_logs*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.get_details_with_logs()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use *wait_for_completion* method to block the local execution until remote run is complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.wait_for_completion(show_output=True)\n", + "print(local_script_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add properties and tags\n", + "\n", + "Properties and tags help you organize your runs. You can use them to describe, for example, who authored the run, what the results were, and what machine learning approach was used. And as you'll later learn, properties and tags can be used to query the history of your runs to find the important ones.\n", + "\n", + "For example, let's add \"author\" property to the run:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.add_properties({\"author\":\"azureml-user\"})\n", + "print(local_script_run.get_properties())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Properties are immutable. Once you assign a value it cannot be changed, making them useful as a permanent record for auditing purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " local_script_run.add_properties({\"author\":\"different-user\"})\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Tags on the other hand can be changed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.tag(\"quality\", \"great run\")\n", + "print(local_script_run.get_tags())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.tag(\"quality\", \"fantastic run\")\n", + "print(local_script_run.get_tags())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also add a simple string tag. It appears in the tag dictionary with value of None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.tag(\"worth another look\")\n", + "print(local_script_run.get_tags())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query properties and tags\n", + "\n", + "You can quary runs within an experiment that match specific properties and tags. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(exp.get_runs(properties={\"author\":\"azureml-user\"},tags={\"quality\":\"fantastic run\"}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(exp.get_runs(properties={\"author\":\"azureml-user\"},tags=\"worth another look\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start and query child runs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can use child runs to group together related runs, for example different hyperparameter tuning iterations.\n", + "\n", + "Let's use *hello_with_children* script to create a batch of 5 child runs from within a submitted run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!more hello_with_children.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config = ScriptRunConfig(source_directory='.', script='hello_with_children.py')\n", + "\n", + "local_script_run = exp.submit(run_config)\n", + "local_script_run.wait_for_completion(show_output=True)\n", + "print(local_script_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can start child runs one by one. Note that this is less efficient than submitting a batch of runs, because each creation results in a network call.\n", + "\n", + "Child runs too complete automatically as they move out of scope." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with exp.start_logging() as parent_run:\n", + " for c,count in enumerate(range(5)):\n", + " with parent_run.child_run() as child:\n", + " child.log(name=\"Hello from child run\", value=c)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To query the child runs belonging to specific parent, use *get_children* method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(parent_run.get_children())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cancel or fail runs\n", + "\n", + "Sometimes, you realize that the run is not performing as intended, and you want to cancel it instead of waiting for it to complete.\n", + "\n", + "As an example, let's create a Python script with a delay in the middle." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!more hello_with_delay.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can use *cancel* method to cancel a run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config = ScriptRunConfig(source_directory='.', script='hello_with_delay.py')\n", + "\n", + "local_script_run = exp.submit(run_config)\n", + "print(\"Did the run start?\",local_script_run.get_status())\n", + "local_script_run.cancel()\n", + "print(\"Did the run cancel?\",local_script_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also mark an unsuccessful run as failed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run = exp.submit(run_config)\n", + "local_script_run.fail()\n", + "print(local_script_run.get_status())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reproduce a run\n", + "\n", + "When updating or troubleshooting on a model deployed to production, you sometimes need to revisit the original training run that produced the model. To help you with this, Azure ML service by default creates snapshots of your scripts a the time of run submission:\n", + "\n", + "You can use *restore_snapshot* to obtain a zip package of the latest snapshot of the script folder. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_script_run.restore_snapshot(path=\"snapshots\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can then extract the zip package, examine the code, and submit your run again." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + " * To learn more about logging APIs, see [logging API notebook](./logging-api/logging-api.ipynb)\n", + " * To learn more about remote runs, see [train on AML compute notebook](./train-on-amlcompute/train-on-amlcompute.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } ], - "kernelspec": { - "display_name": "Python 3.6", - "language": "python", - "name": "python36" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "metadata": { + "authors": [ + { + "name": "roastala" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/training/train-on-local/train-on-local.ipynb b/how-to-use-azureml/training/train-on-local/train-on-local.ipynb index 9048a42d0..e2a7c4c80 100644 --- a/how-to-use-azureml/training/train-on-local/train-on-local.ipynb +++ b/how-to-use-azureml/training/train-on-local/train-on-local.ipynb @@ -488,7 +488,7 @@ "metadata": { "authors": [ { - "name": "haining" + "name": "roastala" } ], "kernelspec": { diff --git a/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb b/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb index 9042bc187..5fb586e11 100644 --- a/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb +++ b/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb @@ -615,7 +615,7 @@ "metadata": { "authors": [ { - "name": "haining" + "name": "roastala" } ], "kernelspec": { diff --git a/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb b/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb index 45d7aa37e..76abee73e 100644 --- a/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb +++ b/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb @@ -673,7 +673,7 @@ "metadata": { "authors": [ { - "name": "haining" + "name": "roastala" } ], "kernelspec": { diff --git a/tutorials/dflows.dprep b/tutorials/dflows.dprep index 4ffa7c86e..2ff862f2a 100644 --- a/tutorials/dflows.dprep +++ b/tutorials/dflows.dprep @@ -1,309 +1,461 @@ { - "schemaVersion": 63, - "id": "6af23880-a2ee-4f5a-b466-fa2e5c4da9bc", - "activities": [ + "blocks": [ { - "id": "6d218b59-f9fa-44d1-a6ea-6c517e78e7aa", - "name": "dataflow", - "blocks": [ - { - "id": "49fe4a0c-a2ac-4251-9f68-391769e3b93a", - "type": "Microsoft.DPrep.GetFilesBlock", - "arguments": { - "isArchive": false, - "path": { - "target": 1, - "resourceDetails": [ - { - "path": "https://dprepdata.blob.core.windows.net/demo/green-small/*", - "sas": null, - "storageAccountName": null, - "storageAccountKey": null - } - ] + "id": "01111501-eb7e-49e1-9f50-4b1cfa86a785", + "type": "Microsoft.DPrep.GetFilesBlock", + "arguments": { + "isArchive": false, + "path": { + "target": 1, + "resourceDetails": [ + { + "path": "https://dprepdata.blob.core.windows.net/demo/green-small/*", + "sas": null, + "storageAccountName": null, + "storageAccountKey": null } - }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null + ] + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "90f226e9-b393-4690-b85c-f90aeb6b3afc", + "type": "Microsoft.DPrep.ParseDelimitedBlock", + "arguments": { + "columnHeadersMode": 2, + "fileEncoding": 0, + "handleQuotedLineBreaks": false, + "preview": false, + "separator": ",", + "skipRows": 0, + "skipRowsMode": 0 + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "4292a198-4a35-4e88-be22-cec950aa7586", + "type": "Microsoft.DPrep.DropColumnsBlock", + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "Path" + ] + } + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "4e59beb2-ea20-4528-86cd-1ed4ca98ee7d", + "type": "Microsoft.DPrep.ReplaceNaBlock", + "arguments": { + "columns": { + "type": 1, + "details": { + "ignoreCase": false, + "term": ".*", + "useRegex": true, + "matchWholeWord": false, + "invert": false + } }, - { - "id": "11e29fa4-5020-454a-99ef-946130ff11d8", - "type": "Microsoft.DPrep.ParseDelimitedBlock", - "arguments": { - "columnHeadersMode": 2, - "fileEncoding": 0, - "handleQuotedLineBreaks": false, - "preview": false, - "separator": ",", - "skipRows": 0, - "skipRowsMode": 0 + "useDefaultNaList": true, + "useEmptyStringAsNa": true, + "useNanAsNa": true + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "a8fa1654-7a5a-43b1-8354-4f98288f9b64", + "type": "Microsoft.DPrep.DropNullsBlock", + "arguments": { + "columnRelationship": 0, + "columns": { + "type": 1, + "details": { + "ignoreCase": false, + "term": ".*", + "useRegex": true, + "matchWholeWord": false, + "invert": false + } + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "e95b3a50-3bea-4548-9972-f2be1d30737b", + "type": "Microsoft.DPrep.RenameColumnsBlock", + "arguments": { + "columnPairs": [ + { + "column": { + "type": 2, + "details": { + "selectedColumn": "VendorID" + } + }, + "newColumnId": "vendor" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "8f05426b-938f-4c99-bad4-58d26923f4ba", - "type": "Microsoft.DPrep.DropColumnsBlock", - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "Path" - ] + "selectedColumn": "lpep_pickup_datetime" } - } + }, + "newColumnId": "pickup_datetime" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "a93911be-db18-4517-9e17-0e8ca76dbb97", - "type": "Microsoft.DPrep.ReplaceNaBlock", - "arguments": { - "columns": { - "type": 1, + { + "column": { + "type": 2, "details": { - "ignoreCase": false, - "term": ".*", - "useRegex": true, - "matchWholeWord": false, - "invert": false + "selectedColumn": "Lpep_dropoff_datetime" } }, - "useDefaultNaList": true, - "useEmptyStringAsNa": true, - "useNanAsNa": true + "newColumnId": "dropoff_datetime" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "3964eb98-15ea-49aa-b1a4-d749f4855ce6", - "type": "Microsoft.DPrep.DropNullsBlock", - "arguments": { - "columnRelationship": 0, - "columns": { - "type": 1, + { + "column": { + "type": 2, "details": { - "ignoreCase": false, - "term": ".*", - "useRegex": true, - "matchWholeWord": false, - "invert": false + "selectedColumn": "lpep_dropoff_datetime" } - } + }, + "newColumnId": "dropoff_datetime" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "ee9f3a23-6bc5-4822-8a8b-da412e7ab691", - "type": "Microsoft.DPrep.RenameColumnsBlock", - "arguments": { - "columnPairs": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "VendorID" - } - }, - "newColumnId": "vendor" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "lpep_pickup_datetime" - } - }, - "newColumnId": "pickup_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Lpep_dropoff_datetime" - } - }, - "newColumnId": "dropoff_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "lpep_dropoff_datetime" - } - }, - "newColumnId": "dropoff_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Store_and_fwd_flag" - } - }, - "newColumnId": "store_forward" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "store_and_fwd_flag" - } - }, - "newColumnId": "store_forward" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Pickup_longitude" - } - }, - "newColumnId": "pickup_longitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Pickup_latitude" - } - }, - "newColumnId": "pickup_latitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Dropoff_longitude" - } - }, - "newColumnId": "dropoff_longitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Dropoff_latitude" - } - }, - "newColumnId": "dropoff_latitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Passenger_count" - } - }, - "newColumnId": "passengers" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Fare_amount" - } - }, - "newColumnId": "cost" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_distance" - } - }, - "newColumnId": "distance" + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Store_and_fwd_flag" } - ] + }, + "newColumnId": "store_forward" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "10118a40-ab49-476d-8d66-d34631c8671a", - "type": "Microsoft.DPrep.KeepColumnsBlock", - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "cost", - "distance", - "dropoff_datetime", - "dropoff_latitude", - "dropoff_longitude", - "passengers", - "pickup_datetime", - "pickup_latitude", - "pickup_longitude", - "store_forward", - "vendor" - ] + "selectedColumn": "store_and_fwd_flag" } - } + }, + "newColumnId": "store_forward" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "c7ffca78-04ec-45b6-a0bc-d1ec17cc7113", - "type": "Microsoft.DPrep.AppendRowsBlock", - "arguments": { - "dataflows": [ + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Pickup_longitude" + } + }, + "newColumnId": "pickup_longitude" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Pickup_latitude" + } + }, + "newColumnId": "pickup_latitude" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Dropoff_longitude" + } + }, + "newColumnId": "dropoff_longitude" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Dropoff_latitude" + } + }, + "newColumnId": "dropoff_latitude" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Passenger_count" + } + }, + "newColumnId": "passengers" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Fare_amount" + } + }, + "newColumnId": "cost" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_distance" + } + }, + "newColumnId": "distance" + } + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "51c214cd-d34c-46b6-8f22-c38b983e5574", + "type": "Microsoft.DPrep.KeepColumnsBlock", + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "cost", + "distance", + "dropoff_datetime", + "dropoff_latitude", + "dropoff_longitude", + "passengers", + "pickup_datetime", + "pickup_latitude", + "pickup_longitude", + "store_forward", + "vendor" + ] + } + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "48f6db5e-335b-4ce1-8477-e831e8254a33", + "type": "Microsoft.DPrep.AppendRowsBlock", + "arguments": { + "dataflows": [ + { + "referenceType": 2, + "referenceContainerPath": null, + "referencedStep": null, + "anonymousSteps": [ { - "referenceType": 2, - "referenceContainerPath": null, - "referencedActivityId": null, - "referencedBranchId": null, - "referencedStep": null, - "anonymousSteps": [ + "id": "34e25357-2ca3-43a1-ab24-0bf90688b18a", + "description": { + "localDataProperties": [], + "blockGroup": 0, + "supportedTargets": [ + 4 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": "Expands the path specified by reading globs and files in folders and outputs one record per file found.", + "type": "Microsoft.DPrep.GetFilesBlock", + "propertyDescriptions": [ + { + "name": "path", + "type": 11, + "multipleValues": false, + "domain": { + "type": 7, + "details": { + "allowMultiple": true, + "allowedSources": [ + 0, + 1, + 2 + ], + "allowedSourceType": 2 + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The path or paths to expand." + }, + { + "name": "isArchive", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": false, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Whether the path or paths point to archive files." + }, + { + "name": "archiveOptions", + "type": 7, + "multipleValues": false, + "domain": { + "type": 0, + "details": [ + { + "name": "archiveType", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "ArchiveType", + "enumType": "Microsoft.DPrep.Engine.ArchiveType, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "The type of archive file" + }, + { + "name": "entryGlob", + "type": 1, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "The glob pattern for entries in archive file" + } + ] + }, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": { + "condition": { + "property": { + "propertyPath": [ + "isArchive" + ] + }, + "comparison": 0, + "value": true + }, + "aggregate": null + }, + "documentation": "Options for archive file" + } + ], + "dataEffectDetails": { + "dataEffect": 9 + } + }, + "type": "Microsoft.DPrep.GetFilesBlock", + "dataEffectDetails": { + "dataEffect": 9 + }, + "propertyDescriptions": [ { - "id": "2be4b61e-5d90-4fbb-a71c-72c243d2660b", - "description": { - "localDataProperties": [], - "blockGroup": 0, - "supportedTargets": [ - 4 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": "Expands the path specified by reading globs and files in folders and outputs one record per file found.", - "type": "Microsoft.DPrep.GetFilesBlock", - "propertyDescriptions": [ - { - "name": "path", - "type": 11, + "name": "path", + "type": 11, + "multipleValues": false, + "domain": { + "type": 7, + "details": { + "allowMultiple": true, + "allowedSources": [ + 0, + 1, + 2 + ], + "allowedSourceType": 2 + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The path or paths to expand." + }, + { + "name": "isArchive", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": false, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Whether the path or paths point to archive files." + }, + { + "name": "archiveOptions", + "type": 7, + "multipleValues": false, + "domain": { + "type": 0, + "details": [ + { + "name": "archiveType", + "type": 6, "multipleValues": false, "domain": { - "type": 7, + "type": 1, "details": { - "allowMultiple": true, - "allowedSources": [ - 0, - 1, - 2 - ], - "allowedSourceType": 2 + "enumName": "ArchiveType", + "enumType": "Microsoft.DPrep.Engine.ArchiveType, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0 + ] } }, "multiValueDetails": null, @@ -311,402 +463,800 @@ "origin": 1, "defaultValue": null, "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The path or paths to expand." - }, - { - "name": "isArchive", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": false, - "initializeFromProperty": null, "telemetryStrategy": 0, "condition": null, - "documentation": "Whether the path or paths point to archive files." + "documentation": "The type of archive file" }, { - "name": "archiveOptions", - "type": 7, + "name": "entryGlob", + "type": 1, "multipleValues": false, - "domain": { - "type": 0, - "details": [ - { - "name": "archiveType", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "ArchiveType", - "enumType": "Microsoft.DPrep.Engine.ArchiveType, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "The type of archive file" - }, - { - "name": "entryGlob", - "type": 1, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "The glob pattern for entries in archive file" - } - ] - }, + "domain": null, "multiValueDetails": null, "isRequired": false, - "origin": 1, + "origin": 0, "defaultValue": null, "initializeFromProperty": null, "telemetryStrategy": 0, - "condition": { - "condition": { - "property": { - "propertyPath": [ - "isArchive" - ] - }, - "comparison": 0, - "value": true - }, - "aggregate": null - }, - "documentation": "Options for archive file" + "condition": null, + "documentation": "The glob pattern for entries in archive file" } - ], - "dataEffectDetails": { - "dataEffect": 1 - } - }, - "type": "Microsoft.DPrep.GetFilesBlock", - "dataEffectDetails": { - "dataEffect": 1 + ] }, - "propertyDescriptions": [ - { - "name": "path", - "type": 11, - "multipleValues": false, - "domain": { - "type": 7, - "details": { - "allowMultiple": true, - "allowedSources": [ - 0, - 1, - 2 - ], - "allowedSourceType": 2 - } + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": { + "condition": { + "property": { + "propertyPath": [ + "isArchive" + ] }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The path or paths to expand." + "comparison": 0, + "value": true }, + "aggregate": null + }, + "documentation": "Options for archive file" + } + ], + "arguments": { + "isArchive": false, + "path": { + "target": 1, + "resourceDetails": [ { - "name": "isArchive", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": false, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Whether the path or paths point to archive files." + "path": "https://dprepdata.blob.core.windows.net/demo/yellow-small/*", + "sas": null, + "storageAccountName": null, + "storageAccountKey": null + } + ] + } + } + }, + { + "id": "b0198a55-5142-4bc5-a7f5-f4439480f4d2", + "description": { + "localDataProperties": [], + "blockGroup": 0, + "supportedTargets": [ + 5 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": null, + "type": "Microsoft.DPrep.ParseDelimitedBlock", + "propertyDescriptions": [ + { + "name": "maxRows", + "type": 2, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 2, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "preview", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 2, + "defaultValue": false, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "columnHeadersMode", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "PromoteHeadersMode", + "enumType": "Microsoft.DPrep.Engine.PromoteHeadersMode, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 2, + 3 + ] + } }, - { - "name": "archiveOptions", - "type": 7, - "multipleValues": false, - "domain": { - "type": 0, - "details": [ + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 1, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "separator", + "type": 0, + "multipleValues": false, + "domain": { + "type": 6, + "details": { + "values": [ { - "name": "archiveType", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "ArchiveType", - "enumType": "Microsoft.DPrep.Engine.ArchiveType, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "The type of archive file" + "value": ",", + "id": 0 }, { - "name": "entryGlob", - "type": 1, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "The glob pattern for entries in archive file" - } - ] - }, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": { - "condition": { - "property": { - "propertyPath": [ - "isArchive" - ] + "value": "\t", + "id": 1 + }, + { + "value": ":", + "id": 2 + }, + { + "value": ";", + "id": 3 + }, + { + "value": " ", + "id": 5 + }, + { + "value": "=", + "id": 4 }, - "comparison": 0, - "value": true + { + "value": "—", + "id": 7 + } + ], + "allowCustom": true, + "customEnumValue": 6 + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": ",", + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "commentLineCharacter", + "type": 0, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "fileEncoding", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "FileEncoding", + "enumType": "Microsoft.DPrep.Engine.FileEncoding, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "skipRowsMode", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "SkipMode", + "enumType": "Microsoft.DPrep.Engine.SkipMode, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 2 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "skipRows", + "type": 2, + "multipleValues": false, + "domain": { + "type": 4, + "details": { + "min": 0.0, + "max": 1.7976931348623157E+308 + } + }, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": { + "condition": { + "property": { + "propertyPath": [ + "skipRowsMode" + ] }, - "aggregate": null + "comparison": 1, + "value": 0 }, - "documentation": "Options for archive file" + "aggregate": null + }, + "documentation": null + }, + { + "name": "handleQuotedLineBreaks", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": false, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ], + "dataEffectDetails": { + "dataEffect": 1 + } + }, + "type": "Microsoft.DPrep.ParseDelimitedBlock", + "dataEffectDetails": { + "dataEffect": 1 + }, + "propertyDescriptions": [ + { + "name": "maxRows", + "type": 2, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 2, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "preview", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 2, + "defaultValue": false, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "columnHeadersMode", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "PromoteHeadersMode", + "enumType": "Microsoft.DPrep.Engine.PromoteHeadersMode, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 2, + 3 + ] } - ], - "arguments": { - "isArchive": false, - "path": { - "target": 1, - "resourceDetails": [ + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 1, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "separator", + "type": 0, + "multipleValues": false, + "domain": { + "type": 6, + "details": { + "values": [ + { + "value": ",", + "id": 0 + }, + { + "value": "\t", + "id": 1 + }, + { + "value": ":", + "id": 2 + }, { - "path": "https://dprepdata.blob.core.windows.net/demo/yellow-small/*", - "sas": null, - "storageAccountName": null, - "storageAccountKey": null + "value": ";", + "id": 3 + }, + { + "value": " ", + "id": 5 + }, + { + "value": "=", + "id": 4 + }, + { + "value": "—", + "id": 7 } + ], + "allowCustom": true, + "customEnumValue": 6 + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": ",", + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "commentLineCharacter", + "type": 0, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "fileEncoding", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "FileEncoding", + "enumType": "Microsoft.DPrep.Engine.FileEncoding, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 ] } - } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null }, { - "id": "e6b4a5b2-d80d-4b0f-acfa-42890ff78f9d", - "description": { - "localDataProperties": [], - "blockGroup": 0, - "supportedTargets": [ - 5 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": null, - "type": "Microsoft.DPrep.ParseDelimitedBlock", - "propertyDescriptions": [ - { - "name": "maxRows", - "type": 2, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 2, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - }, - { - "name": "preview", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 2, - "defaultValue": false, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "name": "skipRowsMode", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "SkipMode", + "enumType": "Microsoft.DPrep.Engine.SkipMode, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 2 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "skipRows", + "type": 2, + "multipleValues": false, + "domain": { + "type": 4, + "details": { + "min": 0.0, + "max": 1.7976931348623157E+308 + } + }, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": { + "condition": { + "property": { + "propertyPath": [ + "skipRowsMode" + ] }, - { - "name": "columnHeadersMode", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "PromoteHeadersMode", - "enumType": "Microsoft.DPrep.Engine.PromoteHeadersMode, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 2, - 3 - ] - } + "comparison": 1, + "value": 0 + }, + "aggregate": null + }, + "documentation": null + }, + { + "name": "handleQuotedLineBreaks", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": false, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ], + "arguments": { + "columnHeadersMode": 3, + "fileEncoding": 0, + "handleQuotedLineBreaks": false, + "preview": false, + "separator": ",", + "skipRows": 0, + "skipRowsMode": 0 + } + }, + { + "id": "b3a40b63-acb6-467c-8eb0-6718a9e52e22", + "description": { + "localDataProperties": [], + "blockGroup": 6, + "supportedTargets": [ + 2, + 3 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": "Drops the specified columns.", + "type": "Microsoft.DPrep.DropColumnsBlock", + "propertyDescriptions": [ + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + } + ], + "dataEffectDetails": { + "columnsRemovedProperties": [ + { + "propertyPath": [ + "columns" + ] + } + ], + "dataEffect": 4 + } + }, + "type": "Microsoft.DPrep.DropColumnsBlock", + "dataEffectDetails": { + "columnsRemovedProperties": [ + { + "propertyPath": [ + "columns" + ] + } + ], + "dataEffect": 4 + }, + "propertyDescriptions": [ + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + } + ], + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "Path" + ] + } + } + } + }, + { + "id": "378efb1b-9db2-4c76-a8e1-ad653493551a", + "description": { + "localDataProperties": [], + "blockGroup": 7, + "supportedTargets": [ + 5 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": null, + "type": "Microsoft.DPrep.SetColumnTypesBlock", + "propertyDescriptions": [ + { + "name": "columnConversion", + "type": 7, + "multipleValues": true, + "domain": { + "type": 0, + "details": [ + { + "name": "column", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 0, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source column." }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": 1, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - }, - { - "name": "separator", - "type": 0, - "multipleValues": false, - "domain": { + { + "name": "typeProperty", "type": 6, - "details": { - "values": [ - { - "value": ",", - "id": 0 - }, - { - "value": "\t", - "id": 1 - }, - { - "value": ":", - "id": 2 - }, - { - "value": ";", - "id": 3 - }, - { - "value": " ", - "id": 5 - }, - { - "value": "=", - "id": 4 - }, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "FieldType", + "enumType": "Microsoft.DPrep.Engine.FieldType, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1, + 3, + 4, + 10 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + }, + { + "name": "typeArguments", + "type": 7, + "multipleValues": false, + "domain": { + "type": 0, + "details": [ { - "value": "—", - "id": 7 + "name": "dateTimeFormats", + "type": 1, + "multipleValues": true, + "domain": null, + "multiValueDetails": { + "isOrdered": false + }, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null } - ], - "allowCustom": true, - "customEnumValue": 6 - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": ",", - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - }, - { - "name": "commentLineCharacter", - "type": 0, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - }, - { - "name": "fileEncoding", - "type": 6, + ] + }, + "multiValueDetails": null, + "isRequired": false, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ] + }, + "multiValueDetails": { + "isOrdered": false + }, + "isRequired": false, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ], + "dataEffectDetails": { + "dataEffect": 1 + } + }, + "type": "Microsoft.DPrep.SetColumnTypesBlock", + "dataEffectDetails": { + "dataEffect": 1 + }, + "propertyDescriptions": [ + { + "name": "columnConversion", + "type": 7, + "multipleValues": true, + "domain": { + "type": 0, + "details": [ + { + "name": "column", + "type": 5, "multipleValues": false, "domain": { - "type": 1, + "type": 12, "details": { - "enumName": "FileEncoding", - "enumType": "Microsoft.DPrep.Engine.FileEncoding, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ] + "selectorDomainType": 0, + "fieldTypes": [] } }, "multiValueDetails": null, "isRequired": true, - "origin": 1, - "defaultValue": 0, + "origin": 0, + "defaultValue": null, "initializeFromProperty": null, - "telemetryStrategy": 0, + "telemetryStrategy": 1, "condition": null, - "documentation": null + "documentation": "The source column." }, { - "name": "skipRowsMode", + "name": "typeProperty", "type": 6, "multipleValues": false, "domain": { "type": 1, "details": { - "enumName": "SkipMode", - "enumType": "Microsoft.DPrep.Engine.SkipMode, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", + "enumName": "FieldType", + "enumType": "Microsoft.DPrep.Engine.FieldType, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", "enumValues": [ 0, 1, - 2 + 3, + 4, + 10 ] } }, "multiValueDetails": null, "isRequired": true, - "origin": 1, + "origin": 0, "defaultValue": 0, "initializeFromProperty": null, "telemetryStrategy": 0, @@ -714,312 +1264,691 @@ "documentation": null }, { - "name": "skipRows", - "type": 2, + "name": "typeArguments", + "type": 7, "multipleValues": false, "domain": { - "type": 4, - "details": { - "min": 0.0, - "max": 1.7976931348623157E+308 - } + "type": 0, + "details": [ + { + "name": "dateTimeFormats", + "type": 1, + "multipleValues": true, + "domain": null, + "multiValueDetails": { + "isOrdered": false + }, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ] }, "multiValueDetails": null, "isRequired": false, - "origin": 1, + "origin": 0, "defaultValue": null, "initializeFromProperty": null, "telemetryStrategy": 0, - "condition": { - "condition": { - "property": { - "propertyPath": [ - "skipRowsMode" - ] - }, - "comparison": 1, - "value": 0 - }, - "aggregate": null - }, - "documentation": null - }, - { - "name": "handleQuotedLineBreaks", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": false, - "initializeFromProperty": null, - "telemetryStrategy": 0, "condition": null, "documentation": null } - ], - "dataEffectDetails": { - "dataEffect": 1 - } + ] }, - "type": "Microsoft.DPrep.ParseDelimitedBlock", - "dataEffectDetails": { - "dataEffect": 1 + "multiValueDetails": { + "isOrdered": false }, - "propertyDescriptions": [ - { - "name": "maxRows", + "isRequired": false, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ], + "arguments": { + "columnConversion": [ + { + "column": { "type": 2, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 2, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "details": { + "selectedColumn": "End_Lon" + } }, - { - "name": "preview", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 2, - "defaultValue": false, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_Pickup_DateTime" + } }, - { - "name": "columnHeadersMode", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "PromoteHeadersMode", - "enumType": "Microsoft.DPrep.Engine.PromoteHeadersMode, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 2, - 3 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": 1, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Tip_Amt" + } }, - { - "name": "separator", - "type": 0, - "multipleValues": false, - "domain": { - "type": 6, - "details": { - "values": [ - { - "value": ",", - "id": 0 - }, - { - "value": "\t", - "id": 1 - }, - { - "value": ":", - "id": 2 - }, - { - "value": ";", - "id": 3 - }, - { - "value": " ", - "id": 5 - }, - { - "value": "=", - "id": 4 - }, - { - "value": "—", - "id": 7 - } - ], - "allowCustom": true, - "customEnumValue": 6 - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": ",", - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Passenger_Count" + } }, - { - "name": "commentLineCharacter", - "type": 0, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Fare_Amt" + } }, - { - "name": "fileEncoding", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "FileEncoding", - "enumType": "Microsoft.DPrep.Engine.FileEncoding, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": 0, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "vendor_name" + } }, - { - "name": "skipRowsMode", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "SkipMode", - "enumType": "Microsoft.DPrep.Engine.SkipMode, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 2 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": 0, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Start_Lat" + } }, - { - "name": "skipRows", + "typeProperty": 0 + }, + { + "column": { "type": 2, - "multipleValues": false, - "domain": { - "type": 4, - "details": { - "min": 0.0, - "max": 1.7976931348623157E+308 - } - }, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": { - "condition": { - "property": { - "propertyPath": [ - "skipRowsMode" - ] + "details": { + "selectedColumn": "Tolls_Amt" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "End_Lat" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "surcharge" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_Dropoff_DateTime" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Total_Amt" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "store_and_forward" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Payment_Type" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Rate_Code" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_Distance" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Start_Lon" + } + }, + "typeProperty": 0 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "mta_tax" + } + }, + "typeProperty": 0 + } + ] + } + }, + { + "id": "1569498c-6f81-43e2-881b-e11587ae60e2", + "description": { + "localDataProperties": [], + "blockGroup": 3, + "supportedTargets": [ + 2, + 3 + ], + "supportedFieldTypes": [ + 1, + 4, + 3, + 2, + 0, + 5 + ], + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": "Replaces values in the specified columns with nulls. You can choose to use the default list, supply your own, or both.", + "type": "Microsoft.DPrep.ReplaceNaBlock", + "propertyDescriptions": [ + { + "name": "useDefaultNaList", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": true, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Use the default list and replace 'null', 'NaN', 'NA', and 'N/A' with null." + }, + { + "name": "useEmptyStringAsNa", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": true, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Replace empty strings with null." + }, + { + "name": "useNanAsNa", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": true, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Replace NaNs with Null." + }, + { + "name": "customNaList", + "type": 1, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Provide a comma separated list of values to replace with null." + }, + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + } + ], + "dataEffectDetails": { + "columnsTransformedProperties": [ + { + "propertyPath": [ + "columns" + ] + } + ], + "dataEffect": 5 + } + }, + "type": "Microsoft.DPrep.ReplaceNaBlock", + "dataEffectDetails": { + "columnsTransformedProperties": [ + { + "propertyPath": [ + "columns" + ] + } + ], + "dataEffect": 5 + }, + "propertyDescriptions": [ + { + "name": "useDefaultNaList", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": true, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Use the default list and replace 'null', 'NaN', 'NA', and 'N/A' with null." + }, + { + "name": "useEmptyStringAsNa", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": true, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Replace empty strings with null." + }, + { + "name": "useNanAsNa", + "type": 4, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": true, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Replace NaNs with Null." + }, + { + "name": "customNaList", + "type": 1, + "multipleValues": false, + "domain": null, + "multiValueDetails": null, + "isRequired": false, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Provide a comma separated list of values to replace with null." + }, + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + } + ], + "arguments": { + "columns": { + "type": 1, + "details": { + "ignoreCase": false, + "term": ".*", + "useRegex": true, + "matchWholeWord": false, + "invert": false + } + }, + "useDefaultNaList": true, + "useEmptyStringAsNa": true, + "useNanAsNa": true + } + }, + { + "id": "5c88808e-dd28-4d79-be9c-06773aae56e8", + "description": { + "localDataProperties": [], + "blockGroup": 5, + "supportedTargets": [ + 2, + 3 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": false, + "documentation": "Drops rows where all or any of the selected columns are null.", + "type": "Microsoft.DPrep.DropNullsBlock", + "propertyDescriptions": [ + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + }, + { + "name": "columnRelationship", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "ColumnRelationship", + "enumType": "Microsoft.DPrep.Engine.ColumnRelationship, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Whether all or any of the selected columns must be null." + } + ], + "dataEffectDetails": { + "dataEffect": 6 + } + }, + "type": "Microsoft.DPrep.DropNullsBlock", + "dataEffectDetails": { + "dataEffect": 6 + }, + "propertyDescriptions": [ + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + }, + { + "name": "columnRelationship", + "type": 6, + "multipleValues": false, + "domain": { + "type": 1, + "details": { + "enumName": "ColumnRelationship", + "enumType": "Microsoft.DPrep.Engine.ColumnRelationship, Microsoft.DPrep.EngineAPI, Version=0.1.1903.18033, Culture=neutral, PublicKeyToken=null", + "enumValues": [ + 0, + 1 + ] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": 0, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "Whether all or any of the selected columns must be null." + } + ], + "arguments": { + "columnRelationship": 0, + "columns": { + "type": 1, + "details": { + "ignoreCase": false, + "term": ".*", + "useRegex": true, + "matchWholeWord": false, + "invert": false + } + } + } + }, + { + "id": "507b229d-a8bd-4517-8984-d5750b4443e9", + "description": { + "localDataProperties": [], + "blockGroup": 6, + "supportedTargets": [ + 2, + 3 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": "Renames the specified columns.", + "type": "Microsoft.DPrep.RenameColumnsBlock", + "propertyDescriptions": [ + { + "name": "columnPairs", + "type": 7, + "multipleValues": true, + "domain": { + "type": 0, + "details": [ + { + "name": "column", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 0, + "fieldTypes": [] + } }, - "comparison": 1, - "value": 0 + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source column." }, - "aggregate": null - }, - "documentation": null + { + "name": "newColumnId", + "type": 1, + "multipleValues": false, + "domain": { + "type": 3, + "details": null + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null + } + ] + }, + "multiValueDetails": { + "isOrdered": false + }, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "The columns to rename and the desired new names." + } + ], + "dataEffectDetails": { + "columnsChangedProperties": [ + { + "propertyPath": [ + "columnPairs", + "column" + ] }, { - "name": "handleQuotedLineBreaks", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": false, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "propertyPath": [ + "columnPairs", + "newColumnId" + ] } ], - "arguments": { - "columnHeadersMode": 3, - "fileEncoding": 0, - "handleQuotedLineBreaks": false, - "preview": false, - "separator": ",", - "skipRows": 0, - "skipRowsMode": 0 + "dataEffect": 2 + } + }, + "type": "Microsoft.DPrep.RenameColumnsBlock", + "dataEffectDetails": { + "columnsChangedProperties": [ + { + "propertyPath": [ + "columnPairs", + "column" + ] + }, + { + "propertyPath": [ + "columnPairs", + "newColumnId" + ] } - }, + ], + "dataEffect": 2 + }, + "propertyDescriptions": [ { - "id": "e2b1ec33-0362-496f-af1a-e5c3daad511d", - "description": { - "localDataProperties": [], - "blockGroup": 6, - "supportedTargets": [ - 2, - 3 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": "Drops the specified columns.", - "type": "Microsoft.DPrep.DropColumnsBlock", - "propertyDescriptions": [ - { - "name": "columns", + "name": "columnPairs", + "type": 7, + "multipleValues": true, + "domain": { + "type": 0, + "details": [ + { + "name": "column", "type": 5, "multipleValues": false, "domain": { "type": 12, "details": { - "selectorDomainType": 1, + "selectorDomainType": 0, "fieldTypes": [] } }, @@ -1030,2003 +1959,1067 @@ "initializeFromProperty": null, "telemetryStrategy": 1, "condition": null, - "documentation": "The source columns." + "documentation": "The source column." + }, + { + "name": "newColumnId", + "type": 1, + "multipleValues": false, + "domain": { + "type": 3, + "details": null + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": null } - ], - "dataEffectDetails": { - "columnsRemovedProperties": [ - { - "propertyPath": [ - "columns" - ] - } - ], - "dataEffect": 4 - } + ] }, - "type": "Microsoft.DPrep.DropColumnsBlock", - "dataEffectDetails": { - "columnsRemovedProperties": [ - { - "propertyPath": [ - "columns" - ] + "multiValueDetails": { + "isOrdered": false + }, + "isRequired": true, + "origin": 1, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 0, + "condition": null, + "documentation": "The columns to rename and the desired new names." + } + ], + "arguments": { + "columnPairs": [ + { + "column": { + "type": 2, + "details": { + "selectedColumn": "vendor_name" } - ], - "dataEffect": 4 + }, + "newColumnId": "vendor" }, - "propertyDescriptions": [ - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." - } - ], - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "Path" - ] - } - } - } - }, - { - "id": "9e460167-3692-4b15-a6bc-dede61bf2ea2", - "description": { - "localDataProperties": [], - "blockGroup": 7, - "supportedTargets": [ - 5 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": null, - "type": "Microsoft.DPrep.SetColumnTypesBlock", - "propertyDescriptions": [ - { - "name": "columnConversion", - "type": 7, - "multipleValues": true, - "domain": { - "type": 0, - "details": [ - { - "name": "column", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 0, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source column." - }, - { - "name": "typeProperty", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "FieldType", - "enumType": "Microsoft.DPrep.Engine.FieldType, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 3, - 4, - 10 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": 0, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - }, - { - "name": "typeArguments", - "type": 7, - "multipleValues": false, - "domain": { - "type": 0, - "details": [ - { - "name": "dateTimeFormats", - "type": 1, - "multipleValues": true, - "domain": null, - "multiValueDetails": { - "isOrdered": false - }, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ] - }, - "multiValueDetails": null, - "isRequired": false, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ] - }, - "multiValueDetails": { - "isOrdered": false - }, - "isRequired": false, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null + "selectedColumn": "VendorID" } - ], - "dataEffectDetails": { - "dataEffect": 1 - } - }, - "type": "Microsoft.DPrep.SetColumnTypesBlock", - "dataEffectDetails": { - "dataEffect": 1 + }, + "newColumnId": "vendor" }, - "propertyDescriptions": [ - { - "name": "columnConversion", - "type": 7, - "multipleValues": true, - "domain": { - "type": 0, - "details": [ - { - "name": "column", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 0, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source column." - }, - { - "name": "typeProperty", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "FieldType", - "enumType": "Microsoft.DPrep.Engine.FieldType, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1, - 3, - 4, - 10 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": 0, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - }, - { - "name": "typeArguments", - "type": 7, - "multipleValues": false, - "domain": { - "type": 0, - "details": [ - { - "name": "dateTimeFormats", - "type": 1, - "multipleValues": true, - "domain": null, - "multiValueDetails": { - "isOrdered": false - }, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ] - }, - "multiValueDetails": null, - "isRequired": false, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ] - }, - "multiValueDetails": { - "isOrdered": false - }, - "isRequired": false, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ], - "arguments": { - "columnConversion": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_Pickup_DateTime" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_Distance" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Rate_Code" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "End_Lat" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "surcharge" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Tip_Amt" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Total_Amt" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_Dropoff_DateTime" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Start_Lon" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "store_and_forward" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Payment_Type" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Passenger_Count" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Start_Lat" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "End_Lon" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Fare_Amt" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "mta_tax" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Tolls_Amt" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "vendor_name" - } - }, - "typeProperty": 0 - } - ] - } - }, - { - "id": "4d34ed25-e396-48b0-abc1-ea998518564d", - "description": { - "localDataProperties": [], - "blockGroup": 3, - "supportedTargets": [ - 2, - 3 - ], - "supportedFieldTypes": [ - 1, - 4, - 3, - 2, - 0, - 5 - ], - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": "Replaces values in the specified columns with nulls. You can choose to use the default list, supply your own, or both.", - "type": "Microsoft.DPrep.ReplaceNaBlock", - "propertyDescriptions": [ - { - "name": "useDefaultNaList", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": true, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Use the default list and replace 'null', 'NaN', 'NA', and 'N/A' with null." - }, - { - "name": "useEmptyStringAsNa", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": true, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Replace empty strings with null." - }, - { - "name": "useNanAsNa", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": true, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Replace NaNs with Null." - }, - { - "name": "customNaList", - "type": 1, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Provide a comma separated list of values to replace with null." - }, - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." + { + "column": { + "type": 2, + "details": { + "selectedColumn": "vendor_id" } - ], - "dataEffectDetails": { - "columnsTransformedProperties": [ - { - "propertyPath": [ - "columns" - ] - } - ], - "dataEffect": 5 - } + }, + "newColumnId": "vendor" }, - "type": "Microsoft.DPrep.ReplaceNaBlock", - "dataEffectDetails": { - "columnsTransformedProperties": [ - { - "propertyPath": [ - "columns" - ] + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_Pickup_DateTime" } - ], - "dataEffect": 5 + }, + "newColumnId": "pickup_datetime" }, - "propertyDescriptions": [ - { - "name": "useDefaultNaList", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": true, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Use the default list and replace 'null', 'NaN', 'NA', and 'N/A' with null." + { + "column": { + "type": 2, + "details": { + "selectedColumn": "tpep_pickup_datetime" + } }, - { - "name": "useEmptyStringAsNa", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": true, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Replace empty strings with null." + "newColumnId": "pickup_datetime" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_Dropoff_DateTime" + } }, - { - "name": "useNanAsNa", - "type": 4, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": true, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Replace NaNs with Null." + "newColumnId": "dropoff_datetime" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "tpep_dropoff_datetime" + } }, - { - "name": "customNaList", - "type": 1, - "multipleValues": false, - "domain": null, - "multiValueDetails": null, - "isRequired": false, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Provide a comma separated list of values to replace with null." + "newColumnId": "dropoff_datetime" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "store_and_forward" + } }, - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." - } - ], - "arguments": { - "columns": { - "type": 1, + "newColumnId": "store_forward" + }, + { + "column": { + "type": 2, "details": { - "ignoreCase": false, - "term": ".*", - "useRegex": true, - "matchWholeWord": false, - "invert": false + "selectedColumn": "store_and_fwd_flag" } }, - "useDefaultNaList": true, - "useEmptyStringAsNa": true, - "useNanAsNa": true - } - }, - { - "id": "a06211f4-2967-4f9e-aeb4-f57008de312d", - "description": { - "localDataProperties": [], - "blockGroup": 5, - "supportedTargets": [ - 2, - 3 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": false, - "documentation": "Drops rows where all or any of the selected columns are null.", - "type": "Microsoft.DPrep.DropNullsBlock", - "propertyDescriptions": [ - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." - }, - { - "name": "columnRelationship", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "ColumnRelationship", - "enumType": "Microsoft.DPrep.Engine.ColumnRelationship, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": 0, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Whether all or any of the selected columns must be null." + "newColumnId": "store_forward" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Start_Lon" } - ], - "dataEffectDetails": { - "dataEffect": 6 - } + }, + "newColumnId": "pickup_longitude" }, - "type": "Microsoft.DPrep.DropNullsBlock", - "dataEffectDetails": { - "dataEffect": 6 + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Start_Lat" + } + }, + "newColumnId": "pickup_latitude" }, - "propertyDescriptions": [ - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." + { + "column": { + "type": 2, + "details": { + "selectedColumn": "End_Lon" + } }, - { - "name": "columnRelationship", - "type": 6, - "multipleValues": false, - "domain": { - "type": 1, - "details": { - "enumName": "ColumnRelationship", - "enumType": "Microsoft.DPrep.Engine.ColumnRelationship, Microsoft.DPrep.EngineAPI, Version=0.1.1811.29023, Culture=neutral, PublicKeyToken=null", - "enumValues": [ - 0, - 1 - ] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": 0, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "Whether all or any of the selected columns must be null." - } - ], - "arguments": { - "columnRelationship": 0, - "columns": { - "type": 1, + "newColumnId": "dropoff_longitude" + }, + { + "column": { + "type": 2, "details": { - "ignoreCase": false, - "term": ".*", - "useRegex": true, - "matchWholeWord": false, - "invert": false + "selectedColumn": "End_Lat" } - } - } - }, - { - "id": "1e7bc495-fab4-4462-9518-4145c87f0640", - "description": { - "localDataProperties": [], - "blockGroup": 6, - "supportedTargets": [ - 2, - 3 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": "Renames the specified columns.", - "type": "Microsoft.DPrep.RenameColumnsBlock", - "propertyDescriptions": [ - { - "name": "columnPairs", - "type": 7, - "multipleValues": true, - "domain": { - "type": 0, - "details": [ - { - "name": "column", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 0, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source column." - }, - { - "name": "newColumnId", - "type": 1, - "multipleValues": false, - "domain": { - "type": 3, - "details": null - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ] - }, - "multiValueDetails": { - "isOrdered": false - }, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "The columns to rename and the desired new names." + }, + "newColumnId": "dropoff_latitude" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Passenger_Count" } - ], - "dataEffectDetails": { - "columnsChangedProperties": [ - { - "propertyPath": [ - "columnPairs", - "column" - ] - }, - { - "propertyPath": [ - "columnPairs", - "newColumnId" - ] - } - ], - "dataEffect": 2 - } + }, + "newColumnId": "passengers" }, - "type": "Microsoft.DPrep.RenameColumnsBlock", - "dataEffectDetails": { - "columnsChangedProperties": [ - { - "propertyPath": [ - "columnPairs", - "column" - ] - }, - { - "propertyPath": [ - "columnPairs", - "newColumnId" - ] + { + "column": { + "type": 2, + "details": { + "selectedColumn": "passenger_count" } - ], - "dataEffect": 2 + }, + "newColumnId": "passengers" }, - "propertyDescriptions": [ - { - "name": "columnPairs", - "type": 7, - "multipleValues": true, - "domain": { - "type": 0, - "details": [ - { - "name": "column", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 0, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source column." - }, - { - "name": "newColumnId", - "type": 1, - "multipleValues": false, - "domain": { - "type": 3, - "details": null - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": null - } - ] - }, - "multiValueDetails": { - "isOrdered": false - }, - "isRequired": true, - "origin": 1, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 0, - "condition": null, - "documentation": "The columns to rename and the desired new names." - } - ], - "arguments": { - "columnPairs": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "vendor_name" - } - }, - "newColumnId": "vendor" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "VendorID" - } - }, - "newColumnId": "vendor" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "vendor_id" - } - }, - "newColumnId": "vendor" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_Pickup_DateTime" - } - }, - "newColumnId": "pickup_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "tpep_pickup_datetime" - } - }, - "newColumnId": "pickup_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_Dropoff_DateTime" - } - }, - "newColumnId": "dropoff_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "tpep_dropoff_datetime" - } - }, - "newColumnId": "dropoff_datetime" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "store_and_forward" - } - }, - "newColumnId": "store_forward" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "store_and_fwd_flag" - } - }, - "newColumnId": "store_forward" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Start_Lon" - } - }, - "newColumnId": "pickup_longitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Start_Lat" - } - }, - "newColumnId": "pickup_latitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "End_Lon" - } - }, - "newColumnId": "dropoff_longitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "End_Lat" - } - }, - "newColumnId": "dropoff_latitude" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Passenger_Count" - } - }, - "newColumnId": "passengers" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "passenger_count" - } - }, - "newColumnId": "passengers" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Fare_Amt" - } - }, - "newColumnId": "cost" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "fare_amount" - } - }, - "newColumnId": "cost" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "Trip_Distance" - } - }, - "newColumnId": "distance" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "trip_distance" - } - }, - "newColumnId": "distance" + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Fare_Amt" } - ] - } - }, - { - "id": "82b89518-036c-4e83-84b8-4d25e0d5467b", - "description": { - "localDataProperties": [], - "blockGroup": 6, - "supportedTargets": [ - 2, - 3 - ], - "supportedFieldTypes": null, - "supportedMessages": [], - "allowDefaultAdd": true, - "documentation": "Keeps the specified columns and drops all others.", - "type": "Microsoft.DPrep.KeepColumnsBlock", - "propertyDescriptions": [ - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." + }, + "newColumnId": "cost" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "fare_amount" } - ], - "dataEffectDetails": { - "dataEffect": 1 - } + }, + "newColumnId": "cost" }, - "type": "Microsoft.DPrep.KeepColumnsBlock", - "dataEffectDetails": { - "dataEffect": 1 + { + "column": { + "type": 2, + "details": { + "selectedColumn": "Trip_Distance" + } + }, + "newColumnId": "distance" }, - "propertyDescriptions": [ - { - "name": "columns", - "type": 5, - "multipleValues": false, - "domain": { - "type": 12, - "details": { - "selectorDomainType": 1, - "fieldTypes": [] - } - }, - "multiValueDetails": null, - "isRequired": true, - "origin": 0, - "defaultValue": null, - "initializeFromProperty": null, - "telemetryStrategy": 1, - "condition": null, - "documentation": "The source columns." - } - ], - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "cost", - "distance", - "dropoff_datetime", - "dropoff_latitude", - "dropoff_longitude", - "passengers", - "pickup_datetime", - "pickup_latitude", - "pickup_longitude", - "store_forward", - "vendor" - ] + "selectedColumn": "trip_distance" } - } + }, + "newColumnId": "distance" } - } - ] - } - ] - }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "9aa2386e-a0b5-4902-898f-f68fce4df3e3", - "type": "Microsoft.DPrep.SetColumnTypesBlock", - "arguments": { - "columnConversion": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_longitude" - } - }, - "typeProperty": 3 + ] + } }, { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_latitude" + "id": "f1dff530-60d8-467a-b24f-246eb2a27893", + "description": { + "localDataProperties": [], + "blockGroup": 6, + "supportedTargets": [ + 2, + 3 + ], + "supportedFieldTypes": null, + "supportedMessages": [], + "requiresInitialization": false, + "allowDefaultAdd": true, + "documentation": "Keeps the specified columns and drops all others.", + "type": "Microsoft.DPrep.KeepColumnsBlock", + "propertyDescriptions": [ + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." + } + ], + "dataEffectDetails": { + "dataEffect": 1 } }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_longitude" - } + "type": "Microsoft.DPrep.KeepColumnsBlock", + "dataEffectDetails": { + "dataEffect": 1 }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_latitude" + "propertyDescriptions": [ + { + "name": "columns", + "type": 5, + "multipleValues": false, + "domain": { + "type": 12, + "details": { + "selectorDomainType": 1, + "fieldTypes": [] + } + }, + "multiValueDetails": null, + "isRequired": true, + "origin": 0, + "defaultValue": null, + "initializeFromProperty": null, + "telemetryStrategy": 1, + "condition": null, + "documentation": "The source columns." } - }, - "typeProperty": 3 + ], + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "cost", + "distance", + "dropoff_datetime", + "dropoff_latitude", + "dropoff_longitude", + "passengers", + "pickup_datetime", + "pickup_latitude", + "pickup_longitude", + "store_forward", + "vendor" + ] + } + } + } + } + ] + } + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "9a72242d-1823-41a0-9588-e416d621bc75", + "type": "Microsoft.DPrep.SetColumnTypesBlock", + "arguments": { + "columnConversion": [ + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_longitude" + } + }, + "typeProperty": 3 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_latitude" + } + }, + "typeProperty": 3 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_longitude" + } + }, + "typeProperty": 3 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_latitude" + } + }, + "typeProperty": 3 + } + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "69c0d1c3-1628-4147-890d-1a19dc4f7fc7", + "type": "Microsoft.DPrep.DropNullsBlock", + "arguments": { + "columnRelationship": 1, + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "pickup_longitude", + "pickup_latitude", + "dropoff_longitude", + "dropoff_latitude" + ] + } + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "6919ef99-c7c0-4895-a9d2-af1abd09740f", + "type": "Microsoft.DPrep.ExpressionFilterBlock", + "arguments": { + "expression": {"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_longitude"]]},-73.72]]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_longitude"]]},-74.09]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_latitude"]]},40.88]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_latitude"]]},40.53]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_longitude"]]},-73.72]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_longitude"]]},-74.09]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_latitude"]]},40.88]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_latitude"]]},40.53]]]}]]} + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "98ede915-38ed-4fc2-bdc5-31fd43a8ad69", + "type": "Microsoft.DPrep.ReplaceBlock", + "arguments": { + "booleanReplaceWith": 1, + "booleanValueToFind": 1, + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "store_forward" + ] + } + }, + "replaceWithType": 0, + "stringReplaceWith": "N", + "stringValueToFind": "0", + "valueToFindType": 0 + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "a72da6d6-7594-4118-90ee-e7107b6c9d7a", + "type": "Microsoft.DPrep.FillNullsBlock", + "arguments": { + "booleanReplaceWith": 1, + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "store_forward" + ] + } + }, + "replaceWithType": 0, + "stringReplaceWith": "N" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "35290d77-a442-4ace-8f88-bed7fb153a57", + "type": "Microsoft.DPrep.ReplaceBlock", + "arguments": { + "booleanReplaceWith": 1, + "booleanValueToFind": 1, + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "distance" + ] + } + }, + "doubleReplaceWith": 0.0, + "replaceWithType": 3, + "stringValueToFind": ".00", + "valueToFindType": 0 + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "c98fdba9-f82e-4587-bef3-a4f7f9b2dc47", + "type": "Microsoft.DPrep.FillNullsBlock", + "arguments": { + "booleanReplaceWith": 1, + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "distance" + ] + } + }, + "doubleReplaceWith": 0.0, + "replaceWithType": 3 + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "c84749ff-b6e8-41f7-b696-51c2399fe5bc", + "type": "Microsoft.DPrep.ToNumberBlock", + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "distance" + ] + } + }, + "decimalPoint": 0 + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "46d344f7-dfe4-4305-ba49-c101572c4e87", + "type": "Microsoft.DPrep.SplitColumnByExampleBlock", + "arguments": { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_datetime" + } + }, + "delimiters": [], + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgAgACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", + "examples": "[]", + "fillStrategy": 0, + "keepDelimiter": false, + "newColumnsBaseName": "pickup_datetime_" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "b62a7aaa-6c70-4d9b-a3ec-7eaad3dfa170", + "type": "Microsoft.DPrep.SplitColumnByExampleBlock", + "arguments": { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_datetime" + } + }, + "delimiters": [], + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgAgACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", + "examples": "[]", + "fillStrategy": 0, + "keepDelimiter": false, + "newColumnsBaseName": "dropoff_datetime_" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "987b78c5-bb25-44ee-ae5c-e18302e2f8d4", + "type": "Microsoft.DPrep.RenameColumnsBlock", + "arguments": { + "columnPairs": [ + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_datetime_1" + } + }, + "newColumnId": "pickup_date" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_datetime_2" + } + }, + "newColumnId": "pickup_time" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_datetime_1" + } + }, + "newColumnId": "dropoff_date" + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_datetime_2" } + }, + "newColumnId": "dropoff_time" + } + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "56d8eeb5-ad5b-46a5-b4af-d6e492c75b00", + "type": "Microsoft.DPrep.DeriveColumnByExample", + "arguments": { + "anchorColumnId": "pickup_date", + "columnId": "pickup_weekday", + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHcAaQB0AGMAaAAiACAAcgB1AGwAZQA9ACIAUwBpAG4AZwBsAGUAQgByAGEAbgBjAGgAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwB0ACIAIAByAHUAbABlAD0AIgBUAHIAYQBuAHMAZgBvAHIAbQBhAHQAaQBvAG4AIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZQAiACAAcgB1AGwAZQA9ACIAQQB0AG8AbQAiAD4APABMAGUAdABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAIgAgAGkAZAA9ACIATABlAHQAQwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABWAGEAcgBpAGEAYgBsAGUAIABzAHkAbQBiAG8AbAA9ACIAYwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBkAHgAIgA+ADwAIQBbAEMARABBAFQAQQBbACIAcABpAGMAawB1AHAAXwBkAGEAdABlACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBWAGEAcgBpAGEAYgBsAGUAPgA8AEwAZQB0AE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbABlAHQATwBwAHQAaQBvAG4AcwAiACAAaQBkAD0AIgBMAGUAdABYACIAPgA8AFYAYQByAGkAYQBiAGwAZQAgAHMAeQBtAGIAbwBsAD0AIgB4ACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAHIAdQBsAGUAPQAiAEMAaABvAG8AcwBlAEkAbgBwAHUAdAAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2AHMAIgAgAC8APgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAbwBsAHUAbQBuAE4AYQBtAGUAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8AVgBhAHIAaQBhAGIAbABlAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBjAG8AbgB2ACIAIAByAHUAbABlAD0AIgBGAG8AcgBtAGEAdABQAGEAcgB0AGkAYQBsAEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGEAdABlAHQAaQBtAGUAIgAgAHIAdQBsAGUAPQAiAH4AYwBvAG4AdgBlAHIAdABfAGQAYQB0AGUAdABpAG0AZQBfAGkAbgBwAHUAdABEAGEAdABlAFQAaQBtAGUAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAHAAdQB0AEQAYQB0AGUAVABpAG0AZQAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AaQBuAHAAdQB0AEQAYQB0AGUAVABpAG0AZQBfAHAAYQByAHMAZQBkAEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBwAGEAcgBzAGUAZABEAGEAdABlAFQAaQBtAGUAIgAgAHIAdQBsAGUAPQAiAFAAYQByAHMAZQBQAGEAcgB0AGkAYQBsAEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBTAFMAIgAgAHIAdQBsAGUAPQAiAFcAaABvAGwAZQBDAG8AbAB1AG0AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB4ACIAIAAvAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAHAAdQB0AEQAdABGAG8AcgBtAGEAdABzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADEAIgA+ADwASQB0AGUAbQAgAGkAPQAiADAAIgA+ADwARABhAHQAZQBUAGkAbQBlAEYAbwByAG0AYQB0ACAAZgBvAHIAbQBhAHQAPQAiAHkAeQB5AHkAXAAtAE0ATQBcAC0AZABkACIAPgA8AE4AdQBtAGUAcgBpAGMARgBvAHIAbQBhAHQAUABhAHIAdAAgAEIAYQBzAGUARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAHkAeQB5AHkAIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAHkAeQB5AHkAIgAgAEQAYQB0AGUAVABpAG0AZQBQAGEAcgB0AD0AIgBZAGUAYQByACIAIAAvAD4APABDAG8AbgBzAHQAYQBuAHQARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwAIQBbAEMARABBAFQAQQBbAC0AXQBdAD4APAAvAEMAbwBuAHMAdABhAG4AdABGAG8AcgBtAGEAdABQAGEAcgB0AD4APABOAHUAbQBlAHIAaQBjAEYAbwByAG0AYQB0AFAAYQByAHQAIABCAGEAcwBlAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBNAE0AIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAE0ATQAiACAARABhAHQAZQBUAGkAbQBlAFAAYQByAHQAPQAiAE0AbwBuAHQAaAAiACAALwA+ADwAQwBvAG4AcwB0AGEAbgB0AEYAbwByAG0AYQB0AFAAYQByAHQAPgA8ACEAWwBDAEQAQQBUAEEAWwAtAF0AXQA+ADwALwBDAG8AbgBzAHQAYQBuAHQARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwATgB1AG0AZQByAGkAYwBGAG8AcgBtAGEAdABQAGEAcgB0ACAAQgBhAHMAZQBGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAZABkACIAIABGAHUAbABsAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAIgAgAEQAYQB0AGUAVABpAG0AZQBQAGEAcgB0AD0AIgBEAGEAeQAiACAALwA+ADwALwBEAGEAdABlAFQAaQBtAGUARgBvAHIAbQBhAHQAPgA8AC8ASQB0AGUAbQA+ADwALwBBAHIAcgBhAHkAPgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbwB1AHQAcAB1AHQARAB0AEYAbwByAG0AYQB0ACIAPgA8AEQAYQB0AGUAVABpAG0AZQBGAG8AcgBtAGEAdAAgAGYAbwByAG0AYQB0AD0AIgBkAGQAZABkACIAPgA8AFMAdAByAGkAbgBnAEYAbwByAG0AYQB0AFAAYQByAHQAIABCAGEAcwBlAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAZABkACIAIABGAHUAbABsAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAZABkACIAIABEAGEAdABlAFQAaQBtAGUAUABhAHIAdAA9ACIARABhAHkATwBmAFcAZQBlAGsAIgAgAFAAbwBzAGkAeABPAHUAdABwAHUAdABGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAJQBBACIAIABQAG8AcwBpAHgAUABhAHIAcwBpAG4AZwBGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAJQBBACIAPgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAFMAdQBuAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADAAIgAgAC8APgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAE0AbwBuAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADEAIgAgAC8APgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAFQAdQBlAHMAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIAMgAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAVwBlAGQAbgBlAHMAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIAMwAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAVABoAHUAcgBzAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADQAIgAgAC8APgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAEYAcgBpAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADUAIgAgAC8APgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAFMAYQB0AHUAcgBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgA2ACIAIAAvAD4APAAvAFMAdAByAGkAbgBnAEYAbwByAG0AYQB0AFAAYQByAHQAPgA8AC8ARABhAHQAZQBUAGkAbQBlAEYAbwByAG0AYQB0AD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBMAGUAdABOAG8AZABlAD4APAAvAEwAZQB0AE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA=", + "examples": "{\"-1\": {\"row\": -1, \"sourceData\": {\"pickup_date\": \"2009-01-04\"}, \"example\": \"Sunday\"}, \"-2\": {\"row\": -2, \"sourceData\": {\"pickup_date\": \"2013-08-22\"}, \"example\": \"Thursday\"}}", + "outputType": 2, + "priorColumnIds": { + "type": 0, + "details": { + "selectedColumns": [ + "pickup_date" ] - }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null + } + }, + "sourceColumnIds": [ + "pickup_date" + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "a81d2481-ec3e-4445-81a4-7f99aa8f449b", + "type": "Microsoft.DPrep.DeriveColumnByExample", + "arguments": { + "anchorColumnId": "dropoff_date", + "columnId": "dropoff_weekday", + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHcAaQB0AGMAaAAiACAAcgB1AGwAZQA9ACIAUwBpAG4AZwBsAGUAQgByAGEAbgBjAGgAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwB0ACIAIAByAHUAbABlAD0AIgBUAHIAYQBuAHMAZgBvAHIAbQBhAHQAaQBvAG4AIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZQAiACAAcgB1AGwAZQA9ACIAQQB0AG8AbQAiAD4APABMAGUAdABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAIgAgAGkAZAA9ACIATABlAHQAQwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABWAGEAcgBpAGEAYgBsAGUAIABzAHkAbQBiAG8AbAA9ACIAYwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBkAHgAIgA+ADwAIQBbAEMARABBAFQAQQBbACIAZAByAG8AcABvAGYAZgBfAGQAYQB0AGUAIgBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APAAvAFYAYQByAGkAYQBiAGwAZQA+ADwATABlAHQATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBsAGUAdABPAHAAdABpAG8AbgBzACIAIABpAGQAPQAiAEwAZQB0AFgAIgA+ADwAVgBhAHIAaQBhAGIAbABlACAAcwB5AG0AYgBvAGwAPQAiAHgAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAdgAiACAAcgB1AGwAZQA9ACIAQwBoAG8AbwBzAGUASQBuAHAAdQB0ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAcwAiACAALwA+ADwAVgBhAHIAaQBhAGIAbABlAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAYwBvAGwAdQBtAG4ATgBhAG0AZQAiACAALwA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBWAGEAcgBpAGEAYgBsAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAbwBuAHYAIgAgAHIAdQBsAGUAPQAiAEYAbwByAG0AYQB0AFAAYQByAHQAaQBhAGwARABhAHQAZQBUAGkAbQBlACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAYQB0AGUAdABpAG0AZQAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AZABhAHQAZQB0AGkAbQBlAF8AaQBuAHAAdQB0AEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AcAB1AHQARABhAHQAZQBUAGkAbQBlACIAIAByAHUAbABlAD0AIgB+AGMAbwBuAHYAZQByAHQAXwBpAG4AcAB1AHQARABhAHQAZQBUAGkAbQBlAF8AcABhAHIAcwBlAGQARABhAHQAZQBUAGkAbQBlACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHAAYQByAHMAZQBkAEQAYQB0AGUAVABpAG0AZQAiACAAcgB1AGwAZQA9ACIAUABhAHIAcwBlAFAAYQByAHQAaQBhAGwARABhAHQAZQBUAGkAbQBlACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAFMAUwAiACAAcgB1AGwAZQA9ACIAVwBoAG8AbABlAEMAbwBsAHUAbQBuACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHgAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AcAB1AHQARAB0AEYAbwByAG0AYQB0AHMAIgA+ADwAQQByAHIAYQB5ACAAcwBpAHoAZQA9ACIAMQAiAD4APABJAHQAZQBtACAAaQA9ACIAMAAiAD4APABEAGEAdABlAFQAaQBtAGUARgBvAHIAbQBhAHQAIABmAG8AcgBtAGEAdAA9ACIAeQB5AHkAeQBcAC0ATQBNAFwALQBkAGQAIgA+ADwATgB1AG0AZQByAGkAYwBGAG8AcgBtAGEAdABQAGEAcgB0ACAAQgBhAHMAZQBGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAeQB5AHkAeQAiACAARgB1AGwAbABGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAeQB5AHkAeQAiACAARABhAHQAZQBUAGkAbQBlAFAAYQByAHQAPQAiAFkAZQBhAHIAIgAgAC8APgA8AEMAbwBuAHMAdABhAG4AdABGAG8AcgBtAGEAdABQAGEAcgB0AD4APAAhAFsAQwBEAEEAVABBAFsALQBdAF0APgA8AC8AQwBvAG4AcwB0AGEAbgB0AEYAbwByAG0AYQB0AFAAYQByAHQAPgA8AE4AdQBtAGUAcgBpAGMARgBvAHIAbQBhAHQAUABhAHIAdAAgAEIAYQBzAGUARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAE0ATQAiACAARgB1AGwAbABGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIATQBNACIAIABEAGEAdABlAFQAaQBtAGUAUABhAHIAdAA9ACIATQBvAG4AdABoACIAIAAvAD4APABDAG8AbgBzAHQAYQBuAHQARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwAIQBbAEMARABBAFQAQQBbAC0AXQBdAD4APAAvAEMAbwBuAHMAdABhAG4AdABGAG8AcgBtAGEAdABQAGEAcgB0AD4APABOAHUAbQBlAHIAaQBjAEYAbwByAG0AYQB0AFAAYQByAHQAIABCAGEAcwBlAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAGQAZAAiACAARABhAHQAZQBUAGkAbQBlAFAAYQByAHQAPQAiAEQAYQB5ACIAIAAvAD4APAAvAEQAYQB0AGUAVABpAG0AZQBGAG8AcgBtAGEAdAA+ADwALwBJAHQAZQBtAD4APAAvAEEAcgByAGEAeQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBvAHUAdABwAHUAdABEAHQARgBvAHIAbQBhAHQAIgA+ADwARABhAHQAZQBUAGkAbQBlAEYAbwByAG0AYQB0ACAAZgBvAHIAbQBhAHQAPQAiAGQAZABkAGQAIgA+ADwAUwB0AHIAaQBuAGcARgBvAHIAbQBhAHQAUABhAHIAdAAgAEIAYQBzAGUARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAGQAZABkAGQAIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAGQAZABkAGQAIgAgAEQAYQB0AGUAVABpAG0AZQBQAGEAcgB0AD0AIgBEAGEAeQBPAGYAVwBlAGUAawAiACAAUABvAHMAaQB4AE8AdQB0AHAAdQB0AEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgAlAEEAIgAgAFAAbwBzAGkAeABQAGEAcgBzAGkAbgBnAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgAlAEEAIgA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAUwB1AG4AZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIAMAAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIATQBvAG4AZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIAMQAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAVAB1AGUAcwBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgAyACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBXAGUAZABuAGUAcwBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgAzACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBUAGgAdQByAHMAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIANAAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIARgByAGkAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIANQAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAUwBhAHQAdQByAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADYAIgAgAC8APgA8AC8AUwB0AHIAaQBuAGcARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwALwBEAGEAdABlAFQAaQBtAGUARgBvAHIAbQBhAHQAPgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAEwAZQB0AE4AbwBkAGUAPgA8AC8ATABlAHQATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", + "examples": "{\"-1\": {\"row\": -1, \"sourceData\": {\"dropoff_date\": \"2013-08-22\"}, \"example\": \"Thursday\"}, \"-2\": {\"row\": -2, \"sourceData\": {\"dropoff_date\": \"2013-11-03\"}, \"example\": \"Sunday\"}}", + "outputType": 2, + "priorColumnIds": { + "type": 0, + "details": { + "selectedColumns": [ + "dropoff_date" + ] + } + }, + "sourceColumnIds": [ + "dropoff_date" + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "4699c028-36b8-4c7a-826f-5b64ec7af309", + "type": "Microsoft.DPrep.SplitColumnByExampleBlock", + "arguments": { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_time" + } + }, + "delimiters": [], + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADEAIgA+ADwASQB0AGUAbQAgAGkAPQAiADAAIgA+ADwAIQBbAEMARABBAFQAQQBbADAAXQBdAD4APAAvAEkAdABlAG0APgA8AC8AQQByAHIAYQB5AD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAG4AdQBtAFMAcABsAGkAdABzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwAyAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBTAHQAYQByAHQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZABlAGwAaQBtAGkAdABlAHIARQBuAGQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAGMAbAB1AGQAZQBEAGUAbABpAG0AaQB0AGUAcgBzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBmAGEAbABzAGUAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAaQBsAGwAUwB0AHIAYQB0AGUAZwB5ACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBOAHUAbABsAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA=", + "examples": "[]", + "fillStrategy": 0, + "keepDelimiter": false, + "newColumnsBaseName": "pickup_time_" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "bb5e7ef8-0763-414a-b36f-ecb5d74806da", + "type": "Microsoft.DPrep.SplitColumnByExampleBlock", + "arguments": { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_time" + } + }, + "delimiters": [], + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADEAIgA+ADwASQB0AGUAbQAgAGkAPQAiADAAIgA+ADwAIQBbAEMARABBAFQAQQBbADAAXQBdAD4APAAvAEkAdABlAG0APgA8AC8AQQByAHIAYQB5AD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAG4AdQBtAFMAcABsAGkAdABzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwAyAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBTAHQAYQByAHQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZABlAGwAaQBtAGkAdABlAHIARQBuAGQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAGMAbAB1AGQAZQBEAGUAbABpAG0AaQB0AGUAcgBzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBmAGEAbABzAGUAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAaQBsAGwAUwB0AHIAYQB0AGUAZwB5ACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBOAHUAbABsAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA=", + "examples": "[]", + "fillStrategy": 0, + "keepDelimiter": false, + "newColumnsBaseName": "dropoff_time_" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "ee2d7138-8867-4ede-830c-e37f01a83f75", + "type": "Microsoft.DPrep.SplitColumnByExampleBlock", + "arguments": { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_time_1" + } + }, + "delimiters": [], + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", + "examples": "[]", + "fillStrategy": 0, + "keepDelimiter": false, + "newColumnsBaseName": "pickup_time_1_" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "7f09be9f-372f-4c35-bde5-91d133bfed14", + "type": "Microsoft.DPrep.SplitColumnByExampleBlock", + "arguments": { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_time_1" + } }, - { - "id": "40be5ddb-6b3b-4635-ba81-b9684a60d3dd", - "type": "Microsoft.DPrep.DropNullsBlock", - "arguments": { - "columnRelationship": 1, - "columns": { - "type": 0, + "delimiters": [], + "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", + "examples": "[]", + "fillStrategy": 0, + "keepDelimiter": false, + "newColumnsBaseName": "dropoff_time_1_" + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "46c61672-c91e-467a-b157-e06643e5c93b", + "type": "Microsoft.DPrep.DropColumnsBlock", + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "pickup_date", + "pickup_time", + "dropoff_date", + "dropoff_time", + "pickup_date_1", + "dropoff_date_1", + "pickup_time_1", + "dropoff_time_1" + ] + } + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "45756d2e-176c-4a33-b65b-dfbbf72a82f6", + "type": "Microsoft.DPrep.RenameColumnsBlock", + "arguments": { + "columnPairs": [ + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "pickup_longitude", - "pickup_latitude", - "dropoff_longitude", - "dropoff_latitude" - ] + "selectedColumn": "pickup_date_2" } - } + }, + "newColumnId": "pickup_month" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "6e953e1a-c92f-4efc-a923-77425754a392", - "type": "Microsoft.DPrep.ExpressionFilterBlock", - "arguments": { - "expression": {"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["And",[{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_longitude"]]},-73.72]]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_longitude"]]},-74.09]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_latitude"]]},40.88]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"pickup_latitude"]]},40.53]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_longitude"]]},-73.72]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_longitude"]]},-74.09]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_LE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_latitude"]]},40.88]]]}]]},{"r":["Invoke",[{"r":["Identifier","Value_GE"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"dropoff_latitude"]]},40.53]]]}]]} + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_date_3" + } + }, + "newColumnId": "pickup_monthday" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "6f518359-02e0-4856-81ff-e21afd566bdb", - "type": "Microsoft.DPrep.ReplaceBlock", - "arguments": { - "booleanReplaceWith": 1, - "booleanValueToFind": 1, - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "store_forward" - ] + "selectedColumn": "pickup_time_1_1" } }, - "replaceWithType": 0, - "stringReplaceWith": "N", - "stringValueToFind": "0", - "valueToFindType": 0 + "newColumnId": "pickup_hour" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "809bc4ca-62a3-4eb9-b396-205e2c3bc520", - "type": "Microsoft.DPrep.ReplaceBlock", - "arguments": { - "booleanReplaceWith": 1, - "booleanValueToFind": 1, - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "store_forward" - ] + "selectedColumn": "pickup_time_1_2" } }, - "replaceWithType": 0, - "stringReplaceWith": "N", - "valueToFindType": 7 + "newColumnId": "pickup_minute" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "439b2d5c-e945-46e3-be0e-1014460a429a", - "type": "Microsoft.DPrep.ReplaceBlock", - "arguments": { - "booleanReplaceWith": 1, - "booleanValueToFind": 1, - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "distance" - ] + "selectedColumn": "pickup_time_2" } }, - "doubleReplaceWith": 0.0, - "replaceWithType": 3, - "stringValueToFind": ".00", - "valueToFindType": 0 + "newColumnId": "pickup_second" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "a5b90169-202f-4bfc-8b6c-fa980ff852d9", - "type": "Microsoft.DPrep.ReplaceBlock", - "arguments": { - "booleanReplaceWith": 1, - "booleanValueToFind": 1, - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "distance" - ] + "selectedColumn": "dropoff_date_2" } }, - "doubleReplaceWith": 0.0, - "replaceWithType": 3, - "valueToFindType": 7 + "newColumnId": "dropoff_month" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "b023a494-62dc-4929-9209-6222c44db46e", - "type": "Microsoft.DPrep.ToNumberBlock", - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "distance" - ] + "selectedColumn": "dropoff_date_3" } }, - "decimalPoint": 0 + "newColumnId": "dropoff_monthday" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "292ce841-201f-4a59-8e85-e680c22d1271", - "type": "Microsoft.DPrep.SplitColumnByExampleBlock", - "arguments": { + { "column": { "type": 2, "details": { - "selectedColumn": "pickup_datetime" + "selectedColumn": "dropoff_time_1_1" } }, - "delimiters": [], - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgAgACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", - "examples": "[]", - "fillStrategy": 0, - "keepDelimiter": false, - "newColumnsBaseName": "pickup_datetime_" + "newColumnId": "dropoff_hour" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "59fcfefa-b315-4d31-85cb-e7398e348fa3", - "type": "Microsoft.DPrep.SplitColumnByExampleBlock", - "arguments": { + { "column": { "type": 2, "details": { - "selectedColumn": "dropoff_datetime" + "selectedColumn": "dropoff_time_1_2" } }, - "delimiters": [], - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgAgACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", - "examples": "[]", - "fillStrategy": 0, - "keepDelimiter": false, - "newColumnsBaseName": "dropoff_datetime_" + "newColumnId": "dropoff_minute" }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "af910662-a6cf-4b51-be2e-bf83baeb50f7", - "type": "Microsoft.DPrep.RenameColumnsBlock", - "arguments": { - "columnPairs": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_datetime_1" - } - }, - "newColumnId": "pickup_date" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_datetime_2" - } - }, - "newColumnId": "pickup_time" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_datetime_1" - } - }, - "newColumnId": "dropoff_date" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_datetime_2" - } - }, - "newColumnId": "dropoff_time" + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_time_2" } + }, + "newColumnId": "dropoff_second" + } + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "c9ed92e3-464e-4706-9e91-a39f8d2a5504", + "type": "Microsoft.DPrep.DropColumnsBlock", + "arguments": { + "columns": { + "type": 0, + "details": { + "selectedColumns": [ + "pickup_datetime", + "dropoff_datetime" ] + } + } + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "7b9401a8-61bd-410f-a63b-a9f1a2432d03", + "type": "Microsoft.DPrep.SetColumnTypesBlock", + "arguments": { + "columnConversion": [ + { + "column": { + "type": 2, + "details": { + "selectedColumn": "vendor" + } + }, + "typeProperty": 0 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "eb066c51-2446-407f-9f2f-ed1275c4f910", - "type": "Microsoft.DPrep.DeriveColumnByExample", - "arguments": { - "anchorColumnId": "pickup_date", - "columnId": "pickup_weekday", - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHcAaQB0AGMAaAAiACAAcgB1AGwAZQA9ACIAUwBpAG4AZwBsAGUAQgByAGEAbgBjAGgAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwB0ACIAIAByAHUAbABlAD0AIgBUAHIAYQBuAHMAZgBvAHIAbQBhAHQAaQBvAG4AIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZQAiACAAcgB1AGwAZQA9ACIAQQB0AG8AbQAiAD4APABMAGUAdABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAIgAgAGkAZAA9ACIATABlAHQAQwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABWAGEAcgBpAGEAYgBsAGUAIABzAHkAbQBiAG8AbAA9ACIAYwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBkAHgAIgA+ADwAIQBbAEMARABBAFQAQQBbACIAcABpAGMAawB1AHAAXwBkAGEAdABlACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBWAGEAcgBpAGEAYgBsAGUAPgA8AEwAZQB0AE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbABlAHQATwBwAHQAaQBvAG4AcwAiACAAaQBkAD0AIgBMAGUAdABYACIAPgA8AFYAYQByAGkAYQBiAGwAZQAgAHMAeQBtAGIAbwBsAD0AIgB4ACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAHIAdQBsAGUAPQAiAEMAaABvAG8AcwBlAEkAbgBwAHUAdAAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2AHMAIgAgAC8APgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAbwBsAHUAbQBuAE4AYQBtAGUAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8AVgBhAHIAaQBhAGIAbABlAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBjAG8AbgB2ACIAIAByAHUAbABlAD0AIgBGAG8AcgBtAGEAdABQAGEAcgB0AGkAYQBsAEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGEAdABlAHQAaQBtAGUAIgAgAHIAdQBsAGUAPQAiAH4AYwBvAG4AdgBlAHIAdABfAGQAYQB0AGUAdABpAG0AZQBfAGkAbgBwAHUAdABEAGEAdABlAFQAaQBtAGUAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAHAAdQB0AEQAYQB0AGUAVABpAG0AZQAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AaQBuAHAAdQB0AEQAYQB0AGUAVABpAG0AZQBfAHAAYQByAHMAZQBkAEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBwAGEAcgBzAGUAZABEAGEAdABlAFQAaQBtAGUAIgAgAHIAdQBsAGUAPQAiAFAAYQByAHMAZQBQAGEAcgB0AGkAYQBsAEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBTAFMAIgAgAHIAdQBsAGUAPQAiAFcAaABvAGwAZQBDAG8AbAB1AG0AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB4ACIAIAAvAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAHAAdQB0AEQAdABGAG8AcgBtAGEAdABzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADEAIgA+ADwASQB0AGUAbQAgAGkAPQAiADAAIgA+ADwARABhAHQAZQBUAGkAbQBlAEYAbwByAG0AYQB0ACAAZgBvAHIAbQBhAHQAPQAiAHkAeQB5AHkAXAAtAE0ATQBcAC0AZABkACIAPgA8AE4AdQBtAGUAcgBpAGMARgBvAHIAbQBhAHQAUABhAHIAdAAgAEIAYQBzAGUARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAHkAeQB5AHkAIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAHkAeQB5AHkAIgAgAEQAYQB0AGUAVABpAG0AZQBQAGEAcgB0AD0AIgBZAGUAYQByACIAIAAvAD4APABDAG8AbgBzAHQAYQBuAHQARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwAIQBbAEMARABBAFQAQQBbAC0AXQBdAD4APAAvAEMAbwBuAHMAdABhAG4AdABGAG8AcgBtAGEAdABQAGEAcgB0AD4APABOAHUAbQBlAHIAaQBjAEYAbwByAG0AYQB0AFAAYQByAHQAIABCAGEAcwBlAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBNAE0AIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAE0ATQAiACAARABhAHQAZQBUAGkAbQBlAFAAYQByAHQAPQAiAE0AbwBuAHQAaAAiACAALwA+ADwAQwBvAG4AcwB0AGEAbgB0AEYAbwByAG0AYQB0AFAAYQByAHQAPgA8ACEAWwBDAEQAQQBUAEEAWwAtAF0AXQA+ADwALwBDAG8AbgBzAHQAYQBuAHQARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwATgB1AG0AZQByAGkAYwBGAG8AcgBtAGEAdABQAGEAcgB0ACAAQgBhAHMAZQBGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAZABkACIAIABGAHUAbABsAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAIgAgAEQAYQB0AGUAVABpAG0AZQBQAGEAcgB0AD0AIgBEAGEAeQAiACAALwA+ADwALwBEAGEAdABlAFQAaQBtAGUARgBvAHIAbQBhAHQAPgA8AC8ASQB0AGUAbQA+ADwALwBBAHIAcgBhAHkAPgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbwB1AHQAcAB1AHQARAB0AEYAbwByAG0AYQB0ACIAPgA8AEQAYQB0AGUAVABpAG0AZQBGAG8AcgBtAGEAdAAgAGYAbwByAG0AYQB0AD0AIgBkAGQAZABkACIAPgA8AFMAdAByAGkAbgBnAEYAbwByAG0AYQB0AFAAYQByAHQAIABCAGEAcwBlAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAZABkACIAIABGAHUAbABsAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAZABkACIAIABEAGEAdABlAFQAaQBtAGUAUABhAHIAdAA9ACIARABhAHkATwBmAFcAZQBlAGsAIgA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAUwB1AG4AZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIAMAAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIATQBvAG4AZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIAMQAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAVAB1AGUAcwBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgAyACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBXAGUAZABuAGUAcwBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgAzACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBUAGgAdQByAHMAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIANAAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIARgByAGkAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIANQAiACAALwA+ADwATABvAG8AawB1AHAAIABzAHQAcgBpAG4AZwA9ACIAUwBhAHQAdQByAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADYAIgAgAC8APgA8AC8AUwB0AHIAaQBuAGcARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwALwBEAGEAdABlAFQAaQBtAGUARgBvAHIAbQBhAHQAPgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAEwAZQB0AE4AbwBkAGUAPgA8AC8ATABlAHQATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", - "examples": "{\"-1\": {\"row\": -1, \"sourceData\": {\"pickup_date\": \"2009-01-04\"}, \"example\": \"Sunday\"}, \"-2\": {\"row\": -2, \"sourceData\": {\"pickup_date\": \"2013-08-22\"}, \"example\": \"Thursday\"}}", - "outputType": 2, - "priorColumnIds": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "pickup_date" - ] + "selectedColumn": "passengers" } }, - "sourceColumnIds": [ - "pickup_date" - ] + "typeProperty": 2 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "34daaec6-92b5-477f-90f1-2e5c6946db33", - "type": "Microsoft.DPrep.DeriveColumnByExample", - "arguments": { - "anchorColumnId": "dropoff_date", - "columnId": "dropoff_weekday", - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHcAaQB0AGMAaAAiACAAcgB1AGwAZQA9ACIAUwBpAG4AZwBsAGUAQgByAGEAbgBjAGgAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwB0ACIAIAByAHUAbABlAD0AIgBUAHIAYQBuAHMAZgBvAHIAbQBhAHQAaQBvAG4AIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZQAiACAAcgB1AGwAZQA9ACIAQQB0AG8AbQAiAD4APABMAGUAdABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAIgAgAGkAZAA9ACIATABlAHQAQwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABWAGEAcgBpAGEAYgBsAGUAIABzAHkAbQBiAG8AbAA9ACIAYwBvAGwAdQBtAG4ATgBhAG0AZQAiAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBkAHgAIgA+ADwAIQBbAEMARABBAFQAQQBbACIAZAByAG8AcABvAGYAZgBfAGQAYQB0AGUAIgBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APAAvAFYAYQByAGkAYQBiAGwAZQA+ADwATABlAHQATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBsAGUAdABPAHAAdABpAG8AbgBzACIAIABpAGQAPQAiAEwAZQB0AFgAIgA+ADwAVgBhAHIAaQBhAGIAbABlACAAcwB5AG0AYgBvAGwAPQAiAHgAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAdgAiACAAcgB1AGwAZQA9ACIAQwBoAG8AbwBzAGUASQBuAHAAdQB0ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAcwAiACAALwA+ADwAVgBhAHIAaQBhAGIAbABlAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAYwBvAGwAdQBtAG4ATgBhAG0AZQAiACAALwA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwALwBWAGEAcgBpAGEAYgBsAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAbwBuAHYAIgAgAHIAdQBsAGUAPQAiAEYAbwByAG0AYQB0AFAAYQByAHQAaQBhAGwARABhAHQAZQBUAGkAbQBlACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAYQB0AGUAdABpAG0AZQAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AZABhAHQAZQB0AGkAbQBlAF8AaQBuAHAAdQB0AEQAYQB0AGUAVABpAG0AZQAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AcAB1AHQARABhAHQAZQBUAGkAbQBlACIAIAByAHUAbABlAD0AIgB+AGMAbwBuAHYAZQByAHQAXwBpAG4AcAB1AHQARABhAHQAZQBUAGkAbQBlAF8AcABhAHIAcwBlAGQARABhAHQAZQBUAGkAbQBlACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHAAYQByAHMAZQBkAEQAYQB0AGUAVABpAG0AZQAiACAAcgB1AGwAZQA9ACIAUABhAHIAcwBlAFAAYQByAHQAaQBhAGwARABhAHQAZQBUAGkAbQBlACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAFMAUwAiACAAcgB1AGwAZQA9ACIAVwBoAG8AbABlAEMAbwBsAHUAbQBuACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHgAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AcAB1AHQARAB0AEYAbwByAG0AYQB0AHMAIgA+ADwAQQByAHIAYQB5ACAAcwBpAHoAZQA9ACIAMQAiAD4APABJAHQAZQBtACAAaQA9ACIAMAAiAD4APABEAGEAdABlAFQAaQBtAGUARgBvAHIAbQBhAHQAIABmAG8AcgBtAGEAdAA9ACIAeQB5AHkAeQBcAC0ATQBNAFwALQBkAGQAIgA+ADwATgB1AG0AZQByAGkAYwBGAG8AcgBtAGEAdABQAGEAcgB0ACAAQgBhAHMAZQBGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAeQB5AHkAeQAiACAARgB1AGwAbABGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIAeQB5AHkAeQAiACAARABhAHQAZQBUAGkAbQBlAFAAYQByAHQAPQAiAFkAZQBhAHIAIgAgAC8APgA8AEMAbwBuAHMAdABhAG4AdABGAG8AcgBtAGEAdABQAGEAcgB0AD4APAAhAFsAQwBEAEEAVABBAFsALQBdAF0APgA8AC8AQwBvAG4AcwB0AGEAbgB0AEYAbwByAG0AYQB0AFAAYQByAHQAPgA8AE4AdQBtAGUAcgBpAGMARgBvAHIAbQBhAHQAUABhAHIAdAAgAEIAYQBzAGUARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAE0ATQAiACAARgB1AGwAbABGAG8AcgBtAGEAdABTAHQAcgBpAG4AZwA9ACIATQBNACIAIABEAGEAdABlAFQAaQBtAGUAUABhAHIAdAA9ACIATQBvAG4AdABoACIAIAAvAD4APABDAG8AbgBzAHQAYQBuAHQARgBvAHIAbQBhAHQAUABhAHIAdAA+ADwAIQBbAEMARABBAFQAQQBbAC0AXQBdAD4APAAvAEMAbwBuAHMAdABhAG4AdABGAG8AcgBtAGEAdABQAGEAcgB0AD4APABOAHUAbQBlAHIAaQBjAEYAbwByAG0AYQB0AFAAYQByAHQAIABCAGEAcwBlAEYAbwByAG0AYQB0AFMAdAByAGkAbgBnAD0AIgBkAGQAIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAGQAZAAiACAARABhAHQAZQBUAGkAbQBlAFAAYQByAHQAPQAiAEQAYQB5ACIAIAAvAD4APAAvAEQAYQB0AGUAVABpAG0AZQBGAG8AcgBtAGEAdAA+ADwALwBJAHQAZQBtAD4APAAvAEEAcgByAGEAeQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBvAHUAdABwAHUAdABEAHQARgBvAHIAbQBhAHQAIgA+ADwARABhAHQAZQBUAGkAbQBlAEYAbwByAG0AYQB0ACAAZgBvAHIAbQBhAHQAPQAiAGQAZABkAGQAIgA+ADwAUwB0AHIAaQBuAGcARgBvAHIAbQBhAHQAUABhAHIAdAAgAEIAYQBzAGUARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAGQAZABkAGQAIgAgAEYAdQBsAGwARgBvAHIAbQBhAHQAUwB0AHIAaQBuAGcAPQAiAGQAZABkAGQAIgAgAEQAYQB0AGUAVABpAG0AZQBQAGEAcgB0AD0AIgBEAGEAeQBPAGYAVwBlAGUAawAiAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBTAHUAbgBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgAwACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBNAG8AbgBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgAxACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBUAHUAZQBzAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADIAIgAgAC8APgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAFcAZQBkAG4AZQBzAGQAYQB5ACIAIAB2AGEAbAB1AGUAPQAiADMAIgAgAC8APgA8AEwAbwBvAGsAdQBwACAAcwB0AHIAaQBuAGcAPQAiAFQAaAB1AHIAcwBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgA0ACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBGAHIAaQBkAGEAeQAiACAAdgBhAGwAdQBlAD0AIgA1ACIAIAAvAD4APABMAG8AbwBrAHUAcAAgAHMAdAByAGkAbgBnAD0AIgBTAGEAdAB1AHIAZABhAHkAIgAgAHYAYQBsAHUAZQA9ACIANgAiACAALwA+ADwALwBTAHQAcgBpAG4AZwBGAG8AcgBtAGEAdABQAGEAcgB0AD4APAAvAEQAYQB0AGUAVABpAG0AZQBGAG8AcgBtAGEAdAA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATABlAHQATgBvAGQAZQA+ADwALwBMAGUAdABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4APAAvAE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlAD4A", - "examples": "{\"-1\": {\"row\": -1, \"sourceData\": {\"dropoff_date\": \"2013-08-22\"}, \"example\": \"Thursday\"}, \"-2\": {\"row\": -2, \"sourceData\": {\"dropoff_date\": \"2013-11-03\"}, \"example\": \"Sunday\"}}", - "outputType": 2, - "priorColumnIds": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "dropoff_date" - ] + "selectedColumn": "store_forward" } }, - "sourceColumnIds": [ - "dropoff_date" - ] + "typeProperty": 1 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "93abe985-7723-4a42-b37d-f4e760c87f95", - "type": "Microsoft.DPrep.SplitColumnByExampleBlock", - "arguments": { + { "column": { "type": 2, "details": { - "selectedColumn": "pickup_time" + "selectedColumn": "pickup_minute" } }, - "delimiters": [], - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADEAIgA+ADwASQB0AGUAbQAgAGkAPQAiADAAIgA+ADwAIQBbAEMARABBAFQAQQBbADAAXQBdAD4APAAvAEkAdABlAG0APgA8AC8AQQByAHIAYQB5AD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAG4AdQBtAFMAcABsAGkAdABzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwAyAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBTAHQAYQByAHQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZABlAGwAaQBtAGkAdABlAHIARQBuAGQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAGMAbAB1AGQAZQBEAGUAbABpAG0AaQB0AGUAcgBzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBmAGEAbABzAGUAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAaQBsAGwAUwB0AHIAYQB0AGUAZwB5ACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBOAHUAbABsAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA=", - "examples": "[]", - "fillStrategy": 0, - "keepDelimiter": false, - "newColumnsBaseName": "pickup_time_" + "typeProperty": 2 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "9df4ba6e-7399-4299-8610-19956c839af7", - "type": "Microsoft.DPrep.SplitColumnByExampleBlock", - "arguments": { + { "column": { "type": 2, "details": { - "selectedColumn": "dropoff_time" + "selectedColumn": "dropoff_latitude" } }, - "delimiters": [], - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADEAIgA+ADwASQB0AGUAbQAgAGkAPQAiADAAIgA+ADwAIQBbAEMARABBAFQAQQBbADAAXQBdAD4APAAvAEkAdABlAG0APgA8AC8AQQByAHIAYQB5AD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAG4AdQBtAFMAcABsAGkAdABzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwAyAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBTAHQAYQByAHQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZABlAGwAaQBtAGkAdABlAHIARQBuAGQAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAaQBuAGMAbAB1AGQAZQBEAGUAbABpAG0AaQB0AGUAcgBzACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBmAGEAbABzAGUAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGYAaQBsAGwAUwB0AHIAYQB0AGUAZwB5ACIAPgA8ACEAWwBDAEQAQQBUAEEAWwBOAHUAbABsAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA=", - "examples": "[]", - "fillStrategy": 0, - "keepDelimiter": false, - "newColumnsBaseName": "dropoff_time_" + "typeProperty": 3 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "d6fa1270-fe48-4995-85ec-f79823589553", - "type": "Microsoft.DPrep.SplitColumnByExampleBlock", - "arguments": { + { "column": { "type": 2, "details": { - "selectedColumn": "pickup_time_1" + "selectedColumn": "cost" } }, - "delimiters": [], - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", - "examples": "[]", - "fillStrategy": 0, - "keepDelimiter": false, - "newColumnsBaseName": "pickup_time_1_" + "typeProperty": 3 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "1d7d70f5-f7d4-4907-a400-3ef9baa7b754", - "type": "Microsoft.DPrep.SplitColumnByExampleBlock", - "arguments": { + { "column": { "type": 2, "details": { - "selectedColumn": "dropoff_time_1" + "selectedColumn": "pickup_weekday" } }, - "delimiters": [], - "dsl": "PABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgByAGUAZwBpAG8AbgBTAHAAbABpAHQAIgAgAHIAdQBsAGUAPQAiAFMAcABsAGkAdABSAGUAZwBpAG8AbgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBzAHAAbABpAHQATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AcwBwAGwAaQB0AE0AYQB0AGMAaABlAHMAXwBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiAD4APABOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBtAHUAbAB0AGkAcABsAGUATQBhAHQAYwBoAGUAcwAiACAAcgB1AGwAZQA9ACIAfgBjAG8AbgB2AGUAcgB0AF8AbQB1AGwAdABpAHAAbABlAE0AYQB0AGMAaABlAHMAXwBkACIAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAIgAgAHIAdQBsAGUAPQAiAEwAbwBvAGsAQQByAG8AdQBuAGQAIgA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AE4AbwBuAHQAZQByAG0AaQBuAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGMAIgAgAHIAdQBsAGUAPQAiAEMAbwBuAHMAdABTAHQAcgAiAD4APABWAGEAcgBpAGEAYgBsAGUATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgB2ACIAIAAvAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcwAiAD4APAAhAFsAQwBEAEEAVABBAFsAIgA6ACIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+ADwATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAcgAiACAAcgB1AGwAZQA9ACIARQBtAHAAdAB5ACIAPgA8AFYAYQByAGkAYQBiAGwAZQBOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAHYAIgAgAC8APgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AC8ATgBvAG4AdABlAHIAbQBpAG4AYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAGcAbgBvAHIAZQBJAG4AZABlAHgAZQBzACIAPgA8AEEAcgByAGEAeQAgAHMAaQB6AGUAPQAiADAAIgAgAC8APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAbgB1AG0AUwBwAGwAaQB0AHMAIgA+ADwAIQBbAEMARABBAFQAQQBbADIAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwATABpAHQAZQByAGEAbABOAG8AZABlACAAcwB5AG0AYgBvAGwAPQAiAGQAZQBsAGkAbQBpAHQAZQByAFMAdABhAHIAdAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBkAGUAbABpAG0AaQB0AGUAcgBFAG4AZAAiAD4APAAhAFsAQwBEAEEAVABBAFsAZgBhAGwAcwBlAF0AXQA+ADwALwBMAGkAdABlAHIAYQBsAE4AbwBkAGUAPgA8AEwAaQB0AGUAcgBhAGwATgBvAGQAZQAgAHMAeQBtAGIAbwBsAD0AIgBpAG4AYwBsAHUAZABlAEQAZQBsAGkAbQBpAHQAZQByAHMAIgA+ADwAIQBbAEMARABBAFQAQQBbAGYAYQBsAHMAZQBdAF0APgA8AC8ATABpAHQAZQByAGEAbABOAG8AZABlAD4APABMAGkAdABlAHIAYQBsAE4AbwBkAGUAIABzAHkAbQBiAG8AbAA9ACIAZgBpAGwAbABTAHQAcgBhAHQAZQBnAHkAIgA+ADwAIQBbAEMARABBAFQAQQBbAE4AdQBsAGwAXQBdAD4APAAvAEwAaQB0AGUAcgBhAGwATgBvAGQAZQA+ADwALwBOAG8AbgB0AGUAcgBtAGkAbgBhAGwATgBvAGQAZQA+AA==", - "examples": "[]", - "fillStrategy": 0, - "keepDelimiter": false, - "newColumnsBaseName": "dropoff_time_1_" + "typeProperty": 0 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "bee58b74-72e1-4dfb-acde-1a14889c2805", - "type": "Microsoft.DPrep.DropColumnsBlock", - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "pickup_date", - "pickup_time", - "dropoff_date", - "dropoff_time", - "pickup_date_1", - "dropoff_date_1", - "pickup_time_1", - "dropoff_time_1" - ] + "selectedColumn": "pickup_hour" } - } + }, + "typeProperty": 2 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "dcda6db8-866f-4e18-8460-2b92ae38c3a6", - "type": "Microsoft.DPrep.RenameColumnsBlock", - "arguments": { - "columnPairs": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_date_2" - } - }, - "newColumnId": "pickup_month" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_date_3" - } - }, - "newColumnId": "pickup_monthday" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_time_1_1" - } - }, - "newColumnId": "pickup_hour" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_time_1_2" - } - }, - "newColumnId": "pickup_minute" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_time_2" - } - }, - "newColumnId": "pickup_second" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_date_2" - } - }, - "newColumnId": "dropoff_month" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_date_3" - } - }, - "newColumnId": "dropoff_monthday" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_time_1_1" - } - }, - "newColumnId": "dropoff_hour" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_time_1_2" - } - }, - "newColumnId": "dropoff_minute" - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_time_2" - } - }, - "newColumnId": "dropoff_second" + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_longitude" } - ] + }, + "typeProperty": 3 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "f320919e-de1d-4a7f-9955-efde73b36231", - "type": "Microsoft.DPrep.DropColumnsBlock", - "arguments": { - "columns": { - "type": 0, + { + "column": { + "type": 2, "details": { - "selectedColumns": [ - "pickup_datetime", - "dropoff_datetime" - ] + "selectedColumn": "dropoff_weekday" } - } + }, + "typeProperty": 0 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "9c844277-6394-40b8-971d-86015c9782dd", - "type": "Microsoft.DPrep.SetColumnTypesBlock", - "arguments": { - "columnConversion": [ - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_weekday" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_hour" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_minute" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_second" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_hour" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_minute" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_second" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "store_forward" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_longitude" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_longitude" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "passengers" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "distance" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "vendor" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_weekday" - } - }, - "typeProperty": 0 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "pickup_latitude" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "dropoff_latitude" - } - }, - "typeProperty": 3 - }, - { - "column": { - "type": 2, - "details": { - "selectedColumn": "cost" - } - }, - "typeProperty": 3 + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_second" } - ] + }, + "typeProperty": 2 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "3e100d7a-1ed9-4ec9-b4eb-79c64f0b9e35", - "type": "Microsoft.DPrep.ExpressionFilterBlock", - "arguments": { - "expression": {"r":["Invoke",[{"r":["Identifier","Value_GT"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"distance"]]},0]]]} + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_hour" + } + }, + "typeProperty": 2 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - }, - { - "id": "abdbfa65-9f62-4326-8e25-a8801f7c3ef2", - "type": "Microsoft.DPrep.ExpressionFilterBlock", - "arguments": { - "expression": {"r":["Invoke",[{"r":["Identifier","Value_GT"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"cost"]]},0]]]} + { + "column": { + "type": 2, + "details": { + "selectedColumn": "dropoff_minute" + } + }, + "typeProperty": 2 }, - "localData": {}, - "isEnabled": true, - "name": null, - "annotation": null - } - ], - "inspectors": [] + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_longitude" + } + }, + "typeProperty": 3 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_second" + } + }, + "typeProperty": 2 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "pickup_latitude" + } + }, + "typeProperty": 3 + }, + { + "column": { + "type": 2, + "details": { + "selectedColumn": "distance" + } + }, + "typeProperty": 3 + } + ] + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "0cf8ee9c-d0b3-47fa-8b88-8d40393476b0", + "type": "Microsoft.DPrep.ExpressionFilterBlock", + "arguments": { + "expression": {"r":["Invoke",[{"r":["Identifier","Value_GT"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"distance"]]},0]]]} + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null + }, + { + "id": "d413f9c1-e33b-4c7a-8e44-a789af2dbf14", + "type": "Microsoft.DPrep.ExpressionFilterBlock", + "arguments": { + "expression": {"r":["Invoke",[{"r":["Identifier","Value_GT"]},[{"r":["RecordField",[{"r":["Identifier","row"]},"cost"]]},0]]]} + }, + "localData": {}, + "isEnabled": true, + "name": null, + "annotation": null } ], - "runConfigurations": [] + "inspectors": [] } \ No newline at end of file diff --git a/tutorials/img-classification-part1-training.ipynb b/tutorials/img-classification-part1-training.ipynb index 4171c4925..c9e2bd538 100644 --- a/tutorials/img-classification-part1-training.ipynb +++ b/tutorials/img-classification-part1-training.ipynb @@ -661,7 +661,7 @@ "metadata": { "authors": [ { - "name": "haining" + "name": "roastala" } ], "kernelspec": { @@ -681,7 +681,7 @@ "pygments_lexer": "ipython3", "version": "3.6.8" }, - "msauthor": "haining" + "msauthor": "roastala" }, "nbformat": 4, "nbformat_minor": 2 diff --git a/tutorials/img-classification-part2-deploy.ipynb b/tutorials/img-classification-part2-deploy.ipynb index b1a2c1045..a26b9ef17 100644 --- a/tutorials/img-classification-part2-deploy.ipynb +++ b/tutorials/img-classification-part2-deploy.ipynb @@ -592,7 +592,7 @@ "metadata": { "authors": [ { - "name": "haining" + "name": "roastala" } ], "kernelspec": { diff --git a/tutorials/regression-part1-data-prep.ipynb b/tutorials/regression-part1-data-prep.ipynb index 31ed5603f..11d8c1f66 100644 --- a/tutorials/regression-part1-data-prep.ipynb +++ b/tutorials/regression-part1-data-prep.ipynb @@ -60,7 +60,7 @@ "Use the following to install necessary packages if you don't already have them.\n", "\n", "```shell\n", - "pip install azureml-dataprep\n", + "pip install \"azureml-dataprep>=1.1.0,<1.2.0\"\n", "```\n", "\n", "Import the SDK." @@ -557,8 +557,7 @@ "import os\n", "file_path = os.path.join(os.getcwd(), \"dflows.dprep\")\n", "\n", - "package = dprep.Package([final_df])\n", - "package.save(file_path)" + "final_df.save(file_path)" ] }, { diff --git a/tutorials/regression-part2-automated-ml.ipynb b/tutorials/regression-part2-automated-ml.ipynb index e380940ea..ee8ed8ea1 100644 --- a/tutorials/regression-part2-automated-ml.ipynb +++ b/tutorials/regression-part2-automated-ml.ipynb @@ -137,8 +137,7 @@ "\n", "file_path = os.path.join(os.getcwd(), \"dflows.dprep\")\n", "\n", - "package_saved = dprep.Package.open(file_path)\n", - "dflow_prepared = package_saved.dataflows[0]\n", + "dflow_prepared = dprep.Dataflow.open(file_path)\n", "dflow_prepared.get_profile()" ] },