From dd7b4131a97bd7362fb06c4daba99b25f50e2f12 Mon Sep 17 00:00:00 2001 From: Andrei Vishniakov <31008759+avishniakov@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:10:33 +0100 Subject: [PATCH 1/4] import `BuildConfiguration` --- .../spark/step_operators/kubernetes_step_operator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py b/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py index 91d783c513a..60f21e5cc47 100644 --- a/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py +++ b/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py @@ -118,6 +118,8 @@ def get_docker_builds( Returns: The required Docker builds. """ + from zenml.config.build_configuration import BuildConfiguration + builds = [] extra_files = {ENTRYPOINT_NAME: LOCAL_ENTRYPOINT} for step_name, step in deployment.step_configurations.items(): From 88aa4b9080b4005cf37273566acd9f0683e0f65a Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 15 Dec 2023 09:22:24 +0000 Subject: [PATCH 2/4] Auto-update of E2E template --- .../spark/step_operators/kubernetes_step_operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py b/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py index 60f21e5cc47..40d49a8af2f 100644 --- a/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py +++ b/src/zenml/integrations/spark/step_operators/kubernetes_step_operator.py @@ -119,7 +119,7 @@ def get_docker_builds( The required Docker builds. """ from zenml.config.build_configuration import BuildConfiguration - + builds = [] extra_files = {ENTRYPOINT_NAME: LOCAL_ENTRYPOINT} for step_name, step in deployment.step_configurations.items(): From 101412166fe09fc85155ebf83cfe736b211587d0 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 15 Dec 2023 14:32:31 +0000 Subject: [PATCH 3/4] Auto-update of E2E template --- examples/e2e/.copier-answers.yml | 2 +- examples/e2e/configs/train_config.yaml | 7 +++---- examples/e2e/pipelines/training.py | 19 +++++++++++-------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/examples/e2e/.copier-answers.yml b/examples/e2e/.copier-answers.yml index 6b95a7a8c60..70e3f86e323 100644 --- a/examples/e2e/.copier-answers.yml +++ b/examples/e2e/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2023.11.23-2-gc19b794 +_commit: 2023.12.06 _src_path: gh:zenml-io/template-e2e-batch data_quality_checks: true email: '' diff --git a/examples/e2e/configs/train_config.yaml b/examples/e2e/configs/train_config.yaml index 9ad75e2f020..567e51281bc 100644 --- a/examples/e2e/configs/train_config.yaml +++ b/examples/e2e/configs/train_config.yaml @@ -32,13 +32,9 @@ steps: model_trainer: parameters: name: e2e_use_case - compute_performance_metrics_on_current_data: - parameters: - target_env: staging promote_with_metric_compare: parameters: mlflow_model_name: e2e_use_case - target_env: staging notify_on_success: parameters: notify_on_success: False @@ -65,6 +61,9 @@ model_version: # pipeline level extra configurations extra: notify_on_failure: True +# pipeline level parameters +parameters: + target_env: staging # This set contains all the model configurations that you want # to evaluate during hyperparameter tuning stage. model_search_space: diff --git a/examples/e2e/pipelines/training.py b/examples/e2e/pipelines/training.py index 6e7c5f0d0ce..93ac1f5ae38 100644 --- a/examples/e2e/pipelines/training.py +++ b/examples/e2e/pipelines/training.py @@ -17,7 +17,7 @@ import random -from typing import List, Optional +from typing import Any, Dict, List, Optional from steps import ( compute_performance_metrics_on_current_data, @@ -33,7 +33,7 @@ train_data_splitter, ) -from zenml import get_pipeline_context, pipeline +from zenml import pipeline from zenml.logger import get_logger logger = get_logger(__name__) @@ -41,6 +41,8 @@ @pipeline(on_failure=notify_on_failure) def e2e_use_case_training( + model_search_space: Dict[str, Any], + target_env: str, test_size: float = 0.2, drop_na: Optional[bool] = None, normalize: Optional[bool] = None, @@ -57,6 +59,8 @@ def e2e_use_case_training( trains and evaluates a model. Args: + model_search_space: Search space for hyperparameter tuning + target_env: The environment to promote the model to test_size: Size of holdout set for training 0.0..1.0 drop_na: If `True` NA values will be removed from dataset normalize: If `True` dataset will be normalized with MinMaxScaler @@ -65,12 +69,10 @@ def e2e_use_case_training( min_test_accuracy: Threshold to stop execution if test set accuracy is lower fail_on_accuracy_quality_gates: If `True` and `min_train_accuracy` or `min_test_accuracy` are not met - execution will be interrupted early - """ ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### # Link all the steps together by calling them and passing the output # of one step as the input of the next step. - pipeline_extra = get_pipeline_context().extra ########## ETL stage ########## raw_data, target, _ = data_loader(random_state=random.randint(0, 100)) dataset_trn, dataset_tst = train_data_splitter( @@ -87,9 +89,7 @@ def e2e_use_case_training( ########## Hyperparameter tuning stage ########## after = [] search_steps_prefix = "hp_tuning_search_" - for config_name, model_search_configuration in pipeline_extra[ - "model_search_space" - ].items(): + for config_name, model_search_configuration in model_search_space.items(): step_name = f"{search_steps_prefix}{config_name}" hp_tuning_single_search( id=step_name, @@ -123,12 +123,15 @@ def e2e_use_case_training( latest_metric, current_metric, ) = compute_performance_metrics_on_current_data( - dataset_tst=dataset_tst, after=["model_evaluator"] + dataset_tst=dataset_tst, + target_env=target_env, + after=["model_evaluator"], ) promote_with_metric_compare( latest_metric=latest_metric, current_metric=current_metric, + target_env=target_env, ) last_step = "promote_with_metric_compare" From 7ccd6e93cdce66660360154a526f1f4b53da959e Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sat, 16 Dec 2023 07:54:02 +0000 Subject: [PATCH 4/4] Auto-update of NLP template --- examples/nlp-case/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/nlp-case/run.py b/examples/nlp-case/run.py index 504ced0deb6..1cd0bb6cc44 100644 --- a/examples/nlp-case/run.py +++ b/examples/nlp-case/run.py @@ -67,7 +67,7 @@ @click.option( "--no-cache", is_flag=True, - default=False, + default=True, help="Disable caching for the pipeline run.", ) @click.option(