zenml-io · stefannica · Feb 19, 2025 · Feb 19, 2025 · schustmi · Feb 19, 2025
diff --git a/docs/book/component-guide/orchestrators/sagemaker.md b/docs/book/component-guide/orchestrators/sagemaker.md
@@ -167,7 +167,7 @@ Additional configuration for the Sagemaker orchestrator can be passed via `Sagem
 * `sagemaker_session`
 * `entrypoint`
 * `base_job_name`
-* `env`
+* `environment`
 
 For example, settings can be provided and applied in the following way:
 
@@ -180,6 +180,7 @@ from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (
 sagemaker_orchestrator_settings = SagemakerOrchestratorSettings(
     instance_type="ml.m5.large",
     volume_size_in_gb=30,
+    environment={"MY_ENV_VAR": "my_value"}
 )
 
 

diff --git a/src/zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py b/src/zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py
@@ -71,6 +71,7 @@ class SagemakerOrchestratorSettings(BaseSettings):
             For processor_args.instance_type, check
             https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-instance-types.html
             for a list of available instance types.
+        environment: Environment variables to pass to the container.
         estimator_args: Arguments that are directly passed to the SageMaker
             Estimator for a specific step, allowing for overriding the default
             settings provided when configuring the component. See
@@ -116,6 +117,7 @@ class SagemakerOrchestratorSettings(BaseSettings):
 
     processor_args: Dict[str, Any] = {}
     estimator_args: Dict[str, Any] = {}
+    environment: Dict[str, str] = {}
 
     input_data_s3_mode: str = "File"
     input_data_s3_uri: Optional[Union[str, Dict[str, str]]] = Field(

diff --git a/src/zenml/integrations/aws/flavors/sagemaker_step_operator_flavor.py b/src/zenml/integrations/aws/flavors/sagemaker_step_operator_flavor.py
@@ -55,6 +55,7 @@ class SagemakerStepOperatorSettings(BaseSettings):
             For estimator_args.instance_type, check
             https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-instance-types.html
             for a list of available instance types.
+        environment: Environment variables to pass to the container.
 
     """
 
@@ -64,6 +65,7 @@ class SagemakerStepOperatorSettings(BaseSettings):
         default=None, union_mode="left_to_right"
     )
     estimator_args: Dict[str, Any] = {}
+    environment: Dict[str, str] = {}
 
     _deprecation_validator = deprecation_utils.deprecate_pydantic_attributes(
         "instance_type"

diff --git a/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py b/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
@@ -323,6 +323,19 @@ def prepare_or_run_pipeline(
                 ExecutionVariables.PIPELINE_EXECUTION_ARN
             )
 
+            if step_settings.environment:
+                step_environment = step_settings.environment.copy()
+                # Sagemaker does not allow environment variables longer than 256
+                # characters to be passed to Processor steps. If an environment variable
+                # is longer than 256 characters, we split it into multiple environment
+                # variables (chunks) and re-construct it on the other side using the
+                # custom entrypoint configuration.
+                split_environment_variables(
+                    size_limit=SAGEMAKER_PROCESSOR_STEP_ENV_VAR_SIZE_LIMIT,
+                    env=step_environment,
+                )
+                environment.update(step_environment)
+
             use_training_step = (
                 step_settings.use_training_step
                 if step_settings.use_training_step is not None
@@ -457,6 +470,11 @@ def prepare_or_run_pipeline(
                         )
                     )
 
+            # Convert environment to a dict of strings
+            environment = {
+                key: str(value) for key, value in environment.items()
+            }
+
             if use_training_step:
                 # Create Estimator and TrainingStep
                 estimator = sagemaker.estimator.Estimator(

diff --git a/src/zenml/integrations/aws/step_operators/sagemaker_step_operator.py b/src/zenml/integrations/aws/step_operators/sagemaker_step_operator.py
@@ -181,6 +181,11 @@ def launch(
                 self.name,
             )
 
+        settings = cast(SagemakerStepOperatorSettings, self.get_settings(info))
+
+        if settings.environment:
+            environment.update(settings.environment)
+
         # Sagemaker does not allow environment variables longer than 512
         # characters to be passed to Estimator steps. If an environment variable
         # is longer than 512 characters, we split it into multiple environment
@@ -194,8 +199,6 @@ def launch(
         image_name = info.get_image(key=SAGEMAKER_DOCKER_IMAGE_KEY)
         environment[_ENTRYPOINT_ENV_VARIABLE] = " ".join(entrypoint_command)
 
-        settings = cast(SagemakerStepOperatorSettings, self.get_settings(info))
-
         # Get and default fill SageMaker estimator arguments for full ZenML support
         estimator_args = settings.estimator_args
 
@@ -221,6 +224,9 @@ def launch(
             "instance_type", settings.instance_type or "ml.m5.large"
         )
 
+        # Convert environment to a dict of strings
+        environment = {key: str(value) for key, value in environment.items()}
+
         estimator_args["environment"] = environment
         estimator_args["instance_count"] = 1
         estimator_args["sagemaker_session"] = session