|
18 | 18 |
|
19 | 19 | from enum import Enum
|
20 | 20 | from typing import Dict, List, Set, Union, Optional, Any, TYPE_CHECKING
|
21 |
| -from urllib.parse import urlparse |
22 | 21 |
|
23 | 22 | import attr
|
24 | 23 |
|
@@ -465,6 +464,7 @@ def __init__(
|
465 | 464 | self.step_args = step_args
|
466 | 465 | self.estimator = estimator
|
467 | 466 | self.inputs = inputs
|
| 467 | + self.job_name = None |
468 | 468 |
|
469 | 469 | self._properties = Properties(
|
470 | 470 | step_name=name, step=self, shape_name="DescribeTrainingJobResponse"
|
@@ -493,19 +493,6 @@ def __init__(
|
493 | 493 | DeprecationWarning,
|
494 | 494 | )
|
495 | 495 |
|
496 |
| - self.job_name = None |
497 |
| - if estimator and (estimator.source_dir or estimator.entry_point): |
498 |
| - # By default, `Estimator` will upload the local code to an S3 path |
499 |
| - # containing a timestamp. This causes cache misses whenever a |
500 |
| - # pipeline is updated, even if the underlying script hasn't changed. |
501 |
| - # To avoid this, hash the contents of the training script and include it |
502 |
| - # in the `job_name` passed to the `Estimator`, which will be used |
503 |
| - # instead of the timestamped path. |
504 |
| - if not is_pipeline_variable(estimator.source_dir) and not is_pipeline_variable( |
505 |
| - estimator.entry_point |
506 |
| - ): |
507 |
| - self.job_name = self._generate_code_upload_path() |
508 |
| - |
509 | 496 | @property
|
510 | 497 | def arguments(self) -> RequestType:
|
511 | 498 | """The arguments dictionary that is used to call `create_training_job`.
|
@@ -554,26 +541,6 @@ def to_request(self) -> RequestType:
|
554 | 541 |
|
555 | 542 | return request_dict
|
556 | 543 |
|
557 |
| - def _generate_code_upload_path(self) -> str or None: |
558 |
| - """Generate an upload path for local training scripts based on their content.""" |
559 |
| - from sagemaker.workflow.utilities import hash_files_or_dirs |
560 |
| - |
561 |
| - if self.estimator.source_dir: |
562 |
| - source_dir_url = urlparse(self.estimator.source_dir) |
563 |
| - if source_dir_url.scheme == "" or source_dir_url.scheme == "file": |
564 |
| - code_hash = hash_files_or_dirs( |
565 |
| - [self.estimator.source_dir] + self.estimator.dependencies |
566 |
| - ) |
567 |
| - return f"{self.name}-{code_hash}"[:1024] |
568 |
| - elif self.estimator.entry_point: |
569 |
| - entry_point_url = urlparse(self.estimator.entry_point) |
570 |
| - if entry_point_url.scheme == "" or entry_point_url.scheme == "file": |
571 |
| - code_hash = hash_files_or_dirs( |
572 |
| - [self.estimator.entry_point] + self.estimator.dependencies |
573 |
| - ) |
574 |
| - return f"{self.name}-{code_hash}"[:1024] |
575 |
| - return None |
576 |
| - |
577 | 544 |
|
578 | 545 | class CreateModelStep(ConfigurableRetryStep):
|
579 | 546 | """`CreateModelStep` for SageMaker Pipelines Workflows."""
|
@@ -895,16 +862,6 @@ def __init__(
|
895 | 862 | "code argument has to be a valid S3 URI or local file path "
|
896 | 863 | + "rather than a pipeline variable"
|
897 | 864 | )
|
898 |
| - code_url = urlparse(code) |
899 |
| - if code_url.scheme == "" or code_url.scheme == "file": |
900 |
| - # By default, `Processor` will upload the local code to an S3 path |
901 |
| - # containing a timestamp. This causes cache misses whenever a |
902 |
| - # pipeline is updated, even if the underlying script hasn't changed. |
903 |
| - # To avoid this, hash the contents of the script and include it |
904 |
| - # in the `job_name` passed to the `Processor`, which will be used |
905 |
| - # instead of the timestamped path. |
906 |
| - self.job_name = self._generate_code_upload_path() |
907 |
| - |
908 | 865 | warnings.warn(
|
909 | 866 | (
|
910 | 867 | 'We are deprecating the instantiation of ProcessingStep using "processor".'
|
|
0 commit comments