From 24c21bdd0aa0d762b4b1fdd44e5570c7d8387688 Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Sat, 15 Feb 2025 21:41:19 +0100 Subject: [PATCH 1/2] feature: integrate amtviz for visualization of tuning jobs --- src/sagemaker/amtviz/__init__.py | 17 + src/sagemaker/amtviz/job_metrics.py | 185 ++++++ src/sagemaker/amtviz/visualization.py | 800 +++++++++++++++++++++++ src/sagemaker/tuner.py | 57 ++ tests/unit/test_tuner_visualize.py | 303 +++++++++ tests/unit/tuner_visualize_test_utils.py | 110 ++++ tox.ini | 1 + 7 files changed, 1473 insertions(+) create mode 100644 src/sagemaker/amtviz/__init__.py create mode 100644 src/sagemaker/amtviz/job_metrics.py create mode 100644 src/sagemaker/amtviz/visualization.py create mode 100644 tests/unit/test_tuner_visualize.py create mode 100644 tests/unit/tuner_visualize_test_utils.py diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py new file mode 100644 index 0000000000..9e6dd1a64b --- /dev/null +++ b/src/sagemaker/amtviz/__init__.py @@ -0,0 +1,17 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from sagemaker.amtviz.visualization import visualize_tuning_job +__all__ = ['visualize_tuning_job'] \ No newline at end of file diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py new file mode 100644 index 0000000000..6005f886f8 --- /dev/null +++ b/src/sagemaker/amtviz/job_metrics.py @@ -0,0 +1,185 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +# IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from datetime import datetime, timedelta +from typing import Callable, List, Optional, Tuple, Dict, Any +import hashlib +import os +from pathlib import Path + +import pandas as pd +import numpy as np +import boto3 +import logging + +logger = logging.getLogger(__name__) + +cw = boto3.client("cloudwatch") +sm = boto3.client("sagemaker") + + +def disk_cache(outer: Callable) -> Callable: + """A decorator that implements disk-based caching for CloudWatch metrics data. + + This decorator caches the output of the wrapped function to disk in JSON Lines format. + It creates a cache key using MD5 hash of the function arguments and stores the data + in the user's home directory under .amtviz/cw_metrics_cache/. + + Args: + outer (Callable): The function to be wrapped. Must return a pandas DataFrame + containing CloudWatch metrics data. + + Returns: + Callable: A wrapper function that implements the caching logic. + """ + + def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: + key_input = str(args) + str(kwargs) + # nosec b303 - Not used for cryptography, but to create lookup key + key = hashlib.md5(key_input.encode("utf-8")).hexdigest() + cache_dir = Path.home().joinpath(".amtviz/cw_metrics_cache") + fn = f"{cache_dir}/req_{key}.jsonl.gz" + if Path(fn).exists(): + try: + df = pd.read_json(fn, lines=True) + logger.debug("H", end="") + df["ts"] = pd.to_datetime(df["ts"]) + df["ts"] = df["ts"].dt.tz_localize(None) + df["rel_ts"] = pd.to_datetime(df["rel_ts"]) # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + df["rel_ts"] = df["rel_ts"].dt.tz_localize(None) + return df + except KeyError: + # Empty file leads to empty df, hence no df['ts'] possible + pass + # nosec b110 - doesn't matter why we could not load it. + except BaseException as e: + logger.error("\nException", type(e), e) + pass # continue with calling the outer function + + logger.debug("M", end="") + df = outer(*args, **kwargs) + assert isinstance(df, pd.DataFrame), "Only caching Pandas DataFrames." + + os.makedirs(cache_dir, exist_ok=True) + df.to_json(fn, orient="records", date_format="iso", lines=True) + + return df + + return inner + + +def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> Dict[str, Any]: + return { + "Id": metric_name.lower().replace(":", "_").replace("-", "_"), + "MetricStat": { + "Stat": "Average", + "Metric": { + "Namespace": "/aws/sagemaker/TrainingJobs", + "MetricName": metric_name, + "Dimensions": [ + {"Name": dim_name, "Value": dim_value}, + ], + }, + "Period": 60, + }, + "ReturnData": True, + } + + +def _get_metric_data( + queries: List[Dict[str, Any]], + start_time: datetime, + end_time: datetime +) -> pd.DataFrame: + start_time = start_time - timedelta(hours=1) + end_time = end_time + timedelta(hours=1) + response = cw.get_metric_data(MetricDataQueries=queries, StartTime=start_time, EndTime=end_time) + + df = pd.DataFrame() + if "MetricDataResults" not in response: + return df + + for metric_data in response["MetricDataResults"]: + values = metric_data["Values"] + ts = np.array(metric_data["Timestamps"], dtype=np.datetime64) + labels = [metric_data["Label"]] * len(values) + + df = pd.concat([df, pd.DataFrame({"value": values, "ts": ts, "label": labels})]) + + # We now calculate the relative time based on the first actual observed + # time stamps, not the potentially start time that we used to scope our CW + # API call. The difference could be for example startup times or waiting + # for Spot. + if not df.empty: + df["rel_ts"] = datetime.fromtimestamp(1) + (df["ts"] - df["ts"].min()) # pyright: ignore + return df + + +@disk_cache +def _collect_metrics( + dimensions: List[Tuple[str, str]], + start_time: datetime, + end_time: Optional[datetime] +) -> pd.DataFrame: + + df = pd.DataFrame() + for dim_name, dim_value in dimensions: + response = cw.list_metrics( + Namespace="/aws/sagemaker/TrainingJobs", + Dimensions=[ + {"Name": dim_name, "Value": dim_value}, + ], + ) + if not response["Metrics"]: + continue + metric_names = [metric["MetricName"] for metric in response["Metrics"]] + if not metric_names: + # No metric data yet, or not any longer, because the data were aged out + continue + metric_data_queries = [ + _metric_data_query_tpl(metric_name, dim_name, dim_value) for metric_name in metric_names + ] + df = pd.concat([df, _get_metric_data(metric_data_queries, start_time, end_time)]) + + return df + + +def get_cw_job_metrics( + job_name: str, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None +) -> pd.DataFrame: + """Retrieves CloudWatch metrics for a SageMaker training job. + + Args: + job_name (str): Name of the SageMaker training job. + start_time (datetime, optional): Start time for metrics collection. + Defaults to now - 4 hours. + end_time (datetime, optional): End time for metrics collection. + Defaults to start_time + 4 hours. + + Returns: + pd.DataFrame: Metrics data with columns for value, timestamp, and metric name. + Results are cached to disk for improved performance. + """ + dimensions = [ + ("TrainingJobName", job_name), + ("Host", job_name + "/algo-1"), + ] + # If not given, use reasonable defaults for start and end time + start_time = start_time or datetime.now() - timedelta(hours=4) + end_time = end_time or start_time + timedelta(hours=4) + return _collect_metrics(dimensions, start_time, end_time) \ No newline at end of file diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py new file mode 100644 index 0000000000..377a19304d --- /dev/null +++ b/src/sagemaker/amtviz/visualization.py @@ -0,0 +1,800 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +import sagemaker +import boto3 +from typing import Union, List, Optional, Tuple, Dict, Any +import altair as alt +import pandas as pd +import numpy as np +import os +import warnings +import logging +from sagemaker.amtviz.job_metrics import get_cw_job_metrics + +warnings.filterwarnings("ignore") +logger = logging.getLogger(__name__) + +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) +pd.set_option("display.max_colwidth", None) # Don't truncate TrainingJobName + + +alt.data_transformers.disable_max_rows() +altair_renderer = os.getenv("ALTAIR_RENDERER", "default") +logger.info(f"Setting altair renderer to {altair_renderer}.") +alt.renderers.enable(altair_renderer) + + +sm = boto3.client("sagemaker") + + +def _columnize(charts: List[alt.Chart], cols: int = 2) -> alt.VConcatChart: + return alt.vconcat(*[alt.hconcat(*charts[i : i + cols]) for i in range(0, len(charts), cols)]) + + +def visualize_tuning_job( + tuning_jobs: Union[str, List[str], "sagemaker.tuner.HyperparameterTuner"], + return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False, +) -> Union[alt.Chart, Tuple[alt.Chart, pd.DataFrame, pd.DataFrame]]: + """ + Visualize SageMaker hyperparameter tuning jobs. + + Args: + tuning_jobs: Single tuning job or list of tuning jobs (name or HyperparameterTuner object) + return_dfs: Whether to return the underlying DataFrames + job_metrics: List of additional job metrics to include + trials_only: Whether to only show trials data + advanced: Whether to show advanced visualizations + + Returns: + If return_dfs is False, returns Altair chart + If return_dfs is True, returns tuple of (chart, trials_df, full_df) + """ + + trials_df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data(tuning_jobs) + + try: + from IPython import get_ipython + if get_ipython(): + # Running in a Jupyter Notebook + display(trials_df.head(10)) + else: + # Running in a non-Jupyter environment + logger.info(trials_df.head(10).to_string()) + except ImportError: + # Not running in a Jupyter Notebook + logger.info(trials_df.head(10).to_string()) + + full_df = ( + _prepare_consolidated_df(trials_df, objective_name) if not trials_only else pd.DataFrame() + ) + + trials_df.columns = trials_df.columns.map(_clean_parameter_name) + full_df.columns = full_df.columns.map(_clean_parameter_name) + tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] + objective_name = _clean_parameter_name(objective_name) + + charts = create_charts( + trials_df, + tuned_parameters, + full_df, + objective_name, + minimize_objective=is_minimize, + job_metrics=job_metrics, + advanced=advanced, + ) + + if return_dfs: + return charts, trials_df, full_df + else: + return charts + + +def create_charts( + trials_df: pd.DataFrame, + tuning_parameters: List[str], + full_df: pd.DataFrame, + objective_name: str, + minimize_objective: bool, + job_metrics: Optional[List[str]] = None, + highlight_trials: bool = True, + color_trials: bool = False, + advanced: bool = False, +) -> alt.Chart: + """ + Create visualization charts for hyperparameter tuning results. + + Args: + trials_df: DataFrame containing trials data + tuning_parameters: List of hyperparameter names + full_df: DataFrame with consolidated data + objective_name: Name of the objective metric + minimize_objective: Whether objective should be minimized + job_metrics: Additional job metrics to include + highlight_trials: Whether to highlight selected trials + color_trials: Whether to color trials by job + advanced: Whether to show advanced visualizations + + Returns: + Altair chart visualization + """ + + if trials_df.empty: + logger.info("No results available yet.") + return pd.DataFrame() + + if job_metrics is None: + job_metrics = [] + + multiple_tuning_jobs = len(trials_df["TuningJobName"].unique()) > 1 + multiple_job_status = len(trials_df["TrainingJobStatus"].unique()) > 1 + + # Rows, n>1 + # Detail Charts + + brush = alt.selection_interval(encodings=["x"], resolve="intersect", empty=True) + + job_highlight_selection = alt.selection_point( + on="mouseover", + nearest=False, + empty=False, + fields=["TrainingJobName", "TrainingStartTime"], + ) + + # create tooltip + detail_tooltip = [] + for trp in [objective_name] + tuning_parameters: + if trials_df[trp].dtype == np.float64: + trp = alt.Tooltip(trp, format=".2e") + detail_tooltip.append(trp) + + detail_tooltip.append(alt.Tooltip("TrainingStartTime:T", format="%H:%M:%S")) + detail_tooltip.extend(["TrainingJobName", "TrainingJobStatus", "TrainingElapsedTimeSeconds"]) + + # create stroke/stroke-width for tuning_jobs + # and color for training jobs, if wanted + # add coloring of the stroke to highlight correlated + # data points + jobs_props = {"shape": alt.Shape("TrainingJobStatus:N", legend=None)} + + if multiple_tuning_jobs: + jobs_props["strokeWidth"] = alt.StrokeWidthValue(2.0) + jobs_props["stroke"] = alt.Stroke("TuningJobName:N", legend=None) + + if color_trials: + jobs_props["color"] = alt.Color("TrainingJobName:N") + + if highlight_trials: + jobs_props["strokeWidth"] = alt.condition( + job_highlight_selection, + alt.StrokeWidthValue(2.0), + alt.StrokeWidthValue(2.0), + ) + jobs_props["stroke"] = alt.condition( + job_highlight_selection, + alt.StrokeValue("gold"), + alt.Stroke("TuningJobName:N", legend=None) + if multiple_tuning_jobs + else alt.StrokeValue("white"), + ) + + opacity = alt.condition(brush, alt.value(1.0), alt.value(0.35)) + charts = [] + + # Min and max of the objective. This is used in filtered + # charts, so that the filtering does not make the axis + # jump, which would make comparisons harder. + objective_scale = alt.Scale( + domain=( + trials_df[objective_name].min(), + trials_df[objective_name].max(), + ) + ) + + # If we have multiple tuning jobs, we also want to be able + # to discriminate based on the individual tuning job, so + # we just treat them as an additional tuning parameter + tuning_parameters = tuning_parameters.copy() + if multiple_tuning_jobs: + tuning_parameters.append("TuningJobName") + + # If we use early stopping and at least some jobs were + # stopped early, we want to be able to discriminate + # those jobs. + if multiple_job_status: + tuning_parameters.append("TrainingJobStatus") + + def render_detail_charts(): + # To force a tuning job to sample a combination more than once, we + # sometimes introduce a hyperparameter that has no effect. + # It's values are random and without impact, so we omit it from analysis. + ignored_parameters = {"dummy"} + for tuning_parameter in tuning_parameters: + if tuning_parameter in ignored_parameters: + continue + + # Map dataframe's dtype to altair's types and + # adjust scale if necessary + scale_type = "linear" + scale_log_base = 10 + + few_values = len(trials_df[tuning_parameter].unique()) < 8 + parameter_type = "N" # Nominal + dtype = str(trials_df.dtypes[tuning_parameter]) + if "float" in dtype: + parameter_type = "Q" # Quantitative + ratio = (trials_df[tuning_parameter].max() + 1e-10) / ( + trials_df[tuning_parameter].min() + 1e-10 + ) + not_likely_discrete = ( + len(trials_df[tuning_parameter].unique()) > trials_df[tuning_parameter].count() + ) # edge case when both are equal + if few_values and not_likely_discrete: + if ratio > 50: + scale_type = "log" + elif ratio > 10: + scale_type = "log" + scale_log_base = 2 + + elif "int" in dtype or "object" in dtype: + parameter_type = "O" # Ordinal + + x_encoding = alt.X( + f"{tuning_parameter}:{parameter_type}", + scale=alt.Scale( + zero=False, + padding=1, + type=scale_type, + base=scale_log_base, + ), + ) + + # Sync the coloring for categorical hyperparameters + discrete = parameter_type in ["O", "N"] and few_values + + # Detail Chart + charts.append( + alt.Chart(trials_df) + .add_params(brush) + .add_params(job_highlight_selection) + .mark_point(filled=True, size=50) + .encode( + x=x_encoding, + y=alt.Y( + f"{objective_name}:Q", + scale=alt.Scale(zero=False, padding=1), + axis=alt.Axis(title=objective_name), + ), + opacity=opacity, + tooltip=detail_tooltip, + **jobs_props, + ) + ) + + if discrete: + # Individually coloring the values only if we don't already + # use the colors to show the different tuning jobs + logger.info(f"{parameter_type}, {tuning_parameter}") + if not multiple_tuning_jobs: + charts[-1] = charts[-1].encode(color=f"{tuning_parameter}:N") + charts[-1] = ( + ( + charts[-1] + | alt.Chart(trials_df) + .transform_filter(brush) + .transform_density( + objective_name, + bandwidth=0.01, + groupby=[tuning_parameter], + # https://github.com/vega/altair/issues/3203#issuecomment-2141558911 + # Specifying extent no longer necessary (>5.1.2). Leaving the work around in it for now. + extent=[ + trials_df[objective_name].min(), + trials_df[objective_name].max(), + ], + ) + .mark_area(opacity=0.5) + .encode( + x=alt.X( + "value:Q", + title=objective_name, + scale=objective_scale, + ), + y="density:Q", + color=alt.Color( + f"{tuning_parameter}:N", + ), + tooltip=tuning_parameter, + ) + ).properties(title=tuning_parameter) + # .resolve_scale("independent") + # .resolve_legend(color="independent") + ) + + if advanced and parameter_type == "Q": + # Adding tick marks to the detail charts with quantitative hyperparameters + x_enc = x_encoding.copy() + charts[-1].encoding.x.title = None + charts[-1].encoding.x.axis = alt.Axis(labels=False) + + charts[-1] = charts[-1] & alt.Chart(trials_df).mark_tick(opacity=0.5).encode( + x=x_enc, + opacity=alt.condition(brush, alt.value(0.5), alt.value(0.1)), + ) + + return _columnize(charts) + + detail_charts = render_detail_charts() + + # First Row + # Progress Over Time Chart + + def render_progress_chart(): + # Sorting trials by training start time, so that we can track the \ + # progress of the best objective so far over time + trials_df_by_tst = trials_df.sort_values(["TuningJobName", "TrainingStartTime"]) + trials_df_by_tst["cum_objective"] = trials_df_by_tst.groupby(["TuningJobName"]).transform( + lambda x: x.cummin() if minimize_objective else x.cummax() + )[objective_name] + + progress_chart = ( + alt.Chart(trials_df_by_tst) + .add_params(brush) + .add_params(job_highlight_selection) + .mark_point(filled=True, size=50) + .encode( + x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), + y=alt.Y( + f"{objective_name}:Q", + scale=alt.Scale(zero=False, padding=1), + axis=alt.Axis(title=objective_name), + ), + opacity=opacity, + tooltip=detail_tooltip, + **jobs_props, + ) + ) + + cum_obj_chart = ( + alt.Chart(trials_df_by_tst) + .mark_line( + interpolate="step-after", + opacity=1.0, + strokeDash=[3, 3], + strokeWidth=2.0, + ) + .encode( + x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), + y=alt.Y(f"cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), + stroke=alt.Stroke("TuningJobName:N", legend=None), + ) + ) + + if advanced: + return cum_obj_chart + progress_chart + else: + return progress_chart + + progress_chart = render_progress_chart() + + # First Row + # KDE Training Objective + result_hist_chart = ( + alt.Chart(trials_df) + .transform_filter(brush) + .transform_density(objective_name, bandwidth=0.01) + .mark_area() + .encode( + x=alt.X(f"value:Q", scale=objective_scale, title=objective_name), + y="density:Q", + ) + ) + # Training Jobs + training_jobs_chart = ( + alt.Chart(trials_df.sort_values(objective_name), title="Training Jobs") + .mark_bar() + .add_params(brush) + .add_params(job_highlight_selection) + .encode( + y=alt.Y(f"{objective_name}:Q"), + x=alt.X("TrainingJobName:N", sort=None), + color=alt.Color("TrainingJobName:N"), + opacity=opacity, + **jobs_props, + ) + ) + + # Job Level Stats + + training_job_name_encodings = { + "color": alt.condition( + brush, + alt.Color("TrainingJobName:N", legend=None), + alt.value("grey"), + ), + "opacity": alt.condition(brush, alt.value(1.0), alt.value(0.3)), + "strokeWidth": alt.condition(brush, alt.value(2.5), alt.value(0.8)), + } + + duration_format = "%M:%S" + metrics_tooltip = [ + "TrainingJobName:N", + "value:Q", + "label:N", + alt.Tooltip("ts:T", format="%e:%H:%M"), + alt.Tooltip("rel_ts:T", format="%e:%H:%M"), + ] + + job_level_rows = alt.HConcatChart() + + # Use CW metrics + if not full_df.empty: + # Objective Progression + + objective_progression_chart = None + # Suppress diagram if we only have one, final, value + if ( + full_df.loc[full_df.label == objective_name] + .groupby(["TuningJobName", "TrainingJobName"])[objective_name] + .count() + .max() + > 1 + ): + objective_progression_chart = ( + alt.Chart(full_df, title=f"Progression {objective_name}", width=400) + .transform_filter(alt.FieldEqualPredicate(field="label", equal=objective_name)) + .mark_line(point=True) + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y=alt.Y("value:Q", scale=alt.Scale(zero=False)), + **training_job_name_encodings, + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if multiple_job_status: + objective_progression_chart = objective_progression_chart.encode( + strokeDash=alt.StrokeDash("TrainingJobStatus:N", legend=None) + ) + + # Secondary chart showing the same contents, but by absolute time. + objective_progression_absolute_chart = objective_progression_chart.encode( + x=alt.X("ts:T", scale=alt.Scale(nice=True)) + ) + + objective_progression_chart = ( + objective_progression_chart | objective_progression_absolute_chart + ) + + ### + + job_metrics_charts = [] + for metric in job_metrics: + metric_chart = ( + alt.Chart(full_df, title=metric, width=400) + .transform_filter(alt.FieldEqualPredicate(field="label", equal=metric)) + .encode( + y=alt.Y("value:Q", scale=alt.Scale(zero=False)), + **training_job_name_encodings, + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if ( + full_df.loc[full_df.label == metric] + .groupby(["TuningJobName", "TrainingJobName"]) + .count() + .value.max() + == 1 + ): + # single value, render as a bar over the training jobs on the x-axis + metric_chart = metric_chart.encode( + x=alt.X("TrainingJobName:N", sort=None) + ).mark_bar(interpolate="linear", point=True) + else: + # multiple values, render the values over time on the x-axis + metric_chart = metric_chart.encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)) + ).mark_line(interpolate="linear", point=True) + + job_metrics_charts.append(metric_chart) + + job_metrics_chart = _columnize(job_metrics_charts, 3) + + # Job instance + # 'MemoryUtilization', 'CPUUtilization' + instance_metrics_chart = ( + alt.Chart(full_df, title="CPU and Memory") + .transform_filter( + alt.FieldOneOfPredicate( + field="label", + oneOf=[ + "MemoryUtilization", + "CPUUtilization", + ], + ) + ) + .mark_line() + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y="value:Q", + **training_job_name_encodings, + strokeDash=alt.StrokeDash("label:N", legend=alt.Legend(orient="bottom")), + tooltip=metrics_tooltip, + ) + .interactive() + ) + + if "GPUUtilization" in full_df.label.values: + instance_metrics_chart = ( + instance_metrics_chart + | alt.Chart(full_df, title="GPU and GPU Memory") + .transform_filter( + alt.FieldOneOfPredicate( + field="label", + oneOf=[ + "GPUMemoryUtilization", + "GPUUtilization", + ], + ) + ) + .mark_line() + .encode( + x=alt.X("rel_ts:T", axis=alt.Axis(format=duration_format)), + y=alt.Y("value:Q"), + **training_job_name_encodings, + strokeDash=alt.StrokeDash("label:N", legend=alt.Legend(orient="bottom")), + tooltip=metrics_tooltip, + ) + .interactive() + ) + + job_level_rows = job_metrics_chart & instance_metrics_chart + if objective_progression_chart: + job_level_rows = objective_progression_chart & job_level_rows + job_level_rows = job_level_rows.resolve_scale(strokeDash="independent").properties( + title="Job / Instance Level Metrics" + ) + + overview_row = (progress_chart | result_hist_chart).properties( + title="Hyper Parameter Tuning Job" + ) + detail_rows = detail_charts.properties(title="Hyper Parameter Details") + if job_level_rows: + job_level_rows = training_jobs_chart & job_level_rows + + return overview_row & detail_rows & job_level_rows + + +# Ensure proper parameter name characters for altair 5+ +def _clean_parameter_name(s): + return s.replace(":", "_").replace(".", "_") + + +def _prepare_training_job_metrics(jobs): + df = pd.DataFrame() + for job_name, start_time, end_time in jobs: + job_df = get_cw_job_metrics( + job_name, + start_time=pd.Timestamp(start_time) - pd.DateOffset(hours=8), + end_time=pd.Timestamp(end_time) + pd.DateOffset(hours=8), + ) + if job_df is None: + logger.info(f"No CloudWatch metrics for {job_name}. Skipping.") + continue + + job_df["TrainingJobName"] = job_name + df = pd.concat([df, job_df]) + return df + + +def _prepare_consolidated_df(trials_df, objective_name): + if trials_df.empty: + return pd.DataFrame() + + logger.debug("Cache Hit/Miss: ", end="") + jobs_df = _prepare_training_job_metrics( + zip( + trials_df.TrainingJobName.values, + trials_df.TrainingStartTime.values, + trials_df.TrainingEndTime.values, + ) + ) + logger.info("") + + if jobs_df.empty: + return pd.DataFrame() + + merged_df = pd.merge(jobs_df, trials_df, on="TrainingJobName") + return merged_df + + +def _get_df(tuning_job_name, filter_out_stopped=False): + tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name) + + df = tuner.dataframe() + if df.empty: # HPO job just started; no results yet + return df + + df["TuningJobName"] = tuning_job_name + + # Filter out jobs without FinalObjectiveValue + df = df[df["FinalObjectiveValue"] > -float("inf")] + + # Jobs early stopped by AMT are reported with their last + # objective value, before they are stopped. + # However this value may not be a good representation + # of the eventual objective value we would have seen + # if run without stopping. Therefore it may be confusing + # to include those runs. + # For now, if included, we use a different mark to + # discriminate visually between a stopped and finished job + + if filter_out_stopped: + df = df[df["TrainingJobStatus"] != "Stopped"] + + # Preprocessing values for [32], [64] etc. + for tuning_range in tuner.tuning_ranges.values(): + parameter_name = tuning_range["Name"] + if df.dtypes[parameter_name] == "O": + try: + # Remove decorations, like [] + df[parameter_name] = df[parameter_name].apply( + lambda v: v.replace("[", "").replace("]", "").replace('"', "") + ) + + # Is it an int? 3 would work, 3.4 would fail. + try: + df[parameter_name] = df[parameter_name].astype(int) + except ValueError: + # A float then? + df[parameter_name] = df[parameter_name].astype(float) + + except Exception as e: + # Trouble, as this was not a number just pretending to be a string, but an actual string with charracters. Leaving the value untouched + # Ex: Caught exception could not convert string to float: 'sqrt' + pass + + return df + + +def _get_tuning_job_names_with_parents(tuning_job_names): + """Resolve dependent jobs, one level only""" + + all_tuning_job_names = [] + for tuning_job_name in tuning_job_names: + tuning_job_result = sm.describe_hyper_parameter_tuning_job( + HyperParameterTuningJobName=tuning_job_name + ) + + # find parent jobs and retrieve all tuner dataframes + parent_jobs = [] + if "WarmStartConfig" in tuning_job_result: + parent_jobs = [ + cfg["HyperParameterTuningJobName"] + for cfg in tuning_job_result["WarmStartConfig"]["ParentHyperParameterTuningJobs"] + ] + if parent_jobs: + logger.info(f'Tuning job {tuning_job_name}\'s parents: {", ".join(parent_jobs)}') + all_tuning_job_names.extend([tuning_job_name, *parent_jobs]) + + # return de-duplicated tuning job names + return list(set(all_tuning_job_names)) + + +def get_job_analytics_data(tuning_job_names): + if not isinstance(tuning_job_names, list): + tuning_job_names = [tuning_job_names] + + # Ensure to create a list of tuning job names (strings) + tuning_job_names = [ + tuning_job.describe()["HyperParameterTuningJobName"] + if isinstance(tuning_job, sagemaker.tuner.HyperparameterTuner) + else tuning_job + for tuning_job in tuning_job_names + ] + + # Maintain combined tuner dataframe from all tuning jobs + df = pd.DataFrame() + + # maintain objective, direction of optimization and tuned parameters + objective_name = None + is_minimize = None + tuned_parameters = None + + all_tuning_job_names = _get_tuning_job_names_with_parents(tuning_job_names) + + for tuning_job_name in all_tuning_job_names: + tuning_job_result = sm.describe_hyper_parameter_tuning_job( + HyperParameterTuningJobName=tuning_job_name + ) + status = tuning_job_result["HyperParameterTuningJobStatus"] + logger.info(f"Tuning job {tuning_job_name:25s} status: {status}") + + df = pd.concat([df, _get_df(tuning_job_name)]) + + # maintain objective and assure that all tuning jobs use the same + job_is_minimize = ( + tuning_job_result["HyperParameterTuningJobConfig"]["HyperParameterTuningJobObjective"][ + "Type" + ] + != "Maximize" + ) + job_objective_name = tuning_job_result["HyperParameterTuningJobConfig"][ + "HyperParameterTuningJobObjective" + ]["MetricName"] + job_tuned_parameters = [ + v["Name"] + for v in sagemaker.HyperparameterTuningJobAnalytics( + tuning_job_name + ).tuning_ranges.values() + ] + + if not objective_name: + objective_name = job_objective_name + is_minimize = job_is_minimize + tuned_parameters = job_tuned_parameters + else: + if ( + objective_name != job_objective_name + or is_minimize != job_is_minimize + or set(tuned_parameters) != set(job_tuned_parameters) + ): + raise ValueError( + "All tuning jobs must use the same objective and optimization direction." + ) + + if not df.empty: + # Cleanup wrongly encoded floats, e.g. containing quotes. + for i, dtype in enumerate(df.dtypes): + column_name = str(df.columns[i]) + if column_name in [ + "TrainingJobName", + "TrainingJobStatus", + "TuningJobName", + ]: + continue + if dtype == "object": + val = df[column_name].iloc[0] + if isinstance(val, str) and val.startswith('"'): + try: + df[column_name] = df[column_name].apply(lambda x: int(x.replace('"', ""))) + except: # noqa: E722 nosec b110 if we fail, we just continue with what we had + pass # Value is not an int, but a string + + df = df.sort_values("FinalObjectiveValue", ascending=is_minimize) + df[objective_name] = df.pop("FinalObjectiveValue") + + # Fix potential issue with dates represented as objects, instead of a timestamp + # This can in other cases lead to https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ + # Have only observed this for TrainingEndTime, but will be on the lookout dfor TrainingStartTime as well now + df["TrainingEndTime"] = pd.to_datetime(df["TrainingEndTime"]) + df["TrainingStartTime"] = pd.to_datetime(df["TrainingStartTime"]) + + logger.info("") + logger.info(f"Number of training jobs with valid objective: {len(df)}") + logger.info(f"Lowest: {min(df[objective_name])} Highest {max(df[objective_name])}") + + tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] + + return df, tuned_parameters, objective_name, is_minimize \ No newline at end of file diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 4b0f38f36f..00ae78b1ad 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2123,6 +2123,63 @@ def _add_estimator( delete_endpoint = removed_function("delete_endpoint") + @staticmethod + def visualize_jobs( + tuning_jobs: Union[str, 'sagemaker.tuner.HyperparameterTuner', List[Union[str, 'sagemaker.tuner.HyperparameterTuner']]], + return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False + ): + """Create an interactive visualization based on altair charts using the sagemaker.amtviz + package. + Args: + tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): One or more tuning jobs to create + visualization for. + return_dfs: (bool): Option to return trials and full dataframe. + job_metrics: (list[str]): Metrics to be used in charts. + trials_only: (bool): Whether to show trials only or full dataframe. + advanced: (bool): Show a cumulative step line in the progress over time chart. + Returns: + A collection of charts (altair.VConcatChart); or charts, trials_df (pandas.DataFrame), + full_df (pandas.DataFrame) if ``return_dfs=True``. + """ + try: + # Check if altair is installed + importlib.import_module('altair') + + except ImportError: + print("Altair is not installed. To use the visualization feature, please install Altair:") + print(" pip install altair") + print("After installing Altair, you can use the methods visualize_jobs or visualize_job.") + return None + + # If altair is installed, proceed with visualization + from sagemaker.amtviz import visualize_tuning_job + + return visualize_tuning_job( + tuning_jobs, + return_dfs=return_dfs, + job_metrics=job_metrics, + trials_only=trials_only, + advanced=advanced, + ) + + def visualize_job( + self, return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, trials_only: bool = False, advanced: bool = False + ): + """Convenience method on instance level for visualize_jobs(). + See static method visualize_jobs(). + """ + return HyperparameterTuner.visualize_jobs( + self, + return_dfs=return_dfs, + job_metrics=job_metrics, + trials_only=trials_only, + advanced=advanced, + ) + class _TuningJob(_Job): """Placeholder docstring""" diff --git a/tests/unit/test_tuner_visualize.py b/tests/unit/test_tuner_visualize.py new file mode 100644 index 0000000000..ea9835a408 --- /dev/null +++ b/tests/unit/test_tuner_visualize.py @@ -0,0 +1,303 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import pandas as pd +import pytest +from mock import Mock, patch, MagicMock +import sagemaker +from sagemaker.estimator import Estimator +from sagemaker.session_settings import SessionSettings +from sagemaker.tuner import ( + HyperparameterTuner +) +from tests.unit.tuner_test_utils import ( + OBJECTIVE_METRIC_NAME, + HYPERPARAMETER_RANGES, + METRIC_DEFINITIONS +) +from sagemaker.session_settings import SessionSettings +# Visualization specific imports +from sagemaker.amtviz.visualization import visualize_tuning_job, get_job_analytics_data +from tests.unit.tuner_visualize_test_utils import ( + TUNING_JOB_NAMES, + TUNED_PARAMETERS, + OBJECTIVE_NAME, + TRIALS_DF_DATA, + FULL_DF_DATA, + TUNING_JOB_NAME_1, + TUNING_JOB_NAME_2, + TUNING_JOB_RESULT, + TRIALS_DF_COLUMNS, + FULL_DF_COLUMNS, + TRIALS_DF_TRAINING_JOB_NAMES, + TRIALS_DF_TRAINING_JOB_STATUSES, + TUNING_JOB_NAMES, + TRIALS_DF_VALID_F1_VALUES, + FILTERED_TUNING_JOB_DF_DATA, + TUNING_RANGES +) +import altair as alt + +def create_sagemaker_session(): + boto_mock = Mock(name="boto_session") + sms = Mock( + name="sagemaker_session", + boto_session=boto_mock, + config=None, + local_mode=False, + settings=SessionSettings() + ) + sms.sagemaker_config = {} + return sms + +@pytest.fixture() +def sagemaker_session(): + return create_sagemaker_session() + + +@pytest.fixture() +def estimator(sagemaker_session): + return Estimator( + "image", + "role", + 1, + "ml.c4.xlarge", + output_path="s3://bucket/prefix", + sagemaker_session=sagemaker_session, + ) + + +@pytest.fixture() +def tuner(estimator): + return HyperparameterTuner( + estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS + ) + +@pytest.fixture() +def tuner2(estimator): + return HyperparameterTuner( + estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS + ) + + +@pytest.fixture +def mock_visualize_tuning_job(): + with patch("sagemaker.amtviz.visualize_tuning_job") as mock_visualize: + mock_visualize.return_value = "mock_chart" + yield mock_visualize + + +@pytest.fixture +def mock_get_job_analytics_data(): + with patch("sagemaker.amtviz.visualization.get_job_analytics_data") as mock: + mock.return_value = ( + pd.DataFrame(TRIALS_DF_DATA), + TUNED_PARAMETERS, + OBJECTIVE_NAME, + True + ) + yield mock + + +@pytest.fixture +def mock_prepare_consolidated_df(): + with patch("sagemaker.amtviz.visualization._prepare_consolidated_df") as mock: + mock.return_value = pd.DataFrame(FULL_DF_DATA) + yield mock + + +# Test graceful handling if the required altair library is not installed +def test_visualize_jobs_altair_not_installed(capsys): + # Mock importlib.import_module to raise ImportError for 'altair' + with patch("importlib.import_module") as mock_import: + mock_import.side_effect = ImportError("No module named 'altair'") + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result is None + captured = capsys.readouterr() + assert "Altair is not installed." in captured.out + assert "pip install altair" in captured.out + + +# Test basic method call if altair is installed +def test_visualize_jobs_altair_installed(mock_visualize_tuning_job): + # Mock successful import of altair + with patch("importlib.import_module") as mock_import: + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result == "mock_chart" + + +# Test for static method visualize_jobs() +def test_visualize_jobs(mock_visualize_tuning_job): + result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + TUNING_JOB_NAMES, + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # Vary the parameters and check if they have been passed correctly + result = HyperparameterTuner.visualize_jobs( + [TUNING_JOB_NAME_1], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + mock_visualize_tuning_job.assert_called_with( + [TUNING_JOB_NAME_1], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + +# Test the instance method visualize_job() on a stubbed tuner object +def test_visualize_job(tuner, mock_visualize_tuning_job): + # With default parameters + result = tuner.visualize_job() + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + tuner, + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # With varying parameters + result = tuner.visualize_job(return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_with( + tuner, + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + +# Test the static method visualize_jobs() on multiple stubbed tuner objects +def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): + result = HyperparameterTuner.visualize_jobs([tuner, tuner2]) + assert result == "mock_chart" + mock_visualize_tuning_job.assert_called_once_with( + [tuner, tuner2], + return_dfs=False, + job_metrics=None, + trials_only=False, + advanced=False + ) + # Vary the parameters and check if they have been passed correctly + result = HyperparameterTuner.visualize_jobs( + [[tuner, tuner2]], return_dfs=True, job_metrics="job_metrics", trials_only=True, advanced=True) + mock_visualize_tuning_job.assert_called_with( + [[tuner, tuner2]], + return_dfs=True, + job_metrics="job_metrics", + trials_only=True, + advanced=True + ) + +# Test direct method call for basic chart return type and default render settings +def test_visualize_tuning_job_analytics_data_results_in_altair_chart(mock_get_job_analytics_data): + result = visualize_tuning_job("mock_job") + assert alt.renderers.active == "default" + assert isinstance(result, alt.VConcatChart) + + +# Test the size and structure of the returned dataframes (trials_df and full_df) +def test_visualize_tuning_job_return_dfs(mock_get_job_analytics_data, mock_prepare_consolidated_df): + charts, trials_df, full_df = visualize_tuning_job("mock_job", return_dfs=True) + # Basic assertion for the charts + assert isinstance(charts, alt.VConcatChart) + + # Assertions for trials_df + assert isinstance(trials_df, pd.DataFrame) + assert trials_df.shape == (2, len(TRIALS_DF_COLUMNS)) + assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS + assert trials_df['TrainingJobName'].tolist() == TRIALS_DF_TRAINING_JOB_NAMES + assert trials_df['TrainingJobStatus'].tolist() == TRIALS_DF_TRAINING_JOB_STATUSES + assert trials_df['TuningJobName'].tolist() == TUNING_JOB_NAMES + assert trials_df['valid-f1'].tolist() == TRIALS_DF_VALID_F1_VALUES + + # Assertions for full_df + assert isinstance(full_df, pd.DataFrame) + assert full_df.shape == (2, 16) + assert full_df.columns.tolist() == FULL_DF_COLUMNS + + +# Test the handling of an an empty trials dataframe +@patch("sagemaker.amtviz.visualization.get_job_analytics_data") +def test_visualize_tuning_job_empty_trials(mock_get_job_analytics_data): + mock_get_job_analytics_data.return_value = ( + pd.DataFrame(), # empty dataframe + TUNED_PARAMETERS, + OBJECTIVE_NAME, + True + ) + charts = visualize_tuning_job("empty_job") + assert charts.empty + + +# Test handling of return_dfs and trials_only parameter +def test_visualize_tuning_job_trials_only(mock_get_job_analytics_data): + # If return_dfs is set to False, then only charts should be returned + result = visualize_tuning_job("mock_job", return_dfs=False, trials_only=True) + assert isinstance(result, alt.VConcatChart) + # Trials_only controls the content of the two returned dataframes (trials_df, full_df) + result, df1, df2 = visualize_tuning_job("mock_job", return_dfs=True, trials_only=True) + assert isinstance(df1, pd.DataFrame) + assert df1.shape == (2, len(TRIALS_DF_COLUMNS)) + assert isinstance(df2, pd.DataFrame) + assert df2.empty + # The combination of return_dfs and trials_only=False is covered in 'test_visualize_tuning_job_return_dfs' + + +# Check if all parameters are correctly passed to the (mocked) create_charts method +@patch("sagemaker.amtviz.visualization.create_charts") +def test_visualize_tuning_job_with_full_df(mock_create_charts, mock_get_job_analytics_data, mock_prepare_consolidated_df): + mock_create_charts.return_value = alt.Chart() + visualize_tuning_job("dummy_job") + + # Check the create_charts call arguments + call_args = mock_create_charts.call_args[0] + call_kwargs = mock_create_charts.call_args[1] + assert isinstance(call_args[0], pd.DataFrame) # trials_df + assert isinstance(call_args[1], list) # tuned_parameters + assert isinstance(call_args[2], pd.DataFrame) # full_df + assert isinstance(call_args[3], str) # objective_name + assert call_kwargs.get("minimize_objective") + + # Check the details of the passed arguments + trials_df = call_args[0] + assert trials_df.columns.tolist() == TRIALS_DF_COLUMNS + tuned_parameters = call_args[1] + assert tuned_parameters == TUNED_PARAMETERS + objective_name = call_args[3] + assert objective_name == OBJECTIVE_NAME + full_df = call_args[2] + assert full_df.columns.tolist() == FULL_DF_COLUMNS + + +# Test the dataframe produced by get_job_analytics_data() +@patch("sagemaker.HyperparameterTuningJobAnalytics") +def test_get_job_analytics_data(mock_hyperparameter_tuning_job_analytics): + # Mock sagemaker's describe_hyper_parameter_tuning_job and some internal methods + sagemaker.amtviz.visualization.sm.describe_hyper_parameter_tuning_job = Mock(return_value=TUNING_JOB_RESULT) + sagemaker.amtviz.visualization._get_tuning_job_names_with_parents = Mock( + return_value=[TUNING_JOB_NAME_1, TUNING_JOB_NAME_2]) + sagemaker.amtviz.visualization._get_df = Mock(return_value=pd.DataFrame(FILTERED_TUNING_JOB_DF_DATA)) + mock_tuning_job_instance = MagicMock() + mock_hyperparameter_tuning_job_analytics.return_value = mock_tuning_job_instance + mock_tuning_job_instance.tuning_ranges.values.return_value = TUNING_RANGES + + df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data([TUNING_JOB_NAME_1]) + assert df.shape == (4, 12) + assert df.columns.tolist() == TRIALS_DF_COLUMNS + assert tuned_parameters == TUNED_PARAMETERS + assert objective_name == OBJECTIVE_NAME + assert is_minimize is False \ No newline at end of file diff --git a/tests/unit/tuner_visualize_test_utils.py b/tests/unit/tuner_visualize_test_utils.py new file mode 100644 index 0000000000..3f66794a00 --- /dev/null +++ b/tests/unit/tuner_visualize_test_utils.py @@ -0,0 +1,110 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +TRIALS_DF_COLUMNS = [ + 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobName', + 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' +] + +FULL_DF_COLUMNS = [ + 'value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', + 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', + 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' +] + + +TRIALS_DF_TRAINING_JOB_NAMES = [ + 'random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1' +] + +TRIALS_DF_TRAINING_JOB_STATUSES = ['Completed', 'Completed'] + +TUNING_JOB_NAME_1 = 'random-240712-1500' +TUNING_JOB_NAME_2 = 'bayesian-240712-1600' +TUNING_JOB_NAMES = [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] +TRIALS_DF_VALID_F1_VALUES = [0.950, 0.896] + +FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', + 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] + + +TUNED_PARAMETERS = ['n-estimators', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'criterion'] +OBJECTIVE_NAME = 'valid-f1' + +TRIALS_DF_DATA = { + 'criterion': ['gini', 'log_loss'], + 'max-depth': [18.0, 8.0], + 'min-samples-leaf': [3.0, 10.0], + 'min-weight-fraction-leaf': [0.011596, 0.062067], + 'n-estimators': [110.0, 18.0], + 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-021-fcd64dc1'], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:56:50+02:00'], + 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:57:29+02:00'], + 'TrainingElapsedTimeSeconds': [44.0, 39.0], + 'TuningJobName': TUNING_JOB_NAMES, + 'valid-f1': [0.950, 0.896] +} + +FULL_DF_DATA = { + 'value': [0.951000, 0.950000], + 'ts': ['2024-07-12 15:56:00', '2024-07-12 15:56:00'], + 'label': ['valid-precision', 'valid-recall'], + 'rel_ts': ['1970-01-01 01:00:00', '1970-01-01 01:00:00'], + 'TrainingJobName': ['random-240712-1545-019-4ac17a84', 'random-240712-1545-019-4ac17a84'], + 'criterion': ['gini', 'gini'], + 'max-depth': [18.0, 18.0], + 'min-samples-leaf': [3.0, 3.0], + 'min-weight-fraction-leaf': [0.011596, 0.011596], + 'n-estimators': [110.0, 110.0], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'TrainingStartTime': ['2024-07-12 17:55:59+02:00', '2024-07-12 17:55:59+02:00'], + 'TrainingEndTime': ['2024-07-12 17:56:43+02:00', '2024-07-12 17:56:43+02:00'], + 'TrainingElapsedTimeSeconds': [44.0, 45.0], + 'TuningJobName': ['random-240712-1545', 'random-240712-1545'], + 'valid-f1': [0.9500, 0.9500] +} + +FILTERED_TUNING_JOB_DF_DATA = { + 'criterion': ['log_loss', 'gini'], + 'max-depth': [10.0, 16.0], + 'min-samples-leaf': [7.0, 2.0], + 'min-weight-fraction-leaf': [0.160910, 0.069803], + 'n-estimators': [67.0, 79.0], + 'TrainingJobName': ['random-240712-1545-050-c0b5c10a', 'random-240712-1545-049-2db2ec05'], + 'TrainingJobStatus': ['Completed', 'Completed'], + 'FinalObjectiveValue': [0.8190, 0.8910], + 'TrainingStartTime': ['2024-07-12 18:09:48+02:00', '2024-07-12 18:09:45+02:00'], + 'TrainingEndTime': ['2024-07-12 18:10:28+02:00', '2024-07-12 18:10:23+02:00'], + 'TrainingElapsedTimeSeconds': [40.0, 38.0], + 'TuningJobName': [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] +} + +TUNING_RANGES = [{'Name': 'n-estimators', 'MinValue': '1', 'MaxValue': '200', 'ScalingType': 'Auto'}, + {'Name': 'max-depth', 'MinValue': '1', 'MaxValue': '20', 'ScalingType': 'Auto'}, + {'Name': 'min-samples-leaf', 'MinValue': '1', 'MaxValue': '10', 'ScalingType': 'Auto'}, + {'Name': 'min-weight-fraction-leaf', 'MinValue': '0.01', 'MaxValue': '0.5', 'ScalingType': 'Auto'}, + {'Name': 'criterion', 'Values': ['"gini"', '"entropy"', '"log_loss"']}] + + +TUNING_JOB_RESULT = { + 'HyperParameterTuningJobName': TUNING_JOB_NAME_1, + 'HyperParameterTuningJobConfig': { + 'Strategy': 'Random', + 'HyperParameterTuningJobObjective': { + 'Type': 'Maximize', + 'MetricName': 'valid-f1' + } + }, + 'HyperParameterTuningJobStatus': 'Completed', +} \ No newline at end of file diff --git a/tox.ini b/tox.ini index b16c0d2f0b..21e7248da2 100644 --- a/tox.ini +++ b/tox.ini @@ -86,6 +86,7 @@ commands = pip install 'torch==2.0.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html' pip install 'torchvision==0.15.2+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html' pip install 'dill>=0.3.8' + pip install 'altair>=5.3' # needed for amtviz pytest {posargs} deps = .[test] From a686e6d524dec210dcdd84d1d7e979f16bd918dd Mon Sep 17 00:00:00 2001 From: Uemit Yoldas Date: Fri, 25 Apr 2025 20:17:23 +0200 Subject: [PATCH 2/2] fix: codestyle, type hints, license, and docstrings --- src/sagemaker/amtviz/__init__.py | 28 ++--- src/sagemaker/amtviz/job_metrics.py | 55 ++++----- src/sagemaker/amtviz/visualization.py | 142 +++++++++++++++-------- src/sagemaker/tuner.py | 14 ++- tests/unit/test_tuner_visualize.py | 21 +++- tests/unit/tuner_visualize_test_utils.py | 48 ++++++-- 6 files changed, 201 insertions(+), 107 deletions(-) diff --git a/src/sagemaker/amtviz/__init__.py b/src/sagemaker/amtviz/__init__.py index 9e6dd1a64b..2b05e7b0cf 100644 --- a/src/sagemaker/amtviz/__init__.py +++ b/src/sagemaker/amtviz/__init__.py @@ -1,17 +1,17 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Placeholder docstring""" +from __future__ import absolute_import from sagemaker.amtviz.visualization import visualize_tuning_job -__all__ = ['visualize_tuning_job'] \ No newline at end of file +__all__ = ['visualize_tuning_job'] diff --git a/src/sagemaker/amtviz/job_metrics.py b/src/sagemaker/amtviz/job_metrics.py index 6005f886f8..f84457f9da 100644 --- a/src/sagemaker/amtviz/job_metrics.py +++ b/src/sagemaker/amtviz/job_metrics.py @@ -1,18 +1,17 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -# IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Helper functions to retrieve job metrics from CloudWatch.""" +from __future__ import absolute_import from datetime import datetime, timedelta from typing import Callable, List, Optional, Tuple, Dict, Any @@ -20,10 +19,10 @@ import os from pathlib import Path +import logging import pandas as pd import numpy as np import boto3 -import logging logger = logging.getLogger(__name__) @@ -58,7 +57,8 @@ def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: logger.debug("H", end="") df["ts"] = pd.to_datetime(df["ts"]) df["ts"] = df["ts"].dt.tz_localize(None) - df["rel_ts"] = pd.to_datetime(df["rel_ts"]) # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + # pyright: ignore [reportIndexIssue, reportOptionalSubscript] + df["rel_ts"] = pd.to_datetime(df["rel_ts"]) df["rel_ts"] = df["rel_ts"].dt.tz_localize(None) return df except KeyError: @@ -66,8 +66,7 @@ def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: pass # nosec b110 - doesn't matter why we could not load it. except BaseException as e: - logger.error("\nException", type(e), e) - pass # continue with calling the outer function + logger.error("\nException: %s - %s", type(e), e) logger.debug("M", end="") df = outer(*args, **kwargs) @@ -82,6 +81,7 @@ def inner(*args: Any, **kwargs: Any) -> pd.DataFrame: def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> Dict[str, Any]: + """Returns a CloudWatch metric data query template.""" return { "Id": metric_name.lower().replace(":", "_").replace("-", "_"), "MetricStat": { @@ -100,10 +100,11 @@ def _metric_data_query_tpl(metric_name: str, dim_name: str, dim_value: str) -> D def _get_metric_data( - queries: List[Dict[str, Any]], - start_time: datetime, + queries: List[Dict[str, Any]], + start_time: datetime, end_time: datetime ) -> pd.DataFrame: + """Fetches CloudWatch metrics between timestamps and returns a DataFrame with selected columns.""" start_time = start_time - timedelta(hours=1) end_time = end_time + timedelta(hours=1) response = cw.get_metric_data(MetricDataQueries=queries, StartTime=start_time, EndTime=end_time) @@ -111,7 +112,7 @@ def _get_metric_data( df = pd.DataFrame() if "MetricDataResults" not in response: return df - + for metric_data in response["MetricDataResults"]: values = metric_data["Values"] ts = np.array(metric_data["Timestamps"], dtype=np.datetime64) @@ -130,11 +131,11 @@ def _get_metric_data( @disk_cache def _collect_metrics( - dimensions: List[Tuple[str, str]], - start_time: datetime, + dimensions: List[Tuple[str, str]], + start_time: datetime, end_time: Optional[datetime] ) -> pd.DataFrame: - + """Collects SageMaker training job metrics from CloudWatch based on given dimensions and time range.""" df = pd.DataFrame() for dim_name, dim_value in dimensions: response = cw.list_metrics( @@ -158,8 +159,8 @@ def _collect_metrics( def get_cw_job_metrics( - job_name: str, - start_time: Optional[datetime] = None, + job_name: str, + start_time: Optional[datetime] = None, end_time: Optional[datetime] = None ) -> pd.DataFrame: """Retrieves CloudWatch metrics for a SageMaker training job. @@ -182,4 +183,4 @@ def get_cw_job_metrics( # If not given, use reasonable defaults for start and end time start_time = start_time or datetime.now() - timedelta(hours=4) end_time = end_time or start_time + timedelta(hours=4) - return _collect_metrics(dimensions, start_time, end_time) \ No newline at end of file + return _collect_metrics(dimensions, start_time, end_time) diff --git a/src/sagemaker/amtviz/visualization.py b/src/sagemaker/amtviz/visualization.py index 377a19304d..bb66195771 100644 --- a/src/sagemaker/amtviz/visualization.py +++ b/src/sagemaker/amtviz/visualization.py @@ -1,28 +1,56 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: MIT-0 - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -import sagemaker -import boto3 -from typing import Union, List, Optional, Tuple, Dict, Any -import altair as alt -import pandas as pd -import numpy as np +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +""" +This module provides visualization capabilities for SageMaker hyperparameter tuning jobs. + +It contains utilities to create interactive visualizations of hyperparameter tuning results +using Altair charts. The module enables users to analyze and understand the performance +of their hyperparameter optimization experiments through various visual representations +including: +- Progress of objective metrics over time +- Distribution of results +- Relationship between hyperparameters and objective values +- Training job metrics and instance utilization +- Comparative analysis across multiple tuning jobs + +Main Features: + - Visualize single or multiple hyperparameter tuning jobs + - Display training job metrics from CloudWatch + - Support for both completed and in-progress tuning jobs + - Interactive filtering and highlighting of data points + - CPU, memory, and GPU utilization visualization + - Advanced visualization options for detailed analysis + +Primary Classes and Functions: + - visualize_tuning_job: Main function to create visualizations for tuning jobs + - create_charts: Core chart creation functionality + - get_job_analytics_data: Retrieves and processes tuning job data + +Dependencies: + - altair: For creating interactive visualizations + - pandas: For data manipulation and analysis + - boto3: For AWS service interaction + - sagemaker: For accessing SageMaker resources +""" +from __future__ import absolute_import + +from typing import Union, List, Optional, Tuple import os import warnings import logging +import altair as alt +import pandas as pd +import numpy as np +import boto3 +import sagemaker from sagemaker.amtviz.job_metrics import get_cw_job_metrics warnings.filterwarnings("ignore") @@ -36,7 +64,7 @@ alt.data_transformers.disable_max_rows() altair_renderer = os.getenv("ALTAIR_RENDERER", "default") -logger.info(f"Setting altair renderer to {altair_renderer}.") +logger.info("Setting altair renderer to %s.", altair_renderer) alt.renderers.enable(altair_renderer) @@ -44,6 +72,7 @@ def _columnize(charts: List[alt.Chart], cols: int = 2) -> alt.VConcatChart: + """Arrange charts in columns.""" return alt.vconcat(*[alt.hconcat(*charts[i : i + cols]) for i in range(0, len(charts), cols)]) @@ -72,7 +101,7 @@ def visualize_tuning_job( trials_df, tuned_parameters, objective_name, is_minimize = get_job_analytics_data(tuning_jobs) try: - from IPython import get_ipython + from IPython import get_ipython, display if get_ipython(): # Running in a Jupyter Notebook display(trials_df.head(10)) @@ -84,7 +113,7 @@ def visualize_tuning_job( logger.info(trials_df.head(10).to_string()) full_df = ( - _prepare_consolidated_df(trials_df, objective_name) if not trials_only else pd.DataFrame() + _prepare_consolidated_df(trials_df) if not trials_only else pd.DataFrame() ) trials_df.columns = trials_df.columns.map(_clean_parameter_name) @@ -104,8 +133,7 @@ def visualize_tuning_job( if return_dfs: return charts, trials_df, full_df - else: - return charts + return charts def create_charts( @@ -212,9 +240,7 @@ def create_charts( # If we have multiple tuning jobs, we also want to be able # to discriminate based on the individual tuning job, so # we just treat them as an additional tuning parameter - tuning_parameters = tuning_parameters.copy() - if multiple_tuning_jobs: - tuning_parameters.append("TuningJobName") + tuning_parameters = tuning_parameters.copy() + (["TuningJobName"] if multiple_tuning_jobs else []) # If we use early stopping and at least some jobs were # stopped early, we want to be able to discriminate @@ -292,7 +318,7 @@ def render_detail_charts(): if discrete: # Individually coloring the values only if we don't already # use the colors to show the different tuning jobs - logger.info(f"{parameter_type}, {tuning_parameter}") + logger.info("%s, %s", parameter_type, tuning_parameter) if not multiple_tuning_jobs: charts[-1] = charts[-1].encode(color=f"{tuning_parameter}:N") charts[-1] = ( @@ -383,15 +409,14 @@ def render_progress_chart(): ) .encode( x=alt.X("TrainingStartTime:T", scale=alt.Scale(nice=True)), - y=alt.Y(f"cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), + y=alt.Y("cum_objective:Q", scale=alt.Scale(zero=False, padding=1)), stroke=alt.Stroke("TuningJobName:N", legend=None), ) ) if advanced: return cum_obj_chart + progress_chart - else: - return progress_chart + return progress_chart progress_chart = render_progress_chart() @@ -403,7 +428,7 @@ def render_progress_chart(): .transform_density(objective_name, bandwidth=0.01) .mark_area() .encode( - x=alt.X(f"value:Q", scale=objective_scale, title=objective_name), + x=alt.X("value:Q", scale=objective_scale, title=objective_name), y="density:Q", ) ) @@ -586,12 +611,20 @@ def render_progress_chart(): return overview_row & detail_rows & job_level_rows -# Ensure proper parameter name characters for altair 5+ def _clean_parameter_name(s): + """ Helper method to ensure proper parameter name characters for altair 5+ """ return s.replace(":", "_").replace(".", "_") def _prepare_training_job_metrics(jobs): + """Fetches and combines CloudWatch metrics for multiple training jobs. + + Args: + jobs (list): List of (job_name, start_time, end_time) tuples. + + Returns: + pandas.DataFrame: Combined metrics DataFrame with 'TrainingJobName' column. + """ df = pd.DataFrame() for job_name, start_time, end_time in jobs: job_df = get_cw_job_metrics( @@ -600,7 +633,7 @@ def _prepare_training_job_metrics(jobs): end_time=pd.Timestamp(end_time) + pd.DateOffset(hours=8), ) if job_df is None: - logger.info(f"No CloudWatch metrics for {job_name}. Skipping.") + logger.info("No CloudWatch metrics for %s. Skipping.", job_name) continue job_df["TrainingJobName"] = job_name @@ -608,7 +641,8 @@ def _prepare_training_job_metrics(jobs): return df -def _prepare_consolidated_df(trials_df, objective_name): +def _prepare_consolidated_df(trials_df): + """Merges training job metrics with trials data into a consolidated DataFrame.""" if trials_df.empty: return pd.DataFrame() @@ -630,6 +664,9 @@ def _prepare_consolidated_df(trials_df, objective_name): def _get_df(tuning_job_name, filter_out_stopped=False): + """Retrieves hyperparameter tuning job results and returns preprocessed DataFrame with + tuning metrics and parameters.""" + tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name) df = tuner.dataframe() @@ -670,8 +707,9 @@ def _get_df(tuning_job_name, filter_out_stopped=False): # A float then? df[parameter_name] = df[parameter_name].astype(float) - except Exception as e: - # Trouble, as this was not a number just pretending to be a string, but an actual string with charracters. Leaving the value untouched + except Exception: + # Trouble, as this was not a number just pretending to be a string, but an actual string with + # characters. Leaving the value untouched # Ex: Caught exception could not convert string to float: 'sqrt' pass @@ -695,7 +733,7 @@ def _get_tuning_job_names_with_parents(tuning_job_names): for cfg in tuning_job_result["WarmStartConfig"]["ParentHyperParameterTuningJobs"] ] if parent_jobs: - logger.info(f'Tuning job {tuning_job_name}\'s parents: {", ".join(parent_jobs)}') + logger.info("Tuning job %s's parents: %s", tuning_job_name, ", ".join(parent_jobs)) all_tuning_job_names.extend([tuning_job_name, *parent_jobs]) # return de-duplicated tuning job names @@ -703,6 +741,17 @@ def _get_tuning_job_names_with_parents(tuning_job_names): def get_job_analytics_data(tuning_job_names): + """Retrieves and processes analytics data from hyperparameter tuning jobs. + + Args: + tuning_job_names (str or list): Single tuning job name or list of names/tuner objects. + + Returns: + tuple: (DataFrame with training results, tuned parameters list, objective name, is_minimize flag). + + Raises: + ValueError: If tuning jobs have different objectives or optimization directions. + """ if not isinstance(tuning_job_names, list): tuning_job_names = [tuning_job_names] @@ -729,7 +778,7 @@ def get_job_analytics_data(tuning_job_names): HyperParameterTuningJobName=tuning_job_name ) status = tuning_job_result["HyperParameterTuningJobStatus"] - logger.info(f"Tuning job {tuning_job_name:25s} status: {status}") + logger.info("Tuning job %-25s status: %s", tuning_job_name, status) df = pd.concat([df, _get_df(tuning_job_name)]) @@ -786,15 +835,16 @@ def get_job_analytics_data(tuning_job_names): df[objective_name] = df.pop("FinalObjectiveValue") # Fix potential issue with dates represented as objects, instead of a timestamp - # This can in other cases lead to https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ + # This can in other cases lead to: + # https://www.markhneedham.com/blog/2020/01/10/altair-typeerror-object-type-date-not-json-serializable/ # Have only observed this for TrainingEndTime, but will be on the lookout dfor TrainingStartTime as well now df["TrainingEndTime"] = pd.to_datetime(df["TrainingEndTime"]) df["TrainingStartTime"] = pd.to_datetime(df["TrainingStartTime"]) logger.info("") - logger.info(f"Number of training jobs with valid objective: {len(df)}") - logger.info(f"Lowest: {min(df[objective_name])} Highest {max(df[objective_name])}") + logger.info("Number of training jobs with valid objective: %d", len(df)) + logger.info("Lowest: %s Highest %s", min(df[objective_name]), max(df[objective_name])) tuned_parameters = [_clean_parameter_name(tp) for tp in tuned_parameters] - return df, tuned_parameters, objective_name, is_minimize \ No newline at end of file + return df, tuned_parameters, objective_name, is_minimize diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 17468b5593..35d468feeb 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -2119,7 +2119,10 @@ def _add_estimator( @staticmethod def visualize_jobs( - tuning_jobs: Union[str, 'sagemaker.tuner.HyperparameterTuner', List[Union[str, 'sagemaker.tuner.HyperparameterTuner']]], + tuning_jobs: Union[ + str, 'sagemaker.tuner.HyperparameterTuner', + List[Union[str, 'sagemaker.tuner.HyperparameterTuner']] + ], return_dfs: bool = False, job_metrics: Optional[List[str]] = None, trials_only: bool = False, @@ -2128,7 +2131,8 @@ def visualize_jobs( """Create an interactive visualization based on altair charts using the sagemaker.amtviz package. Args: - tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): One or more tuning jobs to create + tuning_jobs (str or sagemaker.tuner.HyperparameterTuner or list[str, sagemaker.tuner.HyperparameterTuner]): + One or more tuning jobs to create visualization for. return_dfs: (bool): Option to return trials and full dataframe. job_metrics: (list[str]): Metrics to be used in charts. @@ -2160,8 +2164,10 @@ def visualize_jobs( ) def visualize_job( - self, return_dfs: bool = False, - job_metrics: Optional[List[str]] = None, trials_only: bool = False, advanced: bool = False + self, return_dfs: bool = False, + job_metrics: Optional[List[str]] = None, + trials_only: bool = False, + advanced: bool = False ): """Convenience method on instance level for visualize_jobs(). See static method visualize_jobs(). diff --git a/tests/unit/test_tuner_visualize.py b/tests/unit/test_tuner_visualize.py index ea9835a408..8f17f4d2db 100644 --- a/tests/unit/test_tuner_visualize.py +++ b/tests/unit/test_tuner_visualize.py @@ -10,6 +10,9 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Tests related to amtviz.visualization""" +from __future__ import absolute_import + import pandas as pd import pytest from mock import Mock, patch, MagicMock @@ -24,7 +27,6 @@ HYPERPARAMETER_RANGES, METRIC_DEFINITIONS ) -from sagemaker.session_settings import SessionSettings # Visualization specific imports from sagemaker.amtviz.visualization import visualize_tuning_job, get_job_analytics_data from tests.unit.tuner_visualize_test_utils import ( @@ -40,13 +42,13 @@ FULL_DF_COLUMNS, TRIALS_DF_TRAINING_JOB_NAMES, TRIALS_DF_TRAINING_JOB_STATUSES, - TUNING_JOB_NAMES, TRIALS_DF_VALID_F1_VALUES, FILTERED_TUNING_JOB_DF_DATA, TUNING_RANGES ) import altair as alt + def create_sagemaker_session(): boto_mock = Mock(name="boto_session") sms = Mock( @@ -59,6 +61,7 @@ def create_sagemaker_session(): sms.sagemaker_config = {} return sms + @pytest.fixture() def sagemaker_session(): return create_sagemaker_session() @@ -82,6 +85,7 @@ def tuner(estimator): estimator, OBJECTIVE_METRIC_NAME, HYPERPARAMETER_RANGES, METRIC_DEFINITIONS ) + @pytest.fixture() def tuner2(estimator): return HyperparameterTuner( @@ -130,7 +134,7 @@ def test_visualize_jobs_altair_not_installed(capsys): # Test basic method call if altair is installed def test_visualize_jobs_altair_installed(mock_visualize_tuning_job): # Mock successful import of altair - with patch("importlib.import_module") as mock_import: + with patch("importlib.import_module"): result = HyperparameterTuner.visualize_jobs(TUNING_JOB_NAMES) assert result == "mock_chart" @@ -157,6 +161,7 @@ def test_visualize_jobs(mock_visualize_tuning_job): advanced=True ) + # Test the instance method visualize_job() on a stubbed tuner object def test_visualize_job(tuner, mock_visualize_tuning_job): # With default parameters @@ -180,6 +185,7 @@ def test_visualize_job(tuner, mock_visualize_tuning_job): advanced=True ) + # Test the static method visualize_jobs() on multiple stubbed tuner objects def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): result = HyperparameterTuner.visualize_jobs([tuner, tuner2]) @@ -202,6 +208,7 @@ def test_visualize_multiple_jobs(tuner, tuner2, mock_visualize_tuning_job): advanced=True ) + # Test direct method call for basic chart return type and default render settings def test_visualize_tuning_job_analytics_data_results_in_altair_chart(mock_get_job_analytics_data): result = visualize_tuning_job("mock_job") @@ -259,7 +266,11 @@ def test_visualize_tuning_job_trials_only(mock_get_job_analytics_data): # Check if all parameters are correctly passed to the (mocked) create_charts method @patch("sagemaker.amtviz.visualization.create_charts") -def test_visualize_tuning_job_with_full_df(mock_create_charts, mock_get_job_analytics_data, mock_prepare_consolidated_df): +def test_visualize_tuning_job_with_full_df( + mock_create_charts, + mock_get_job_analytics_data, + mock_prepare_consolidated_df +): mock_create_charts.return_value = alt.Chart() visualize_tuning_job("dummy_job") @@ -300,4 +311,4 @@ def test_get_job_analytics_data(mock_hyperparameter_tuning_job_analytics): assert df.columns.tolist() == TRIALS_DF_COLUMNS assert tuned_parameters == TUNED_PARAMETERS assert objective_name == OBJECTIVE_NAME - assert is_minimize is False \ No newline at end of file + assert is_minimize is False diff --git a/tests/unit/tuner_visualize_test_utils.py b/tests/unit/tuner_visualize_test_utils.py index 3f66794a00..17a993717c 100644 --- a/tests/unit/tuner_visualize_test_utils.py +++ b/tests/unit/tuner_visualize_test_utils.py @@ -10,10 +10,12 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +from __future__ import absolute_import TRIALS_DF_COLUMNS = [ 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobName', - 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' + 'TrainingJobStatus', + 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1' ] FULL_DF_COLUMNS = [ @@ -34,9 +36,9 @@ TUNING_JOB_NAMES = [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] TRIALS_DF_VALID_F1_VALUES = [0.950, 0.896] -FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', - 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] - +FULL_DF_COLUMNS = ['value', 'ts', 'label', 'rel_ts', 'TrainingJobName', 'criterion', 'max-depth', 'min-samples-leaf', + 'min-weight-fraction-leaf', 'n-estimators', 'TrainingJobStatus', 'TrainingStartTime', + 'TrainingEndTime', 'TrainingElapsedTimeSeconds', 'TuningJobName', 'valid-f1'] TUNED_PARAMETERS = ['n-estimators', 'max-depth', 'min-samples-leaf', 'min-weight-fraction-leaf', 'criterion'] OBJECTIVE_NAME = 'valid-f1' @@ -90,12 +92,36 @@ 'TuningJobName': [TUNING_JOB_NAME_1, TUNING_JOB_NAME_2] } -TUNING_RANGES = [{'Name': 'n-estimators', 'MinValue': '1', 'MaxValue': '200', 'ScalingType': 'Auto'}, - {'Name': 'max-depth', 'MinValue': '1', 'MaxValue': '20', 'ScalingType': 'Auto'}, - {'Name': 'min-samples-leaf', 'MinValue': '1', 'MaxValue': '10', 'ScalingType': 'Auto'}, - {'Name': 'min-weight-fraction-leaf', 'MinValue': '0.01', 'MaxValue': '0.5', 'ScalingType': 'Auto'}, - {'Name': 'criterion', 'Values': ['"gini"', '"entropy"', '"log_loss"']}] - +TUNING_RANGES = [ + { + 'Name': 'n-estimators', + 'MinValue': '1', + 'MaxValue': '200', + 'ScalingType': 'Auto' + }, + { + 'Name': 'max-depth', + 'MinValue': '1', + 'MaxValue': '20', + 'ScalingType': 'Auto' + }, + { + 'Name': 'min-samples-leaf', + 'MinValue': '1', + 'MaxValue': '10', + 'ScalingType': 'Auto' + }, + { + 'Name': 'min-weight-fraction-leaf', + 'MinValue': '0.01', + 'MaxValue': '0.5', + 'ScalingType': 'Auto' + }, + { + 'Name': 'criterion', + 'Values': ['"gini"', '"entropy"', '"log_loss"'] + } +] TUNING_JOB_RESULT = { 'HyperParameterTuningJobName': TUNING_JOB_NAME_1, @@ -107,4 +133,4 @@ } }, 'HyperParameterTuningJobStatus': 'Completed', -} \ No newline at end of file +}