From 5163cec775bcc5397917331093506367d6afbaf7 Mon Sep 17 00:00:00 2001
From: Tommy Dang <quantumventuress@gmail.com>
Date: Mon, 13 May 2024 22:01:28 -0700
Subject: [PATCH] 3

---
 .env.dev                                      |   5 +-
 .../overview/dashboard/block_layout.yaml      |  47 ++--
 .../dashboard/block_layout.yaml               | 119 +++++++++
 .../dashboard/block_layout.yaml               |  24 +-
 .../unit_2_training/global_data_products.yaml |   2 +-
 mlops/unit_3_observability/charts/__init__.py |   0
 ...pipeline_runs_daily_sklearn_training_a3.py |   4 +
 ...pipeline_runs_daily_sklearn_training_o5.py |   4 +
 .../charts/feature_profiles_for_ingest.py     |  61 +++++
 .../charts/ingest_time_series_bar_chart_y0.py |   0
 .../charts/missing_values_for_ingest.py       |   8 +
 .../charts/most_frequent_values_for_ingest.py |  21 ++
 .../charts/prepare_histogram_k4.py            |  16 ++
 .../charts/runs_by_model.py                   |   0
 .../charts/shap_values.py                     |  34 +++
 .../charts/shap_values_bars.py                |  34 +++
 .../charts/shap_values_force_plot.py          |  64 +++++
 .../charts/summary_overview_for_ingest.py     |  17 ++
 .../charts/unique_values_for_ingest.py        |   2 +
 .../charts/xgboost_metrics.py                 |   0
 .../charts/xgboost_metrics_by_runs.py         |   2 +-
 .../charts/xgboost_training_runs_hourly.py    |   2 +-
 mlops/unit_3_observability/custom/__init__.py |   0
 .../custom/dashboard_data_source.py           |  23 ++
 .../custom/load_models.py                     |  28 ++
 .../data_exporters/__init__.py                |   0
 .../data_exporters/training.py                |  29 +++
 .../data_exporters/xgboost.py                 |  35 +++
 .../global_data_products.yaml                 |   8 +
 mlops/unit_3_observability/io_config.yaml     |   8 +
 .../pipelines/data_preparation/__init__.py    |   0
 .../pipelines/data_preparation/metadata.yaml  | 242 ++++++++++++++++++
 .../pipelines/sklearn_training/__init__.py    |   0
 .../sklearn_training/interactions.yaml        |   2 +
 .../pipelines/sklearn_training/metadata.yaml  | 106 ++++++++
 .../pipelines/xgboost_training/__init__.py    |   0
 .../xgboost_training/interactions.yaml        |   2 +
 .../pipelines/xgboost_training/metadata.yaml  | 105 ++++++++
 .../transformers/__init__.py                  |   0
 .../hyperparameter_tuning/sklearn.py          |  39 +++
 .../hyperparameter_tuning/xgboost.py          |  38 +++
 41 files changed, 1091 insertions(+), 40 deletions(-)
 rename mlops/{unit_5_running => }/presenters/overview/dashboard/block_layout.yaml (65%)
 create mode 100644 mlops/presenters/pipelines/sklearn_training/dashboard/block_layout.yaml
 rename mlops/{unit_5_running => }/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml (72%)
 create mode 100644 mlops/unit_3_observability/charts/__init__.py
 create mode 100644 mlops/unit_3_observability/charts/completed_pipeline_runs_daily_sklearn_training_a3.py
 create mode 100644 mlops/unit_3_observability/charts/failed_pipeline_runs_daily_sklearn_training_o5.py
 create mode 100644 mlops/unit_3_observability/charts/feature_profiles_for_ingest.py
 create mode 100644 mlops/unit_3_observability/charts/ingest_time_series_bar_chart_y0.py
 create mode 100644 mlops/unit_3_observability/charts/missing_values_for_ingest.py
 create mode 100644 mlops/unit_3_observability/charts/most_frequent_values_for_ingest.py
 create mode 100644 mlops/unit_3_observability/charts/prepare_histogram_k4.py
 rename mlops/{unit_5_running => unit_3_observability}/charts/runs_by_model.py (100%)
 create mode 100644 mlops/unit_3_observability/charts/shap_values.py
 create mode 100644 mlops/unit_3_observability/charts/shap_values_bars.py
 create mode 100644 mlops/unit_3_observability/charts/shap_values_force_plot.py
 create mode 100644 mlops/unit_3_observability/charts/summary_overview_for_ingest.py
 create mode 100644 mlops/unit_3_observability/charts/unique_values_for_ingest.py
 rename mlops/{unit_5_running => unit_3_observability}/charts/xgboost_metrics.py (100%)
 rename mlops/{unit_5_running => unit_3_observability}/charts/xgboost_metrics_by_runs.py (98%)
 rename mlops/{unit_5_running => unit_3_observability}/charts/xgboost_training_runs_hourly.py (97%)
 create mode 100755 mlops/unit_3_observability/custom/__init__.py
 create mode 100644 mlops/unit_3_observability/custom/dashboard_data_source.py
 create mode 100644 mlops/unit_3_observability/custom/load_models.py
 create mode 100755 mlops/unit_3_observability/data_exporters/__init__.py
 create mode 100644 mlops/unit_3_observability/data_exporters/training.py
 create mode 100644 mlops/unit_3_observability/data_exporters/xgboost.py
 create mode 100644 mlops/unit_3_observability/global_data_products.yaml
 create mode 100755 mlops/unit_3_observability/io_config.yaml
 create mode 100755 mlops/unit_3_observability/pipelines/data_preparation/__init__.py
 create mode 100755 mlops/unit_3_observability/pipelines/data_preparation/metadata.yaml
 create mode 100755 mlops/unit_3_observability/pipelines/sklearn_training/__init__.py
 create mode 100644 mlops/unit_3_observability/pipelines/sklearn_training/interactions.yaml
 create mode 100755 mlops/unit_3_observability/pipelines/sklearn_training/metadata.yaml
 create mode 100755 mlops/unit_3_observability/pipelines/xgboost_training/__init__.py
 create mode 100644 mlops/unit_3_observability/pipelines/xgboost_training/interactions.yaml
 create mode 100755 mlops/unit_3_observability/pipelines/xgboost_training/metadata.yaml
 create mode 100755 mlops/unit_3_observability/transformers/__init__.py
 create mode 100644 mlops/unit_3_observability/transformers/hyperparameter_tuning/sklearn.py
 create mode 100644 mlops/unit_3_observability/transformers/hyperparameter_tuning/xgboost.py

diff --git a/.env.dev b/.env.dev
index 4707197e4..bfe1a6daf 100644
--- a/.env.dev
+++ b/.env.dev
@@ -12,14 +12,15 @@ PYTHONPATH="${MAGE_CODE_PATH}/${PROJECT_NAME}:${PYTHONPATH}"
 MAGE_PRESENTERS_DIRECTORY="$PROJECT_NAME/presenters"
 
 # Database
+POSTGRES_HOST=magic-database
 POSTGRES_DB=magic
 POSTGRES_PASSWORD=password
 POSTGRES_USER=postgres
-MAGE_DATABASE_CONNECTION_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@magic-database:5432/${POSTGRES_DB}"
+MAGE_DATABASE_CONNECTION_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/${POSTGRES_DB}"
 
 # Experiments
 EXPERIMENTS_DB=experiments
-EXPERIMENTS_TRACKING_URI="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@magic-database:5432/${EXPERIMENTS_DB}"
+EXPERIMENTS_TRACKING_URI="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/${EXPERIMENTS_DB}"
 
 # Alerts
 SMTP_EMAIL=
diff --git a/mlops/unit_5_running/presenters/overview/dashboard/block_layout.yaml b/mlops/presenters/overview/dashboard/block_layout.yaml
similarity index 65%
rename from mlops/unit_5_running/presenters/overview/dashboard/block_layout.yaml
rename to mlops/presenters/overview/dashboard/block_layout.yaml
index 5cd2541b6..f45199676 100644
--- a/mlops/unit_5_running/presenters/overview/dashboard/block_layout.yaml
+++ b/mlops/presenters/overview/dashboard/block_layout.yaml
@@ -3,7 +3,7 @@ blocks:
     configuration:
       chart_type: pie chart
       group_by:
-      - model
+        - model
     data_source:
       type: chart_code
     error: null
@@ -15,12 +15,12 @@ blocks:
     configuration:
       chart_type: time series line chart
       group_by:
-      - start_time
+        - start_time
       metrics:
-      - aggregation: average
-        column: mse
-      - aggregation: average
-        column: rmse
+        - aggregation: average
+          column: mse
+        - aggregation: average
+          column: rmse
       time_interval: original
     data_source:
       type: chart_code
@@ -34,12 +34,12 @@ blocks:
     configuration:
       chart_type: bar chart
       group_by:
-      - run_id
+        - run_id
       metrics:
-      - aggregation: max
-        column: rmse
-      - aggregation: max
-        column: mse
+        - aggregation: max
+          column: rmse
+        - aggregation: max
+          column: mse
     data_source:
       type: chart_code
     error: null
@@ -53,25 +53,26 @@ blocks:
       chart_style: horizontal
       chart_type: bar chart
       group_by:
-      - start_time_hour
+        - start_time_hour
       metrics:
-      - aggregation: count_distinct
-        column: run_id
+        - aggregation: count_distinct
+          column: run_id
       y_sort_order: descending
     data_source:
-      refresh_interval: '60000'
+      refresh_interval: "60000"
       type: chart_code
     error: null
     name: XGBoost training runs hourly
+    name_new: XGBoost training runs hourly
     skip_render: false
     type: chart
     uuid: xgboost_training_runs_hourly
 layout:
-- - block_uuid: xgboost_metrics
-    width: 1
-  - block_uuid: xgboost_metrics_by_runs
-    width: 1
-- - block_uuid: runs_by_model
-    width: 1
-  - block_uuid: xgboost_training_runs_hourly
-    width: 1
+  - - block_uuid: xgboost_metrics
+      width: 1
+    - block_uuid: xgboost_metrics_by_runs
+      width: 1
+  - - block_uuid: xgboost_training_runs_hourly
+      width: 1
+    - block_uuid: runs_by_model
+      width: 1
diff --git a/mlops/presenters/pipelines/sklearn_training/dashboard/block_layout.yaml b/mlops/presenters/pipelines/sklearn_training/dashboard/block_layout.yaml
new file mode 100644
index 000000000..2ca2d2a4c
--- /dev/null
+++ b/mlops/presenters/pipelines/sklearn_training/dashboard/block_layout.yaml
@@ -0,0 +1,119 @@
+blocks:
+  completed_pipeline_runs_daily_sklearn_training_a3:
+    configuration:
+      chart_type: time series line chart
+      group_by:
+      - execution_date
+      metrics:
+      - aggregation: count_distinct
+        column: id
+      time_interval: day
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_runs
+    name: Completed pipeline runs daily
+    type: chart
+    uuid: completed_pipeline_runs_daily_sklearn_training_a3
+  failed_pipeline_runs_daily_sklearn_training_o5:
+    configuration:
+      chart_type: time series line chart
+      group_by:
+      - execution_date
+      metrics:
+      - aggregation: count_distinct
+        column: id
+      time_interval: day
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_runs
+    name: Failed pipeline runs daily
+    type: chart
+    uuid: failed_pipeline_runs_daily_sklearn_training_o5
+  pipeline_run_status_sklearn_training_n3:
+    configuration:
+      chart_style: horizontal
+      chart_type: bar chart
+      group_by:
+      - status
+      metrics:
+      - aggregation: count_distinct
+        column: id
+      y_sort_order: descending
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_runs
+    name: Pipeline run status
+    type: chart
+    uuid: pipeline_run_status_sklearn_training_n3
+  pipeline_runs_daily_sklearn_training_p1:
+    configuration:
+      chart_type: time series line chart
+      group_by:
+      - execution_date
+      metrics:
+      - aggregation: count_distinct
+        column: id
+      time_interval: day
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_runs
+    name: Pipeline runs daily
+    type: chart
+    uuid: pipeline_runs_daily_sklearn_training_p1
+  trigger_active_status_sklearn_training_r5:
+    configuration:
+      chart_type: bar chart
+      group_by:
+      - status
+      metrics:
+      - aggregation: count_distinct
+        column: id
+      y_sort_order: descending
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_schedules
+    name: Trigger active status
+    type: chart
+    uuid: trigger_active_status_sklearn_training_r5
+  trigger_frequency_sklearn_training_o8:
+    configuration:
+      chart_style: horizontal
+      chart_type: bar chart
+      group_by:
+      - schedule_interval
+      metrics:
+      - aggregation: count_distinct
+        column: id
+      y_sort_order: descending
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_schedules
+    name: Trigger frequency
+    type: chart
+    uuid: trigger_frequency_sklearn_training_o8
+  trigger_types_sklearn_training_s4:
+    configuration:
+      chart_type: pie chart
+      group_by:
+      - schedule_type
+    data_source:
+      pipeline_uuid: sklearn_training
+      type: pipeline_schedules
+    name: Trigger types
+    type: chart
+    uuid: trigger_types_sklearn_training_s4
+layout:
+- - block_uuid: trigger_active_status_sklearn_training_r5
+    width: 1
+  - block_uuid: trigger_types_sklearn_training_s4
+    width: 1
+  - block_uuid: trigger_frequency_sklearn_training_o8
+    width: 2
+- - block_uuid: pipeline_run_status_sklearn_training_n3
+    width: 1
+  - block_uuid: pipeline_runs_daily_sklearn_training_p1
+    width: 2
+- - block_uuid: completed_pipeline_runs_daily_sklearn_training_a3
+    width: 1
+  - block_uuid: failed_pipeline_runs_daily_sklearn_training_o5
+    width: 1
diff --git a/mlops/unit_5_running/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml b/mlops/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml
similarity index 72%
rename from mlops/unit_5_running/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml
rename to mlops/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml
index 71b9ae896..16efe627f 100644
--- a/mlops/unit_5_running/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml
+++ b/mlops/presenters/pipelines/xgboost_training/dashboard/block_layout.yaml
@@ -3,10 +3,10 @@ blocks:
     configuration:
       chart_type: custom
     data_source:
-      block_uuid: chart_source
+      block_uuid: dashboard_data_source
       pipeline_schedule_id: null
       pipeline_uuid: xgboost_training
-      refresh_interval: '60000'
+      refresh_interval: "60000"
       type: block
     error: null
     name: SHAP values
@@ -18,7 +18,7 @@ blocks:
     configuration:
       chart_type: custom
     data_source:
-      block_uuid: chart_source
+      block_uuid: dashboard_data_source
       pipeline_schedule_id: null
       pipeline_uuid: xgboost_training
       type: block
@@ -32,7 +32,7 @@ blocks:
     configuration:
       chart_type: custom
     data_source:
-      block_uuid: chart_source
+      block_uuid: dashboard_data_source
       pipeline_schedule_id: null
       pipeline_uuid: xgboost_training
       type: block
@@ -42,11 +42,11 @@ blocks:
     type: chart
     uuid: shap_values_force_plot
 layout:
-- - block_uuid: shap_values
-    height: 500
-    width: 1
-  - block_uuid: shap_values_bars
-    height: 500
-    width: 1
-- - block_uuid: shap_values_force_plot
-    width: 1
+  - - block_uuid: shap_values
+      height: 500
+      width: 1
+    - block_uuid: shap_values_bars
+      height: 500
+      width: 1
+  - - block_uuid: shap_values_force_plot
+      width: 1
diff --git a/mlops/unit_2_training/global_data_products.yaml b/mlops/unit_2_training/global_data_products.yaml
index b82f0d3aa..514db3f2d 100644
--- a/mlops/unit_2_training/global_data_products.yaml
+++ b/mlops/unit_2_training/global_data_products.yaml
@@ -2,7 +2,7 @@ training_set:
   object_type: pipeline
   object_uuid: data_preparation
   outdated_after:
-    seconds: 3600
+    seconds: 600
   settings:
     build:
       partitions: 1
diff --git a/mlops/unit_3_observability/charts/__init__.py b/mlops/unit_3_observability/charts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/charts/completed_pipeline_runs_daily_sklearn_training_a3.py b/mlops/unit_3_observability/charts/completed_pipeline_runs_daily_sklearn_training_a3.py
new file mode 100644
index 000000000..db6dbba66
--- /dev/null
+++ b/mlops/unit_3_observability/charts/completed_pipeline_runs_daily_sklearn_training_a3.py
@@ -0,0 +1,4 @@
+
+@data_source
+def d(df):
+    return df[df['status'] == 'completed']
diff --git a/mlops/unit_3_observability/charts/failed_pipeline_runs_daily_sklearn_training_o5.py b/mlops/unit_3_observability/charts/failed_pipeline_runs_daily_sklearn_training_o5.py
new file mode 100644
index 000000000..2d61fc8ce
--- /dev/null
+++ b/mlops/unit_3_observability/charts/failed_pipeline_runs_daily_sklearn_training_o5.py
@@ -0,0 +1,4 @@
+
+@data_source
+def d(df):
+    return df[df['status'] == 'failed']
diff --git a/mlops/unit_3_observability/charts/feature_profiles_for_ingest.py b/mlops/unit_3_observability/charts/feature_profiles_for_ingest.py
new file mode 100644
index 000000000..ad72b4884
--- /dev/null
+++ b/mlops/unit_3_observability/charts/feature_profiles_for_ingest.py
@@ -0,0 +1,61 @@
+import statistics
+from mage_ai.data_cleaner.column_types.column_type_detector import infer_column_types
+from mage_ai.data_preparation.models.constants import DATAFRAME_ANALYSIS_MAX_COLUMNS
+from mage_ai.shared.parsers import convert_matrix_to_dataframe
+
+
+df_1 = convert_matrix_to_dataframe(df_1)
+df_1 = df_1.iloc[:, :DATAFRAME_ANALYSIS_MAX_COLUMNS]
+columns_and_types = infer_column_types(df_1).items()
+columns = [t[0] for t in columns_and_types]
+stats = ['Type', 'Missing values', 'Unique values', 'Min', 'Max', 'Mean', 'Median', 'Mode']
+rows = [[] for _ in stats]
+
+for col, col_type in columns_and_types:
+    series = df_1[col]
+
+    min_value = None
+    max_value = None
+    mean = None
+    median = None
+
+    not_null = series[series.notnull()]
+
+    if len(not_null) == 0:
+        continue
+
+    if col_type.value in ['number', 'number_with_decimals']:
+        if str(series.dtype) == 'object':
+            if col_type.value == 'number_with_decimals':
+                series = series.astype('float64')
+                not_null = not_null.astype('float64')
+            else:
+                series = series.astype('int64')
+                not_null = not_null.astype('int64')
+
+        count = len(not_null.index)
+        if count >= 1:
+            mean = round(not_null.sum() / count, 2)
+            median = sorted(not_null)[int(count / 2)]
+        min_value = round(series.min(), 2)
+        max_value = round(series.max(), 2)
+    else:
+        min_value = not_null.astype(str).min()
+        max_value = not_null.astype(str).max()
+
+    _, mode = sorted(
+      [(v, k) for k, v in not_null.value_counts().items()],
+      reverse=True,
+    )[0]
+
+    for idx, value in enumerate([
+        col_type.value,
+        len(series[series.isna()].index),
+        len(series.unique()),
+        min_value,
+        max_value,
+        mean,
+        median,
+        mode,
+    ]):
+      rows[idx].append(value)
diff --git a/mlops/unit_3_observability/charts/ingest_time_series_bar_chart_y0.py b/mlops/unit_3_observability/charts/ingest_time_series_bar_chart_y0.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/charts/missing_values_for_ingest.py b/mlops/unit_3_observability/charts/missing_values_for_ingest.py
new file mode 100644
index 000000000..91dddf5df
--- /dev/null
+++ b/mlops/unit_3_observability/charts/missing_values_for_ingest.py
@@ -0,0 +1,8 @@
+number_of_rows = len(df_1.index)
+columns_with_mising_values = []
+percentage_of_missing_values = []
+for col in df_1.columns:
+    missing = df_1[col].isna().sum()
+    if missing > 0:
+        columns_with_mising_values.append(col)
+        percentage_of_missing_values.append(100 * missing / number_of_rows)
diff --git a/mlops/unit_3_observability/charts/most_frequent_values_for_ingest.py b/mlops/unit_3_observability/charts/most_frequent_values_for_ingest.py
new file mode 100644
index 000000000..b482bb4b9
--- /dev/null
+++ b/mlops/unit_3_observability/charts/most_frequent_values_for_ingest.py
@@ -0,0 +1,21 @@
+from mage_ai.data_preparation.models.constants import DATAFRAME_ANALYSIS_MAX_COLUMNS
+from mage_ai.shared.parsers import convert_matrix_to_dataframe
+
+
+df_1 = convert_matrix_to_dataframe(df_1)
+columns = ['mode value', 'frequency', '% of values']
+column_index = []
+rows = []
+for col in df_1.columns[:DATAFRAME_ANALYSIS_MAX_COLUMNS]:
+    value_counts = df_1[col].value_counts()
+    if len(value_counts.index) == 0:
+        continue
+    column_value = value_counts.index[0]
+    value = value_counts[column_value]
+    number_of_rows = df_1[col].count()
+    column_index.append(col)
+    rows.append([
+        column_value,
+        f'{round(100 * value / number_of_rows, 2)}%',
+        value,
+      ])
diff --git a/mlops/unit_3_observability/charts/prepare_histogram_k4.py b/mlops/unit_3_observability/charts/prepare_histogram_k4.py
new file mode 100644
index 000000000..a95db5c30
--- /dev/null
+++ b/mlops/unit_3_observability/charts/prepare_histogram_k4.py
@@ -0,0 +1,16 @@
+import pandas as pd
+
+from mage_ai.shared.parsers import convert_matrix_to_dataframe
+
+
+if isinstance(df_1, list) and len(df_1) >= 1:
+    item = df_1[0]
+    if isinstance(item, pd.Series):
+        item = item.to_frame()
+    elif not isinstance(item, pd.DataFrame):
+        item = convert_matrix_to_dataframe(item)
+    df_1 = item
+
+columns = df_1.columns
+col = 'trip_distance'
+x = df_1[df_1[col] <= 20][col]
diff --git a/mlops/unit_5_running/charts/runs_by_model.py b/mlops/unit_3_observability/charts/runs_by_model.py
similarity index 100%
rename from mlops/unit_5_running/charts/runs_by_model.py
rename to mlops/unit_3_observability/charts/runs_by_model.py
diff --git a/mlops/unit_3_observability/charts/shap_values.py b/mlops/unit_3_observability/charts/shap_values.py
new file mode 100644
index 000000000..ece1cd41f
--- /dev/null
+++ b/mlops/unit_3_observability/charts/shap_values.py
@@ -0,0 +1,34 @@
+import base64
+import io
+from typing import Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+import shap
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+from xgboost import Booster
+
+
+@render(render_type='jpeg')
+def create_visualization(inputs: Tuple[Booster, csr_matrix, Series], *args, **kwargs):
+    model, X, _ = inputs
+
+    # Random sampling - for example, 10% of the data
+    sample_indices = np.random.choice(X.shape[0], size=int(X.shape[0] * 0.1), replace=False)
+    X_sampled = X[sample_indices]
+    X_sampled = X[:1]
+
+    # Now, use X_sampled instead of X for SHAP analysis
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_sampled)
+    shap.summary_plot(shap_values, X_sampled)
+
+    my_stringIObytes = io.BytesIO()
+    plt.savefig(my_stringIObytes, format='jpg')
+    my_stringIObytes.seek(0)
+    my_base64_jpgData = base64.b64encode(my_stringIObytes.read()).decode()
+
+    plt.close()
+
+    return my_base64_jpgData
diff --git a/mlops/unit_3_observability/charts/shap_values_bars.py b/mlops/unit_3_observability/charts/shap_values_bars.py
new file mode 100644
index 000000000..31b75f488
--- /dev/null
+++ b/mlops/unit_3_observability/charts/shap_values_bars.py
@@ -0,0 +1,34 @@
+import base64
+import io
+from typing import Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+import shap
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+from xgboost import Booster
+
+
+@render(render_type='jpeg')
+def create_visualization(inputs: Tuple[Booster, csr_matrix, Series], *args, **kwargs):
+    model, X, _ = inputs
+
+    # Random sampling - for example, 10% of the data
+    sample_indices = np.random.choice(X.shape[0], size=int(X.shape[0] * 0.1), replace=False)
+    X_sampled = X[sample_indices]
+    X_sampled = X[:1]
+
+    # Now, use X_sampled instead of X for SHAP analysis
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_sampled)
+    shap.summary_plot(shap_values, X_sampled, plot_type='bar')
+
+    my_stringIObytes = io.BytesIO()
+    plt.savefig(my_stringIObytes, format='jpg')
+    my_stringIObytes.seek(0)
+    my_base64_jpgData = base64.b64encode(my_stringIObytes.read()).decode()
+
+    plt.close()
+
+    return my_base64_jpgData
diff --git a/mlops/unit_3_observability/charts/shap_values_force_plot.py b/mlops/unit_3_observability/charts/shap_values_force_plot.py
new file mode 100644
index 000000000..681a9e3a7
--- /dev/null
+++ b/mlops/unit_3_observability/charts/shap_values_force_plot.py
@@ -0,0 +1,64 @@
+import base64
+import io
+from typing import Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import shap
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+from xgboost import Booster
+
+from mage_ai.shared.parsers import convert_matrix_to_dataframe
+
+
+@render(render_type='jpeg')
+def create_visualization(inputs: Tuple[Booster, csr_matrix, Series], *args, **kwargs):
+    model, X, _ = inputs
+
+    # Random sampling - for example, 10% of the data
+    sample_indices = np.random.choice(X.shape[0], size=int(X.shape[0] * 0.1), replace=False)
+    X_sampled = X[sample_indices]
+    X_sampled = X[:1]
+
+    X_sampled = convert_matrix_to_dataframe(X_sampled)
+
+    # Now, use X_sampled instead of X for SHAP analysis
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_sampled)
+
+    # Calculate the mean absolute SHAP values for each feature
+    shap_sum = np.abs(shap_values).mean(axis=0)
+
+    X = convert_matrix_to_dataframe(X)
+
+    importance_df = pd.DataFrame([X.columns.tolist(), shap_sum.tolist()]).T
+    importance_df.columns = ['column_name', 'shap_importance']
+    importance_df = importance_df.sort_values('shap_importance', ascending=False)
+
+    # Get the names of the top 10 most important features
+    top_n_features = importance_df['column_name'].head(10).tolist()
+
+    # Reduce the original X to these top 10 features
+    X_top_n = X[top_n_features]
+
+    # If idx is not defined, assuming we're taking the first sample for demonstration
+    idx = 0  # Or any specific index of interest
+
+    # Generate the force plot for this specific instance and only for the top N features
+    shap.force_plot(
+        explainer.expected_value,
+        shap_values[idx, :][np.newaxis, X.columns.get_indexer(top_n_features)],
+        X_top_n.iloc[idx, :],
+        matplotlib=True
+    )
+
+    string_bytes = io.BytesIO()
+    plt.savefig(string_bytes, format='png')
+    string_bytes.seek(0)
+    image_str = base64.b64encode(string_bytes.read()).decode()
+
+    plt.close()
+
+    return image_str
diff --git a/mlops/unit_3_observability/charts/summary_overview_for_ingest.py b/mlops/unit_3_observability/charts/summary_overview_for_ingest.py
new file mode 100644
index 000000000..769c5ab9f
--- /dev/null
+++ b/mlops/unit_3_observability/charts/summary_overview_for_ingest.py
@@ -0,0 +1,17 @@
+from mage_ai.data_cleaner.column_types.column_type_detector import infer_column_types
+
+
+headers = ['value']
+stats = ['Columns', 'Rows']
+rows = [[len(df_1.columns)], [len(df_1.index)]]
+
+col_counts = {}
+for col, col_type in infer_column_types(df_1).items():
+    col_type_name = col_type.value
+    if not col_counts.get(col_type_name):
+        col_counts[col_type_name] = 0
+    col_counts[col_type_name] += 1
+
+for col_type, count in sorted(col_counts.items()):
+    stats.append(f'# of {col_type}')
+    rows.append([count])
diff --git a/mlops/unit_3_observability/charts/unique_values_for_ingest.py b/mlops/unit_3_observability/charts/unique_values_for_ingest.py
new file mode 100644
index 000000000..6ffb530dc
--- /dev/null
+++ b/mlops/unit_3_observability/charts/unique_values_for_ingest.py
@@ -0,0 +1,2 @@
+columns = df_1.columns
+number_of_unique_values = [df_1[col].nunique() for col in columns]
diff --git a/mlops/unit_5_running/charts/xgboost_metrics.py b/mlops/unit_3_observability/charts/xgboost_metrics.py
similarity index 100%
rename from mlops/unit_5_running/charts/xgboost_metrics.py
rename to mlops/unit_3_observability/charts/xgboost_metrics.py
diff --git a/mlops/unit_5_running/charts/xgboost_metrics_by_runs.py b/mlops/unit_3_observability/charts/xgboost_metrics_by_runs.py
similarity index 98%
rename from mlops/unit_5_running/charts/xgboost_metrics_by_runs.py
rename to mlops/unit_3_observability/charts/xgboost_metrics_by_runs.py
index f71a79fcc..4520f50d3 100644
--- a/mlops/unit_5_running/charts/xgboost_metrics_by_runs.py
+++ b/mlops/unit_3_observability/charts/xgboost_metrics_by_runs.py
@@ -29,7 +29,7 @@ def experiments(*args, **kwargs):
 ON runs.run_uuid = metrics.run_uuid
 
 WHERE tags.key = 'model'
-AND tags.value = 'xgboost'
+AND tags.value = 'Booster'
 
 ORDER BY runs.start_time ASC
 """
diff --git a/mlops/unit_5_running/charts/xgboost_training_runs_hourly.py b/mlops/unit_3_observability/charts/xgboost_training_runs_hourly.py
similarity index 97%
rename from mlops/unit_5_running/charts/xgboost_training_runs_hourly.py
rename to mlops/unit_3_observability/charts/xgboost_training_runs_hourly.py
index 688669d93..e2c4ebcec 100644
--- a/mlops/unit_5_running/charts/xgboost_training_runs_hourly.py
+++ b/mlops/unit_3_observability/charts/xgboost_training_runs_hourly.py
@@ -31,7 +31,7 @@ def experiments(*args, **kwargs):
 ON runs.run_uuid = metrics.run_uuid
 
 WHERE tags.key = 'model'
-AND tags.value = 'xgboost'
+AND tags.value = 'Booster'
 
 ORDER BY runs.start_time ASC
 """
diff --git a/mlops/unit_3_observability/custom/__init__.py b/mlops/unit_3_observability/custom/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/custom/dashboard_data_source.py b/mlops/unit_3_observability/custom/dashboard_data_source.py
new file mode 100644
index 000000000..d7b1a2238
--- /dev/null
+++ b/mlops/unit_3_observability/custom/dashboard_data_source.py
@@ -0,0 +1,23 @@
+from typing import Dict, Tuple, Union
+
+from pandas import Series
+from scipy.sparse import csr_matrix
+from xgboost import Booster
+
+if 'custom' not in globals():
+    from mage_ai.data_preparation.decorators import custom
+
+
+@custom
+def source(
+    settings: Tuple[
+        Dict[str, Union[bool, float, int, str]],
+        csr_matrix,
+        Series,
+    ],
+    model: Booster, 
+    **kwargs,
+) -> Tuple[Booster, csr_matrix, csr_matrix]:
+    _, X_train, y_train = settings
+
+    return model, X_train, y_train 
diff --git a/mlops/unit_3_observability/custom/load_models.py b/mlops/unit_3_observability/custom/load_models.py
new file mode 100644
index 000000000..c52316b15
--- /dev/null
+++ b/mlops/unit_3_observability/custom/load_models.py
@@ -0,0 +1,28 @@
+from typing import Dict, List, Tuple
+
+if 'custom' not in globals():
+    from mage_ai.data_preparation.decorators import custom
+
+
+@custom
+def models(*args, **kwargs) -> Tuple[List[str], List[Dict[str, str]]]:
+    """
+    models: comma separated strings
+        linear_model.Lasso
+        linear_model.LinearRegression
+        svm.LinearSVR
+        ensemble.ExtraTreesRegressor
+        ensemble.GradientBoostingRegressor
+        ensemble.RandomForestRegressor
+    """
+    model_names: str = kwargs.get(
+        'models', 'linear_model.LinearRegression,linear_model.Lasso'
+    )
+    child_data: List[str] = [
+        model_name.strip() for model_name in model_names.split(',')
+    ]
+    child_metadata: List[Dict] = [
+        dict(block_uuid=model_name.split('.')[-1]) for model_name in child_data
+    ]
+
+    return child_data, child_metadata
diff --git a/mlops/unit_3_observability/data_exporters/__init__.py b/mlops/unit_3_observability/data_exporters/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/data_exporters/training.py b/mlops/unit_3_observability/data_exporters/training.py
new file mode 100644
index 000000000..c97eaeb56
--- /dev/null
+++ b/mlops/unit_3_observability/data_exporters/training.py
@@ -0,0 +1,29 @@
+from typing import Callable, Dict, Tuple, Union
+
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+from sklearn.base import BaseEstimator
+
+from mlops.utils.models.sklearn import load_class, train_model
+
+if 'data_exporter' not in globals():
+    from mage_ai.data_preparation.decorators import data_exporter
+
+
+@data_exporter
+def train(
+    settings: Tuple[
+        Dict[str, Union[bool, float, int, str]],
+        csr_matrix,
+        Series,
+        Dict[str, Union[Callable[..., BaseEstimator], str]],
+    ],
+    **kwargs,
+) -> Tuple[BaseEstimator, Dict[str, str]]:
+    hyperparameters, X, y, model_info = settings
+
+    model_class = model_info['cls']
+    model = model_class(**hyperparameters)
+    model.fit(X, y)
+
+    return model, model_info
diff --git a/mlops/unit_3_observability/data_exporters/xgboost.py b/mlops/unit_3_observability/data_exporters/xgboost.py
new file mode 100644
index 000000000..74cfbf0c3
--- /dev/null
+++ b/mlops/unit_3_observability/data_exporters/xgboost.py
@@ -0,0 +1,35 @@
+from typing import Dict, Tuple, Union
+
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+from xgboost import Booster
+
+from utils.models.xgboost import build_data, fit_model
+
+if 'data_exporter' not in globals():
+    from mage_ai.data_preparation.decorators import data_exporter
+
+
+@data_exporter
+def train(
+    settings: Tuple[
+        Dict[str, Union[bool, float, int, str]],
+        csr_matrix,
+        Series,
+    ],
+    **kwargs,
+) -> Tuple[Booster, csr_matrix, Series]:
+    hyperparameters, X, y = settings
+    
+    # Test training a model with low max depth 
+    # so that the output renders a reasonably sized plot tree.
+    if kwargs.get('max_depth'):
+        hyperparameters['max_depth'] = int(kwargs.get('max_depth'))
+
+    model = fit_model(
+        build_data(X, y),
+        hyperparameters,
+        verbose_eval=kwargs.get('verbose_eval', 100),
+    )
+
+    return model
\ No newline at end of file
diff --git a/mlops/unit_3_observability/global_data_products.yaml b/mlops/unit_3_observability/global_data_products.yaml
new file mode 100644
index 000000000..514db3f2d
--- /dev/null
+++ b/mlops/unit_3_observability/global_data_products.yaml
@@ -0,0 +1,8 @@
+training_set:
+  object_type: pipeline
+  object_uuid: data_preparation
+  outdated_after:
+    seconds: 600
+  settings:
+    build:
+      partitions: 1
diff --git a/mlops/unit_3_observability/io_config.yaml b/mlops/unit_3_observability/io_config.yaml
new file mode 100755
index 000000000..0307fc20d
--- /dev/null
+++ b/mlops/unit_3_observability/io_config.yaml
@@ -0,0 +1,8 @@
+version: 0.1.1
+default:
+  POSTGRES_CONNECT_TIMEOUT: 10
+  POSTGRES_DBNAME: "{{ env_var('EXPERIMENTS_DB') }}"
+  POSTGRES_USER: "{{ env_var('POSTGRES_USER') }}"
+  POSTGRES_PASSWORD: "{{ env_var('POSTGRES_PASSWORD') }}"
+  POSTGRES_HOST: "{{ env_var('POSTGRES_HOST') }}"
+  POSTGRES_PORT: 5432
diff --git a/mlops/unit_3_observability/pipelines/data_preparation/__init__.py b/mlops/unit_3_observability/pipelines/data_preparation/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/pipelines/data_preparation/metadata.yaml b/mlops/unit_3_observability/pipelines/data_preparation/metadata.yaml
new file mode 100755
index 000000000..91eccf01b
--- /dev/null
+++ b/mlops/unit_3_observability/pipelines/data_preparation/metadata.yaml
@@ -0,0 +1,242 @@
+blocks:
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      disable_output_preview: false
+      file_source:
+        path: unit_1_data_preparation/data_loaders/ingest.py
+    downstream_blocks:
+      - prepare
+      - ingest_time_series_bar_chart_y0
+      - missing_values_for_ingest
+      - unique_values_for_ingest
+      - most_frequent_values_for_ingest
+      - summary_overview_for_ingest
+      - feature_profiles_for_ingest
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Ingest
+    retry_config: {}
+    status: executed
+    timeout: null
+    type: data_loader
+    upstream_blocks: []
+    uuid: ingest
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      file_source:
+        path: unit_1_data_preparation/transformers/prepare.py
+    downstream_blocks:
+      - build
+      - prepare_histogram_k4
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Prepare
+    retry_config: null
+    status: executed
+    timeout: null
+    type: transformer
+    upstream_blocks:
+      - ingest
+    uuid: prepare
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      file_source:
+        path: unit_1_data_preparation/data_exporters/build.py
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Build
+    retry_config: null
+    status: executed
+    timeout: null
+    type: data_exporter
+    upstream_blocks:
+      - prepare
+    uuid: build
+cache_block_output_in_memory: false
+callbacks: []
+concurrency_config: {}
+conditionals: []
+created_at: "2024-05-05 05:35:38.032338+00:00"
+data_integration: null
+description: Collect data from various sources, generate additional training data
+  if needed, and perform feature engineering to transform the raw data into a set
+  of useful input features.
+executor_config: {}
+executor_count: 1
+executor_type: null
+extensions: {}
+name: Data preparation
+notification_config: {}
+remote_variables_dir: null
+retry_config: {}
+run_pipeline_in_one_process: false
+settings:
+  triggers: null
+spark_config: {}
+tags: []
+type: python
+uuid: data_preparation
+variables:
+  split_on_feature: lpep_pickup_datetime
+  split_on_feature_value: "2024-02-01"
+  target: duration
+variables_dir: /home/src/mage_data/unit_1_data_preparation
+widgets:
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      chart_type: time series bar chart
+      group_by:
+        - lpep_pickup_datetime
+      metrics:
+        - aggregation: count
+          column: lpep_pickup_datetime
+      time_interval: original
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: ingest_time series bar chart_y0
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - ingest
+    uuid: ingest_time_series_bar_chart_y0
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      chart_style: horizontal
+      chart_type: bar chart
+      x: columns_with_mising_values
+      y: percentage_of_missing_values
+      y_sort_order: descending
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: missing values for ingest
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - ingest
+    uuid: missing_values_for_ingest
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      chart_style: horizontal
+      chart_type: bar chart
+      x: columns
+      y: number_of_unique_values
+      y_sort_order: descending
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: unique values for ingest
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - ingest
+    uuid: unique_values_for_ingest
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      chart_type: table
+      height: 3000
+      index: column_index
+      x: columns
+      y: rows
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: most frequent values for ingest
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - ingest
+    uuid: most_frequent_values_for_ingest
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      chart_type: table
+      height: 3000
+      index: stats
+      x: headers
+      y: rows
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: summary overview for ingest
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - ingest
+    uuid: summary_overview_for_ingest
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      chart_type: table
+      height: 3000
+      index: stats
+      x: columns
+      y: rows
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: feature profiles for ingest
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - ingest
+    uuid: feature_profiles_for_ingest
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      buckets: "20"
+      chart_type: histogram
+      group_by: []
+      x: x
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: prepare_histogram_k4
+    retry_config: null
+    status: executed
+    timeout: null
+    type: chart
+    upstream_blocks:
+      - prepare
+    uuid: prepare_histogram_k4
diff --git a/mlops/unit_3_observability/pipelines/sklearn_training/__init__.py b/mlops/unit_3_observability/pipelines/sklearn_training/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/pipelines/sklearn_training/interactions.yaml b/mlops/unit_3_observability/pipelines/sklearn_training/interactions.yaml
new file mode 100644
index 000000000..a1d40f831
--- /dev/null
+++ b/mlops/unit_3_observability/pipelines/sklearn_training/interactions.yaml
@@ -0,0 +1,2 @@
+blocks: {}
+layout: []
diff --git a/mlops/unit_3_observability/pipelines/sklearn_training/metadata.yaml b/mlops/unit_3_observability/pipelines/sklearn_training/metadata.yaml
new file mode 100755
index 000000000..620ba657c
--- /dev/null
+++ b/mlops/unit_3_observability/pipelines/sklearn_training/metadata.yaml
@@ -0,0 +1,106 @@
+blocks:
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      global_data_product:
+        uuid: training_set
+    downstream_blocks:
+      - hyperparameter_tuning/sklearn
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Training set
+    retry_config: null
+    status: executed
+    timeout: null
+    type: global_data_product
+    upstream_blocks: []
+    uuid: training_set
+  - all_upstream_blocks_executed: true
+    color: teal
+    configuration:
+      dynamic: true
+      file_source:
+        path: unit_2_training/custom/load_models.py
+    downstream_blocks:
+      - hyperparameter_tuning/sklearn
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Load models
+    retry_config: null
+    status: executed
+    timeout: null
+    type: custom
+    upstream_blocks: []
+    uuid: load_models
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      file_source:
+        path: unit_2_training/transformers/hyperparameter_tuning/sklearn.py
+    downstream_blocks:
+      - training
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Hyperparameter tuning
+    retry_config: null
+    status: executed
+    timeout: null
+    type: transformer
+    upstream_blocks:
+      - training_set
+      - load_models
+    uuid: hyperparameter_tuning/sklearn
+  - all_upstream_blocks_executed: true
+    color: null
+    configuration:
+      file_source:
+        path: unit_2_training/data_exporters/training.py
+    downstream_blocks: []
+    executor_config: null
+    executor_type: local_python
+    has_callback: false
+    language: python
+    name: Training
+    retry_config: null
+    status: executed
+    timeout: null
+    type: data_exporter
+    upstream_blocks:
+      - hyperparameter_tuning/sklearn
+    uuid: training
+cache_block_output_in_memory: false
+callbacks: []
+concurrency_config:
+  block_run_limit: 2
+  on_pipeline_run_limit_reached: wait
+  pipeline_run_limit: 30
+  pipeline_run_limit_all_triggers: 50
+conditionals: []
+created_at: "2024-05-07 13:38:01.412176+00:00"
+data_integration: null
+description:
+  Train models from the sklearn library (e.g. ExtraTreesRegressor, GradientBoostingRegressor,
+  Lasso, LinearRegression, LinearSVR, RandomForestRegressor).
+executor_config: {}
+executor_count: 1
+executor_type: null
+extensions: {}
+name: sklearn training
+notification_config: {}
+remote_variables_dir: null
+retry_config: {}
+run_pipeline_in_one_process: false
+settings:
+  triggers: null
+spark_config: {}
+tags: []
+type: python
+uuid: sklearn_training
+variables_dir: /home/src/mage_data/unit_2_training
+widgets: []
diff --git a/mlops/unit_3_observability/pipelines/xgboost_training/__init__.py b/mlops/unit_3_observability/pipelines/xgboost_training/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/pipelines/xgboost_training/interactions.yaml b/mlops/unit_3_observability/pipelines/xgboost_training/interactions.yaml
new file mode 100644
index 000000000..a1d40f831
--- /dev/null
+++ b/mlops/unit_3_observability/pipelines/xgboost_training/interactions.yaml
@@ -0,0 +1,2 @@
+blocks: {}
+layout: []
diff --git a/mlops/unit_3_observability/pipelines/xgboost_training/metadata.yaml b/mlops/unit_3_observability/pipelines/xgboost_training/metadata.yaml
new file mode 100755
index 000000000..1a421ca5d
--- /dev/null
+++ b/mlops/unit_3_observability/pipelines/xgboost_training/metadata.yaml
@@ -0,0 +1,105 @@
+blocks:
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    global_data_product:
+      uuid: training_set
+  downstream_blocks: []
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: Training set
+  retry_config: null
+  status: executed
+  timeout: null
+  type: global_data_product
+  upstream_blocks: []
+  uuid: training_set
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    file_source:
+      path: null
+  downstream_blocks:
+  - xgboost
+  - dashboard_data_source
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: xgboost Hyperparameter tuning
+  retry_config: null
+  status: executed
+  timeout: null
+  type: transformer
+  upstream_blocks:
+  - training_set
+  uuid: hyperparameter_tuning/xgboost
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    file_source:
+      path: null
+  downstream_blocks:
+  - dashboard_data_source
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: XGBoost
+  retry_config: null
+  status: executed
+  timeout: null
+  type: data_exporter
+  upstream_blocks:
+  - hyperparameter_tuning/xgboost
+  uuid: xgboost
+- all_upstream_blocks_executed: true
+  color: pink
+  configuration:
+    file_source:
+      path: null
+  downstream_blocks: []
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: Dashboard data source
+  retry_config: null
+  status: executed
+  timeout: null
+  type: custom
+  upstream_blocks:
+  - hyperparameter_tuning/xgboost
+  - xgboost
+  uuid: dashboard_data_source
+cache_block_output_in_memory: false
+callbacks: []
+concurrency_config: {}
+conditionals: []
+created_at: '2024-05-07 18:27:34.902705+00:00'
+data_integration: null
+description: 'XGBoost is a scalable and efficient implementation of gradient boosted
+  decision trees, a powerful ensemble machine learning technique. '
+executor_config: {}
+executor_count: 1
+executor_type: null
+extensions: {}
+name: XGBoost training
+notification_config: {}
+remote_variables_dir: null
+retry_config: {}
+run_pipeline_in_one_process: false
+settings:
+  triggers: null
+spark_config: {}
+tags: []
+type: python
+uuid: xgboost_training
+variables:
+  early_stopping_rounds: 1
+  max_depth: 1
+  max_evaluations: 1
+variables_dir: /home/src/mage_data/unit_3_observability
+widgets: []
diff --git a/mlops/unit_3_observability/transformers/__init__.py b/mlops/unit_3_observability/transformers/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/transformers/hyperparameter_tuning/sklearn.py b/mlops/unit_3_observability/transformers/hyperparameter_tuning/sklearn.py
new file mode 100644
index 000000000..de9aef004
--- /dev/null
+++ b/mlops/unit_3_observability/transformers/hyperparameter_tuning/sklearn.py
@@ -0,0 +1,39 @@
+from typing import Callable, Dict, Tuple, Union
+
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+from sklearn.base import BaseEstimator
+
+from utils.models.sklearn import load_class, tune_hyperparameters
+
+if 'transformer' not in globals():
+    from mage_ai.data_preparation.decorators import transformer
+
+
+@transformer
+def hyperparameter_tuning(
+    training_set: Dict[str, Union[Series, csr_matrix]],
+    model_class_name: str,
+    *args,
+    **kwargs,
+) -> Tuple[
+    Dict[str, Union[bool, float, int, str]],
+    csr_matrix,
+    Series,
+    Callable[..., BaseEstimator],
+]:
+    X, X_train, X_val, y, y_train, y_val, _ = training_set['build']
+    
+    model_class = load_class(model_class_name)
+    
+    hyperparameters = tune_hyperparameters(
+        model_class,
+        X_train=X_train,
+        y_train=y_train,
+        X_val=X_val,
+        y_val=y_val,
+        max_evaluations=kwargs.get('max_evaluations', 50),
+        random_state=kwargs.get('random_state', 3),
+    )
+
+    return hyperparameters, X, y, dict(cls=model_class, name=model_class_name)
diff --git a/mlops/unit_3_observability/transformers/hyperparameter_tuning/xgboost.py b/mlops/unit_3_observability/transformers/hyperparameter_tuning/xgboost.py
new file mode 100644
index 000000000..9bb5d8f23
--- /dev/null
+++ b/mlops/unit_3_observability/transformers/hyperparameter_tuning/xgboost.py
@@ -0,0 +1,38 @@
+from typing import Dict, Tuple, Union
+
+import numpy as np
+import xgboost as xgb
+from pandas import Series
+from scipy.sparse._csr import csr_matrix
+
+from utils.logging import track_experiment
+from utils.models.xgboost import build_data, tune_hyperparameters
+
+if 'transformer' not in globals():
+    from mage_ai.data_preparation.decorators import transformer
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@transformer
+def hyperparameter_tuning(
+    training_set: Dict[str, Union[Series, csr_matrix]],
+    **kwargs,
+) -> Tuple[
+    Dict[str, Union[bool, float, int, str]],
+    csr_matrix,
+    Series,
+]:
+    X, X_train, X_val, y, y_train, y_val, _ = training_set['build']
+
+    training = build_data(X_train, y_train)
+    validation = build_data(X_val, y_val)
+
+    best_hyperparameters = tune_hyperparameters(
+        training,
+        validation,
+        callback=lambda **opts: track_experiment(**{**opts, **kwargs}),
+        **kwargs,
+    )
+
+    return best_hyperparameters, X_train, y_train