Skip to content

Commit

Permalink
improve explainer dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
diegomarvid committed Mar 14, 2024
1 parent 8d91815 commit 4819fad
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 51 deletions.
1 change: 1 addition & 0 deletions pipeline_lib/core/data_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class DataContainer:
TUNING_PARAMS = "tuning_params"
TARGET = "target"
IMPORTANCE = "importance"
DROP_COLUMNS = "drop_columns"

def __init__(self, initial_data: Optional[dict] = None):
"""
Expand Down
1 change: 1 addition & 0 deletions pipeline_lib/core/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .predict import PredictStep # noqa: F401
from .tabular_split import TabularSplitStep # noqa: F401
from .target_scaling import TargetScalingStep # noqa: F401
from .explainer_dashboard import ExplainerDashboardStep # noqa: F401
47 changes: 37 additions & 10 deletions pipeline_lib/core/steps/explainer_dashboard.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,46 @@
from typing import Optional
import pandas as pd
from explainerdashboard import RegressionExplainer

from pipeline_lib.core import DataContainer

from .base import PipelineStep
from pipeline_lib.core.steps import PipelineStep


class ExplainerDashboardStep(PipelineStep):
"""Explainer Dashboard."""

def __init__(self, config: Optional[dict] = None) -> None:
"""Initialize ExplainerDashboardStep."""
super().__init__(config=config)
"""Scale the target using Quantile Transformer."""
def __init__(
self,
max_samples: int = 1000,
) -> None:
self.init_logger()
self.max_samples = max_samples

def execute(self, data: DataContainer) -> DataContainer:
"""Execute the step."""
self.logger.info("Creating explainer dashboard.")
self.logger.debug("Starting explainer dashboard")

model = data.get(DataContainer.MODEL)
if model is None:
raise ValueError("Model not found in data container.")

target = data.get(DataContainer.TARGET)
if target is None:
raise ValueError("Target column not found in any parameter.")

df = data.get(DataContainer.CLEAN)

if len(df) > self.max_samples:
# Randomly sample a subset of data points if the dataset is larger than max_samples
self.logger.info(f"Sampling {self.max_samples} data points from the dataset.")
df = df.sample(n=self.max_samples, random_state=42)

drop_columns = data.get("drop_columns")
if drop_columns:
df = df.drop(columns=drop_columns)

X_test = df.drop(columns=[target])
y_test = df[target]

explainer = RegressionExplainer(model, X_test, y_test,)

data[DataContainer.EXPLAINER] = explainer

return data
1 change: 0 additions & 1 deletion pipeline_lib/implementation/tabular/xgboost/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .explainer_dashboard import XGBoostExplainerDashboardStep # noqa: F401
from .fit_model import XGBoostFitModelStep # noqa: F401
from .predict import XGBoostPredictStep # noqa: F401
40 changes: 0 additions & 40 deletions pipeline_lib/implementation/tabular/xgboost/explainer_dashboard.py

This file was deleted.

1 change: 1 addition & 0 deletions pipeline_lib/implementation/tabular/xgboost/fit_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def execute(self, data: DataContainer) -> DataContainer:
start_time = time.time()

data[DataContainer.TARGET] = self.target
data[DataContainer.DROP_COLUMNS] = self.drop_columns

df_train = data[DataContainer.TRAIN]
df_valid = data[DataContainer.VALIDATION]
Expand Down
2 changes: 2 additions & 0 deletions pipeline_lib/implementation/tabular/xgboost/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def execute(self, data: DataContainer) -> DataContainer:
predictions_df = pd.DataFrame(predictions, columns=["prediction"])

model_input[DataContainer.PREDICTIONS] = predictions_df
data[DataContainer.MODEL] = self.model
data[DataContainer.MODEL_OUTPUT] = model_input
data[DataContainer.TARGET] = self.target
data[DataContainer.DROP_COLUMNS] = self.drop_columns
return data

0 comments on commit 4819fad

Please sign in to comment.