Skip to content

Commit

Permalink
improve logging
Browse files Browse the repository at this point in the history
  • Loading branch information
diegomarvid committed Apr 2, 2024
1 parent 5977e9e commit 8024ca5
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions pipeline_lib/core/steps/encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def execute(self, data: DataContainer) -> DataContainer:

self.column_transformer.fit(df, df[target_column_name])
transformed_data = self.column_transformer.transform(df)
self.logger.info(f"Transformed data shape: {transformed_data.shape}")
self.logger.debug(f"Transformed data shape: {transformed_data.shape}")

encoded_data = pd.DataFrame(transformed_data)

Expand All @@ -79,7 +79,18 @@ def _log_feature_info(
low_cardinality_features,
high_cardinality_features,
):
self.logger.info(f"Categorical features: {categorical_features}")
self.logger.info(f"Numeric features: {numeric_features}")
self.logger.info(f"Low cardinality features: {low_cardinality_features}")
self.logger.info(f"High cardinality features: {high_cardinality_features}")
self.logger.info(
f"Categorical features: ({len(categorical_features)}) - {categorical_features}"
)
self.logger.info(
f"Low cardinality features (cardinality ratio < {self.cardinality_threshold}):"
f" ({len(low_cardinality_features)}) - {low_cardinality_features}"
)
self.logger.info("Low cardinality features encoding method: target encoder")
self.logger.info(
f"High cardinality features (cardinality ratio >= {self.cardinality_threshold}):"
f" ({len(high_cardinality_features)}) - {high_cardinality_features}"
)
self.logger.info("High cardinality features encoding method: ordinal encoder")
self.logger.info(f"Numeric features: ({len(numeric_features)}) - {numeric_features}")
self.logger.info("Numeric features encoding method: passthrough")

0 comments on commit 8024ca5

Please sign in to comment.