Skip to content

Commit 8fbf174

Browse files
author
“vijayg15”
committed
model evaluation added
1 parent 7e38da3 commit 8fbf174

File tree

6 files changed

+495
-5
lines changed

6 files changed

+495
-5
lines changed

main.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from mlProject.pipeline.stage_02_data_validation import DataValidationTrainingPipeline
55
from mlProject.pipeline.stage_03_data_transformation import DataTransformationTrainingPipeline
66
from mlProject.pipeline.stage_04_model_trainer import ModelTrainerTrainingPipeline
7-
7+
from mlProject.pipeline.stage_05_model_evaluation import ModelEvaluationTrainingPipeline
88

99

1010

@@ -53,6 +53,18 @@
5353
data_ingestion = ModelTrainerTrainingPipeline()
5454
data_ingestion.main()
5555
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
56+
except Exception as e:
57+
logger.exception(e)
58+
raise e
59+
60+
61+
62+
STAGE_NAME = "Model evaluation stage"
63+
try:
64+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
65+
data_ingestion = ModelEvaluationTrainingPipeline()
66+
data_ingestion.main()
67+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
5668
except Exception as e:
5769
logger.exception(e)
5870
raise e

research/05_model_evaluation.ipynb

+327
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import os
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import joblib
6+
from urllib.parse import urlparse
7+
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, classification_report
8+
from sklearn.metrics import ConfusionMatrixDisplay
9+
import mlflow
10+
import mlflow.sklearn
11+
from mlProject.entity.config_entity import ModelEvaluationConfig
12+
from mlProject.utils.common import save_json
13+
from pathlib import Path
14+
15+
16+
class ModelEvaluation:
17+
def __init__(self, config: ModelEvaluationConfig):
18+
self.config = config
19+
20+
21+
def eval_metrics(self, actual, pred):
22+
acc = accuracy_score(actual, pred)
23+
prec = precision_score(actual, pred)
24+
rec = recall_score(actual, pred)
25+
cm = confusion_matrix(actual, pred)
26+
cm_nor = confusion_matrix(actual, pred, normalize='true')
27+
cr = classification_report(actual, pred)
28+
return acc, prec, rec, cm, cm_nor, cr
29+
30+
31+
32+
def log_into_mlflow(self):
33+
34+
test_data = pd.read_csv(self.config.test_data_path)
35+
model = joblib.load(self.config.model_path)
36+
37+
X_test = test_data.drop([self.config.target_column], axis=1)
38+
y_test = test_data[[self.config.target_column]]
39+
40+
41+
mlflow.set_registry_uri(self.config.mlflow_uri)
42+
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
43+
44+
45+
with mlflow.start_run():
46+
47+
predicted_qualities = model.predict(X_test)
48+
49+
(acc, prec, rec, cm, cm_nor, cr) = self.eval_metrics(y_test, predicted_qualities)
50+
51+
# Saving metrics as local
52+
#scores = {"Accuracy": acc, "Precision": prec, "Recall": rec, "Confusion Mat": cm, "C_report": cr}
53+
scores = {"Accuracy": acc, "Precision": prec, "Recall": rec, "Confusion Mat": np.array(cm).tolist()}
54+
save_json(path=Path(self.config.metric_file_name), data=scores)
55+
56+
mlflow.log_params(self.config.all_params)
57+
58+
mlflow.log_metric("Accuracy", acc)
59+
mlflow.log_metric("Precision", prec)
60+
mlflow.log_metric("Recall", rec)
61+
#mlflow.log_metric("Classification report", cr)
62+
63+
mlflow.log_dict(np.array(cm).tolist(), "confusion_matrix.json")
64+
65+
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
66+
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
67+
plt.savefig("ConfusionMatrix.png")
68+
mlflow.log_artifact("ConfusionMatrix.png")
69+
plt.close()
70+
71+
disp = ConfusionMatrixDisplay(confusion_matrix=cm_nor)
72+
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
73+
plt.savefig("NormalizedConfusionMatrix.png")
74+
mlflow.log_artifact("NormalizedConfusionMatrix.png")
75+
plt.close()
76+
77+
78+
# Model registry does not work with file store
79+
if tracking_url_type_store != "file":
80+
81+
# Register the model
82+
# There are other ways to use the Model Registry, which depends on the use case,
83+
# please refer to the doc for more information:
84+
# https://mlflow.org/docs/latest/model-registry.html#api-workflow
85+
86+
mlflow.sklearn.log_model(model, "model", registered_model_name="RandomForestClassifier")
87+
else:
88+
mlflow.sklearn.log_model(model, "model")
89+
90+

src/mlProject/config/configuration.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from mlProject.entity.config_entity import (DataIngestionConfig,
44
DataValidationConfig,
55
DataTransformationConfig,
6-
ModelTrainerConfig,)
7-
6+
ModelTrainerConfig,
7+
ModelEvaluationConfig)
88

99
class ConfigurationManager:
1010
def __init__(
@@ -88,4 +88,25 @@ def get_model_trainer_config(self) -> ModelTrainerConfig:
8888

8989
)
9090

91-
return model_trainer_config
91+
return model_trainer_config
92+
93+
94+
def get_model_evaluation_config(self) -> ModelEvaluationConfig:
95+
config = self.config.model_evaluation
96+
params = self.params.RandomForestClassifier
97+
schema = self.schema.TARGET_COLUMN
98+
99+
create_directories([config.root_dir])
100+
101+
model_evaluation_config = ModelEvaluationConfig(
102+
root_dir=config.root_dir,
103+
test_data_path=config.test_data_path,
104+
model_path = config.model_path,
105+
all_params=params,
106+
metric_file_name = config.metric_file_name,
107+
target_column = schema.name,
108+
mlflow_uri="https://dagshub.com/vijayg15/Machine-Learning-project-with-MLflow-deployment.mlflow",
109+
110+
)
111+
112+
return model_evaluation_config

src/mlProject/entity/config_entity.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,16 @@ class ModelTrainerConfig:
4040
min_samples_leaf: float
4141
bootstrap: bool
4242
ccp_alpha: float
43-
target_column: str
43+
target_column: str
44+
45+
46+
47+
@dataclass(frozen=True)
48+
class ModelEvaluationConfig:
49+
root_dir: Path
50+
test_data_path: Path
51+
model_path: Path
52+
all_params: dict
53+
metric_file_name: Path
54+
target_column: str
55+
mlflow_uri: str
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from mlProject.config.configuration import ConfigurationManager
2+
from mlProject.components.model_evaluation import ModelEvaluation
3+
from mlProject import logger
4+
5+
STAGE_NAME = "Model evaluation stage"
6+
7+
class ModelEvaluationTrainingPipeline:
8+
def __init__(self):
9+
pass
10+
11+
def main(self):
12+
config = ConfigurationManager()
13+
model_evaluation_config = config.get_model_evaluation_config()
14+
model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
15+
model_evaluation_config.log_into_mlflow()
16+
17+
18+
19+
if __name__ == '__main__':
20+
try:
21+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
22+
obj = ModelEvaluationTrainingPipeline()
23+
obj.main()
24+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
25+
except Exception as e:
26+
logger.exception(e)
27+
raise e
28+

0 commit comments

Comments
 (0)