File tree Expand file tree Collapse file tree 6 files changed +136
-4
lines changed Expand file tree Collapse file tree 6 files changed +136
-4
lines changed Original file line number Diff line number Diff line change 1
1
import os
2
2
from mlProject import logger
3
3
from mlProject .pipeline .stage_01_data_ingestion import DataIngestionTrainingPipeline
4
+ from mlProject .pipeline .stage_02_data_validation import DataValidationTrainingPipeline
5
+
6
+
7
+
8
+
9
+
10
+
4
11
5
12
6
13
10
17
data_ingestion = DataIngestionTrainingPipeline ()
11
18
data_ingestion .main ()
12
19
logger .info (f">>>>>> stage { STAGE_NAME } completed <<<<<<\n \n x==========x" )
20
+ except Exception as e :
21
+ logger .exception (e )
22
+ raise e
23
+
24
+
25
+
26
+ STAGE_NAME = "Data Validation stage"
27
+ try :
28
+ logger .info (f">>>>>> stage { STAGE_NAME } started <<<<<<" )
29
+ data_ingestion = DataValidationTrainingPipeline ()
30
+ data_ingestion .main ()
31
+ logger .info (f">>>>>> stage { STAGE_NAME } completed <<<<<<\n \n x==========x" )
13
32
except Exception as e :
14
33
logger .exception (e )
15
34
raise e
Original file line number Diff line number Diff line change 1
- key : value
1
+ COLUMNS :
2
+ RowNumber : int64
3
+ CustomerId : int64
4
+ Surname : str
5
+ CreditScore : int64
6
+ Geography : str
7
+ Gender : str
8
+ Age : float64
9
+ Tenure : int64
10
+ Balance : float64
11
+ NumOfProducts : int64
12
+ HasCrCardl : float64
13
+ IsActiveMember : float64
14
+ EstimatedSalary : float64
15
+ Exited : int64
16
+
17
+
18
+ TARGET_COLUMN :
19
+ name : Exited
Original file line number Diff line number Diff line change
1
+ import os
2
+ from mlProject import logger
3
+ from mlProject .entity .config_entity import DataValidationConfig
4
+ import pandas as pd
5
+
6
+
7
+ class DataValiadtion :
8
+ def __init__ (self , config : DataValidationConfig ):
9
+ self .config = config
10
+
11
+
12
+ def validate_all_columns (self )-> bool :
13
+ try :
14
+ validation_status = None
15
+
16
+ data = pd .read_csv (self .config .unzip_data_dir )
17
+ all_cols = list (data .columns )
18
+
19
+ all_schema = self .config .all_schema .keys ()
20
+
21
+
22
+ for col in all_cols :
23
+ if col not in all_schema :
24
+ validation_status = False
25
+ with open (self .config .STATUS_FILE , 'w' ) as f :
26
+ f .write (f"Validation status: { validation_status } " )
27
+ else :
28
+ validation_status = True
29
+ with open (self .config .STATUS_FILE , 'w' ) as f :
30
+ f .write (f"Validation status: { validation_status } " )
31
+
32
+ return validation_status
33
+
34
+ except Exception as e :
35
+ raise e
36
+
Original file line number Diff line number Diff line change 1
1
from mlProject .constants import *
2
2
from mlProject .utils .common import read_yaml , create_directories
3
- from mlProject .entity .config_entity import (DataIngestionConfig ,
3
+ from mlProject .entity .config_entity import (DataIngestionConfig ,
4
+ DataValidationConfig
4
5
)
5
6
7
+
8
+
6
9
class ConfigurationManager :
7
10
def __init__ (
8
11
self ,
@@ -30,4 +33,20 @@ def get_data_ingestion_config(self) -> DataIngestionConfig:
30
33
unzip_dir = config .unzip_dir
31
34
)
32
35
33
- return data_ingestion_config
36
+ return data_ingestion_config
37
+
38
+
39
+ def get_data_validation_config (self ) -> DataValidationConfig :
40
+ config = self .config .data_validation
41
+ schema = self .schema .COLUMNS
42
+
43
+ create_directories ([config .root_dir ])
44
+
45
+ data_validation_config = DataValidationConfig (
46
+ root_dir = config .root_dir ,
47
+ STATUS_FILE = config .STATUS_FILE ,
48
+ unzip_data_dir = config .unzip_data_dir ,
49
+ all_schema = schema ,
50
+ )
51
+
52
+ return data_validation_config
Original file line number Diff line number Diff line change @@ -7,4 +7,13 @@ class DataIngestionConfig:
7
7
root_dir : Path
8
8
source_URL : str
9
9
local_data_file : Path
10
- unzip_dir : Path
10
+ unzip_dir : Path
11
+
12
+
13
+
14
+ @dataclass (frozen = True )
15
+ class DataValidationConfig :
16
+ root_dir : Path
17
+ STATUS_FILE : str
18
+ unzip_data_dir : Path
19
+ all_schema : dict
Original file line number Diff line number Diff line change
1
+ from mlProject .config .configuration import ConfigurationManager
2
+ from mlProject .components .data_validation import DataValiadtion
3
+ from mlProject import logger
4
+
5
+
6
+ STAGE_NAME = "Data Validation stage"
7
+
8
+ class DataValidationTrainingPipeline :
9
+ def __init__ (self ):
10
+ pass
11
+
12
+ def main (self ):
13
+ config = ConfigurationManager ()
14
+ data_validation_config = config .get_data_validation_config ()
15
+ data_validation = DataValiadtion (config = data_validation_config )
16
+ data_validation .validate_all_columns ()
17
+
18
+
19
+
20
+
21
+
22
+ if __name__ == '__main__' :
23
+ try :
24
+ logger .info (f">>>>>> stage { STAGE_NAME } started <<<<<<" )
25
+ obj = DataValidationTrainingPipeline ()
26
+ obj .main ()
27
+ logger .info (f">>>>>> stage { STAGE_NAME } completed <<<<<<\n \n x==========x" )
28
+ except Exception as e :
29
+ logger .exception (e )
30
+ raise e
31
+
You can’t perform that action at this time.
0 commit comments