-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogistic_Regression.py
53 lines (43 loc) · 1.91 KB
/
Logistic_Regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#Data Pre-procesing Step
# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
#importing datasets
data_set= pd.read_csv('framingham.csv')
print(data_set.isnull().sum())
#Replace the missing values for numerical columns with mean
data_set['education'] = data_set['education'].fillna(data_set['education'].mean())
data_set['cigsPerDay'] = data_set['cigsPerDay'].fillna(data_set['cigsPerDay'].mean())
data_set['BPMeds'] = data_set['BPMeds'].fillna(data_set['BPMeds'].mean())
data_set['totChol'] = data_set['totChol'].fillna(data_set['totChol'].mean())
data_set['BMI'] = data_set['BMI'].fillna(data_set['BMI'].mean())
data_set['heartRate'] = data_set['heartRate'].fillna(data_set['heartRate'].mean())
data_set['glucose'] = data_set['glucose'].fillna(data_set['glucose'].mean())
#Extracting Independent and dependent Variable
x= data_set.iloc[:, 5:10].values
y= data_set.iloc[:, -1].values
#print(x)
#print(y)
# Splitting the dataset into training and test set.
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0)
print(x_train,y_train)
print(x_test,y_test)
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
#Fitting Logistic Regression to the training set
from sklearn.linear_model import LogisticRegression
classifier= LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)
#Predicting the test set result
y_pred= classifier.predict(x_test)
#Creating the Confusion matrix
from sklearn.metrics import confusion_matrix
cm= confusion_matrix(y_test,y_pred)
print ("Confusion Matrix : \n", cm)
from sklearn.metrics import accuracy_score
print ("Accuracy : ", accuracy_score(y_test, y_pred))