-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
123 lines (91 loc) · 3.05 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import numpy as np
from sklearn.metrics import confusion_matrix
from scipy.stats import mode
class KNN():
def __init__(self, N=5):
self.N = N
def fit(self, X_train, Y_train):
self.X_train = X_train
self.Y_train = Y_train
self.m, self.n = X_train.shape
def predict(self, X_test):
self.X_test = X_test
self.m_test, self.n = X_test.shape
Y_predict = np.zeros(self.m_test)
for i in range(self.m_test):
x = self.X_test[i]
neighbors = np.zeros(self.N)
neighbors = self.find_neighbors(x)
Y_predict[i] = mode(neighbors)[0][0]
return Y_predict
def find_neighbors(self, x):
euclidean_distances = np.zeros(self.m)
for i in range(self.m):
d = self.euclidean(x, self.X_train[i])
euclidean_distances[i] = d
inds = euclidean_distances.argsort()
Y_train_sorted = self.Y_train[inds]
return Y_train_sorted[:self.N]
def euclidean(self, x, x_train):
return np.sqrt(np.sum(np.square(x-x_train)))
def get_unique_chars(txt_filepath):
with open(txt_filepath) as f:
raw_txt = f.read()
unique_chars = []
for char in raw_txt:
if char not in unique_chars:
unique_chars.append(char)
return sorted(unique_chars)
def extract_lines_val(txt_filepath):
with open(txt_filepath) as f:
lines = f.readlines()
lines_extracted_values = []
for line in lines:
line_values = []
line_split = line.split(' ')
for string in line_split:
try:
value = float(string)
line_values.append(value)
except Exception:
pass
lines_extracted_values.append(line_values)
return lines_extracted_values
def extract_labels(txt_filepath):
with open(txt_filepath) as f:
raw_txt = f.read()
new_lines_free = raw_txt.replace('\n', '')
labels = [char for char in new_lines_free]
return labels
def get_features(txt_filepath):
with open(txt_filepath) as f:
lines = f.readlines()
features = []
for line in lines:
new_line_free = line.replace('\n', '')
feature = new_line_free.split(' ')[1]
features.append(feature)
return features
def calculate_classes_accuracy(y_test, predicted):
cnf_matrix = confusion_matrix(y_test, predicted)
FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP)
# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP)
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate
FPR = FP/(FP+TN)
# False negative rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)
# Overall accuracy
ACC = (TP+TN) / (TP+FP+FN+TN)
return ACC