1
+ import numpy as np
2
+ from sklearn .ensemble import RandomForestClassifier , VotingClassifier
3
+ from sklearn .metrics .classification import accuracy_score , recall_score , f1_score
4
+ import scipy .stats as st
5
+ import sys
6
+
7
+ def A (sample ):
8
+ feat = []
9
+ for col in range (0 ,sample .shape [1 ]):
10
+ average = np .average (sample [:,col ])
11
+ feat .append (average )
12
+
13
+ return feat
14
+
15
+ def SD (sample ):
16
+ feat = []
17
+ for col in range (0 , sample .shape [1 ]):
18
+ std = np .std (sample [:, col ])
19
+ feat .append (std )
20
+
21
+ return feat
22
+
23
+ def AAD (sample ):
24
+ feat = []
25
+ for col in range (0 , sample .shape [1 ]):
26
+ data = sample [:, col ]
27
+ add = np .mean (np .absolute (data - np .mean (data )))
28
+ feat .append (add )
29
+
30
+ return feat
31
+
32
+ def ARA (sample ):
33
+ #Average Resultant Acceleration[1]:
34
+ # Average of the square roots of the sum of the values of each axis squared √(xi^2 + yi^2+ zi^2) over the ED
35
+ feat = []
36
+ sum_square = 0
37
+ sample = np .power (sample , 2 )
38
+ for col in range (0 , sample .shape [1 ]):
39
+ sum_square = sum_square + sample [:, col ]
40
+
41
+ sample = np .sqrt (sum_square )
42
+ average = np .average (sample )
43
+ feat .append (average )
44
+ return feat
45
+
46
+ def TBP (sample ):
47
+ from scipy import signal
48
+ feat = []
49
+ sum_of_time = 0
50
+ for col in range (0 , sample .shape [1 ]):
51
+ data = sample [:, col ]
52
+ peaks = signal .find_peaks_cwt (data , np .arange (1 ,4 ))
53
+
54
+ feat .append (peaks )
55
+
56
+ return feat
57
+
58
+ def feature_extraction (X ):
59
+ #Extracts the features, as mentioned by Catal et al. 2015
60
+ # Average - A,
61
+ # Standard Deviation - SD,
62
+ # Average Absolute Difference - AAD,
63
+ # Average Resultant Acceleration - ARA(1),
64
+ # Time Between Peaks - TBP
65
+ X_tmp = []
66
+ for sample in X :
67
+ features = A (sample )
68
+ features = np .hstack ((features , A (sample )))
69
+ features = np .hstack ((features , SD (sample )))
70
+ features = np .hstack ((features , AAD (sample )))
71
+ features = np .hstack ((features , ARA (sample )))
72
+ #features = np.hstack((features, TBP(sample)))
73
+ X_tmp .append (features )
74
+
75
+ X = np .array (X_tmp )
76
+ return X
77
+
78
+ def train_j48 (X , y ):
79
+ from sklearn import tree
80
+ clf = tree .DecisionTreeClassifier ()
81
+ #clf = clf.fit(X, y)
82
+ return clf
83
+
84
+ def train_mlp (X , y ):
85
+ from sklearn .neural_network import MLPClassifier
86
+ a = int ((X .shape [1 ] + np .amax (y )) / 2 )#Default param of weka, amax(y) gets the number of classes
87
+ clf = MLPClassifier (solver = 'lbfgs' , alpha = 1e-5 , hidden_layer_sizes = (a ,),
88
+ learning_rate_init = 0.3 , momentum = 0.2 , max_iter = 500 , #Default param of weka
89
+ )
90
+ #clf.fit(X, y)
91
+ return clf
92
+
93
+ def train_logistic_regression (X , y ):
94
+ from sklearn .linear_model import LogisticRegression
95
+ clf = LogisticRegression (multi_class = 'ovr' )
96
+ #clf.fit(X, y)
97
+ return clf
98
+
99
+ if __name__ == '__main__' :
100
+ #Paper: On the use of ensemble of classifiers for accelerometer-based activity recognition
101
+ np .random .seed (12227 )
102
+
103
+ if (len (sys .argv ) > 1 ):
104
+ data_input_file = sys .argv [1 ]
105
+ else :
106
+ data_input_file = 'E:/datasets/sensors/TemporalWindow/LOSO/UTD-MHAD2_1s.npz'
107
+
108
+ tmp = np .load (data_input_file )
109
+ X = tmp ['X' ]
110
+ X = X [:, 0 , :, :]
111
+ y = tmp ['y' ]
112
+ folds = tmp ['folds' ]
113
+
114
+ n_class = y .shape [1 ]
115
+
116
+ avg_acc = []
117
+ avg_recall = []
118
+ avg_f1 = []
119
+ y = np .argmax (y , axis = 1 )
120
+
121
+ print ('Catal et al. 2015 {}' .format (data_input_file ))
122
+
123
+ for i in range (0 , len (folds )):
124
+ train_idx = folds [i ][0 ]
125
+ test_idx = folds [i ][1 ]
126
+
127
+ X_train = X [train_idx ]
128
+ X_test = X [test_idx ]
129
+
130
+ X_train = feature_extraction (X_train )
131
+ X_test = feature_extraction (X_test )
132
+
133
+ j_48 = train_j48 (X_train ,y [train_idx ])
134
+ mlp = train_mlp (X_train , y [train_idx ])
135
+ logistic_regression = train_logistic_regression (X_train , y [train_idx ])
136
+
137
+ majority_voting = VotingClassifier (estimators = [('dt' , j_48 ), ('mlp' , mlp ), ('lr' , logistic_regression )], voting = 'soft' )
138
+ majority_voting .fit (X_train , y [train_idx ])
139
+ tmp = majority_voting .predict (X_test )
140
+
141
+ acc_fold = accuracy_score (y [test_idx ], tmp )
142
+ avg_acc .append (acc_fold )
143
+
144
+ recall_fold = recall_score (y [test_idx ], tmp , average = 'macro' )
145
+ avg_recall .append (recall_fold )
146
+
147
+ f1_fold = f1_score (y [test_idx ], tmp , average = 'macro' )
148
+ avg_f1 .append (f1_fold )
149
+
150
+ print ('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]' .format (acc_fold , recall_fold , f1_fold ,i ))
151
+ print ('______________________________________________________' )
152
+
153
+ ic_acc = st .t .interval (0.9 , len (avg_acc ) - 1 , loc = np .mean (avg_acc ), scale = st .sem (avg_acc ))
154
+ ic_recall = st .t .interval (0.9 , len (avg_recall ) - 1 , loc = np .mean (avg_recall ), scale = st .sem (avg_recall ))
155
+ ic_f1 = st .t .interval (0.9 , len (avg_f1 ) - 1 , loc = np .mean (avg_f1 ), scale = st .sem (avg_f1 ))
156
+ print ('Mean Accuracy[{:.4f}] IC [{:.4f}, {:.4f}]' .format (np .mean (avg_acc ), ic_acc [0 ], ic_acc [1 ]))
157
+ print ('Mean Recall[{:.4f}] IC [{:.4f}, {:.4f}]' .format (np .mean (avg_recall ), ic_recall [0 ], ic_recall [1 ]))
158
+ print ('Mean F1[{:.4f}] IC [{:.4f}, {:.4f}]' .format (np .mean (avg_f1 ), ic_f1 [0 ], ic_f1 [1 ]))
0 commit comments