-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_code.m
332 lines (288 loc) · 14.6 KB
/
main_code.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
clc;
close all;
clear all;
% Loading the file from the folder, excluding the 3 first text rows
fid = readtable('Iris.dat', 'NumHeaderLines', 3);
% SUBSET 1-----------------------------------------------------------------
A = fid(1:100, 1:7); % Extracting the data: first 100 rows and 7 columns
data1 = A{:, [1, 3]}; % Converting the columns [1, 3] to an array
% we want to normalize the data so that the mean is 0 and the standard deviation is 1
m1 = mean(data1); % Calculating the mean of all data
s1 = std(data1); % Calculating the standard deviation of all data
norm_data1 = (data1 - m1) ./ s1; % Normalizing data
% Repeat the same process for the other subsets
% SUBSET 2-----------------------------------------------------------------
B = fid(51:150, 1:7);
data2 = B{:, [2, 3]};
m2 = mean(data2);
s2 = std(data2);
norm_data2 = (data2 - m2) ./ s2;
% SUBSET 3-----------------------------------------------------------------
C = load('subset3.mat');
m3 = mean(C.x);
s3 = std(C.x);
norm_data3 = (C.x - m3) ./ s3;
%--------------------------------------------------------------------------
% Visualizing the original and normalized data
figure('Name', 'Original and Normalized Data');
subplot(3, 2, 1);
scatter(data1(:, 1), data1(:, 2), 20, 'MarkerEdgeColor',[0 .5 .5], 'MarkerFaceColor',[0 .7 .7], 'LineWidth',1.5);
title('SUBSET 1')
subplot(3, 2, 2);
scatter(norm_data1(:, 1), norm_data1(:, 2), 20, 'MarkerEdgeColor',[0 .5 .5], 'MarkerFaceColor',[0 .7 .7], 'LineWidth',1.5);
title('Normalized SUBSET 1')
subplot(3, 2, 3);
scatter(data2(:, 1), data2(:, 2), 20, 'MarkerEdgeColor',[.5 .5 0], 'MarkerFaceColor',[.7 .7 0], 'LineWidth',1.5);
title('SUBSET 2')
subplot(3, 2, 4);
scatter(norm_data2(:, 1), norm_data2(:, 2), 20, 'MarkerEdgeColor',[.5 .5 0], 'MarkerFaceColor',[.7 .7 0], 'LineWidth',1.5);
title('Normalized SUBSET 2')
subplot(3, 2, 5);
scatter(C.x(:, 1), C.x(:, 2), 20, 'MarkerEdgeColor',[.5 0 .5], 'MarkerFaceColor',[.7 0 .7], 'LineWidth',1.5);
title('SUBSET 3')
subplot(3, 2, 6);
scatter(norm_data3(:, 1), norm_data3(:, 2), 20, 'MarkerEdgeColor',[.5 0 .5], 'MarkerFaceColor',[.7 0 .7], 'LineWidth',1.5);
title('Normalized SUBSET 3')
%--------------------------------------------------------------------------
% Selecting the subset to feed to our model---------------------------------------------------------
choise = menu('Choose the subset you want to use:', 'Subset 1', 'Subset 2', 'Subset 3');
switch choise
case 1
dataset = norm_data1;
case 2
dataset = norm_data2;
case 3
dataset = norm_data3;
end
% Initializing the outputs of the model
labels = [ones(50, 1); -ones(50, 1)];
% Splitting data into 2 subsets and suffling the data
X1 = dataset(1:50, :);
X2 = dataset(51:100, :);
y1 = labels(1:50, :);
y2 = labels(51:100, :);
% Generating random the indices for each class
idx1 = randperm(50);
idx2 = randperm(50);
% Shuffling the data
X1 = X1(idx1, :);
X2 = X2(idx2, :);
y1 = y1(idx1, :);
y2 = y2(idx2, :);
%--------------------------------------------------------------------------
% Cross Validation with k-folds-------------------------------------------------
k = 10; % Number of folds
% To split each category into k subsets
subsets_X1 = cell(k, 1); % Initialization of 10 cell arrays to store the subsets of class 1
subsets_X2 = cell(k, 1); % Initialization of 10 cell arrays to store the subsets of class 2
combined = cell(k, 1); % Combination of subsets
for i = 1:k
% splitting the class 1 into k equal sets of 5 rows and 2 columns
subsets_X1{i} = X1((i-1)*5 + 1:i*5, :);
subsets_X2{i} = X2((i-1)*5 + 1:i*5, :);
% splitting the labels of each class into 10 equal sets of 5 rows and 1 column
subsets_y1 = y1((i-1)*5 + 1:i*5, :);
subsets_y2 = y2((i-1)*5 + 1:i*5, :);
% combining the sets oo each class making 10 sets of 10 rows and 2 columns
combined{i} = [subsets_X1{i}, subsets_y1; subsets_X2{i}, subsets_y2];
end
%--------------------------------------------------------------------------
% Choosing which model to use---------------------------------------------------------
choise = menu('Choose the model you want to use:', 'Adaline Linear Neuron', 'Logistic Regressor', 'Perceptron');
switch choise
case 1
model = 'Adaline Linear Neuron';
case 2
model = 'Logistic Regressor';
case 3
model = 'Perceptron';
end
% k-fold Cross Validation training-------------------------------------------------
% Selecting the learning rate of the model
choise = menu('Choose learning rate of model:', '0.01', '0.1', '0.5');
switch choise
case 1
a = 0.01;
case 2
a = 0.1;
case 3
a = 0.5;
end
% input the number of epochs
epochs = input('Enter the number of epochs: ');
% Initializing an empty table to store the MSE for each fold
mse_table = zeros(k, 1); % The table where we will store MSEs for each fold
min_mse = inf; % Initializing the minimum MSE to infinity
max_mse = -inf; % Initializing the maximum MSE to -infinity
max_accuarcy = 0; % Initializing the maximum accuracy to 0
min_accuracy = 0; % Initializing the minimum accuracy to 0
loss_history = zeros(1, epochs); % Initialization of loss history to store the MSE for each epoch
accu_table = zeros(k, 1); % Initialization of accuracy table to store the accuracy for each fold
bestHistory = zeros(1, epochs); % Initialization of best model loss history
worstHistory = zeros(1, epochs); % Initialization of worst model loss history
% Looping through each fold
for fold = 1:k
% splitting training and test data
% 70% will be used as training set
% 30% will be used as testing set
test_indices = mod((fold-1):(fold+1), 10) + 1; % mod function to get the indices of the test cells for each fold
% SOS: by using mod we achieve a circular shift of the indices
% Remaining cells for training
train_indices = setdiff(1:10, test_indices);
train_data = cell2mat(combined(train_indices)); % Training dataset
test_data = cell2mat(combined(test_indices));
% Extracting the features and labels
X_train = train_data(:, 1:2);
y_train = train_data(:, 3);
X_test = test_data(:, 1:2);
y_test = test_data(:, 3);
y = zeros(size(train_data(:, 3))); % preallocating for speed
y_pred = zeros(size(test_data(:, 3))); % preallocating for speed
%--------------------------------------------------------------------------
% Model Activation
% if statements for activating function/model depending on the user's choice
if strcmp(model, 'Adaline Linear Neuron')
[weights, W0, loss_history] = AdalineNeuron(fold, epochs, a, X_train, y_train);
elseif strcmp(model, 'Logistic Regressor')
[weights, W0, loss_history] = LogisticRegressorFunction(fold, epochs, a, X_train, y_train);
elseif strcmp(model, 'Perceptron')
[weights, W0, loss_history] = PerceptronFunction(fold, epochs, a, X_train, y_train);
end
fold_mse = mean(loss_history(fold, :)); % mean of MSE across epochs for the current fold
mse_table(fold) = fold_mse; % storing the mse for each fold, to extract later the max and min MSE
% Making predictions with the test dataset, depending on the model
for i = 1:size(X_test, 1)
if strcmp(model, 'Adaline Linear Neuron')
y_pred(i) = X_test(i,:) * weights' + W0; % Least Squares method for adaline linear neuron
elseif strcmp(model, 'Logistic Regressor')
y_pred(i) = tanh(X_test(i,:) * weights' + W0); % Hyperbolic tangent activation method for logistic regressor
elseif strcmp(model, 'Perceptron')
y_pred(i) = sign(X_test(i, :) * weights' + W0); % Hard-limiter activation method for perceptron
if y_pred(i) == 0
y_pred(i) = -1; % ensuring that the output is -1 if the output is 0
end
end
end
% Calculating the accuracy of the model
accuracy = 100 * (1 - mean(abs(y_test - y_pred), "all"));
accu_table(fold) = accuracy; % storing the accuracy for each fold to extract the mean, min and max accuracy
% store best model parameters
if fold_mse < min_mse % if the current fold MSE is less than the minimum MSE
min_mse = fold_mse;
best_weights = weights;
best_bias = W0;
max_accuarcy = accuracy;
best_loss_history = loss_history(fold, :); % storing loss history for visualization
end
% store worst model parameters
if fold_mse > max_mse % if the current fold MSE is greater than the maximum MSE
max_mse = fold_mse;
worst_weights = weights;
worst_bias = W0;
min_accuracy = accuracy;
worst_loss_history = loss_history(fold, :); % storing loss history for visualization
end
fprintf('Fold %d: Mean Squared Error = %.4f\n', fold, fold_mse);
end
% Displaying the results of the model
mean_accuracy = mean(accu_table, "all");
fprintf('\nMean Accuracy with K-fold cross validation = %.4f\n', mean_accuracy);
fprintf('\nBest Model Results:\nMSE = %.4f\nPredictions Accuracy = %.4f\n', min_mse, max_accuarcy);
fprintf('\nWorst Model Results:\nMSE = %.4f\nPredictions Accuracy = %.4f\n', max_mse, min_accuracy);
% Plotting the loss history of best and worst models separately in two figures with grid
figure("Name", "Loss History of Best Model");
plot(best_loss_history, 'g-', 'DisplayName', 'Best Model Loss History');
xlabel('Epochs');
ylabel('Mean Squared Error');
grid on
title('Loss History of Best Model');
figure("Name", "Loss History of Worst Model");
plot(worst_loss_history, 'r-', 'DisplayName', 'Worst Model Loss History');
xlabel('Epochs');
ylabel('Mean Squared Error');
grid on
title('Loss History of Worst Model');
% Plotting the decision boundaries of best and worst models
figure("Name", "Decision Boundaries of Best and Worst Models");
hold on;
title('Perceptron Model with best results');
xlabel('First Feature');
ylabel('Second Feature');
% Plot data points
scatter(dataset(:, 1), dataset(:, 2), 'filled')
grid on
% Plot decision boundary
x_values = linspace(min(X_train(:, 1)), max(X_train(:, 1)), 100); % Generating 100 points between the minimum and maximum values of the first feature
ybest_values = -(best_weights(1) * x_values + best_bias) / best_weights(2); % Calculating the decision boundary of the best model according to the weights and bias we stored
yworst_values = -(worst_weights(1) * x_values + worst_bias) / worst_weights(2); % Same for the worst model
plot(x_values, ybest_values, 'g-', 'DisplayName', 'Decision Boundary of Best Model');
plot(x_values, yworst_values, 'r-', 'DisplayName', 'Decision Boundary of Worst Model');
axis([min(dataset(:, 1)) max(dataset(:, 1)) min(dataset(:, 2)) max(dataset(:, 2))]); % Set axis limits to match data
legend("BackgroundAlpha", 0.5);
hold off;
% FUNCTIONS------------------------------------------------------------------------------------------------------------------------------
%----------------------------------------------------------------------------------------------------------------------------------------
%----------------------------------------------------------------------------------------------------------------------------------------
%----------------------------------------------------------------------------------------------------------------------------------------
% Adaline Linear Neuron function----------------------------------------------------
function [weights, W0, loss_history] = AdalineNeuron(fold, epochs, a, X_train, y_train)
% Initializing weights and bias
weights = randn(1, size(X_train, 2));
W0 = 1; % bias of neuron
for epoch = 1:epochs
error = zeros(size(X_train, 1), 1); % Initialization of errors for this epoch
for i = 1:size(X_train, 1)
% Linear activation
y(i) = X_train(i, :) * weights' + W0; % Linear activation function
for j = 1:length(weights)
weights(j) = weights(j) - a * (y(i) - y_train(i)) * X_train(i, j); % Online weight adjustment with the gradient descent method: Least Squares
end
error(i) = 0.5 * (y(i) - y_train(i))^2; % Squared Error for sample i
W0 = W0 - a * (y(i) - y_train(i)); % Bias update
end
mse = mean(error); % Mean Squared Error for the current epoch
loss_history(fold, epoch) = mse; % Storing the MSE for each epoch
end
end
% Logistic Regressor function----------------------------------------------------
function [weights, W0, loss_history] = LogisticRegressorFunction(fold, epochs, a, X_train, y_train)
% Initializing weights and bias
weights = randn(1, size(X_train, 2));
W0 = 1; % bias of neuron
for epoch = 1:epochs
error = zeros(size(X_train, 1), 1); % Initialization of errors for this epoch
for i = 1:size(X_train, 1)
y(i) = tanh(X_train(i, :) * weights' + W0); % hyperbolic tangent activation function tanh
for j = 1:length(weights)
% Online weight adjustment with the gradient descent method using the derivative of tanh
weights(j) = weights(j) - a * (y(i) - y_train(i)) * (1 + y(i)) * (1 - y(i)) * X_train(i, j);
end
error(i) = 0.5 * (y(i) - y_train(i))^2; % Squared Error for sample i
W0 = W0 - a * (y(i) - y_train(i)) * (1 + y(i)) * (1 - y(i)); % Bias update using the derivative of tanh
end
mse = mean(error); % Mean Squared Error for the current epoch
loss_history(fold, epoch) = mse; % Storing the MSE for each epoch
end
end
% Perceptron function----------------------------------------------------
function [weights, W0, loss_history] = PerceptronFunction(fold, epochs, a, X_train, y_train)
% Initializing weights and bias
weights = randn(1, size(X_train, 2));
W0 = 1; % bias of neuron
for epoch = 1:epochs
error = zeros(size(X_train, 1), 1); % Initialization of errors for this epoch
for i = 1:size(X_train, 1)
y(i) = sign(X_train(i, :) * weights' + W0); % Hard-limiter activation
if y(i) == 0
y(i) = -1;
end
if y(i) ~= y_train(i)
weights = weights + a * (y_train(i) - y(i)) * X_train(i, :); % online method: if missclassification -> weight update
W0 = W0 + a * (y_train(i) - y(i)); % bias update
error(i) = (y_train(i) - y(i))^2; % Squared Error for sample i
end
end
mse = mean(error); % Mean Squared Error for the current epoch
loss_history(fold, epoch) = mse; % Storing the MSE for each epoch
end
end