-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDataLoadAndPreprocess.m
137 lines (118 loc) · 5.3 KB
/
DataLoadAndPreprocess.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
%------------------------------------------------------------------
%---------------Load Data and Preprocessing -----------------------
%------------------------------------------------------------------
% Copyright 2003-2009 Dimitrios Ververidis, AIIA Lab.
% $Revision: 0.0$ $Date: 09/01/2009$
% Do by your will:
% 1. Remove redudant features: Features with many Nans or unique
% values
% 2. Normalize feature values to 0-1
function [Patterns, Targets] = DataLoadAndPreprocess(DatasetToUse)
if strfind(DatasetToUse,'.mat')
DatasetToUse = DatasetToUse(1:(length(DatasetToUse)-4));
end
%--------------------- Load data ----------------------------------
cd('PatTargMatrices')
load([DatasetToUse]);
cd('..');
%------------------- End Load data -------------------------------
%-----------------Prepare Targets and Patterns --------------------
[NPatterns, KInitialFeatures] = eval(['size(' DatasetToUse ')']);
patterns = eval([DatasetToUse '(1:NPatterns,1:KInitialFeatures)']);
if strcmp(DatasetToUse,'finalvecDES') || ...
strcmp(DatasetToUse,'finalvecSUSAS') || ...
strcmp(DatasetToUse,'finalvecKidsVR')
emotions = eval([DatasetToUse '(1:NPatterns, 114)']);
if strcmp(DatasetToUse, 'finalvecSUSAS')
EmotionsOfInterest = ...
(emotions == 1 | emotions == 2 | ...
emotions == 6 | emotions == 8); % | ...
% emotions == 8 | emotions == 9 | ...
% emotions == 10 | emotions == 11);
% 1 2 3 4 5 6 7 8 9 10 11
% 'ARY' 'CAR' 'C50' 'C70' 'FST' 'LRD' 'LUD' 'NAL' 'QON' 'SOW' 'SFT'
patterns = patterns(EmotionsOfInterest, :);
emotions = emotions( EmotionsOfInterest);
emotions(emotions ==6) = emotions(emotions == 6) -3;
emotions(emotions ==8) = emotions(emotions == 8) -4;
% emotions(emotions ==8) = emotions(emotions ==8) -3;
% emotions(emotions ==9) = emotions(emotions ==9) - 3;
% emotions(emotions ==10) = emotions(emotions ==10) - 3;
% emotions(emotions ==11) = emotions(emotions ==11) - 3;
end
[NPatterns, KInitialFeatures] = size(patterns);
KFeatures = 90; % 90 Remained after Processing
%------------------- Missing data handle --------
for FeatureIndex = 1:KInitialFeatures
FeatureMean(FeatureIndex)=nanmean(patterns(:,FeatureIndex));
for PatternIndex = 1:NPatterns ,
if isnan(patterns(PatternIndex,FeatureIndex)),
patterns(PatternIndex, FeatureIndex) = ...
FeatureMean(FeatureIndex);
end
end
end
%--------------Exponential Normalization------------------------
tabexp = [13 14 30:32 37 39:40 46 53 64:66 68 ...
71 86:104 106:113]; % Exponentially distributed features
for FeaturesIndex=1:length(tabexp),
lamda=1/mean(patterns(:,tabexp(FeaturesIndex)));
for PatternsIndex=1:NPatterns
patterns(PatternsIndex,tabexp(FeaturesIndex)) = (1 - ...
exp(-lamda* patterns(PatternsIndex,...
tabexp( FeaturesIndex ))))/(1 - exp(-lamda));
end
end
%-----------------Linear Normalization--------------------------
for FeaturesIndex=1:KInitialFeatures,
a = min(patterns(:,FeaturesIndex));
b = max(patterns(:,FeaturesIndex));
patterns(:, FeaturesIndex)=(patterns(:, FeaturesIndex) -...
a)/(b-a);
end
%---------------- Remove features ----------------
tabNans = [23:29 48 57:63]; % Feature with Nans
tabbias = [8 33:34 41 60 67 75 82 105]; % Features with Bias
tabnw = sort([tabNans tabbias]); % Useless features
tabfin = 1:KInitialFeatures;
for FeatureIndex = 1:length(tabnw)
tabfin(tabnw(FeatureIndex)) = 0;
end
tabfin = tabfin(find(tabfin~=0));
patterns = patterns(:,tabfin);
Patterns = patterns(1:NPatterns,1:KFeatures);
Targets = emotions;
elseif strcmp(DatasetToUse,'finalvecCOLONCANCER')
for FeaturesIndex= 1:KInitialFeatures-1,
a = min(patterns(:,FeaturesIndex));
b = max(patterns(:,FeaturesIndex));
patterns(:, FeaturesIndex)=(patterns(:, FeaturesIndex) -...
a)/(b-a);
end
Patterns = patterns(:,1:(KInitialFeatures-1));
Targets = eval([DatasetToUse '(1:NPatterns, end)']);
NewTargets = zeros(NPatterns,1);
NewTargets(find(Targets>0)) = 1;
NewTargets(find(Targets<0)) = 2;
Targets = NewTargets;
disp('1 responds to Benign, 2 responds to Malignant');
else % Your Data is loaded here
% Linear transformation, Normalization to [0,1]
for FeaturesIndex= 1:KInitialFeatures-1,
a = min(patterns(:,FeaturesIndex));
b = max(patterns(:,FeaturesIndex));
patterns(:, FeaturesIndex)=(patterns(:, FeaturesIndex) -...
a)/(b-a);
end
Patterns = patterns(:,1:(KInitialFeatures-1));
Targets = eval([DatasetToUse '(1:NPatterns, end)']);
disp(['No Preprocessing Code, however PatternTargets format'...
'is appropriate']);
end
disp('Size of Patterns');
size(Patterns)
disp('Size of Targets');
size(Targets)
save 'Patterns.mat' Patterns
save 'Targets.mat' Targets
return