-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo.m
218 lines (184 loc) · 6.59 KB
/
demo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
function demo()
% INTRO
% demonstrates the usage of minFunc(), in the context of linear regression
% INPUT
% None
% OUTPUT
% None
close all; clearvars; clc;
% randomly generate feature vectors and targe values
opts.smplCnt = 1000;
opts.featCnt = 100;
opts.ouptCnt = 50;
opts.featMgn = 1.0;
opts.projMgn = 1.0;
opts.noisRat = 0.02;
[featMat, ouptMat, projMatUnly] = GnrtData(opts);
% evaluate the underlying projection matrix
[funcValUnly, ~] = CalcFuncGrad(projMatUnly(:), [], featMat, ouptMat);
fprintf('[INFO] funcVal (underlying) = %.4e\n', funcValUnly);
% obtain the closed-form solution to the linear regression
projMatClsd = (ouptMat / featMat)';
[funcValClsd, ~] = CalcFuncGrad(projMatClsd(:), [], featMat, ouptMat);
fprintf('[INFO] funcVal (closed-form) = %.4e\n', funcValClsd);
% initialize optimization options for each method
[optsGradDst, optsAdaGrad, optsAdaDelta, optsAdam] = InitOpts(opts.smplCnt);
% evaluate each method's performance
[~, ~, funcVecGradDst] = EvaMethod(featMat, ouptMat, optsGradDst);
[~, ~, funcVecAdaGrad] = EvaMethod(featMat, ouptMat, optsAdaGrad);
[~, ~, funcVecAdaDelta] = EvaMethod(featMat, ouptMat, optsAdaDelta);
[~, ~, funcVecAdam] = EvaMethod(featMat, ouptMat, optsAdam);
% visualize the optimization process of each method
VisualOptProc([funcVecGradDst, funcVecAdaGrad, funcVecAdaDelta, funcVecAdam]);
end
function [featMat, ouptMat, projMat] = GnrtData(opts)
% INTRO
% randomly generate a dataset for linear regression
% INPUT
% opts: structure (dataset generation options)
% OUTPUT
% featMat: D x N (feature matrix)
% ouptMat: R x N (output matrix)
% projMat: D x R (projection matrix)
% randomly generate feature vectors and targe values
featMat = opts.featMgn * randn(opts.featCnt, opts.smplCnt);
projMat = opts.projMgn * randn(opts.featCnt, opts.ouptCnt);
ouptMat = projMat' * featMat;
noisMgn = opts.noisRat * norm(ouptMat, 'fro');
ouptMat = ouptMat + noisMgn * randn(opts.ouptCnt, opts.smplCnt);
end
function [funcVal, gradVec] = CalcFuncGrad(paraVec, smplIdxs, featMat, ouptMat)
% INTRO
% compute the objective function's value and gradient vector
% INPUT
% paraVec: (D * R) x 1 (projection matrix, viewed as the column vector)
% smplIdxs: M x 1 (list of sample indexes)
% featMat: D x N (feature matrix)
% ouptMat: R x N (output matrix)
% OUTPUT
% funcVal: scalar (objective function's value)
% gradVec: (D * R) x 1 (objective function's gradient vector)
% obtain basic variables
featCnt = size(featMat, 1);
smplCnt = size(featMat, 2);
ouptCnt = size(ouptMat, 1);
% recover the projection matrix
projMat = reshape(paraVec, [featCnt, ouptCnt]);
% construct a mini-batch with randomly selected samples
if ~isempty(smplIdxs)
batcSiz = numel(smplIdxs);
featMat = featMat(:, smplIdxs);
ouptMat = ouptMat(:, smplIdxs);
else
batcSiz = smplCnt;
end
% compute the objective function's value
diffMat = projMat' * featMat - ouptMat;
funcVal = norm(diffMat, 'fro') ^ 2 / 2 / batcSiz;
% compute the objective function's gradient vector
gradMat = featMat * diffMat' / batcSiz;
gradVec = gradMat(:);
end
function [optsGradDst, optsAdaGrad, optsAdaDelta, optsAdam] = InitOpts(smplCnt)
% INTRO
% initialize optimization options for each method
% INPUT
% smplCnt: scalar (number of samples; required for mini-batch construction)
% OUTPUT
% optsGradDst: structure (GradDst's optimization options)
% optsAdaGrad: structure (AdaGrad's optimization options)
% optsAdaDelta: structure (AdaDelta's optimization options)
% optsAdam: structure (Adam's optimization options)
% configure common optimization options for all methods
opts.enblVis = false;
opts.epchCnt = 100;
opts.smplCnt = smplCnt;
opts.batcSiz = 50;
% configure optimization options for GradDst
optsGradDst = opts;
optsGradDst.method = 'GradDst';
optsGradDst.lrInit = 1e-2;
optsGradDst.lrIncrMult = 1.5;
optsGradDst.lrDecrMult = 0.5;
optsGradDst.momentum = 0.9;
% configure optimization options for AdaGrad
optsAdaGrad = opts;
optsAdaGrad.method = 'AdaGrad';
optsAdaGrad.lrInit = 1e-1;
optsAdaGrad.autoCorr = 0.95;
optsAdaGrad.fudgFctr = 1e-6;
% configure optimization options for AdaDelta
optsAdaDelta = opts;
optsAdaDelta.method = 'AdaDelta';
optsAdaDelta.momentum = 0.999;
optsAdaDelta.fudgFctr = 1e-6;
% configure optimization options for Adam
optsAdam = opts;
optsAdam.method = 'Adam';
optsAdam.lrInit = 1e-1;
optsAdam.betaFst = 0.90;
optsAdam.betaSec = 0.90;
optsAdam.fudgFctr = 1e-6;
end
function [projMat, funcVal, funcVec] = EvaMethod(featMat, ouptMat, opts)
% INTRO
% evaluate the selected optimization method
% INPUT
% featMat: D x N (feature matrix)
% ouptMat: R x N (output matrix)
% opts: structure (optimization options)
% OUTPUT
% projMat: D x R (projection matrix)
% funcVal: scalar (function's value of the optimal solution)
% funcVec: T x 1 (list of function's values through iterations)
% obtain basic variables
featCnt = size(featMat, 1);
ouptCnt = size(ouptMat, 1);
% solve the optimization via gradient descent
projMatInit = randn(featCnt, ouptCnt);
paraVecInit = projMatInit(:);
[paraVec, funcVal, funcVec] = ...
minFunc(@CalcFuncGrad, paraVecInit, opts, featMat, ouptMat);
projMat = reshape(paraVec, [featCnt, ouptCnt]);
fprintf('[INFO] funcVal (%s) = %.4e\n', opts.method, funcVal);
end
function VisualOptProc(funcMat)
% INTRO
% visualize the optimization process of each method
% INPUT
% funcMat: (T + 1) x 4 (matrix of function's values through iterations)
% OUTPUT
% None
% obtain basic variables
iterCnt = size(funcMat, 1) - 1; % exclude the initial function's value
iterCntLast = 11;
assert(iterCntLast <= iterCnt);
xAxisLft = (0 : iterCnt);
xAxisRht = (iterCnt - iterCntLast + 1 : iterCnt);
% LEFT FIGURE: complete curve of function's values
subplot(1, 2, 1);
hold on;
plot(xAxisLft, funcMat(:, 1), 'r-o');
plot(xAxisLft, funcMat(:, 2), 'g-.+');
plot(xAxisLft, funcMat(:, 3), 'b-*');
plot(xAxisLft, funcMat(:, 4), 'm-.x');
grid on;
legend({'GradDst', 'AdaGrad', 'AdaDelta', 'Adam'}, 'Location', 'East');
xlabel('# of Iterations');
ylabel('Mean Squared Error');
set(gca, 'FontSize', 12);
title(sprintf('All %d Iterations', iterCnt));
% RIGHT FIGURE: partial curve of function's values (last a few iterations)
subplot(1, 2, 2);
hold on;
plot(xAxisRht, funcMat(end - iterCntLast + 1 : end, 1), 'r-o');
plot(xAxisRht, funcMat(end - iterCntLast + 1 : end, 2), 'g-.+');
plot(xAxisRht, funcMat(end - iterCntLast + 1 : end, 3), 'b-*');
plot(xAxisRht, funcMat(end - iterCntLast + 1 : end, 4), 'm-.x');
grid on;
legend({'GradDst', 'AdaGrad', 'AdaDelta', 'Adam'}, 'Location', 'East');
xlabel('# of Iterations');
ylabel('Mean Squared Error');
set(gca, 'FontSize', 12);
title(sprintf('Last %d Iterations', iterCntLast));
end