-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
426 lines (342 loc) · 16.7 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="None">
<link rel="shortcut icon" href="img/favicon.ico">
<title>Home - PocketFlow Docs</title>
<link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="css/theme.css" type="text/css" />
<link rel="stylesheet" href="css/theme_extra.css" type="text/css" />
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
<script>
// Current page data
var mkdocs_page_name = "Home";
var mkdocs_page_input_path = "index.md";
var mkdocs_page_url = null;
</script>
<script src="js/jquery-2.1.1.min.js" defer></script>
<script src="js/modernizr-2.8.3.min.js" defer></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>
</head>
<body class="wy-body-for-nav" role="document">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
<div class="wy-side-nav-search">
<a href="." class="icon icon-home"> PocketFlow Docs</a>
<div role="search">
<form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
<input type="text" name="q" placeholder="Search docs" title="Type search term here" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1 current">
<a class="current" href=".">Home</a>
<ul class="subnav">
<li class="toctree-l2"><a href="#pocketflow">PocketFlow</a></li>
<ul>
<li><a class="toctree-l3" href="#framework">Framework</a></li>
<li><a class="toctree-l3" href="#learners">Learners</a></li>
<li><a class="toctree-l3" href="#hyper-parameter-optimizers">Hyper-parameter Optimizers</a></li>
<li><a class="toctree-l3" href="#performance">Performance</a></li>
</ul>
</ul>
</li>
<li class="toctree-l1">
<a class="" href="installation/">Installation</a>
</li>
<li class="toctree-l1">
<a class="" href="tutorial/">Tutorial</a>
</li>
<li class="toctree-l1">
<span class="caption-text">Learners - Algorithms</span>
<ul class="subnav">
<li class="">
<a class="" href="cp_learner/">Channel Pruning</a>
</li>
<li class="">
<a class="" href="cpr_learner/">Channel Pruning - Remastered</a>
</li>
<li class="">
<a class="" href="dcp_learner/">Discrimination-aware Channel Pruning</a>
</li>
<li class="">
<a class="" href="ws_learner/">Weight Sparsification</a>
</li>
<li class="">
<a class="" href="uq_learner/">Uniform Quantization</a>
</li>
<li class="">
<a class="" href="nuq_learner/">Non-uniform Quantization</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Learners - Misc.</span>
<ul class="subnav">
<li class="">
<a class="" href="distillation/">Distillation</a>
</li>
<li class="">
<a class="" href="multi_gpu_training/">Multi-GPU Training</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<span class="caption-text">Hyper-parameter Optimizers</span>
<ul class="subnav">
<li class="">
<a class="" href="reinforcement_learning/">Reinforcement Learning</a>
</li>
<li class="">
<a class="" href="automl_based_methods/">AutoML-based Methods</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="" href="self_defined_models/">Self-defined Models</a>
</li>
<li class="toctree-l1">
<a class="" href="performance/">Performance</a>
</li>
<li class="toctree-l1">
<a class="" href="faq/">Frequently Asked Questions</a>
</li>
<li class="toctree-l1">
<span class="caption-text">Appendix</span>
<ul class="subnav">
<li class="">
<a class="" href="pre_trained_models/">Pre-trained Models</a>
</li>
<li class="">
<a class="" href="test_cases/">Test Cases</a>
</li>
<li class="">
<a class="" href="reference/">Reference</a>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href=".">PocketFlow Docs</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href=".">Docs</a> »</li>
<li>Home</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main">
<div class="section">
<h1 id="pocketflow">PocketFlow</h1>
<p>PocketFlow is an open-source framework for compressing and accelerating deep learning models with minimal human effort. Deep learning is widely used in various areas, such as computer vision, speech recognition, and natural language translation. However, deep learning models are often computational expensive, which limits further applications on mobile devices with limited computational resources.</p>
<p>PocketFlow aims at providing an easy-to-use toolkit for developers to improve the inference efficiency with little or no performance degradation. Developers only needs to specify the desired compression and/or acceleration ratios and then PocketFlow will automatically choose proper hyper-parameters to generate a highly efficient compressed model for deployment.</p>
<h2 id="framework">Framework</h2>
<p>The proposed framework mainly consists of two categories of algorithm components, <em>i.e.</em> learners and hyper-parameter optimizers, as depicted in the figure below. Given an uncompressed original model, the learner module generates a candidate compressed model using some randomly chosen hyper-parameter combination. The candidate model's accuracy and computation efficiency is then evaluated and used by hyper-parameter optimizer module as the feedback signal to determine the next hyper-parameter combination to be explored by the learner module. After a few iterations, the best one of all the candidate models is output as the final compressed model.</p>
<p><img alt="Framework Design" src="pics/framework_design.png" /></p>
<h2 id="learners">Learners</h2>
<p>A learner refers to some model compression algorithm augmented with several training techniques as shown in the figure above. Below is a list of model compression algorithms supported in PocketFlow:</p>
<table>
<thead>
<tr>
<th align="left">Name</th>
<th align="left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><code>ChannelPrunedLearner</code></td>
<td align="left">channel pruning with LASSO-based channel selection (He et al., 2017)</td>
</tr>
<tr>
<td align="left"><code>DisChnPrunedLearner</code></td>
<td align="left">discrimination-aware channel pruning (Zhuang et al., 2018)</td>
</tr>
<tr>
<td align="left"><code>WeightSparseLearner</code></td>
<td align="left">weight sparsification with dynamic pruning schedule (Zhu & Gupta, 2017)</td>
</tr>
<tr>
<td align="left"><code>UniformQuantLearner</code></td>
<td align="left">weight quantization with uniform reconstruction levels (Jacob et al., 2018)</td>
</tr>
<tr>
<td align="left"><code>UniformQuantTFLearner</code></td>
<td align="left">weight quantization with uniform reconstruction levels and TensorFlow APIs</td>
</tr>
<tr>
<td align="left"><code>NonUniformQuantLearner</code></td>
<td align="left">weight quantization with non-uniform reconstruction levels (Han et al., 2016)</td>
</tr>
</tbody>
</table>
<p>All the above model compression algorithms can trained with fast fine-tuning, which is to directly derive a compressed model from the original one by applying either pruning masks or quantization functions. The resulting model can be fine-tuned with a few iterations to recover the accuracy to some extent. Alternatively, the compressed model can be re-trained with the full training data, which leads to higher accuracy but usually takes longer to complete.</p>
<p>To further reduce the compressed model's performance degradation, we adopt network distillation to augment its training process with an extra loss term, using the original uncompressed model's outputs as soft labels. Additionally, multi-GPU distributed training is enabled for all learners to speed-up the time-consuming training process.</p>
<h2 id="hyper-parameter-optimizers">Hyper-parameter Optimizers</h2>
<p>For model compression algorithms, there are several hyper-parameters that may have a large impact on the final compressed model's performance. It can be quite difficult to manually determine proper values for these hyper-parameters, especially for developers that are not very familiar with algorithm details. Recently, several AutoML systems, <em>e.g.</em> <a href="https://cloud.google.com/automl/">Cloud AutoML</a> from Google, have been developed to train high-quality machine learning models with minimal human effort. Particularly, the AMC algorithm (He et al., 2018) presents promising results for adopting reinforcement learning for automated model compression with channel pruning and fine-grained pruning.</p>
<p>In PocketFlow, we introduce the hyper-parameter optimizer module to iteratively search for the optimal hyper-parameter setting. We provide several implementations of hyper-parameter optimizer, based on models including Gaussian Processes (GP, Mockus, 1975), Tree-structured Parzen Estimator (TPE, Bergstra et al., 2013), and Deterministic Deep Policy Gradients (DDPG, Lillicrap et al., 2016). The hyper-parameter setting is optimized through an iterative process. In each iteration, the hyper-parameter optimizer chooses a combination of hyper-parameter values, and the learner generates a candidate model with fast fast-tuning. The candidate model is evaluated to calculate the reward of the current hyper-parameter setting. After that, the hyper-parameter optimizer updates its model to improve its estimation on the hyper-parameter space. Finally, when the best candidate model (and corresponding hyper-parameter setting) is selected after some iterations, this model can be re-trained with full data to further reduce the performance loss.</p>
<h2 id="performance">Performance</h2>
<p>In this section, we present some of our results for applying various model compression methods for ResNet and MobileNet models on the ImageNet classification task, including channel pruning, weight sparsification, and uniform quantization.
For complete evaluation results, please refer to <a href="https://pocketflow.github.io/performance/">here</a>.</p>
<h3 id="channel-pruning">Channel Pruning</h3>
<p>We adopt the DDPG algorithm as the RL agent to find the optimal layer-wise pruning ratios, and use group fine-tuning to further improve the compressed model's accuracy:</p>
<table>
<thead>
<tr>
<th align="center">Model</th>
<th align="center">FLOPs</th>
<th align="center">Uniform</th>
<th align="center">RL-based</th>
<th align="center">RL-based + Group Fine-tuning</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">50%</td>
<td align="center">66.5%</td>
<td align="center">67.8% (+1.3%)</td>
<td align="center">67.9% (+1.4%)</td>
</tr>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">40%</td>
<td align="center">66.2%</td>
<td align="center">66.9% (+0.7%)</td>
<td align="center">67.0% (+0.8%)</td>
</tr>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">30%</td>
<td align="center">64.4%</td>
<td align="center">64.5% (+0.1%)</td>
<td align="center">64.8% (+0.4%)</td>
</tr>
<tr>
<td align="center">Mobilenet-v1</td>
<td align="center">20%</td>
<td align="center">61.4%</td>
<td align="center">61.4% (+0.0%)</td>
<td align="center">62.2% (+0.8%)</td>
</tr>
</tbody>
</table>
<h3 id="weight-sparsification">Weight Sparsification</h3>
<p>Comparing with the original algorithm (Zhu & Gupta, 2017) which uses the same sparsity for all layers, we incorporate the DDPG algorithm to iteratively search for the optimal sparsity of each layer, which leads to the increased accuracy:</p>
<table>
<thead>
<tr>
<th align="center">Model</th>
<th align="center">Sparsity</th>
<th align="center">(Zhu & Gupta, 2017)</th>
<th align="center">RL-based</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">50%</td>
<td align="center">69.5%</td>
<td align="center">70.5% (+1.0%)</td>
</tr>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">75%</td>
<td align="center">67.7%</td>
<td align="center">68.5% (+0.8%)</td>
</tr>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">90%</td>
<td align="center">61.8%</td>
<td align="center">63.4% (+1.6%)</td>
</tr>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">95%</td>
<td align="center">53.6%</td>
<td align="center">56.8% (+3.2%)</td>
</tr>
</tbody>
</table>
<h3 id="uniform-quantization">Uniform Quantization</h3>
<p>We show that models with 32-bit floating-point number weights can be safely quantized into their 8-bit counterpart without accuracy loss (sometimes even better!).
The resulting model can be deployed on mobile devices for faster inference (Device: XiaoMi 8 with a Snapdragon 845 CPU):</p>
<table>
<thead>
<tr>
<th align="center">Model</th>
<th align="center">Acc. (32-bit)</th>
<th align="center">Acc. (8-bit)</th>
<th align="center">Time (32-bit)</th>
<th align="center">Time (8-bit)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">MobileNet-v1</td>
<td align="center">70.89%</td>
<td align="center">71.29% (+0.40%)</td>
<td align="center">124.53</td>
<td align="center">56.12 (2.22<span><span class="MathJax_Preview">\times</span><script type="math/tex">\times</script></span>)</td>
</tr>
<tr>
<td align="center">MobileNet-v2</td>
<td align="center">71.84%</td>
<td align="center">72.26% (+0.42%)</td>
<td align="center">120.59</td>
<td align="center">49.04 (2.46<span><span class="MathJax_Preview">\times</span><script type="math/tex">\times</script></span>)</td>
</tr>
</tbody>
</table>
<ul>
<li>All the reported time are in milliseconds.</li>
</ul>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="installation/" class="btn btn-neutral float-right" title="Installation">Next <span class="icon icon-circle-arrow-right"></span></a>
</div>
<hr/>
<div role="contentinfo">
<!-- Copyright etc -->
</div>
Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<div class="rst-versions" role="note" style="cursor: pointer">
<span class="rst-current-version" data-toggle="rst-current-version">
<span style="margin-left: 15px"><a href="installation/" style="color: #fcfcfc">Next »</a></span>
</span>
</div>
<script>var base_url = '.';</script>
<script src="js/theme.js" defer></script>
<script src="mathjax-config.js" defer></script>
<script src="MathJax.js?config=TeX-AMS-MML_HTMLorMML" defer></script>
<script src="search/main.js" defer></script>
</body>
</html>
<!--
MkDocs version : 1.0.4
Build Date UTC : 2019-05-06 16:20:07
-->