generated from JuliaAI/MLJExampleInterface.jl
-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathslides.tex
421 lines (358 loc) · 12 KB
/
slides.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
% 45 minutes including demos of JIT and pkg_composability, exlcluding
% Julia installation.
\documentclass[t]{beamer}
\usepackage{color}
\usepackage{amsmath}
\usepackage{pdfpages}
\definecolor{dkgreen}{rgb}{0,0.5,0}
\definecolor{gray}{rgb}{0.5,0.5,0.5}
\definecolor{Maroon}{rgb}{0.6,0,0}
\newcommand\df{\bf\color{Maroon}}
\newcommand\dff{\bf\color{dkgreen}}
\setbeamercolor{background canvas}{bg=}
%\usetheme{TuringLight}
%\usetheme{TuringDark}
% Presentation data
\title{\color{Maroon} Getting started with Julia and machine learning}
%\subtitle{\small An introduction}
\date{July, 2022}
\author{Anthony Blaom, Samuel Okon}
% Uncomment any of these lines below to set custom size for each of the font sizes.
% The default value is shown in the comment.
%\setlength{\titlefontsize}{6.875\basefontsize}
%\setlength{\subtitlefontsize}{4.375\basefontsize}
%\setlength{\frametitlesize}{2.625\basefontsize}
%\setlength{\framesubtitlesize}{1.625\basefontsize}
%\setlength{\bodytextsize}{2\basefontsize}
%\setlength{\blocktitlesize}{\bodytextsize}
%\setlength{\blockbodysize}{\bodytextsize}
% Start document
\begin{document}
% Title slide (details filled from presentation data fields above)
% \begin{frame}
% \frametitle{Installing the tutorials}
% Please follow instructions at:
% \begin{center}
% {\df github.com/ablaom/MachineLearningInJulia2020}
% \end{center}
% \end{frame}
\begin{frame}
\titlepage
\end{frame}
\iffalse
\begin{frame}
\frametitle{Interacting}
\begin{block}{Asking Anthony/Sam a question:}
{\df\ttfamily https://pigeonhole.at/JULIA2}
\end{block}
\begin{block}{Participant forum:}
Streamyard chat ({\em not} monitored by panelists)
\end{block}
\end{frame}
\begin{frame}
\frametitle{Outline}
\begin{enumerate}
\item Workshop resources
\item Supervised Learning (mini lecture)
\item Julia basics
\item Data frames
\item Supervised Learning using MLJ
\end{enumerate}
\end{frame}
\fi
\begin{frame}
\frametitle{Introducing Julia}
Julia is an open source, general-purpose programming language, in
the fourth year of its first {\df stable} release 1.0.\pause
\begin{block}{Key take aways}
\begin{itemize}
\item Julia let's you write {\df fast}
code {\df fast}.\pause
\item Maximally hackable.
\end{itemize}
\end{block}
\end{frame}
% lego analogy
\includepdf[scale=1.3,pages={1,3}]{lego.pdf}
\begin{frame}
\frametitle{Introducing Julia}
Julia is an open source, general-purpose programming language, in
the fourth year of its first {\df stable} release 1.0.
\begin{block}{Key take aways}
\begin{itemize}
\item Julia let's you write {\df fast}
code {\df fast}.
\item Maximally hackable.
\item Julia's design makes {\df extending} Julia code easy,
promoting a relatively rapid expansion of third party libraries.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\frametitle{Julia's secret sauce}
\begin{itemize}
\item {\df Just-in-time compilation}
\item {\df Multiple dispatch}
\item {\df Abstract type system} (typing is dynamic, nominative and parametric)
\end{itemize}
\end{frame}
\begin{frame}
\begin{block}{Workshop resources:}
{\large\texttt{github.com/ablaom/HelloJulia.jl}}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Secret sauce demo:}
{\large\texttt{github.com/ablaom/HelloJulia.jl}}\newline
{\large\mbox{~~}\texttt{/tree/dev/notebooks/secret\_sauce}}
\end{block}
\end{frame}
%
\begin{frame}
\begin{block}{Package composability demo:}
{\large\texttt{github.com/ablaom/HelloJulia.jl}}\newline
{\large\mbox{~~}\texttt{/tree/dev/notebooks/pkg\_composability}}
\end{block}
\end{frame}
\begin{frame}[plain]
% \begin{center}
\includegraphics[scale=0.25]{mandel.png}
% \end{center}
\end{frame}
% stars
\begin{frame}[plain]
% \begin{center}
\includegraphics[scale=0.30]{julia_stars.png}
% \end{center}
\end{frame}
% 2018 conference (2021: 3 x talks, 50 x registration):
\includepdf[pages=10]{Sebastian.pdf}
% growth stats 2020 - 2021:
\begin{frame}[plain]
% \begin{center}
\includegraphics[scale=0.70]{julia_stats.png}
% \end{center}
\end{frame}
% high profile applications:
\includepdf[pages=18-21]{julia_for_ecologists.pdf}
\includepdf[pages=23]{julia_for_ecologists.pdf}
% \begin{frame}
% \frametitle{The Two Language Problem}
% \begin{itemize}
% \item Older programming languages like C and FORTRAN are designed to {\em
% run} fast. Almost all {\df computationally demanding} tasks
% (weather forecasting, machine learning, etc) are performed by
% calling code written in these languages. \pause
% \item ``Scripting'' languages like python, MATLAB, and R allow you
% quickly {\df wrap} computational tasks into {\df customized and easily
% modified workflows}. They also accelerate {\df testing and
% protyping} of new computational algorithms.\pause
% \item These languages are {\df too slow} for computationally intensive tasks.
% \end{itemize}
% \end{frame}
\begin{frame}
\frametitle{Popular libraries (packages)}
These are all pretty mature:
\begin{itemize}
\item DataFrames.jl and CSV.jl - {\df in-memory data manipulation}
\item Plots.jl (also easy to call R's ggplot())
\item $\star$ JuMP.jl - {\df constrained optimization}
\item $\star$ DifferentialEquations.jl
\item StatsModels.jl, GLM.jl - {\df traditional stats models}
\item Flux.jl - {\df deep learning}
\item $\star$ Turing.jl, Soss.jl, ... {\df probabilistic programmming}
\item MLJ.jl - multi-pardigm {\df machine learning} platform
\item Pluto.jl - {\df ``reactive'' notebooks}
\end{itemize}
\end{frame}
% lines of code versus mean run time:
\includepdf[pages=11]{julia_for_ecologists.pdf}
\begin{frame}
\frametitle{Why is two languages a problem ?}
\begin{itemize}
\item Can't easily {\df modify} core algorithms
\item Barrier to {\df understanding algorithms} even if not seeking to modify them
\item Complicates installation of a complete software stack
(package {\df dependency hell}, manual interventions)
\item Barrier to {\df transparency} and {\df reproducibility}
\item {\df Hampers innovation}, development of novel algorithms
\item Barrier to {\df composing libraries} (e.g., automatic differentiation)
\end{itemize}
\end{frame}
% slows innovation engineers <--> researchers
\includepdf[scale=1.3,pages={1,2,4}]{slows_innovation.pdf}
\begin{frame}[plain]
\frametitle{Fast code written fast: DifferentialEquations.jl}
% \frametitle{DifferentialEquations.jl}
% \begin{center}
\includegraphics[scale=0.27]{de_solver_software_comparsion.pdf}
% \end{center}
\end{frame}
% % data science pipeline
% \includepdf[pages=3-5]{Sebastian.pdf}
% \begin{frame}[plain]
% \includegraphics[scale=0.45]{benchmarks.png}
% \end{frame}
% \begin{frame}
% \frametitle{The Expression Problem}
% As in any kind of architecture, the central concerns of a
% programming language are {\df form} and {\df function}. Roughly
% speaking, the {\df Expression Problem} is the problem of how to
% represent data (form) and how to articulate required behaviour
% (function) in way that allows {\df extension} of both in an optimal
% and safe way.\pause
% \begin{itemize}
% \item Object oriented languages typically score poorly here (Ruby has a
% dirty way, Scala a clean workaround).
% \item Julia scores well. {\df Proof:} \pause Large code-reuse, package composibility
% \end{itemize}
% \end{frame}
\begin{frame}
\frametitle{Other features}
\begin{itemize}
\item Fast user-defined composite types (C-like structs)
\item Lisp-like macros/metaprogramming
\item Call C functions directly
\item Can wrap Python, R Java/Scala
\item State-of-the-art distributed computing and multi-threading support
\end{itemize}
\end{frame}
% code snippet:
\begin{frame}
\frametitle{Other features (continued)}
\begin{itemize}
\item First-class math support
\item Exellent REPL (console)
\item Built-in package manager
\item Automatic differentiation (Zygote.jl)
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Drawbacks}
For most users all serious drawbacks derive from Julia's young age:
\begin{itemize}
\item Smaller number of libraries
\item Smaller community of users
\item Some interfaces and tooling less polished
\item More bugs (in libraries, few in language itself)
\end{itemize}
Other drawbacks and annoyances:
\begin{itemize}
\item Limited support for exporting programs as stand-alone executables.
\item The ``time to first plot'' problem.
\end{itemize}
\end{frame}
% %% Intro to machine learning process
\begin{frame}
\vspace{5\baselineskip}
\begin{center}
{\Large machine learning $\ne$ deep learning\\
\mbox{}\hspace{4cm} \pause {\normalfont (neural networks)}}
\end{center}
\end{frame}
\begin{frame}
\frametitle{A plethora of machine learning models}
\begin{center}
\includegraphics[scale=0.25]{plethora_of_models.png}
\end{center}
{\tiny Figure source: Oleksii Trekhleb ({\ttfamily https://github.com/trekhleb/homemade-machine-learning})}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
Learning to {\df predict} some target variable {\df\large y} from a
knowledge of some other variables {\df \large X} (the {\it input features}).\pause
\begin{table}
\begin{tabular}{|l|l|}
\hline
${\mathbf X}$ & ${\mathbf y}$ \\ \hline
longitude, latitude, temperature, pressure & wind speed \\
individual DNA gene sequence & got diabetes\\
word frequencies in email & is junk? \\
an image of handwritten digit & $0, 1, 2, \ldots, 8$ or $9$ \\
num rooms, floor area, zip code & selling price \\
{\bf Titanic:} sex, class, fare, where embarked & survived? \\
\hline
\end{tabular}
\end{table}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
Learning to {\df predict} some target variable {\df\large y} from a
knowledge of some other variables {\df \large X} (the {\it input features}).
\begin{center}
\includegraphics[scale=0.17]{X.png}\mbox{~~~~}
\includegraphics[scale=0.17]{y.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{center}
\includegraphics[scale=0.6]{overfitting1.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{center}
\includegraphics[scale=0.6]{overfitting2.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{center}
\includegraphics[scale=0.6]{overfitting3.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{center}
\includegraphics[scale=0.6]{overfitting4.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{center}
\includegraphics[scale=0.6]{overfitting5.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{center}
\includegraphics[scale=0.6]{overfitting6.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Supervised Learning}
\begin{block}{Setup}
Got historical data with:
\begin{itemize}
\item {\df features} $X$ (aka, inputs, patterns)
\item {\df target} $y$ (aka, labels)\pause
\end{itemize}
\end{block}
\begin{block}{Basic machine learning workflow}
\begin{enumerate}
\item Split historical data into {\df training} and {\df test} sets.\pause
\item Train the model using the {\df training} set\pause
\item Get model predictions $\hat y$ given $X$ for {\df test} set.\pause
\item Compare $\hat y$ with $y$ from the {\df test} set using some {\df measure} (e.g.,
mean squared error).\pause
\item Change hyperparameters of model and repeat.
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}[plain]
\begin{center}
\includegraphics[scale=0.55]{titanic_tree.jpg}
\end{center}
\end{frame}
\begin{frame}
\begin{block}{Tutorials}
{\large\texttt{github.com/ablaom/HelloJulia.jl}}\newline
{\large\mbox{~~}\texttt{/tree/dev/notebooks/}}
\end{block}
\end{frame}
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: