Skip to content

Commit 5251a27

Browse files
committed
60017: Started notes for sys perf engineering
1 parent 56efb62 commit 5251a27

File tree

10 files changed

+198
-0
lines changed

10 files changed

+198
-0
lines changed

60017 - System Performance Engineering/60017 - System Performance Engineering.tex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,7 @@
1313
\tableofcontents
1414
\newpage
1515

16+
\addchapter{introduction}
17+
1618
\end{document}
1719

60017 - System Performance Engineering/introduction/code/.gitkeep

Whitespace-only changes.

60017 - System Performance Engineering/introduction/diagrams/.gitkeep

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<mxfile host="Electron" modified="2023-01-16T20:07:49.862Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/20.3.0 Chrome/104.0.5112.114 Electron/20.1.3 Safari/537.36" etag="7v7YTcybRYEEPs22Lumb" version="20.3.0" type="device"><diagram id="I-BKUmwlSTwjugFMJEBM" name="Page-1">5ZZNb6MwEIZ/DcdGmI+EHhv6sYfNqlIOOa4MDGDV2Mg4Ddlfv2MwSZwPbSttT42SyLwzjD3zPijxwrTpXxRt65UsgHuBX/Re+OgFQTIn+G2E/SjEZD4KlWLFKJGjsGZ/wIq+VbesgM5J1FJyzVpXzKUQkGtHo0rJnZtWSu7u2tIKLoR1TvmlumGFrm1bweKo/wBW1dPOZH4/Rho6JdsSXU0LuRulobnwyQtTJaUeV02fAjezm+YyTuD5RvRwMAVCf+SGuL6Lyvnm13NHkrdN98LzfnWXjFXeKd/ahu1h9X6awDsozXAgP2kG/FV2TDMpMJRJrWXjhcsp4YGzygS0bFGtdcPxguASO29NsaavDCOzjHYsn9F8q+G3VoyKyuy1HJjxZzEuSym0ZSH0zTXjPJVcquFIYVmWQZ6bylrJNziJ+MMLI7YvPBr0NwdGDjYgviAb0GqPKfaGe4vg3iVyd8QgSKxWnyBwSKQWvepQ+egOLqxBnzArumJWZN5xmkHFhLdYInWK9d7iETXsXKOE2ALfGwkBiVPzHhY2/NC2HN3L0IIpBUThlArmHHtbZgpXlR6GO2x7TspQ0XXe8UdIAWdmWoladHK0BtQVphpWFAMku5ppWLc0N3vuECfUlNyKAszk/Cvs/AcSyBkJySUK5BoKyVeREN4mwdq6okxo/ByM/V6OBQvXssCPL59ecsWy8KssC/5p2SuoUqqGYoffzzASuYZFH3zEos/7hZfHX90hdvLXJXz6Cw==</diagram></mxfile>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<mxfile host="Electron" modified="2023-01-16T22:19:21.338Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/20.3.0 Chrome/104.0.5112.114 Electron/20.1.3 Safari/537.36" etag="vchhQO5gYYxlygstyv9B" version="20.3.0" type="device"><diagram id="qHSgarKQljRsPdEm8rOu" name="Page-1">5VpLd5s4FP41XiYHiVe8TNNk2jOdmZ7xok13CgjQVCAqhG3m148AySCIHZI6we5sEu7VFUhX33cfURb2Tbr9jaM8+YOFmC6gFW4X9vsFhL4F5M9aUbUKAO1lq4k5CZWuU6zIv1gpLaUtSYgLw1AwRgXJTWXAsgwHwtAhztnGNIsYNb+aoxiPFKsA0bH2CwlF0mqvoN/pP2ASJ/rLwFP7S5E2VjspEhSyTU9l3y7sG86YaJ/S7Q2mtfO0X9p5d3tGdwvjOBNTJrAy+ghXG/ohuAryEv3+xf62vgBQLU5Uesc4lA5QIuMiYTHLEL3ttO84K7MQ16+1pNTZfGIsl0oglf9gISp1mqgUTKoSkVI1irdEfO0939evunSV9H6r3twIlRYywauvjSF0tXzfH+zmNZKeGLFMqJWAq2Y0vK6BIeUHyoLvreqO0N3qMo1DUH+odVDtlb2OV6qClTzAB7ztKAAjHmNxwM7dwUPyCrMUy/3IeRxTJMjaXAdSAI93dh0G5IOCwTMgoRa5RrRUX/oTbxY1uDwqF/3ugRt48X6UNYIbN18Ujd+upQEE+bZxnh6XT3H9+w4jUXI8gl0HqvoMNgkReJWjxp0bGVpMAKGHgtFS4Gse6KOqtZ3kSLEQnH3fsRaaUIBgd7RrzAXeHj7c8WGoCUvFbh3ftLzpggWwlC7pBwqtPPr5ATDy7RtSuqPxfW/kSUo/i9GN9BlzIh2G+enR3J1Ic39Omrsjmt/Wz0jg5xEdeHuILg8oYjxFWTA72YF3HLJDx2S7bc/Pdmt0jpWsl84lAkhhSORfNSr4E6PC8iAEL2T5Y2DwQmNwcthQ7/7MiNxCZ8KiqJALGwJyt4SfwOi4pMjY2dSdByFqALTD6+mXolPRCOyn4GgtraWBSPiTgNSvHsx4PXz6I3gauavLhm1iW5VRRALSuPxuBONxIuoBUPaAeW2XbuO6Xb6MKNsECeLiMsQBKQjL9qTDIXKOkcyAaxyb4z+SzNzHkpnzWslsOTqIkGXj6uGFLpb8TUmGBONv6ORBxeB5EysG79UKBv9sIu8Lo+ypFwJtSJ0Se705GwS9zB4bP4ZysySqxjHxr1yQlBRyWTKEPTKaS8yUGRHV7K2AexxiO9AktjN/4683caqtgHWY7YfqLHC2dVbL4Qlk7/1NeE+hdaXraF35Q/fUK3/4y1b+/wNEgic70aUNB83oy2p/6QpU9QzyGqDF/tbA983g6zqDq48n7KFpLx/aFbx9Z+yN+NHlSlIH72EqvV4jQtEDPVpV/PaNhzs4jRNoPKA9a5p8UVF86Z/rNRhYToxAs96DgXE3+jcOcUHibO4i9liXV459crdXOqGcERPPtxTQ2fq0iQjHVxyq3Zz9YuloRBx0k696sSTF7r9O2kqm+98d+/Y/</diagram></mxfile>

60017 - System Performance Engineering/introduction/images/.gitkeep

Whitespace-only changes.
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
\chapter{introduction}
2+
3+
\section{Logistics}
4+
% Systems not algos
5+
% Hardware Efficiency
6+
7+
8+
9+
\section{What is System Performance Engineering}
10+
\begin{definitionbox}{System}
11+
A collection of components interacting to achieve a greater goal.
12+
\begin{itemize}
13+
\item Usually applicable to many domains (e.g a database, operating system, webserver). The goal is domain-agnostic
14+
\item Designed to be flexible at runtime (deal with other interacting systems, real conditions) (e.g OS with user input, database with varying query volume and type)
15+
\item Operating conditions are unknown at development time (Database does not know schema prior, OS does not know number of users prior, Tensorflow does not know matrix dimensionality prior)
16+
\end{itemize}
17+
Large \& complex systems are typically developed over years by multiple teams.
18+
\end{definitionbox}
19+
20+
The challenge with \textit{system performance engineering} is to make systems maintainable, widely applicable and fast.
21+
\begin{center}
22+
\includegraphics[width=.6\textwidth]{introduction/images/holy_triangle.drawio}
23+
\end{center}
24+
25+
\begin{definitionbox}{System Performance Engineering}
26+
Performance engineering encompasses the techniques applied during a systems development life cycle to ensure the non-functional
27+
requirements for performance will be met.
28+
\begin{itemize}
29+
\item Functional requirements (correctness, features) are assumed to be met.
30+
\item
31+
\end{itemize}
32+
\end{definitionbox}
33+
34+
\begin{definitionbox}{High Performance Computing}
35+
High performance programming uses highly distributed \& parallel computer systems (e.g supercomputers, clusters) to solve advanced problems.
36+
\begin{itemize}
37+
\item Focuses on solving a single computationally difficult problem.
38+
\item Workloads are well defined and known at development time.
39+
\item Sometimes supported by custom hardware (e.g FPGAs, ASICs, custom CPU extensions)
40+
\end{itemize}
41+
\end{definitionbox}
42+
43+
\section{Performance Engineering Process}
44+
\subsection{Metrics}
45+
A \textit{target metric} is used to quantitatively measure any improvement in \textit{performance} (e.g for use in a \textit{SLA}).
46+
The metric needs to be wel defined:
47+
\begin{itemize}
48+
\item When measuring starts (e.g when to measure latency from)
49+
\item Where measuring is done (is server response time measured on server, on a client, under what conditions?)
50+
\end{itemize}
51+
52+
\begin{examplebox}{Imperials}
53+
Provide some example of metrics regarding a database.
54+
\tcblower
55+
\begin{center}
56+
\begin{tabular}{l p{.7\textwidth}}
57+
\textbf{Latency} & Measuring time to query, planning time, the whole systems response time over a network. \\
58+
\textbf{Throughput} & Measure the maximum request/second possible (often used to compare webservers) \\
59+
\textbf{Memory Usage} & measurable, but must be careful (e.g os interaction) \\
60+
\textbf{scalability} & Can define a metric regarding how quickly some metric (e.g throughput) increases with scale (e.g instances of a distributed system) \\
61+
\end{tabular}
62+
\end{center}
63+
\end{examplebox}
64+
65+
It is also important to define when a requirement is satisfied.
66+
\begin{itemize}
67+
\item Setting an optimisation budget (e.g in developer hours)
68+
\item Setting a target or threshold (e.g $x\%$ over baseline implementation)
69+
\item Combination of both
70+
\end{itemize}
71+
72+
\subsection{Quality of Service (QoS) Objectives}
73+
\begin{definitionbox}{Quality of Service Objectives}
74+
A set of statistical properties of a metric that must hold for a system.
75+
\begin{itemize}
76+
\item Can include preconditions (e.g to define the environment/setup)
77+
\item Can be in conflict with functional requirements (e.g framerate vs realism in graphics)
78+
\end{itemize}
79+
\end{definitionbox}
80+
\begin{examplebox}{Game On}
81+
Give an example of a basic QoS Objective for a game's framerate.
82+
\tcblower
83+
The game's framerate will be on average (over \textit{\dots preconditions \dots}) $60fps$ if run on a GPU rated at $50GFlops$ or higher.
84+
\end{examplebox}
85+
86+
\subsection{Service Level Agreements}
87+
\begin{definitionbox}{Service Level Agreements (SLAs)}
88+
Legal contracts specifying \textit{QoS objectives} and penalties for violation.
89+
\begin{itemize}
90+
\item Non-functional requirements (not about system correctness)
91+
\item Can be legally enforced
92+
\end{itemize}
93+
\end{definitionbox}
94+
95+
\begin{sidenotebox}{Amazon}
96+
Amazon Web Services (AWS) provides a set of \textit{service level agreements} relating to performance and availability. Violations are resolved by providing customers with service credits.
97+
\href{https://aws.amazon.com/compute/sla/}{Amazon SLAs}
98+
\end{sidenotebox}
99+
100+
When defining requirements for an SLA:
101+
\begin{center}
102+
\begin{tabular}{l p{.7\textwidth}}
103+
\textbf{Specific} & State exact acceptance criteria (numerical terms). \\
104+
\textbf{Measurable} & Ensure the metrics used can actually be measured. \\
105+
\textbf{Acceptable} & Requirements should be rigorous such that meeting them is a meaningful success. \\
106+
\textbf{Realisable} & Counter to \textbf{Acceptable} - need to be lenient enough to allow implementation. \\
107+
\textbf{Thorough} & All necessary aspects of the system are specified. \\
108+
\end{tabular}
109+
\end{center}
110+
111+
\section{Performance Evaluation Techniques}
112+
\subsection{Measuring}
113+
\begin{itemize}
114+
\item Performed on the actual system (can be prototype or production/final).
115+
\item Can be difficult and costly (need to mitigate any impact of the measuring system on the system itself).
116+
\item As it is on the actual system, it can (if done properly) yield accurate results.
117+
\end{itemize}
118+
119+
The two main types of measurement are:
120+
\begin{tcbraster}[raster columns=2,raster equal height]
121+
\begin{definitionbox}{Monitoring}
122+
Measuring in production to get real usage performance metrics.
123+
\begin{itemize}
124+
\item Observe the system in its production environment
125+
\item Collect usage statistics and analyse data (e.g user's preferred query types/structure, schema designs for databases)
126+
\item Can monitor for and report SLA violations.
127+
\end{itemize}
128+
\end{definitionbox}
129+
\begin{definitionbox}{Benchmarking}
130+
Measuring system performance in a controlled setting (e.g lab).
131+
\begin{itemize}
132+
\item The system to set into a predefined (or steady/hot) state
133+
\item Perform some workload while measuring performance metrics.
134+
\end{itemize}
135+
\end{definitionbox}
136+
\end{tcbraster}
137+
Benchmarking requires representative workloads in order to get metrics likely to be representative of a production environment.
138+
\begin{definitionbox}{Batch Workload}
139+
Program has access to entire batch at start of the benchmark.
140+
\begin{itemize}
141+
\item Useful when a throughput metric is being measured
142+
\item Simple to generate, and can even be recorded from a production environment.
143+
\end{itemize}
144+
\end{definitionbox}
145+
\begin{definitionbox}{Interactive Workload}
146+
A program generates requests to pass to the system being benchmarked.
147+
\begin{itemize}
148+
\item Useful when a latency metric is being measured.
149+
\item Workload generator needs to fast enough to saturate system being benchmarked.
150+
\item Often more representative of a production environment (e.g an operating system receives a workload over time)
151+
\end{itemize}
152+
\end{definitionbox}
153+
\begin{definitionbox}{Hybrid}
154+
A common setup combining batch and interactive workload strategies (e.g sample random queries from a predefined work set).
155+
\end{definitionbox}
156+
157+
In order to get useful results from which
158+
159+
\section{Optimisation Loop}
160+
\begin{center}
161+
\includegraphics[width=.6\textwidth]{introduction/images/optimisation_loop.drawio.pdf}
162+
\end{center}
163+
164+
\subsection{Parameters}
165+
\begin{definitionbox}{Performance Parameters}
166+
System and workload characteristics that affect performance.
167+
\\
168+
\\ \begin{tabular}{l p{.8\textwidth}}
169+
\textbf{System Parameters} & Do not change as the system runs (instruction costs, caches) \\
170+
\textbf{Workload Parameters} & Change as he system runs (available memory, users) \\
171+
\textbf{Numeric Parameters} & Quantitative (e.g CPU frequency, available memory, number of user) \\
172+
\textbf{Nominal Parameters} & Qualitative parameters (Runs on battery, has a GPU, runs in a VM) \\
173+
\end{tabular}
174+
\\
175+
\\ The term \textit{resource Parameters/Resources} refers to the parameters of the underlying platform (e.g CPU, memory).
176+
\end{definitionbox}
177+
178+
\unfinished
179+
180+
\subsection{Utilisation}
181+
\begin{definitionbox}{Utilisation}
182+
\end{definitionbox}
183+
\begin{definitionbox}{Bottleneck}
184+
\end{definitionbox}
185+
186+
\subsection{performance-Dominating Coe Paths}
187+
188+
189+
\subsection{parameter Tuning}
190+
191+
\subsection{Analytical Performance Models}
192+
193+
\subsection{Simulation}
194+

0 commit comments

Comments
 (0)