@@ -93,6 +93,8 @@ print(times.average)
93
93
94
94
```
95
95
## Code profiling
96
+
97
+ ### Profiling time consumption
96
98
Let's see the frequency and timing of each lines of a code
97
99
``` python
98
100
! pip install line_profiler
@@ -107,4 +109,130 @@ Let's see the frequency and timing of each lines of a code
107
109
108
110
```
109
111
112
+ ### Profiling memory consumption
113
+
114
+ ``` python
115
+ ! pip install memory_profiler
116
+
117
+ % load_ext memory_profiler
118
+
119
+ # # Magic command
120
+ % mprun
121
+
122
+ # # profile a function
123
+ % mprun - f function_name function_name(arg1,arg2,agr3)
124
+
125
+ ```
126
+
127
+ ## Efficient Functions
128
+
129
+ ### zip
130
+
131
+ ``` python
132
+ new_list = [* zip (l1,l2,l3,... )] # # Note that each li is an iterable, a list can be combined with a, for example, map object.
133
+ ```
134
+
135
+ ### Counter
136
+
137
+ ``` python
138
+
139
+ from collections import Counter
140
+
141
+ # Use list comprehension to get each names's starting letter
142
+ starting_letters = [a[0 ] for a in names]
143
+
144
+ # Collect the count of names for each starting_letter
145
+ starting_letters_count = Counter(starting_letters)
146
+ ```
147
+
148
+
149
+ ### itertools
150
+
151
+ #### combinations
152
+ ``` python
153
+ from itertools import combinations
154
+
155
+ list_tup = [* combinations(obj_list,num)] # # choose num from obj_list
156
+
157
+ ```
158
+
159
+ ### set
160
+ set methods
161
+ * itersection()
162
+ * difference() # a.difference(b) = a\b
163
+ * symmetric_difference() # all elements in exactly one list
164
+ * union()
165
+
166
+ Use built-in function for membership testing:
167
+ * in
168
+
169
+ ### Eliminating Loops
170
+ Using np vectorizations, map, itertools.
171
+
172
+ ### Writing more efficient Loops
173
+
174
+ Move operations outside loop as much as possible:
175
+ ``` python
176
+ # Collect all possible pairs using combinations()
177
+ possible_pairs = [* combinations(types, 2 )]
178
+
179
+ # Create an empty list called enumerated_tuples
180
+ enumerated_tuples = []
181
+
182
+ # Add a line to append each enumerated_pair_tuple to the empty list above
183
+ for i,pair in enumerate (possible_pairs, 1 ):
184
+ enumerated_pair_tuple = (i,) + pair
185
+ enumerated_tuples.append(enumerated_pair_tuple)
186
+
187
+ # Convert all tuples in enumerated_tuples to a list
188
+ enumerated_pairs = [* map (list , enumerated_tuples)]
189
+ print (enumerated_pairs)
190
+ ```
191
+
192
+
193
+ ## Iterrating over Pandas Dataframe
194
+ ``` python
195
+ for i,row in df.iterrows():
196
+ print (i,row)
197
+ ```
198
+ This is much faster than for loop with iloc
199
+
200
+ However, there is even a faster approach:
201
+
202
+ ``` python
203
+ for row_namedtuple in team_wins_df.itertuples():
204
+ print (row_namedtuple.Team)
205
+ ```
206
+
207
+ If we want to apply a function for each column (0) or row (1), use df.apply.
208
+
209
+ ### Pandas internals
210
+
211
+ Even better than apply!!!
212
+
213
+ Vectorize Operations!
214
+
215
+ ``` python
216
+ %% timeit
217
+ win_perc_preds_loop = []
218
+
219
+ # Use a loop and .itertuples() to collect each row's predicted win percentage
220
+ for row in baseball_df.itertuples():
221
+ runs_scored = row.RS
222
+ runs_allowed = row.RA
223
+ win_perc_pred = predict_win_perc(runs_scored, runs_allowed)
224
+ win_perc_preds_loop.append(win_perc_pred)
225
+
226
+ %% timeit
227
+ # Apply predict_win_perc to each row of the DataFrame
228
+ win_perc_preds_apply = baseball_df.apply(lambda row : predict_win_perc(row[' RS' ], row[' RA' ]), axis = 1 )
229
+
230
+ %% timeit
231
+ # Calculate the win percentage predictions using NumPy arrays
232
+ win_perc_preds_np = predict_win_perc(baseball_df[' RS' ].values, baseball_df[' RA' ].values)
233
+ baseball_df[' WP_preds' ] = win_perc_preds_np
234
+
235
+ print (baseball_df.head())
236
+ ```
237
+
110
238
0 commit comments