@@ -93,6 +93,8 @@ print(times.average)
9393
9494```
9595## Code profiling
96+
97+ ### Profiling time consumption
9698Let's see the frequency and timing of each lines of a code
9799``` python
98100! pip install line_profiler
@@ -107,4 +109,130 @@ Let's see the frequency and timing of each lines of a code
107109
108110```
109111
112+ ### Profiling memory consumption
113+
114+ ``` python
115+ ! pip install memory_profiler
116+
117+ % load_ext memory_profiler
118+
119+ # # Magic command
120+ % mprun
121+
122+ # # profile a function
123+ % mprun - f function_name function_name(arg1,arg2,agr3)
124+
125+ ```
126+
127+ ## Efficient Functions
128+
129+ ### zip
130+
131+ ``` python
132+ new_list = [* zip (l1,l2,l3,... )] # # Note that each li is an iterable, a list can be combined with a, for example, map object.
133+ ```
134+
135+ ### Counter
136+
137+ ``` python
138+
139+ from collections import Counter
140+
141+ # Use list comprehension to get each names's starting letter
142+ starting_letters = [a[0 ] for a in names]
143+
144+ # Collect the count of names for each starting_letter
145+ starting_letters_count = Counter(starting_letters)
146+ ```
147+
148+
149+ ### itertools
150+
151+ #### combinations
152+ ``` python
153+ from itertools import combinations
154+
155+ list_tup = [* combinations(obj_list,num)] # # choose num from obj_list
156+
157+ ```
158+
159+ ### set
160+ set methods
161+ * itersection()
162+ * difference() # a.difference(b) = a\b
163+ * symmetric_difference() # all elements in exactly one list
164+ * union()
165+
166+ Use built-in function for membership testing:
167+ * in
168+
169+ ### Eliminating Loops
170+ Using np vectorizations, map, itertools.
171+
172+ ### Writing more efficient Loops
173+
174+ Move operations outside loop as much as possible:
175+ ``` python
176+ # Collect all possible pairs using combinations()
177+ possible_pairs = [* combinations(types, 2 )]
178+
179+ # Create an empty list called enumerated_tuples
180+ enumerated_tuples = []
181+
182+ # Add a line to append each enumerated_pair_tuple to the empty list above
183+ for i,pair in enumerate (possible_pairs, 1 ):
184+ enumerated_pair_tuple = (i,) + pair
185+ enumerated_tuples.append(enumerated_pair_tuple)
186+
187+ # Convert all tuples in enumerated_tuples to a list
188+ enumerated_pairs = [* map (list , enumerated_tuples)]
189+ print (enumerated_pairs)
190+ ```
191+
192+
193+ ## Iterrating over Pandas Dataframe
194+ ``` python
195+ for i,row in df.iterrows():
196+ print (i,row)
197+ ```
198+ This is much faster than for loop with iloc
199+
200+ However, there is even a faster approach:
201+
202+ ``` python
203+ for row_namedtuple in team_wins_df.itertuples():
204+ print (row_namedtuple.Team)
205+ ```
206+
207+ If we want to apply a function for each column (0) or row (1), use df.apply.
208+
209+ ### Pandas internals
210+
211+ Even better than apply!!!
212+
213+ Vectorize Operations!
214+
215+ ``` python
216+ %% timeit
217+ win_perc_preds_loop = []
218+
219+ # Use a loop and .itertuples() to collect each row's predicted win percentage
220+ for row in baseball_df.itertuples():
221+ runs_scored = row.RS
222+ runs_allowed = row.RA
223+ win_perc_pred = predict_win_perc(runs_scored, runs_allowed)
224+ win_perc_preds_loop.append(win_perc_pred)
225+
226+ %% timeit
227+ # Apply predict_win_perc to each row of the DataFrame
228+ win_perc_preds_apply = baseball_df.apply(lambda row : predict_win_perc(row[' RS' ], row[' RA' ]), axis = 1 )
229+
230+ %% timeit
231+ # Calculate the win percentage predictions using NumPy arrays
232+ win_perc_preds_np = predict_win_perc(baseball_df[' RS' ].values, baseball_df[' RA' ].values)
233+ baseball_df[' WP_preds' ] = win_perc_preds_np
234+
235+ print (baseball_df.head())
236+ ```
237+
110238
0 commit comments