@@ -7,8 +7,13 @@ use smallstr::SmallString;
7
7
use std:: cell:: RefCell ;
8
8
use std:: collections;
9
9
use std:: fmt;
10
+ use std:: fs;
11
+ use std:: io:: Write ;
12
+ use std:: path:: Path ;
10
13
use std:: sync:: Mutex ;
11
14
15
+ /// The current Python callstack. We use u32 IDs instead of CallSite objects for
16
+ /// performance reasons.
12
17
#[ derive( Clone , Debug , PartialEq ) ]
13
18
struct Callstack {
14
19
calls : Vec < u32 > ,
@@ -41,18 +46,28 @@ impl Callstack {
41
46
42
47
thread_local ! ( static THREAD_CALLSTACK : RefCell <Callstack > = RefCell :: new( Callstack :: new( ) ) ) ;
43
48
44
- /// A particular place where a call happened:
49
+ /// A particular place where a call happened.
45
50
#[ derive( Clone , Debug , PartialEq , Eq , Hash ) ]
46
51
pub struct CallSite {
47
- pub module_name : SmallString < [ u8 ; 24 ] > ,
48
- pub function_name : SmallString < [ u8 ; 24 ] > ,
52
+ module_name : SmallString < [ u8 ; 24 ] > ,
53
+ function_name : SmallString < [ u8 ; 24 ] > ,
54
+ }
55
+
56
+ impl CallSite {
57
+ pub fn new ( module_name : & str , function_name : & str ) -> CallSite {
58
+ CallSite {
59
+ module_name : SmallString :: from_str ( module_name) ,
60
+ function_name : SmallString :: from_str ( function_name) ,
61
+ }
62
+ }
49
63
}
50
64
51
65
impl fmt:: Display for CallSite {
52
66
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
53
67
write ! ( f, "{}:{}" , self . module_name, self . function_name)
54
68
}
55
69
}
70
+
56
71
/// Maps CallSites to integer identifiers used in CallStacks.
57
72
struct CallSites {
58
73
max_id : u32 ,
@@ -67,6 +82,7 @@ impl CallSites {
67
82
}
68
83
}
69
84
85
+ /// Add a (possibly) new CallSite, returning its ID.
70
86
fn get_or_insert_id ( & mut self , call_site : CallSite ) -> u32 {
71
87
let max_id = & mut self . max_id ;
72
88
let result = self . callsite_to_id . entry ( call_site) . or_insert_with ( || {
@@ -77,6 +93,7 @@ impl CallSites {
77
93
* result
78
94
}
79
95
96
+ /// Get map from IDs to CallSites.
80
97
fn get_reverse_map ( & self ) -> HashMap < u32 , CallSite > {
81
98
let mut result = HashMap :: default ( ) ;
82
99
for ( call_site, csid) in & ( self . callsite_to_id ) {
@@ -86,12 +103,14 @@ impl CallSites {
86
103
}
87
104
}
88
105
106
+ /// A specific call to malloc()/calloc().
89
107
#[ derive( Clone , Debug , PartialEq ) ]
90
108
struct Allocation {
91
109
callstack : Callstack ,
92
110
size : libc:: size_t ,
93
111
}
94
112
113
+ /// The main data structure tracking everything.
95
114
struct AllocationTracker {
96
115
current_allocations : imhashmap:: HashMap < usize , Allocation > ,
97
116
peak_allocations : imhashmap:: HashMap < usize , Allocation > ,
@@ -149,22 +168,46 @@ impl<'a> AllocationTracker {
149
168
by_call
150
169
}
151
170
152
- /// Dump all callstacks in peak memory usage to format used by flamegraph.
171
+ /// Dump all callstacks in peak memory usage to various files describing the
172
+ /// memory usage.
153
173
fn dump_peak_to_flamegraph ( & self , path : & str ) {
174
+ let directory_path = Path :: new ( path) ;
175
+ if !directory_path. exists ( ) {
176
+ fs:: create_dir ( directory_path) . expect ( "Couldn't create the output directory." ) ;
177
+ } else if !directory_path. is_dir ( ) {
178
+ panic ! ( "Output path must be a directory." ) ;
179
+ }
180
+
154
181
let by_call = self . combine_callstacks ( ) ;
155
182
let lines: Vec < String > = by_call
156
183
. iter ( )
184
+ // Filter out callstacks with less than 1 KiB RAM usage.
185
+ // TODO maybe make this number configurable someday.
186
+ . filter ( |( _, size) | * * size >= 1024 )
157
187
. map ( |( callstack, size) | {
158
188
format ! ( "{} {:.0}" , callstack, ( * size as f64 / 1024.0 ) . round( ) )
159
189
} )
160
190
. collect ( ) ;
191
+ let raw_path = directory_path
192
+ . join ( "peak-memory.prof" )
193
+ . to_str ( )
194
+ . unwrap ( )
195
+ . to_string ( ) ;
196
+ if let Err ( e) = write_lines ( & lines, & raw_path) {
197
+ eprintln ! ( "Error writing raw profiling data: {}" , e) ;
198
+ }
199
+ let svg_path = directory_path
200
+ . join ( "peak-memory.svg" )
201
+ . to_str ( )
202
+ . unwrap ( )
203
+ . to_string ( ) ;
161
204
match write_flamegraph (
162
205
lines. iter ( ) . map ( |s| s. as_ref ( ) ) ,
163
- path ,
206
+ & svg_path ,
164
207
self . peak_allocated_bytes ,
165
208
) {
166
209
Ok ( _) => {
167
- eprintln ! ( "Wrote memory usage flamegraph to {}" , path ) ;
210
+ eprintln ! ( "Wrote memory usage flamegraph to {}" , svg_path ) ;
168
211
}
169
212
Err ( e) => {
170
213
eprintln ! ( "Error writing SVG: {}" , e) ;
@@ -218,6 +261,17 @@ pub fn dump_peak_to_flamegraph(path: &str) {
218
261
allocations. dump_peak_to_flamegraph ( path) ;
219
262
}
220
263
264
+ /// Write strings to disk, one line per string.
265
+ fn write_lines ( lines : & Vec < String > , path : & str ) -> std:: io:: Result < ( ) > {
266
+ let mut file = fs:: File :: create ( path) ?;
267
+ for line in lines. iter ( ) {
268
+ file. write_all ( line. as_bytes ( ) ) ?;
269
+ file. write_all ( b"\n " ) ?;
270
+ }
271
+ Ok ( ( ) )
272
+ }
273
+
274
+ /// Write a flamegraph SVG to disk, given lines in summarized format.
221
275
fn write_flamegraph < ' a , I : IntoIterator < Item = & ' a str > > (
222
276
lines : I ,
223
277
path : & str ,
@@ -250,7 +304,7 @@ fn write_flamegraph<'a, I: IntoIterator<Item = &'a str>>(
250
304
251
305
#[ cfg( test) ]
252
306
mod tests {
253
- use super :: { AllocationTracker , Callstack } ;
307
+ use super :: { AllocationTracker , CallSite , CallSites , Callstack } ;
254
308
use itertools:: Itertools ;
255
309
use proptest:: prelude:: * ;
256
310
use std:: collections;
@@ -308,22 +362,51 @@ mod tests {
308
362
assert_eq ! ( tracker. peak_allocated_bytes, 2123 ) ;
309
363
}
310
364
365
+ #[ test]
366
+ fn callsites_notices_duplicate_callsites ( ) {
367
+ let callsite1 = CallSite :: new ( "a" , "af" ) ;
368
+ let callsite2 = CallSite :: new ( "b" , "af" ) ;
369
+ let callsite3 = CallSite :: new ( "a" , "bf" ) ;
370
+ let mut callsites = CallSites :: new ( ) ;
371
+ let id1 = callsites. get_or_insert_id ( callsite1. clone ( ) ) ;
372
+ let id1b = callsites. get_or_insert_id ( callsite1) ;
373
+ let id2 = callsites. get_or_insert_id ( callsite2) ;
374
+ let id3 = callsites. get_or_insert_id ( callsite3. clone ( ) ) ;
375
+ let id3b = callsites. get_or_insert_id ( callsite3. clone ( ) ) ;
376
+ assert_eq ! ( id1, id1b) ;
377
+ assert_ne ! ( id1, id2) ;
378
+ assert_ne ! ( id1, id3) ;
379
+ assert_ne ! ( id2, id3) ;
380
+ assert_eq ! ( id3, id3b) ;
381
+ }
382
+
311
383
#[ test]
312
384
fn combine_callstacks_and_sum_allocations ( ) {
313
385
let mut tracker = AllocationTracker :: new ( ) ;
386
+ let id1 = tracker
387
+ . call_sites
388
+ . get_or_insert_id ( CallSite :: new ( "a" , "af" ) ) ;
389
+
390
+ let id2 = tracker
391
+ . call_sites
392
+ . get_or_insert_id ( CallSite :: new ( "b" , "bf" ) ) ;
393
+
394
+ let id3 = tracker
395
+ . call_sites
396
+ . get_or_insert_id ( CallSite :: new ( "c" , "cf" ) ) ;
314
397
let mut cs1 = Callstack :: new ( ) ;
315
- cs1. start_call ( 1 ) ;
316
- cs1. start_call ( 2 ) ;
398
+ cs1. start_call ( id1 ) ;
399
+ cs1. start_call ( id2 ) ;
317
400
let mut cs2 = Callstack :: new ( ) ;
318
- cs2. start_call ( 3 ) ;
401
+ cs2. start_call ( id3 ) ;
319
402
320
403
tracker. add_allocation ( 1 , 1000 , cs1. clone ( ) ) ;
321
404
tracker. add_allocation ( 2 , 234 , cs2. clone ( ) ) ;
322
405
tracker. add_allocation ( 3 , 50000 , cs1. clone ( ) ) ;
323
406
324
407
let mut expected: collections:: HashMap < String , usize > = collections:: HashMap :: new ( ) ;
325
- expected. insert ( "a;b " . to_string ( ) , 51000 ) ;
326
- expected. insert ( "c" . to_string ( ) , 234 ) ;
408
+ expected. insert ( "a:af;b:bf " . to_string ( ) , 51000 ) ;
409
+ expected. insert ( "c:cf " . to_string ( ) , 234 ) ;
327
410
assert_eq ! ( expected, tracker. combine_callstacks( ) ) ;
328
411
}
329
412
}
0 commit comments