@@ -7,8 +7,13 @@ use smallstr::SmallString;
77use std:: cell:: RefCell ;
88use std:: collections;
99use std:: fmt;
10+ use std:: fs;
11+ use std:: io:: Write ;
12+ use std:: path:: Path ;
1013use std:: sync:: Mutex ;
1114
15+ /// The current Python callstack. We use u32 IDs instead of CallSite objects for
16+ /// performance reasons.
1217#[ derive( Clone , Debug , PartialEq ) ]
1318struct Callstack {
1419 calls : Vec < u32 > ,
@@ -41,18 +46,28 @@ impl Callstack {
4146
4247thread_local ! ( static THREAD_CALLSTACK : RefCell <Callstack > = RefCell :: new( Callstack :: new( ) ) ) ;
4348
44- /// A particular place where a call happened:
49+ /// A particular place where a call happened.
4550#[ derive( Clone , Debug , PartialEq , Eq , Hash ) ]
4651pub struct CallSite {
47- pub module_name : SmallString < [ u8 ; 24 ] > ,
48- pub function_name : SmallString < [ u8 ; 24 ] > ,
52+ module_name : SmallString < [ u8 ; 24 ] > ,
53+ function_name : SmallString < [ u8 ; 24 ] > ,
54+ }
55+
56+ impl CallSite {
57+ pub fn new ( module_name : & str , function_name : & str ) -> CallSite {
58+ CallSite {
59+ module_name : SmallString :: from_str ( module_name) ,
60+ function_name : SmallString :: from_str ( function_name) ,
61+ }
62+ }
4963}
5064
5165impl fmt:: Display for CallSite {
5266 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
5367 write ! ( f, "{}:{}" , self . module_name, self . function_name)
5468 }
5569}
70+
5671/// Maps CallSites to integer identifiers used in CallStacks.
5772struct CallSites {
5873 max_id : u32 ,
@@ -67,6 +82,7 @@ impl CallSites {
6782 }
6883 }
6984
85+ /// Add a (possibly) new CallSite, returning its ID.
7086 fn get_or_insert_id ( & mut self , call_site : CallSite ) -> u32 {
7187 let max_id = & mut self . max_id ;
7288 let result = self . callsite_to_id . entry ( call_site) . or_insert_with ( || {
@@ -77,6 +93,7 @@ impl CallSites {
7793 * result
7894 }
7995
96+ /// Get map from IDs to CallSites.
8097 fn get_reverse_map ( & self ) -> HashMap < u32 , CallSite > {
8198 let mut result = HashMap :: default ( ) ;
8299 for ( call_site, csid) in & ( self . callsite_to_id ) {
@@ -86,12 +103,14 @@ impl CallSites {
86103 }
87104}
88105
106+ /// A specific call to malloc()/calloc().
89107#[ derive( Clone , Debug , PartialEq ) ]
90108struct Allocation {
91109 callstack : Callstack ,
92110 size : libc:: size_t ,
93111}
94112
113+ /// The main data structure tracking everything.
95114struct AllocationTracker {
96115 current_allocations : imhashmap:: HashMap < usize , Allocation > ,
97116 peak_allocations : imhashmap:: HashMap < usize , Allocation > ,
@@ -149,22 +168,46 @@ impl<'a> AllocationTracker {
149168 by_call
150169 }
151170
152- /// Dump all callstacks in peak memory usage to format used by flamegraph.
171+ /// Dump all callstacks in peak memory usage to various files describing the
172+ /// memory usage.
153173 fn dump_peak_to_flamegraph ( & self , path : & str ) {
174+ let directory_path = Path :: new ( path) ;
175+ if !directory_path. exists ( ) {
176+ fs:: create_dir ( directory_path) . expect ( "Couldn't create the output directory." ) ;
177+ } else if !directory_path. is_dir ( ) {
178+ panic ! ( "Output path must be a directory." ) ;
179+ }
180+
154181 let by_call = self . combine_callstacks ( ) ;
155182 let lines: Vec < String > = by_call
156183 . iter ( )
184+ // Filter out callstacks with less than 1 KiB RAM usage.
185+ // TODO maybe make this number configurable someday.
186+ . filter ( |( _, size) | * * size >= 1024 )
157187 . map ( |( callstack, size) | {
158188 format ! ( "{} {:.0}" , callstack, ( * size as f64 / 1024.0 ) . round( ) )
159189 } )
160190 . collect ( ) ;
191+ let raw_path = directory_path
192+ . join ( "peak-memory.prof" )
193+ . to_str ( )
194+ . unwrap ( )
195+ . to_string ( ) ;
196+ if let Err ( e) = write_lines ( & lines, & raw_path) {
197+ eprintln ! ( "Error writing raw profiling data: {}" , e) ;
198+ }
199+ let svg_path = directory_path
200+ . join ( "peak-memory.svg" )
201+ . to_str ( )
202+ . unwrap ( )
203+ . to_string ( ) ;
161204 match write_flamegraph (
162205 lines. iter ( ) . map ( |s| s. as_ref ( ) ) ,
163- path ,
206+ & svg_path ,
164207 self . peak_allocated_bytes ,
165208 ) {
166209 Ok ( _) => {
167- eprintln ! ( "Wrote memory usage flamegraph to {}" , path ) ;
210+ eprintln ! ( "Wrote memory usage flamegraph to {}" , svg_path ) ;
168211 }
169212 Err ( e) => {
170213 eprintln ! ( "Error writing SVG: {}" , e) ;
@@ -218,6 +261,17 @@ pub fn dump_peak_to_flamegraph(path: &str) {
218261 allocations. dump_peak_to_flamegraph ( path) ;
219262}
220263
264+ /// Write strings to disk, one line per string.
265+ fn write_lines ( lines : & Vec < String > , path : & str ) -> std:: io:: Result < ( ) > {
266+ let mut file = fs:: File :: create ( path) ?;
267+ for line in lines. iter ( ) {
268+ file. write_all ( line. as_bytes ( ) ) ?;
269+ file. write_all ( b"\n " ) ?;
270+ }
271+ Ok ( ( ) )
272+ }
273+
274+ /// Write a flamegraph SVG to disk, given lines in summarized format.
221275fn write_flamegraph < ' a , I : IntoIterator < Item = & ' a str > > (
222276 lines : I ,
223277 path : & str ,
@@ -250,7 +304,7 @@ fn write_flamegraph<'a, I: IntoIterator<Item = &'a str>>(
250304
251305#[ cfg( test) ]
252306mod tests {
253- use super :: { AllocationTracker , Callstack } ;
307+ use super :: { AllocationTracker , CallSite , CallSites , Callstack } ;
254308 use itertools:: Itertools ;
255309 use proptest:: prelude:: * ;
256310 use std:: collections;
@@ -308,22 +362,51 @@ mod tests {
308362 assert_eq ! ( tracker. peak_allocated_bytes, 2123 ) ;
309363 }
310364
365+ #[ test]
366+ fn callsites_notices_duplicate_callsites ( ) {
367+ let callsite1 = CallSite :: new ( "a" , "af" ) ;
368+ let callsite2 = CallSite :: new ( "b" , "af" ) ;
369+ let callsite3 = CallSite :: new ( "a" , "bf" ) ;
370+ let mut callsites = CallSites :: new ( ) ;
371+ let id1 = callsites. get_or_insert_id ( callsite1. clone ( ) ) ;
372+ let id1b = callsites. get_or_insert_id ( callsite1) ;
373+ let id2 = callsites. get_or_insert_id ( callsite2) ;
374+ let id3 = callsites. get_or_insert_id ( callsite3. clone ( ) ) ;
375+ let id3b = callsites. get_or_insert_id ( callsite3. clone ( ) ) ;
376+ assert_eq ! ( id1, id1b) ;
377+ assert_ne ! ( id1, id2) ;
378+ assert_ne ! ( id1, id3) ;
379+ assert_ne ! ( id2, id3) ;
380+ assert_eq ! ( id3, id3b) ;
381+ }
382+
311383 #[ test]
312384 fn combine_callstacks_and_sum_allocations ( ) {
313385 let mut tracker = AllocationTracker :: new ( ) ;
386+ let id1 = tracker
387+ . call_sites
388+ . get_or_insert_id ( CallSite :: new ( "a" , "af" ) ) ;
389+
390+ let id2 = tracker
391+ . call_sites
392+ . get_or_insert_id ( CallSite :: new ( "b" , "bf" ) ) ;
393+
394+ let id3 = tracker
395+ . call_sites
396+ . get_or_insert_id ( CallSite :: new ( "c" , "cf" ) ) ;
314397 let mut cs1 = Callstack :: new ( ) ;
315- cs1. start_call ( 1 ) ;
316- cs1. start_call ( 2 ) ;
398+ cs1. start_call ( id1 ) ;
399+ cs1. start_call ( id2 ) ;
317400 let mut cs2 = Callstack :: new ( ) ;
318- cs2. start_call ( 3 ) ;
401+ cs2. start_call ( id3 ) ;
319402
320403 tracker. add_allocation ( 1 , 1000 , cs1. clone ( ) ) ;
321404 tracker. add_allocation ( 2 , 234 , cs2. clone ( ) ) ;
322405 tracker. add_allocation ( 3 , 50000 , cs1. clone ( ) ) ;
323406
324407 let mut expected: collections:: HashMap < String , usize > = collections:: HashMap :: new ( ) ;
325- expected. insert ( "a;b " . to_string ( ) , 51000 ) ;
326- expected. insert ( "c" . to_string ( ) , 234 ) ;
408+ expected. insert ( "a:af;b:bf " . to_string ( ) , 51000 ) ;
409+ expected. insert ( "c:cf " . to_string ( ) , 234 ) ;
327410 assert_eq ! ( expected, tracker. combine_callstacks( ) ) ;
328411 }
329412}
0 commit comments