Skip to content

Commit b45164e

Browse files
committed
Merge branch 'threading' into 'master'
Threading Closes #5 and #18 See merge request pythonspeed/memory-profiler!4
2 parents ef8c134 + 5e4d70e commit b45164e

File tree

11 files changed

+236
-57
lines changed

11 files changed

+236
-57
lines changed

Cargo.toml

+6-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,9 @@
22

33
members = [
44
"memapi",
5-
]
5+
]
6+
7+
[profile.release]
8+
# Needed for profiling, disable for real release:
9+
#debug = true
10+
lto = true

Makefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ filprofiler/libpymemprofile_api.so: Cargo.lock memapi/Cargo.toml memapi/src/*.rs
2020
cp -f target/release/libpymemprofile_api.so filprofiler/
2121

2222
test: build
23-
fil-profile example.py
23+
env RUST_BACKTRACE=1 cargo test
24+
env RUST_BACKTRACE=1 py.test
2425

2526
.PHONY: clean
2627
clean:

filprofiler/_script.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,14 @@ def stage_1():
3838

3939
def stage_2():
4040
"""Main CLI interface. Presumes LD_PRELOAD etc. has been set by stage_1()."""
41-
usage = "fil-profile [-o /path/to/output.svg] [-m module | /path/to/script.py ] [arg] ..."
41+
usage = "fil-profile [-o /path/to/output-dir/] [-m module | /path/to/script.py ] [arg] ..."
4242
parser = ArgumentParser(usage=usage)
4343
parser.add_argument(
4444
"-o",
45-
dest="svg_output_path",
45+
dest="output_path",
4646
action="store",
47-
default="memory-graph.svg",
48-
help="Path where the SVG with the profile flamegraph is written.",
47+
default="fil-result",
48+
help="Directory where the profiling results written.",
4949
)
5050
parser.add_argument(
5151
"-m",
@@ -74,7 +74,7 @@ def stage_2():
7474
"__package__": None,
7575
"__cached__": None,
7676
}
77-
trace(code, globals_, arguments.svg_output_path)
77+
trace(code, globals_, arguments.output_path)
7878

7979

8080
if __name__ == "__main__":

filprofiler/_tracer.py

+29-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
"""Trace code, so that libpymemprofile_api.so know's where we are."""
22

3+
import atexit
4+
import os
35
import sys
6+
import threading
47
from ctypes import CDLL, RTLD_GLOBAL
58

69
from ._utils import library_path
@@ -14,25 +17,42 @@
1417

1518
def start_tracing():
1619
preload.fil_reset()
20+
threading.settrace(_start_thread_trace)
1721
_profiler.start_tracing()
1822

1923

20-
def stop_tracing(svg_output_path: str):
24+
def _start_thread_trace(frame, event, arg):
25+
"""Trace function that can be passed to sys.settrace.
26+
27+
All this does is register the underlying C trace function, using the
28+
mechanism described in
29+
https://github.com/nedbat/coveragepy/blob/master/coverage/ctracer/tracer.c's
30+
CTracer_call.
31+
"""
32+
if event == "call":
33+
_profiler.start_tracing()
34+
return _start_thread_trace
35+
36+
37+
def stop_tracing(output_path: str):
2138
sys.settrace(None)
22-
path = svg_output_path.encode("utf-8")
39+
path = output_path.encode("utf-8")
2340
preload.fil_dump_peak_to_flamegraph(path)
24-
with open(path) as f:
41+
svg_path = os.path.join(output_path, "peak-memory.svg")
42+
with open(svg_path) as f:
2543
data = f.read().replace(
2644
"SUBTITLE-HERE",
2745
"""Made with the Fil memory profiler. <a href="https://pythonspeed.com/products/filmemoryprofiler/" style="text-decoration: underline;" target="_parent">Try it on your code!</a>""",
2846
)
29-
with open(path, "w") as f:
47+
with open(svg_path, "w") as f:
3048
f.write(data)
3149

3250

33-
def trace(code, globals_, svg_output_path: str):
51+
def trace(code, globals_, output_path: str):
52+
"""
53+
Given code (Python or code object), run it under the tracer until the
54+
program exits.
55+
"""
56+
atexit.register(stop_tracing, output_path)
3457
start_tracing()
35-
try:
36-
exec(code, globals_, None)
37-
finally:
38-
stop_tracing(svg_output_path)
58+
exec(code, globals_, None)

memapi/Cargo.toml

-4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@ features = ["ffi"]
2424
[dev-dependencies]
2525
proptest = "0.9.5"
2626

27-
[profile.release]
28-
# Needed for profiling, disable for real release:
29-
#debug = true
30-
3127
[lib]
3228
name = "pymemprofile_api"
3329
crate_type = ["cdylib"]

memapi/src/lib.rs

+3-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use smallstr::SmallString;
21
use std::ffi::CStr;
32
use std::os::raw::c_char;
43
use std::str;
@@ -25,16 +24,9 @@ pub unsafe extern "C" fn pymemprofile_start_call(
2524
file_name: *const c_char,
2625
func_name: *const c_char,
2726
) {
28-
let function_name = SmallString::from_str(str::from_utf8_unchecked(
29-
CStr::from_ptr(func_name).to_bytes(),
30-
));
31-
let module_name = SmallString::from_str(str::from_utf8_unchecked(
32-
CStr::from_ptr(file_name).to_bytes(),
33-
));
34-
let call_site = memorytracking::CallSite {
35-
function_name,
36-
module_name,
37-
};
27+
let function_name = str::from_utf8_unchecked(CStr::from_ptr(func_name).to_bytes());
28+
let module_name = str::from_utf8_unchecked(CStr::from_ptr(file_name).to_bytes());
29+
let call_site = memorytracking::CallSite::new(module_name, function_name);
3830
memorytracking::start_call(call_site);
3931
}
4032

memapi/src/memorytracking.rs

+95-12
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,13 @@ use smallstr::SmallString;
77
use std::cell::RefCell;
88
use std::collections;
99
use std::fmt;
10+
use std::fs;
11+
use std::io::Write;
12+
use std::path::Path;
1013
use std::sync::Mutex;
1114

15+
/// The current Python callstack. We use u32 IDs instead of CallSite objects for
16+
/// performance reasons.
1217
#[derive(Clone, Debug, PartialEq)]
1318
struct Callstack {
1419
calls: Vec<u32>,
@@ -41,18 +46,28 @@ impl Callstack {
4146

4247
thread_local!(static THREAD_CALLSTACK: RefCell<Callstack> = RefCell::new(Callstack::new()));
4348

44-
/// A particular place where a call happened:
49+
/// A particular place where a call happened.
4550
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
4651
pub struct CallSite {
47-
pub module_name: SmallString<[u8; 24]>,
48-
pub function_name: SmallString<[u8; 24]>,
52+
module_name: SmallString<[u8; 24]>,
53+
function_name: SmallString<[u8; 24]>,
54+
}
55+
56+
impl CallSite {
57+
pub fn new(module_name: &str, function_name: &str) -> CallSite {
58+
CallSite {
59+
module_name: SmallString::from_str(module_name),
60+
function_name: SmallString::from_str(function_name),
61+
}
62+
}
4963
}
5064

5165
impl fmt::Display for CallSite {
5266
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5367
write!(f, "{}:{}", self.module_name, self.function_name)
5468
}
5569
}
70+
5671
/// Maps CallSites to integer identifiers used in CallStacks.
5772
struct CallSites {
5873
max_id: u32,
@@ -67,6 +82,7 @@ impl CallSites {
6782
}
6883
}
6984

85+
/// Add a (possibly) new CallSite, returning its ID.
7086
fn get_or_insert_id(&mut self, call_site: CallSite) -> u32 {
7187
let max_id = &mut self.max_id;
7288
let result = self.callsite_to_id.entry(call_site).or_insert_with(|| {
@@ -77,6 +93,7 @@ impl CallSites {
7793
*result
7894
}
7995

96+
/// Get map from IDs to CallSites.
8097
fn get_reverse_map(&self) -> HashMap<u32, CallSite> {
8198
let mut result = HashMap::default();
8299
for (call_site, csid) in &(self.callsite_to_id) {
@@ -86,12 +103,14 @@ impl CallSites {
86103
}
87104
}
88105

106+
/// A specific call to malloc()/calloc().
89107
#[derive(Clone, Debug, PartialEq)]
90108
struct Allocation {
91109
callstack: Callstack,
92110
size: libc::size_t,
93111
}
94112

113+
/// The main data structure tracking everything.
95114
struct AllocationTracker {
96115
current_allocations: imhashmap::HashMap<usize, Allocation>,
97116
peak_allocations: imhashmap::HashMap<usize, Allocation>,
@@ -149,22 +168,46 @@ impl<'a> AllocationTracker {
149168
by_call
150169
}
151170

152-
/// Dump all callstacks in peak memory usage to format used by flamegraph.
171+
/// Dump all callstacks in peak memory usage to various files describing the
172+
/// memory usage.
153173
fn dump_peak_to_flamegraph(&self, path: &str) {
174+
let directory_path = Path::new(path);
175+
if !directory_path.exists() {
176+
fs::create_dir(directory_path).expect("Couldn't create the output directory.");
177+
} else if !directory_path.is_dir() {
178+
panic!("Output path must be a directory.");
179+
}
180+
154181
let by_call = self.combine_callstacks();
155182
let lines: Vec<String> = by_call
156183
.iter()
184+
// Filter out callstacks with less than 1 KiB RAM usage.
185+
// TODO maybe make this number configurable someday.
186+
.filter(|(_, size)| **size >= 1024)
157187
.map(|(callstack, size)| {
158188
format!("{} {:.0}", callstack, (*size as f64 / 1024.0).round())
159189
})
160190
.collect();
191+
let raw_path = directory_path
192+
.join("peak-memory.prof")
193+
.to_str()
194+
.unwrap()
195+
.to_string();
196+
if let Err(e) = write_lines(&lines, &raw_path) {
197+
eprintln!("Error writing raw profiling data: {}", e);
198+
}
199+
let svg_path = directory_path
200+
.join("peak-memory.svg")
201+
.to_str()
202+
.unwrap()
203+
.to_string();
161204
match write_flamegraph(
162205
lines.iter().map(|s| s.as_ref()),
163-
path,
206+
&svg_path,
164207
self.peak_allocated_bytes,
165208
) {
166209
Ok(_) => {
167-
eprintln!("Wrote memory usage flamegraph to {}", path);
210+
eprintln!("Wrote memory usage flamegraph to {}", svg_path);
168211
}
169212
Err(e) => {
170213
eprintln!("Error writing SVG: {}", e);
@@ -218,6 +261,17 @@ pub fn dump_peak_to_flamegraph(path: &str) {
218261
allocations.dump_peak_to_flamegraph(path);
219262
}
220263

264+
/// Write strings to disk, one line per string.
265+
fn write_lines(lines: &Vec<String>, path: &str) -> std::io::Result<()> {
266+
let mut file = fs::File::create(path)?;
267+
for line in lines.iter() {
268+
file.write_all(line.as_bytes())?;
269+
file.write_all(b"\n")?;
270+
}
271+
Ok(())
272+
}
273+
274+
/// Write a flamegraph SVG to disk, given lines in summarized format.
221275
fn write_flamegraph<'a, I: IntoIterator<Item = &'a str>>(
222276
lines: I,
223277
path: &str,
@@ -250,7 +304,7 @@ fn write_flamegraph<'a, I: IntoIterator<Item = &'a str>>(
250304

251305
#[cfg(test)]
252306
mod tests {
253-
use super::{AllocationTracker, Callstack};
307+
use super::{AllocationTracker, CallSite, CallSites, Callstack};
254308
use itertools::Itertools;
255309
use proptest::prelude::*;
256310
use std::collections;
@@ -308,22 +362,51 @@ mod tests {
308362
assert_eq!(tracker.peak_allocated_bytes, 2123);
309363
}
310364

365+
#[test]
366+
fn callsites_notices_duplicate_callsites() {
367+
let callsite1 = CallSite::new("a", "af");
368+
let callsite2 = CallSite::new("b", "af");
369+
let callsite3 = CallSite::new("a", "bf");
370+
let mut callsites = CallSites::new();
371+
let id1 = callsites.get_or_insert_id(callsite1.clone());
372+
let id1b = callsites.get_or_insert_id(callsite1);
373+
let id2 = callsites.get_or_insert_id(callsite2);
374+
let id3 = callsites.get_or_insert_id(callsite3.clone());
375+
let id3b = callsites.get_or_insert_id(callsite3.clone());
376+
assert_eq!(id1, id1b);
377+
assert_ne!(id1, id2);
378+
assert_ne!(id1, id3);
379+
assert_ne!(id2, id3);
380+
assert_eq!(id3, id3b);
381+
}
382+
311383
#[test]
312384
fn combine_callstacks_and_sum_allocations() {
313385
let mut tracker = AllocationTracker::new();
386+
let id1 = tracker
387+
.call_sites
388+
.get_or_insert_id(CallSite::new("a", "af"));
389+
390+
let id2 = tracker
391+
.call_sites
392+
.get_or_insert_id(CallSite::new("b", "bf"));
393+
394+
let id3 = tracker
395+
.call_sites
396+
.get_or_insert_id(CallSite::new("c", "cf"));
314397
let mut cs1 = Callstack::new();
315-
cs1.start_call(1);
316-
cs1.start_call(2);
398+
cs1.start_call(id1);
399+
cs1.start_call(id2);
317400
let mut cs2 = Callstack::new();
318-
cs2.start_call(3);
401+
cs2.start_call(id3);
319402

320403
tracker.add_allocation(1, 1000, cs1.clone());
321404
tracker.add_allocation(2, 234, cs2.clone());
322405
tracker.add_allocation(3, 50000, cs1.clone());
323406

324407
let mut expected: collections::HashMap<String, usize> = collections::HashMap::new();
325-
expected.insert("a;b".to_string(), 51000);
326-
expected.insert("c".to_string(), 234);
408+
expected.insert("a:af;b:bf".to_string(), 51000);
409+
expected.insert("c:cf".to_string(), 234);
327410
assert_eq!(expected, tracker.combine_callstacks());
328411
}
329412
}

python-benchmarks/threaded.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,21 @@
44
import numpy
55

66
def h(i):
7-
return numpy.ones((1024, 1024, 20), dtype=numpy.uint8)
7+
return numpy.ones((1024, 1024, i), dtype=numpy.uint8)
88

99
def child1():
1010
return h(30)
1111

1212
def thread1():
1313
data = h(20)
14-
sleep(1)
14+
sleep(0.1)
1515
data2 = child1()
16-
sleep(1)
17-
18-
def child2():
19-
return h(30)
20-
21-
def thread2():
22-
data = h(50)
23-
sleep(1)
24-
data2 = child2()
25-
sleep(1)
16+
sleep(0.1)
2617

2718
threading.Thread(target=thread1).start()
28-
threading.Thread(target=thread2).start()
2919

3020
def main():
3121
data = h(50)
32-
sleep(5)
22+
sleep(0.5)
3323

3424
main()

0 commit comments

Comments
 (0)