Skip to content

Commit a02caa4

Browse files
committed
memory.py: Compute a diff over Massif heap memory profiles
This tool will enable memory regression tests by comparing heap memory profiles generated using valgrind's Massif tool (or any other tool that can generate compatible output).
1 parent da287d1 commit a02caa4

File tree

1 file changed

+302
-0
lines changed

1 file changed

+302
-0
lines changed

Diff for: scripts/memory-test/memory.py

+302
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
#!/usr/bin/env python
2+
3+
from __future__ import print_function
4+
5+
import argparse
6+
import difflib
7+
import fractions
8+
import itertools
9+
import msparser
10+
import os
11+
# import pprint
12+
import sys
13+
14+
15+
def near_eq(x, y):
16+
fx = float(x)
17+
fy = float(y)
18+
return abs(fy - fx) <= 0.1 * abs(fx)
19+
20+
21+
class snapshot:
22+
def __init__(self, s, is_peak):
23+
self.data = s
24+
self.value = s['mem_heap']
25+
self.is_peak = is_peak
26+
27+
def __cmp__(self, other):
28+
if self.__eq__(other):
29+
return 0
30+
else:
31+
return -1 if(self.value < other.value) else 1
32+
33+
def __eq__(self, other):
34+
if self.is_peak != other.is_peak:
35+
return False
36+
37+
if not near_eq(self.value, other.value):
38+
return False
39+
40+
if self.data.get('heap_tree') and other.data.get('heap_tree'):
41+
ds = self.data['heap_tree']['children'][0]
42+
do = other.data['heap_tree']['children'][0]
43+
if ds['details']['function'] != do['details']['function'] or (
44+
not near_eq(ds['nbytes'], do['nbytes'])):
45+
return False
46+
47+
return True
48+
# pprint.pprint(self.data['heap_tree'], depth=2)
49+
# pprint.pprint(other.data['heap_tree'], depth=2)
50+
51+
def __radd__(self, other):
52+
s = other + str(self.value)
53+
if self.is_peak:
54+
s += ' *peak*'
55+
if self.data.get('heap_tree'):
56+
d = self.data['heap_tree']['children'][0]
57+
s += ' {}: {}'.format(d['details']['function'], d['nbytes'])
58+
return s
59+
60+
def __hash__(self):
61+
"""
62+
Make sure all values end up in the same hash bucket to enforce
63+
comparision via ==/__eq__ as overridden above.
64+
"""
65+
return 0
66+
67+
68+
# based on https://chezsoi.org/lucas/blog/colored-diff-output-with-python.html
69+
try:
70+
from colorama import Fore, init
71+
init()
72+
except ImportError: # fallback so that the imported classes always exist
73+
class ColorFallback():
74+
# simulate a subset of Colorama's features (Colorama knows how to
75+
# support Windows, we just don't support colours there)
76+
if sys.stdout.isatty and os.name != 'nt':
77+
GREEN = '\033[32m'
78+
RED = '\033[31m'
79+
RESET = '\033[0m'
80+
else:
81+
GREEN = RED = RESET = ''
82+
Fore = ColorFallback()
83+
84+
85+
def color_diff(diff):
86+
for line in diff:
87+
if line.startswith('+'):
88+
yield Fore.GREEN + line + Fore.RESET
89+
elif line.startswith('-'):
90+
yield Fore.RED + line + Fore.RESET
91+
else:
92+
yield line
93+
94+
95+
def build_sequence(data, peak_index):
96+
seq = []
97+
for si in range(0, len(data['snapshots'])):
98+
seq.append(snapshot(data['snapshots'][si], si == peak_index))
99+
seq.append(snapshot({'mem_heap': 0}, False))
100+
return seq
101+
102+
103+
# based on
104+
# https://stackoverflow.com/questions/1011938/python-previous-and-next-values-inside-a-loop
105+
def previous_and_next(some_iterable):
106+
prevs, items, nexts = itertools.tee(some_iterable, 3)
107+
prevs = itertools.chain([None], prevs)
108+
nexts = itertools.chain(itertools.islice(nexts, 1, None), [None])
109+
return itertools.izip(prevs, items, nexts)
110+
111+
112+
def interpolate(seq, other_seq):
113+
ls = len(seq) - 1
114+
lo = len(other_seq) - 1
115+
116+
lcm = (ls * lo) / fractions.gcd(ls, lo)
117+
sub_steps = lcm / ls
118+
119+
interpolated_seq = [seq[0]]
120+
for prev, item, nxt in previous_and_next(seq):
121+
if prev:
122+
step = (item.value - prev.value) // ls
123+
if step < 0:
124+
step += 1
125+
for i in range(0, sub_steps):
126+
s = snapshot(item.data, item.is_peak)
127+
s.value = prev.value + step * (i + 1)
128+
interpolated_seq.append(s)
129+
130+
return interpolated_seq
131+
132+
133+
def filter_delete(ref_seq, data_seq, add_elements, prev, item, nxt,
134+
new_reference_seq, new_data_seq):
135+
(tag, i1, i2, j1, j2) = item
136+
fwd_only = False
137+
for d in ref_seq[i1:i2]:
138+
if prev and not fwd_only and data_seq[prev[4] - 1] == d:
139+
# the value from the original sequence would be
140+
# new_data_seq.append(data_seq[prev[4] - 1])
141+
# but since snapshot.__eq__ isn't transitive this may
142+
# result in having to do even more edits
143+
if add_elements:
144+
new_data_seq.append(d)
145+
elif nxt and data_seq[nxt[3]] == d:
146+
fwd_only = True
147+
# the value from the original sequence would be
148+
# new_data_seq.append(data_seq[nxt[3]])
149+
# but since snapshot.__eq__ isn't transitive this may
150+
# result in having to do even more edits
151+
if add_elements:
152+
new_data_seq.append(d)
153+
elif prev and nxt and (
154+
(ref_seq[prev[2] - 1] <= ref_seq[nxt[1]] and
155+
ref_seq[prev[2] - 1] <= d and d <= ref_seq[nxt[1]]) or
156+
(ref_seq[prev[2] - 1] > ref_seq[nxt[1]] and
157+
ref_seq[prev[2] - 1] >= d and d >= ref_seq[nxt[1]])):
158+
# the value from the original sequence would be between
159+
# new_data_seq.append(data_seq[prev[4] - 1]) and
160+
# new_data_seq.append(data_seq[nxt[3]])
161+
# but since snapshot.__eq__ isn't transitive this may
162+
# result in having to do even more edits
163+
if add_elements:
164+
new_data_seq.append(d)
165+
elif not add_elements:
166+
new_reference_seq.append(d)
167+
168+
169+
def filter_insert(ref_seq, data_seq, add_elements, prev, item, nxt,
170+
new_reference_seq, new_data_seq):
171+
(tag, i1, i2, j1, j2) = item
172+
fwd_only = False
173+
for i in data_seq[j1:j2]:
174+
if prev and not fwd_only and ref_seq[prev[2] - 1] == i:
175+
pass
176+
elif nxt and ref_seq[nxt[1]] == i:
177+
fwd_only = True
178+
elif prev and nxt and (
179+
(data_seq[prev[4] - 1] <= data_seq[nxt[3]] and
180+
data_seq[prev[4] - 1] <= i and
181+
i <= data_seq[nxt[3]]) or
182+
(data_seq[prev[4] - 1] > data_seq[nxt[3]] and
183+
data_seq[prev[4] - 1] >= i and
184+
i >= data_seq[nxt[3]])):
185+
pass
186+
else:
187+
new_data_seq.append(i)
188+
189+
190+
def filter_diff(ref_seq, data_seq, add_elements):
191+
new_reference_seq = []
192+
if add_elements:
193+
new_reference_seq = ref_seq
194+
195+
new_data_seq = []
196+
197+
s = difflib.SequenceMatcher(None, ref_seq, data_seq)
198+
for prev, item, nxt in previous_and_next(s.get_opcodes()):
199+
(tag, i1, i2, j1, j2) = item
200+
if tag == 'equal':
201+
if not add_elements:
202+
new_reference_seq.extend(ref_seq[i1:i2])
203+
new_data_seq.extend(data_seq[j1:j2])
204+
elif tag == 'replace':
205+
filter_delete(ref_seq, data_seq, add_elements, prev, item, nxt,
206+
new_reference_seq, new_data_seq)
207+
filter_insert(ref_seq, data_seq, add_elements, prev, item, nxt,
208+
new_reference_seq, new_data_seq)
209+
elif tag == 'delete':
210+
filter_delete(ref_seq, data_seq, add_elements, prev, item, nxt,
211+
new_reference_seq, new_data_seq)
212+
elif tag == 'insert':
213+
filter_insert(ref_seq, data_seq, add_elements, prev, item, nxt,
214+
new_reference_seq, new_data_seq)
215+
216+
return (new_reference_seq, new_data_seq)
217+
218+
219+
def parse_args():
220+
parser = argparse.ArgumentParser()
221+
parser.add_argument('-r', '--reference', type=str, required=True,
222+
help='Massif reference output')
223+
parser.add_argument('-P', '--peak-diff', action='store_true',
224+
help='Exit code depends on peak memory diff only')
225+
parser.add_argument('-F', '--fuzzy', action='store_true',
226+
help='Permit varying numbers of snapsots')
227+
parser.add_argument('-A', '--artificial', action='store_true',
228+
help='Add artificial elements [implies --fuzzy]')
229+
parser.add_argument('-I', '--interpolate', action='store_true',
230+
help='Interpolate additional values between snapshots')
231+
parser.add_argument('file', type=str,
232+
help='Massif output to validate')
233+
234+
args = parser.parse_args()
235+
236+
return args
237+
238+
239+
def main():
240+
args = parse_args()
241+
242+
reference_data = ()
243+
with open(args.reference) as r:
244+
reference_data = msparser.parse(r)
245+
246+
data = ()
247+
with open(args.file) as f:
248+
data = msparser.parse(f)
249+
250+
r_peak_index = reference_data['peak_snapshot_index']
251+
r_peak = reference_data['snapshots'][r_peak_index]
252+
peak_index = data['peak_snapshot_index']
253+
peak = data['snapshots'][peak_index]
254+
255+
print("snapshots: ref={} cur={}".format(
256+
len(reference_data['snapshots']), len(data['snapshots'])))
257+
print("peak idx : ref={} cur={}".format(r_peak_index, peak_index))
258+
print("peak [kB]: ref={0:.2f} cur={1:.2f}".format(
259+
r_peak['mem_heap'] / 1024.0, peak['mem_heap'] / 1024.0))
260+
261+
"""
262+
snaps = min(len(reference_data['snapshots']), len(data['snapshots']))
263+
for i in range(0, snaps):
264+
print("mem_heap [kB]: ref={0:.2f} cur={1:.2f}".format(
265+
reference_data['snapshots'][i]['mem_heap'] / 1024.0,
266+
data['snapshots'][i]['mem_heap'] / 1024.0))
267+
print(snapshot(reference_data['snapshots'][i], False) ==
268+
snapshot(data['snapshots'][i], False))
269+
"""
270+
271+
reference_seq = build_sequence(reference_data, r_peak_index)
272+
data_seq = build_sequence(data, peak_index)
273+
274+
if args.interpolate:
275+
reference_seq = interpolate(reference_seq, data_seq)
276+
data_seq = interpolate(data_seq, reference_seq)
277+
278+
if args.fuzzy or args.artificial:
279+
(new_ref_seq, new_data_seq) = filter_diff(
280+
reference_seq, data_seq, args.artificial)
281+
else:
282+
(new_ref_seq, new_data_seq) = (reference_seq, data_seq)
283+
284+
ret_code = 0
285+
diff = color_diff(
286+
difflib.unified_diff(
287+
new_ref_seq, new_data_seq, 'ref', 'cur', n=1, lineterm=''))
288+
for l in diff:
289+
ret_code = 1
290+
print(l)
291+
292+
if args.peak_diff:
293+
r_peak = snapshot(reference_data['snapshots'][r_peak_index], True)
294+
d_peak = snapshot(data['snapshots'][peak_index], True)
295+
ret_code = 0 if r_peak == d_peak else 1
296+
297+
return ret_code
298+
299+
300+
if __name__ == '__main__':
301+
rc = main()
302+
sys.exit(rc)

0 commit comments

Comments
 (0)