Skip to content

Commit da287d1

Browse files
committed
Import Massif parser from MathieuTurcotte/msparser
Just import the minimum required for our future use from https://github.com/MathieuTurcotte/msparser at 8ce7336d9b55366. This code is MIT licensed (license information included).
1 parent 06b3adc commit da287d1

File tree

1 file changed

+296
-0
lines changed

1 file changed

+296
-0
lines changed

Diff for: scripts/memory-test/msparser.py

+296
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
# Copyright (c) 2011 Mathieu Turcotte
2+
# Licensed under the MIT license.
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
5+
# this software and associated documentation files (the "Software"), to deal in
6+
# the Software without restriction, including without limitation the rights to
7+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8+
# of the Software, and to permit persons to whom the Software is furnished to do
9+
# so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in all
12+
# copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
# SOFTWARE.
21+
22+
"""
23+
The msparser module offers a simple interface to parse the Valgrind massif.out
24+
file format, i.e. data files produced the Valgrind heap profiler.
25+
"""
26+
27+
from __future__ import with_statement # Enable with statement in Python 2.5.
28+
import os.path
29+
import re
30+
31+
__all__ = ["parse", "parse_file", "ParseError"]
32+
33+
# Precompiled regex used to parse comments.
34+
_COMMENT_RE = re.compile("\s*(#|$)")
35+
36+
# Precompiled regexes used to parse header fields.
37+
_FIELD_DESC_RE = re.compile("desc:\s(?P<data>.*)$")
38+
_FIELD_CMD_RE = re.compile("cmd:\s(?P<data>.*)$")
39+
_FIELD_TIME_UNIT_RE = re.compile("time_unit:\s(?P<data>ms|B|i)$")
40+
41+
# Precompiled regexes used to parse snaphot fields.
42+
_FIELD_SNAPSHOT_RE = re.compile("snapshot=(?P<data>\d+)")
43+
_FIELD_TIME_RE = re.compile("time=(?P<data>\d+)")
44+
_FIELD_MEM_HEAP_RE = re.compile("mem_heap_B=(?P<data>\d+)")
45+
_FIELD_MEM_EXTRA_RE = re.compile("mem_heap_extra_B=(?P<data>\d+)")
46+
_FIELD_MEM_STACK_RE = re.compile("mem_stacks_B=(?P<data>\d+)")
47+
_FIELD_HEAP_TREE_RE = re.compile("heap_tree=(?P<data>\w+)")
48+
49+
# Precompiled regex to parse heap entries. Matches three things:
50+
# - the number of children,
51+
# - the number of bytes,
52+
# - and the details section.
53+
_HEAP_ENTRY_RE = re.compile("""
54+
\s*n # skip zero or more spaces, then 'n'
55+
(?P<num_children>\d+) # match number of children, 1 or more digits
56+
:\s # skip ':' and one space
57+
(?P<num_bytes>\d+) # match the number of bytes, 1 or more digits
58+
\s # skip one space
59+
(?P<details>.*) # match the details
60+
""", re.VERBOSE)
61+
62+
# Precompiled regex to check if the details section is below threshold.
63+
_HEAP_BELOW_THRESHOLD_RE = re.compile(r"""in.*places?.*""")
64+
65+
# Precompiled regex to parse the details section of entries above threshold.
66+
# This should match four things:
67+
# - the hexadecimal address,
68+
# - the function name,
69+
# - the file name or binary path, i.e. file.cpp or usr/local/bin/foo.so,
70+
# - and a line number if present.
71+
# Last two parts are optional to handle entries without a file name or binary
72+
# path.
73+
_HEAP_DETAILS_RE = re.compile(r"""
74+
(?P<address>[a-fA-F0-9x]+) # match the hexadecimal address
75+
:\s # skip ': '
76+
(?P<function>.+?) # match the function's name, non-greedy
77+
(?: # don't capture fname/line group
78+
\s
79+
\(
80+
(?:in\s)? # skip 'in ' if present
81+
(?P<fname>[^:]+) # match the file name
82+
:? # skip ':', if present
83+
(?P<line>\d+)? # match the line number, if present
84+
\)
85+
)? # fname/line group is optional
86+
$ # should have reached the EOL
87+
""", re.VERBOSE)
88+
89+
90+
class ParseContext:
91+
"""
92+
A simple context for parsing. Dumbed down version of fileinput.
93+
"""
94+
def __init__(self, fd):
95+
self._fd = fd
96+
self._line = 0
97+
98+
def line(self):
99+
return self._line
100+
101+
def readline(self):
102+
self._line += 1
103+
return self._fd.readline()
104+
105+
def filename(self):
106+
return os.path.abspath(self._fd.name)
107+
108+
109+
class ParseError(Exception):
110+
"""
111+
Error raised when a parsing error is encountered.
112+
"""
113+
def __init__(self, msg, ctx):
114+
self.msg = msg
115+
self.line = ctx.line()
116+
self.filename = ctx.filename()
117+
118+
def __str__(self):
119+
return " ".join([str(self.msg), 'at line', str(self.line), 'in',
120+
str(self.filename)])
121+
122+
123+
def parse_file(filepath):
124+
"""
125+
Convenience function taking a file path instead of a file descriptor.
126+
"""
127+
with open(filepath) as fd:
128+
return parse(fd)
129+
130+
131+
def parse(fd):
132+
"""
133+
Parse an already opened massif output file.
134+
"""
135+
mdata = {}
136+
ctx = ParseContext(fd)
137+
_parse_header(ctx, mdata)
138+
_parse_snapshots(ctx, mdata)
139+
return mdata
140+
141+
142+
def _match_unconditional(ctx, regex, string):
143+
"""
144+
Unconditionaly match a regular expression against a string, i.e. if there
145+
is no match we raise a ParseError.
146+
"""
147+
match = regex.match(string)
148+
if match is None:
149+
raise ParseError("".join(["can't match '", string, "' against '",
150+
regex.pattern, "'"]), ctx)
151+
return match
152+
153+
154+
def _get_next_line(ctx, may_reach_eof=False):
155+
"""
156+
Read another line from ctx. If may_reach_eof is False, reaching EOF will
157+
be considered as an error.
158+
"""
159+
line = ctx.readline() # Returns an empty string on EOF.
160+
161+
if len(line) == 0:
162+
if may_reach_eof is False:
163+
raise ParseError("unexpected EOF", ctx)
164+
else:
165+
return None
166+
else:
167+
return line.strip("\n")
168+
169+
170+
def _get_next_field(ctx, field_regex, may_reach_eof=False):
171+
"""
172+
Read the next data field. The field_regex arg is a regular expression that
173+
will be used to match the field. Data will be extracted from the match
174+
object by calling m.group('data'). If may_reach_eof is False, reaching EOF
175+
will be considered as an error.
176+
"""
177+
line = _get_next_line(ctx, may_reach_eof)
178+
while line is not None:
179+
if _COMMENT_RE.match(line):
180+
line = _get_next_line(ctx, may_reach_eof)
181+
else:
182+
match = _match_unconditional(ctx, field_regex, line)
183+
return match.group("data")
184+
185+
return None
186+
187+
188+
def _parse_header(ctx, mdata):
189+
mdata["desc"] = _get_next_field(ctx, _FIELD_DESC_RE)
190+
mdata["cmd"] = _get_next_field(ctx, _FIELD_CMD_RE)
191+
mdata["time_unit"] = _get_next_field(ctx, _FIELD_TIME_UNIT_RE)
192+
193+
194+
def _parse_snapshots(ctx, mdata):
195+
index = 0
196+
snapshots = []
197+
detailed_snapshot_indices = []
198+
peak_snapshot_index = None
199+
200+
snapshot = _parse_snapshot(ctx)
201+
202+
while snapshot is not None:
203+
if snapshot["is_detailed"]:
204+
detailed_snapshot_indices.append(index)
205+
if snapshot["is_peak"]:
206+
peak_snapshot_index = index
207+
snapshots.append(snapshot["data"])
208+
snapshot = _parse_snapshot(ctx)
209+
index += 1
210+
211+
mdata["snapshots"] = snapshots
212+
mdata["detailed_snapshot_indices"] = detailed_snapshot_indices
213+
214+
if peak_snapshot_index is not None:
215+
mdata["peak_snapshot_index"] = peak_snapshot_index
216+
217+
218+
def _parse_snapshot(ctx):
219+
"""
220+
Parse another snapshot, appending it to the mdata["snapshots"] list. On
221+
EOF, False will be returned.
222+
"""
223+
snapshot_id = _get_next_field(ctx, _FIELD_SNAPSHOT_RE, may_reach_eof=True)
224+
225+
if snapshot_id is None:
226+
return None
227+
228+
snapshot_id = int(snapshot_id)
229+
time = int(_get_next_field(ctx, _FIELD_TIME_RE))
230+
mem_heap = int(_get_next_field(ctx, _FIELD_MEM_HEAP_RE))
231+
mem_heap_extra = int(_get_next_field(ctx, _FIELD_MEM_EXTRA_RE))
232+
mem_stacks = int(_get_next_field(ctx, _FIELD_MEM_STACK_RE))
233+
heap_tree_field = _get_next_field(ctx, _FIELD_HEAP_TREE_RE)
234+
235+
heap_tree = None
236+
is_detailed = False
237+
is_peak = False
238+
239+
if heap_tree_field != "empty":
240+
is_detailed = True
241+
if heap_tree_field == "peak":
242+
is_peak = True
243+
heap_tree = _parse_heap_tree(ctx)
244+
245+
return {
246+
"is_detailed": is_detailed,
247+
"is_peak": is_peak,
248+
"data": {
249+
"id": snapshot_id,
250+
"time": time,
251+
"mem_heap": mem_heap,
252+
"mem_heap_extra": mem_heap_extra,
253+
"mem_stack": mem_stacks,
254+
"heap_tree": heap_tree
255+
}
256+
}
257+
258+
259+
def _parse_heap_tree(ctx):
260+
"""
261+
Parse a heap tree.
262+
"""
263+
line = _get_next_line(ctx)
264+
265+
entry_match = _match_unconditional(ctx, _HEAP_ENTRY_RE, line)
266+
details_group = entry_match.group("details")
267+
268+
details = None
269+
details_match = _HEAP_DETAILS_RE.match(details_group)
270+
271+
if details_match:
272+
# The 'line' field could be None if the binary/library wasn't compiled
273+
# with debug info. To avoid errors on this condition, we need to make
274+
# sure that the 'line' field is not None before trying to convert it to
275+
# an integer.
276+
linum = details_match.group(4)
277+
if linum is not None:
278+
linum = int(linum)
279+
280+
details = {
281+
"address": details_match.group("address"),
282+
"function": details_match.group("function"),
283+
"file": details_match.group("fname"),
284+
"line": linum
285+
}
286+
287+
children = []
288+
for i in range(0, int(entry_match.group("num_children"))):
289+
children.append(_parse_heap_tree(ctx))
290+
291+
heap_node = {}
292+
heap_node["nbytes"] = int(entry_match.group("num_bytes"))
293+
heap_node["children"] = children
294+
heap_node["details"] = details
295+
296+
return heap_node

0 commit comments

Comments
 (0)