Skip to content

Commit 92e4b90

Browse files
committed
TEST: Benchmark script for slicing gzipped files using ArrayProxy
1 parent e2c7809 commit 92e4b90

File tree

1 file changed

+199
-0
lines changed

1 file changed

+199
-0
lines changed
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""Benchmarks for ArrayProxy slicing of gzipped and non-gzipped files
2+
3+
Run benchmarks with::
4+
5+
import nibabel as nib
6+
nib.bench()
7+
8+
If you have doctests enabled by default in nose (with a noserc file or
9+
environment variable), and you have a numpy version <= 1.6.1, this will also
10+
run the doctests, let's hope they pass.
11+
12+
Run this benchmark with:
13+
14+
nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' /path/to/bench_arrayproxy_slicing.py
15+
"""
16+
17+
from timeit import timeit
18+
import contextlib
19+
import gc
20+
import itertools as it
21+
import numpy as np
22+
import mock
23+
24+
import nibabel as nib
25+
from nibabel.tmpdirs import InTemporaryDirectory
26+
from nibabel.openers import HAVE_INDEXED_GZIP
27+
28+
from .butils import print_git_title
29+
from ..rstutils import rst_table
30+
31+
# if memory_profiler is installed, we get memory usage results
32+
try:
33+
from memory_profiler import memory_usage
34+
except ImportError:
35+
memory_usage = None
36+
37+
38+
# Each test involves loading an image of shape SHAPE, and then slicing it
39+
# NITERS times
40+
NITERS = 50
41+
SHAPE = (100, 100, 100, 100)
42+
43+
# One test is run for each combination of SLICEOBJS, KEEP_OPENS, and HAVE_IGZIP
44+
45+
# ':' gets replaced with slice(None)
46+
# '?' gets replaced with a random index into the relevant axis
47+
# numbers (assumed to be between 0 and 1) get scaled to the axis shape
48+
SLICEOBJS = [
49+
('?', ':', ':', ':'),
50+
(':', ':', ':', '?'),
51+
('?', '?', '?', ':'),
52+
]
53+
54+
KEEP_OPENS = [False, True]
55+
56+
if HAVE_INDEXED_GZIP:
57+
HAVE_IGZIP = [False, True]
58+
else:
59+
HAVE_IGZIP = [False]
60+
61+
62+
@contextlib.contextmanager
63+
def patch_indexed_gzip(have_igzip):
64+
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip), \
65+
mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', have_igzip):
66+
yield
67+
68+
69+
def bench_arrayproxy_slicing():
70+
71+
print_git_title('\nArrayProxy gzip slicing')
72+
73+
# each test is a tuple containing
74+
# (HAVE_INDEXED_GZIP, keep_file_open, sliceobj)
75+
tests = list(it.product(HAVE_IGZIP, KEEP_OPENS, SLICEOBJS))
76+
77+
# remove tests where HAVE_INDEXED_GZIP is True and keep_file_open is False,
78+
# because if keep_file_open is False, HAVE_INDEXED_GZIP has no effect
79+
tests = [t for t in tests if not (t[0] and not t[1])]
80+
81+
testfile = 'testfile.nii'
82+
testfilegz = 'test.nii.gz'
83+
84+
def get_test_label(test):
85+
have_igzip = test[0]
86+
keep_open = test[1]
87+
88+
if not (have_igzip and keep_open):
89+
return 'gzip'
90+
else:
91+
return 'indexed_gzip'
92+
93+
def fix_sliceobj(sliceobj):
94+
new_sliceobj = []
95+
for i, s in enumerate(sliceobj):
96+
if s == ':':
97+
new_sliceobj.append(slice(None))
98+
elif s == '?':
99+
new_sliceobj.append(np.random.randint(0, SHAPE[i]))
100+
else:
101+
new_sliceobj.append(int(s * SHAPE[i]))
102+
return tuple(new_sliceobj)
103+
104+
def fmt_sliceobj(sliceobj):
105+
slcstr = []
106+
for i, s in enumerate(sliceobj):
107+
if s in ':?':
108+
slcstr.append(s)
109+
else:
110+
slcstr.append(str(int(s * SHAPE[i])))
111+
return '[{}]'.format(', '.join(slcstr))
112+
113+
with InTemporaryDirectory():
114+
115+
print('Generating test data... ({} MB)'.format(
116+
int(round(np.prod(SHAPE) * 4 / 1048576.))))
117+
118+
data = np.array(np.random.random(SHAPE), dtype=np.float32)
119+
120+
# zero out 10% of voxels so gzip has something to compress
121+
mask = np.random.random(SHAPE[:3]) > 0.1
122+
if len(SHAPE) > 3:
123+
data[mask, :] = 0
124+
else:
125+
data[mask] = 0
126+
127+
# save uncompressed and compressed versions of the image
128+
img = nib.nifti1.Nifti1Image(data, np.eye(4))
129+
nib.save(img, testfilegz)
130+
nib.save(img, testfile)
131+
132+
# each result is a tuple containing
133+
# (label, keep_open, sliceobj, testtime, basetime, testmem, basemem)
134+
#
135+
# where "basetime" is the time taken to load and slice a memmapped
136+
# (uncompressed)image, and "basemem" is memory usage for the same
137+
results = []
138+
139+
# We use the same random seed for each slice object,
140+
seeds = [np.random.randint(0, 2 ** 32) for s in SLICEOBJS]
141+
142+
for ti, test in enumerate(tests):
143+
144+
label = get_test_label(test)
145+
have_igzip, keep_open, sliceobj = test
146+
seed = seeds[SLICEOBJS.index(sliceobj)]
147+
148+
print('Running test {} of {} ({})...'.format(
149+
ti + 1, len(tests), label))
150+
151+
# load uncompressed and compressed versions of the image
152+
img = nib.load(testfile, keep_file_open=keep_open)
153+
154+
with patch_indexed_gzip(have_igzip):
155+
imggz = nib.load(testfilegz, keep_file_open=keep_open)
156+
157+
def basefunc():
158+
img.dataobj[fix_sliceobj(sliceobj)]
159+
160+
def testfunc():
161+
with patch_indexed_gzip(have_igzip):
162+
imggz.dataobj[fix_sliceobj(sliceobj)]
163+
164+
# make sure nothing is floating around from the previous test
165+
# iteration, so memory profiling is (hopefully) more accurate
166+
gc.collect()
167+
168+
if memory_usage is not None:
169+
membaseline = max(memory_usage(lambda : None))
170+
testmem = max(memory_usage(testfunc)) - membaseline
171+
basemem = max(memory_usage(basefunc)) - membaseline
172+
else:
173+
testmem = np.nan
174+
basemem = np.nan
175+
176+
# reset the random number generator, so test and baseline use the
177+
# same slices
178+
np.random.seed(seed)
179+
testtime = float(timeit(testfunc, number=NITERS)) / float(NITERS)
180+
np.random.seed(seed)
181+
basetime = float(timeit(basefunc, number=NITERS)) / float(NITERS)
182+
183+
results.append((label, keep_open, sliceobj, testtime, basetime,
184+
testmem, basemem))
185+
186+
data = np.zeros((len(results), 4))
187+
data[:, 0] = [r[3] for r in results]
188+
data[:, 1] = [r[4] for r in results]
189+
try:
190+
data[:, 2] = [r[3] / r[4] for r in results]
191+
except:
192+
data[:, 2] = np.nan
193+
data[:, 3] = [r[5] - r[6] for r in results]
194+
195+
rowlbls = ['Type {}, keep_open {}, slice {}'.format(
196+
r[0], r[1], fmt_sliceobj(r[2])) for r in results]
197+
collbls = ['Time', 'Baseline time', 'Time ratio', 'Memory deviation']
198+
199+
print(rst_table(data, rowlbls, collbls))

0 commit comments

Comments
 (0)