Skip to content

Commit 8c1d0bc

Browse files
authored
Merge pull request #562 from pauldmccarthy/indexed_gzip_usage
Only use `indexed_gzip` when explicitly requested
2 parents fa76141 + 39a2963 commit 8c1d0bc

8 files changed

+278
-49
lines changed

nibabel/arrayproxy.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,10 @@ def _get_fileobj(self):
265265
"""
266266
if self._keep_file_open:
267267
if not hasattr(self, '_opener'):
268-
self._opener = ImageOpener(self.file_like)
268+
self._opener = ImageOpener(self.file_like, keep_open=True)
269269
yield self._opener
270270
else:
271-
with ImageOpener(self.file_like) as opener:
271+
with ImageOpener(self.file_like, keep_open=False) as opener:
272272
yield opener
273273

274274
def get_unscaled(self):

nibabel/benchmarks/bench_array_to_file.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919

2020
import numpy as np
2121

22-
2322
from .butils import print_git_title
2423

2524
from numpy.testing import measure
2625

26+
from nibabel.volumeutils import array_to_file # NOQA
27+
2728

2829
def bench_array_to_file():
2930
rng = np.random.RandomState(20111001)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
"""Benchmarks for ArrayProxy slicing of gzipped and non-gzipped files
2+
3+
Run benchmarks with::
4+
5+
import nibabel as nib
6+
nib.bench()
7+
8+
If you have doctests enabled by default in nose (with a noserc file or
9+
environment variable), and you have a numpy version <= 1.6.1, this will also
10+
run the doctests, let's hope they pass.
11+
12+
Run this benchmark with:
13+
14+
nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' /path/to/bench_arrayproxy_slicing.py
15+
"""
16+
17+
from timeit import timeit
18+
import contextlib
19+
import gc
20+
import itertools as it
21+
import numpy as np
22+
import mock
23+
24+
import nibabel as nib
25+
from nibabel.tmpdirs import InTemporaryDirectory
26+
from nibabel.openers import HAVE_INDEXED_GZIP
27+
28+
from .butils import print_git_title
29+
from ..rstutils import rst_table
30+
31+
# if memory_profiler is installed, we get memory usage results
32+
try:
33+
from memory_profiler import memory_usage
34+
except ImportError:
35+
memory_usage = None
36+
37+
38+
# Each test involves loading an image of shape SHAPE, and then slicing it
39+
# NITERS times
40+
NITERS = 50
41+
SHAPE = (100, 100, 100, 100)
42+
43+
# One test is run for each combination of SLICEOBJS, KEEP_OPENS, and HAVE_IGZIP
44+
45+
# ':' gets replaced with slice(None)
46+
# '?' gets replaced with a random index into the relevant axis
47+
# numbers (assumed to be between 0 and 1) get scaled to the axis shape
48+
SLICEOBJS = [
49+
('?', ':', ':', ':'),
50+
(':', ':', ':', '?'),
51+
('?', '?', '?', ':'),
52+
]
53+
54+
KEEP_OPENS = [False, True]
55+
56+
if HAVE_INDEXED_GZIP:
57+
HAVE_IGZIP = [False, True]
58+
else:
59+
HAVE_IGZIP = [False]
60+
61+
62+
@contextlib.contextmanager
63+
def patch_indexed_gzip(have_igzip):
64+
65+
atts = ['nibabel.openers.HAVE_INDEXED_GZIP',
66+
'nibabel.arrayproxy.HAVE_INDEXED_GZIP']
67+
68+
with mock.patch(atts[0], have_igzip), mock.patch(atts[1], have_igzip):
69+
yield
70+
71+
72+
def bench_arrayproxy_slicing():
73+
74+
print_git_title('\nArrayProxy gzip slicing')
75+
76+
# each test is a tuple containing
77+
# (HAVE_INDEXED_GZIP, keep_file_open, sliceobj)
78+
tests = list(it.product(HAVE_IGZIP, KEEP_OPENS, SLICEOBJS))
79+
80+
# remove tests where HAVE_INDEXED_GZIP is True and keep_file_open is False,
81+
# because if keep_file_open is False, HAVE_INDEXED_GZIP has no effect
82+
tests = [t for t in tests if not (t[0] and not t[1])]
83+
84+
testfile = 'testfile.nii'
85+
testfilegz = 'test.nii.gz'
86+
87+
def get_test_label(test):
88+
have_igzip = test[0]
89+
keep_open = test[1]
90+
91+
if not (have_igzip and keep_open):
92+
return 'gzip'
93+
else:
94+
return 'indexed_gzip'
95+
96+
def fix_sliceobj(sliceobj):
97+
new_sliceobj = []
98+
for i, s in enumerate(sliceobj):
99+
if s == ':':
100+
new_sliceobj.append(slice(None))
101+
elif s == '?':
102+
new_sliceobj.append(np.random.randint(0, SHAPE[i]))
103+
else:
104+
new_sliceobj.append(int(s * SHAPE[i]))
105+
return tuple(new_sliceobj)
106+
107+
def fmt_sliceobj(sliceobj):
108+
slcstr = []
109+
for i, s in enumerate(sliceobj):
110+
if s in ':?':
111+
slcstr.append(s)
112+
else:
113+
slcstr.append(str(int(s * SHAPE[i])))
114+
return '[{}]'.format(', '.join(slcstr))
115+
116+
with InTemporaryDirectory():
117+
118+
print('Generating test data... ({} MB)'.format(
119+
int(round(np.prod(SHAPE) * 4 / 1048576.))))
120+
121+
data = np.array(np.random.random(SHAPE), dtype=np.float32)
122+
123+
# zero out 10% of voxels so gzip has something to compress
124+
mask = np.random.random(SHAPE[:3]) > 0.1
125+
if len(SHAPE) > 3:
126+
data[mask, :] = 0
127+
else:
128+
data[mask] = 0
129+
130+
# save uncompressed and compressed versions of the image
131+
img = nib.nifti1.Nifti1Image(data, np.eye(4))
132+
nib.save(img, testfilegz)
133+
nib.save(img, testfile)
134+
135+
# each result is a tuple containing
136+
# (label, keep_open, sliceobj, testtime, basetime, testmem, basemem)
137+
#
138+
# where "basetime" is the time taken to load and slice a memmapped
139+
# (uncompressed)image, and "basemem" is memory usage for the same
140+
results = []
141+
142+
# We use the same random seed for each slice object,
143+
seeds = [np.random.randint(0, 2 ** 32) for s in SLICEOBJS]
144+
145+
for ti, test in enumerate(tests):
146+
147+
label = get_test_label(test)
148+
have_igzip, keep_open, sliceobj = test
149+
seed = seeds[SLICEOBJS.index(sliceobj)]
150+
151+
print('Running test {} of {} ({})...'.format(
152+
ti + 1, len(tests), label))
153+
154+
# load uncompressed and compressed versions of the image
155+
img = nib.load(testfile, keep_file_open=keep_open)
156+
157+
with patch_indexed_gzip(have_igzip):
158+
imggz = nib.load(testfilegz, keep_file_open=keep_open)
159+
160+
def basefunc():
161+
img.dataobj[fix_sliceobj(sliceobj)]
162+
163+
def testfunc():
164+
with patch_indexed_gzip(have_igzip):
165+
imggz.dataobj[fix_sliceobj(sliceobj)]
166+
167+
# make sure nothing is floating around from the previous test
168+
# iteration, so memory profiling is (hopefully) more accurate
169+
gc.collect()
170+
171+
if memory_usage is not None:
172+
membaseline = max(memory_usage(lambda: None))
173+
testmem = max(memory_usage(testfunc)) - membaseline
174+
basemem = max(memory_usage(basefunc)) - membaseline
175+
else:
176+
testmem = np.nan
177+
basemem = np.nan
178+
179+
# reset the random number generator, so test and baseline use the
180+
# same slices
181+
np.random.seed(seed)
182+
testtime = float(timeit(testfunc, number=NITERS)) / float(NITERS)
183+
np.random.seed(seed)
184+
basetime = float(timeit(basefunc, number=NITERS)) / float(NITERS)
185+
186+
results.append((label, keep_open, sliceobj, testtime, basetime,
187+
testmem, basemem))
188+
189+
data = np.zeros((len(results), 4))
190+
data[:, 0] = [r[3] for r in results]
191+
data[:, 1] = [r[4] for r in results]
192+
try:
193+
data[:, 2] = [r[3] / r[4] for r in results]
194+
except:
195+
data[:, 2] = np.nan
196+
data[:, 3] = [r[5] - r[6] for r in results]
197+
198+
rowlbls = ['Type {}, keep_open {}, slice {}'.format(
199+
r[0], r[1], fmt_sliceobj(r[2])) for r in results]
200+
collbls = ['Time', 'Baseline time', 'Time ratio', 'Memory deviation']
201+
202+
print(rst_table(data, rowlbls, collbls))

nibabel/benchmarks/bench_finite_range.py

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424

2525
from numpy.testing import measure
2626

27+
from nibabel.volumeutils import finite_range # NOQA
28+
2729

2830
def bench_finite_range():
2931
rng = np.random.RandomState(20111001)

nibabel/openers.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,23 @@
1212
import bz2
1313
import gzip
1414
import sys
15+
import warnings
1516
from os.path import splitext
17+
from distutils.version import StrictVersion
1618

17-
# is indexed_gzip present?
19+
# is indexed_gzip present and modern?
1820
try:
19-
from indexed_gzip import SafeIndexedGzipFile
21+
from indexed_gzip import SafeIndexedGzipFile, __version__ as version
22+
2023
HAVE_INDEXED_GZIP = True
24+
25+
if StrictVersion(version) < StrictVersion('0.6.0'):
26+
warnings.warn('indexed_gzip is present, but too old '
27+
'(>= 0.6.0 required): {})'.format(version))
28+
HAVE_INDEXED_GZIP = False
29+
30+
del version
31+
2132
except ImportError:
2233
HAVE_INDEXED_GZIP = False
2334

@@ -67,10 +78,10 @@ def readinto(self, buf):
6778
return n_read
6879

6980

70-
def _gzip_open(filename, mode='rb', compresslevel=9):
81+
def _gzip_open(filename, mode='rb', compresslevel=9, keep_open=False):
7182

7283
# use indexed_gzip if possible for faster read access
73-
if mode == 'rb' and HAVE_INDEXED_GZIP:
84+
if keep_open and mode == 'rb' and HAVE_INDEXED_GZIP:
7485
gzip_file = SafeIndexedGzipFile(filename)
7586

7687
# Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class
@@ -101,12 +112,13 @@ class Opener(object):
101112
\*args : positional arguments
102113
passed to opening method when `fileish` is str. ``mode``, if not
103114
specified, is `rb`. ``compresslevel``, if relevant, and not specified,
104-
is set from class variable ``default_compresslevel``
115+
is set from class variable ``default_compresslevel``. ``keep_open``, if
116+
relevant, and not specified, is ``False``.
105117
\*\*kwargs : keyword arguments
106118
passed to opening method when `fileish` is str. Change of defaults as
107119
for \*args
108120
"""
109-
gz_def = (_gzip_open, ('mode', 'compresslevel'))
121+
gz_def = (_gzip_open, ('mode', 'compresslevel', 'keep_open'))
110122
bz2_def = (bz2.BZ2File, ('mode', 'buffering', 'compresslevel'))
111123
compress_ext_map = {
112124
'.gz': gz_def,
@@ -132,8 +144,15 @@ def __init__(self, fileish, *args, **kwargs):
132144
# Set default mode
133145
if 'mode' not in full_kwargs:
134146
kwargs['mode'] = 'rb'
147+
# Default compression level
135148
if 'compresslevel' in arg_names and 'compresslevel' not in kwargs:
136149
kwargs['compresslevel'] = self.default_compresslevel
150+
# Default keep_open hint
151+
if 'keep_open' in arg_names:
152+
kwargs.setdefault('keep_open', False)
153+
# Clear keep_open hint if it is not relevant for the file type
154+
else:
155+
kwargs.pop('keep_open', None)
137156
self.fobj = opener(fileish, *args, **kwargs)
138157
self._name = fileish
139158
self.me_opened = True

nibabel/pkg_info.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
import sys
33
import subprocess
44
try:
5-
from ConfigParser import ConfigParser
5+
from ConfigParser import RawConfigParser as ConfigParser
66
except ImportError:
7-
from configparser import ConfigParser # python 3
7+
from configparser import RawConfigParser as ConfigParser # python 3
88

99
COMMIT_INFO_FNAME = 'COMMIT_INFO.txt'
1010

nibabel/tests/test_arrayproxy.py

+1-15
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from nibabel.testing import VIRAL_MEMMAP
3434

3535
from .test_fileslice import slicer_samples
36+
from .test_openers import patch_indexed_gzip
3637

3738

3839
class FunkyHeader(object):
@@ -412,21 +413,6 @@ def test_keep_file_open_true_false_invalid():
412413
ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto')
413414

414415

415-
@contextlib.contextmanager
416-
def patch_indexed_gzip(state):
417-
# Make it look like we do (state==True) or do not (state==False) have
418-
# the indexed gzip module.
419-
if state:
420-
values = (True, True, gzip.GzipFile)
421-
else:
422-
values = (False, False, None)
423-
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \
424-
mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \
425-
mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2],
426-
create=True):
427-
yield
428-
429-
430416
@contextlib.contextmanager
431417
def patch_keep_file_open_default(value):
432418
# Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value

0 commit comments

Comments
 (0)