Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNT: Expand exception preservation to more scenarios #162

Merged
merged 6 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# `indexed_gzip` changelog


## 1.9.3 (November 27th 2024)


* Expanded exception preservation to more scenarios (#161, #162).


## 1.9.2 (November 18th 2024)


Expand Down
2 changes: 1 addition & 1 deletion indexed_gzip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
"""


__version__ = '1.9.2'
__version__ = '1.9.3'
75 changes: 40 additions & 35 deletions indexed_gzip/indexed_gzip.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,9 @@ cdef class _IndexedGzipFile:
window_size=window_size,
readbuf_size=readbuf_size,
flags=flags):
exc = get_python_exception()
raise ZranError('zran_init returned error (file: '
'{})'.format(self.errname))
'{})'.format(self.errname)) from exc

log.debug('%s.__init__(%s, %s, %s, %s, %s, %s, %s)',
type(self).__name__,
Expand All @@ -486,45 +487,36 @@ cdef class _IndexedGzipFile:
self.import_index(index_file)


@contextlib.contextmanager
def __file_handle(self):
"""This method is used as a context manager whenever access to the
underlying file stream is required. It makes sure that ``index.fd``
field is set appropriately, opening/closing the file handle as
necessary (depending on the value of :attr:`drop_handles`).
"""

# Errors occur with Python 2.7 and
# Cython < 0.26 when decorating
# cdef-class methods. This workaround
# can be removed when you are happy
# dropping support for cython < 0.26.
@contextlib.contextmanager
def proxy():

# If a file handle already exists,
# return it. This clause makes this
# context manager reentrant.
if self.index.fd is not NULL:
yield
# If a file handle already exists,
# return it. This clause makes this
# context manager reentrant.
if self.index.fd is not NULL:
yield

# If a file-like object exists (without an associated
# file descriptor, since self.index.fd is NULL),
# also return it.
elif self.pyfid is not None:
yield
# If a file-like object exists (without an associated
# file descriptor, since self.index.fd is NULL),
# also return it.
elif self.pyfid is not None:
yield

# otherwise we open a new
# file handle on each access
else:
try:
self.index.fd = fopen(self.filename.encode(), 'rb')
yield

finally:
fclose(self.index.fd)
self.index.fd = NULL
# otherwise we open a new
# file handle on each access
else:
try:
self.index.fd = fopen(self.filename.encode(), 'rb')
yield

return proxy()
finally:
fclose(self.index.fd)
self.index.fd = NULL


def seek_points(self):
Expand Down Expand Up @@ -667,8 +659,10 @@ cdef class _IndexedGzipFile:
ret = zran.zran_build_index(&self.index, 0, 0)

if ret != zran.ZRAN_BUILD_INDEX_OK:
exc = get_python_exception()
raise ZranError('zran_build_index returned error: {} (file: {})'
.format(ZRAN_ERRORS.ZRAN_BUILD[ret], self.errname))
.format(ZRAN_ERRORS.ZRAN_BUILD[ret],
self.errname)) from exc

log.debug('%s.build_full_index()', type(self).__name__)

Expand Down Expand Up @@ -716,8 +710,10 @@ cdef class _IndexedGzipFile:
'{})'.format(self.errname))

elif ret not in (zran.ZRAN_SEEK_OK, zran.ZRAN_SEEK_EOF):
exc = get_python_exception()
raise ZranError('zran_seek returned error: {} (file: {})'
.format(ZRAN_ERRORS.ZRAN_SEEK[ret], self.errname))
.format(ZRAN_ERRORS.ZRAN_SEEK[ret],
self.errname)) from exc

offset = self.tell()

Expand Down Expand Up @@ -826,6 +822,9 @@ cdef class _IndexedGzipFile:
cdef void *vbuf
cdef int64_t ret

# reference to any exceptions that are raised
exc = None

# Create a Py_Buffer which allows
# us to access the memory managed
# by the provided buf
Expand All @@ -838,6 +837,9 @@ cdef class _IndexedGzipFile:
with self.__file_handle():
with nogil:
ret = zran.zran_read(index, vbuf, bufsz)
# Something in __file_handle seems to clear
# the exception state, so we look now in
# case an exception has occurred
exc = get_python_exception()

# release the py_buffer
Expand All @@ -847,7 +849,8 @@ cdef class _IndexedGzipFile:
# see how the read went
if ret == zran.ZRAN_READ_FAIL:
raise ZranError('zran_read returned error: {} (file: {})'
.format(ZRAN_ERRORS.ZRAN_READ[ret], self.errname)) from exc
.format(ZRAN_ERRORS.ZRAN_READ[ret],
self.errname)) from exc

# This will happen if the current
# seek point is not covered by the
Expand Down Expand Up @@ -1020,9 +1023,10 @@ cdef class _IndexedGzipFile:
fd = NULL
ret = zran.zran_export_index(&self.index, fd, <PyObject*>fileobj)
if ret != zran.ZRAN_EXPORT_OK:
exc = get_python_exception()
raise ZranError('export_index returned error: {} (file: '
'{})'.format(ZRAN_ERRORS.ZRAN_EXPORT[ret],
self.errname))
self.errname)) from exc

finally:
if close_file:
Expand Down Expand Up @@ -1070,9 +1074,10 @@ cdef class _IndexedGzipFile:
fd = NULL
ret = zran.zran_import_index(&self.index, fd, <PyObject*>fileobj)
if ret != zran.ZRAN_IMPORT_OK:
exc = get_python_exception()
raise ZranError('import_index returned error: {} (file: '
'{})'.format(ZRAN_ERRORS.ZRAN_IMPORT[ret],
self.errname))
self.errname)) from exc

self.skip_crc_check = True

Expand Down
20 changes: 15 additions & 5 deletions indexed_gzip/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,11 @@ def compress_with_gzip_command():
chunk = inf.read(buflen)

# Use python gzip module on windows,
# can't assume gzip exists
if sys.platform.startswith("win"):
# as we can't assume the gzip command
# exists.
onwin = sys.platform.startswith("win")

if onwin:
target = compress_with_gzip_module

# If not windows, assume that gzip command
Expand All @@ -124,9 +127,16 @@ def compress_with_gzip_command():
else:
target = compress_with_gzip_command

cmpThread = threading.Thread(target=target)
cmpThread.start()
poll(lambda : not cmpThread.is_alive())
# Some kind of corruption also seems to
# occur on windows+free-threaded builds,
# so don't thread on windows. Threading
# here is just for progress reporting.
if onwin:
target()
else:
cmpThread = threading.Thread(target=target)
cmpThread.start()
poll(lambda : not cmpThread.is_alive())


def compress_inmem(data, concat):
Expand Down
132 changes: 102 additions & 30 deletions indexed_gzip/tests/test_indexed_gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import print_function

import gc
import os
import os.path as op
import itertools as it
Expand Down Expand Up @@ -509,8 +510,7 @@ def test_read_beyond_end(concat, drop):
os.remove(testfile)


@pytest.mark.parametrize('use_readinto', [False, True])
def test_read_exception(testfile, nelems, use_readinto):
def test_exception_preserved(testfile, nelems):
"""When wrapping a python file object, if it raises an exception
it should be preserved.
"""
Expand All @@ -519,47 +519,119 @@ def test_read_exception(testfile, nelems, use_readinto):
# because it's read-only, so skip it.
return

f = None
gzf = None

MY_ERROR = "This error should be preserved"

# We'll use a weakref to check that we are handling reference counting
# correctly, and you cannot weakref an Exception, so we need a subclass.
# We'll use a weakref to check that we are handling
# reference counting correctly, and you cannot
# weakref an Exception, so we need a subclass.
class MyException(Exception):
pass
my_err_weak = [None]
def my_error_fn(*args, **kwargs):
err = MyException(MY_ERROR)
my_err_weak[0] = weakref.ref(err)

# Function which raises an exception. This is
# is patched into the python file-like to test
# that the igzip object preserves the exception.
# We store a weakref to each exception to check
# that no references to them are kept.
all_errors = []
error_msg = "This error should be preserved"

def error_fn(name, *args, **kwargs):
err = MyException(error_msg + ": " + name)
all_errors.append(weakref.ref(err))
raise err

try:
with open(testfile, 'rb') as f2:
f = BytesIO(f2.read())
gzf = igzip._IndexedGzipFile(fileobj=f)
f.read = my_error_fn
# Below we check that each of these scenarios
# raises an appropriate exception
def test_create(pyf):
pyf.seek = ft.partial(error_fn, "test_create")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
return gzf

def test_build_full_index(pyf):
pyf.read = ft.partial(error_fn, "test_build_full_index")
pyf.seek = ft.partial(error_fn, "test_build_full_index")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
gzf.build_full_index()
return gzf

def test_seek(pyf):
pyf.seek = ft.partial(error_fn, "test_seek")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
gzf.seek(5)
return gzf

def test_read(pyf):
# Test _IndexedGzipFile.read, as the buffered
# IndexedGzipFile.read method uses readinto
pyf.read = ft.partial(error_fn, "test_read")
gzf = igzip._IndexedGzipFile(fileobj=pyf)
gzf.read(1)
return gzf

def test_readinto(pyf):
pyf.read = ft.partial(error_fn, "test_readinto")
gzf = igzip.IndexedGzipFile(fileobj=pyf)
ba = bytearray(1)
gzf.readinto(ba)
return gzf

def test_export_index(pyf):
gzf = igzip.IndexedGzipFile(fileobj=pyf)
gzf.build_full_index()
idxf = BytesIO()
idxf.write = ft.partial(error_fn, "test_export_index")
gzf.export_index(fileobj=idxf)
return gzf

def test_import_index(pyf):
gzf = igzip.IndexedGzipFile(fileobj=pyf)
idxf = BytesIO()
gzf.build_full_index()
gzf.export_index(fileobj=idxf)
gzf.close()
del gzf
gzf = igzip.IndexedGzipFile(fileobj=pyf)
idxf.read = ft.partial(error_fn, "test_import_index")
gzf.import_index(fileobj=idxf)
return gzf

tests = [test_create, test_build_full_index, test_seek,
test_read, test_readinto, test_export_index,
test_import_index]

# Call the given function, making sure that
# the exception originating from the python
# file-like is preserved
def check_error_preserved(func):
pyf = None
gzf = None
with open(testfile, 'rb') as f:
pyf = BytesIO(f.read())
try:
if use_readinto:
ba = bytearray(1)
gzf.readinto(ba)
else:
gzf.read(1)
gzf = func(pyf)
except Exception as e:
assert MY_ERROR in str(e) or MY_ERROR in str(e.__cause__), "Exception was not preserved; got {}".format(e)
fname = func.__name__
estr = str(e) + " " + str(e.__cause__)
assert error_msg in estr, \
"Exception was not preserved; got {}".format(estr)
assert fname in estr, \
"Exception didn't come from {}; got {}".format(fname, estr)
del e
else:
assert False, "Expected an exception to be raised"


finally:
if gzf is not None: gzf.close()
if f is not None: f .close()
del f
if pyf is not None: pyf.close()
del gzf
assert (my_err_weak[0] is None or my_err_weak[0]() is None,
"Exception was not garbage collected")
del pyf

# Run the tests
for test in tests:
check_error_preserved(test)

# Make sure there are no dangling
# references to any exception objects
gc.collect()
for e in all_errors:
assert (e() is None), "Exception was not garbage collected"

def test_seek(concat):
with tempdir() as tdir:
Expand Down
3 changes: 2 additions & 1 deletion indexed_gzip/zran_file_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ size_t _fwrite_python(const void *ptr,
goto fail;
if ((data = PyObject_CallMethod(f, "write", "(O)", input)) == NULL)
goto fail;

if (PyErr_Occurred())
goto fail;
#if PY_MAJOR_VERSION >= 3
if ((len = PyLong_AsLong(data)) == -1 && PyErr_Occurred())
goto fail;
Expand Down