Skip to content

Commit f3dd15f

Browse files
rollakariv
andauthored
Added additional information to exceptions (#134)
* Added additional information to exceptions * Updated implementation Co-authored-by: Adam Kariv <[email protected]>
1 parent 99f5215 commit f3dd15f

File tree

6 files changed

+87
-6
lines changed

6 files changed

+87
-6
lines changed

dataflows/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .base import DataStream, DataStreamProcessor, schema_validator, ValidationError
22
from .base import ResourceWrapper, PackageWrapper
3+
from .base import exceptions
34
from .base import Flow
4-
from .processors import * # noqa
5+
from .processors import * # noqa

dataflows/base/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from . import exceptions
12
from .datastream import DataStream
23
from .datastream_processor import DataStreamProcessor
34
from .resource_wrapper import ResourceWrapper

dataflows/base/datastream_processor.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from datapackage import Package
77
from tableschema.exceptions import CastError
88

9+
from . import exceptions
910
from .datastream import DataStream
1011
from .resource_wrapper import ResourceWrapper
1112
from .schema_validator import schema_validator
@@ -26,11 +27,13 @@ def __init__(self):
2627
self.stats = {}
2728
self.source = None
2829
self.datapackage = None
30+
self.position = None
2931

30-
def __call__(self, source=None):
32+
def __call__(self, source=None, position=None):
3133
if source is None:
3234
source = DataStream()
3335
self.source = source
36+
self.position = position
3437
return self
3538

3639
def process_resource(self, resource: ResourceWrapper):
@@ -69,7 +72,18 @@ def func():
6972
return func
7073

7174
def _process(self):
72-
datastream = self.source._process()
75+
try:
76+
datastream = self.source._process()
77+
except Exception as exception:
78+
if not isinstance(exception, exceptions.ProcessorError):
79+
error = exceptions.ProcessorError(
80+
exception,
81+
processor_name=self.source.__class__.__name__,
82+
processor_object=self.source,
83+
processor_position=self.source.position
84+
)
85+
raise error from exception
86+
raise exception
7387

7488
self.datapackage = Package(descriptor=copy.deepcopy(datastream.dp.descriptor))
7589
self.datapackage = self.process_datapackage(self.datapackage)
@@ -90,7 +104,18 @@ def process(self):
90104
return ds.dp, ds.merge_stats()
91105

92106
def results(self, on_error=None):
93-
ds = self._process()
107+
try:
108+
ds = self._process()
109+
except Exception as exception:
110+
if not isinstance(exception, exceptions.ProcessorError):
111+
error = exceptions.ProcessorError(
112+
exception,
113+
processor_name=self.__class__.__name__,
114+
processor_object=self,
115+
processor_position=self.position
116+
)
117+
raise error from exception
118+
raise exception
94119
results = [
95120
list(schema_validator(res.res, res, on_error=on_error))
96121
for res in ds.res_iter

dataflows/base/exceptions.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
class DataflowsException(Exception):
2+
pass
3+
4+
5+
class ProcessorError(DataflowsException):
6+
7+
def __init__(self, cause, *, processor_name, processor_object, processor_position):
8+
self.cause = cause
9+
self.processor_name = processor_name
10+
self.processor_object = processor_object
11+
self.processor_position = processor_position
12+
super().__init__(str(cause))

dataflows/base/flow.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ def _preprocess_chain(self):
2929
def _chain(self, ds=None):
3030
from ..helpers import datapackage_processor, rows_processor, row_processor, iterable_loader
3131

32-
for link in self._preprocess_chain():
32+
for position, link in enumerate(self._preprocess_chain(), start=1):
3333
if isinstance(link, Flow):
3434
ds = link._chain(ds)
3535
elif isinstance(link, DataStreamProcessor):
36-
ds = link(ds)
36+
ds = link(ds, position=position)
3737
elif isfunction(link):
3838
sig = signature(link)
3939
params = list(sig.parameters)

tests/test_lib.py

+42
Original file line numberDiff line numberDiff line change
@@ -1596,6 +1596,7 @@ def test_force_temporal_format():
15961596
}
15971597
]]
15981598

1599+
15991600
# Extract missing values
16001601

16011602
def test_extract_missing_values():
@@ -1722,6 +1723,47 @@ def test_conditional():
17221723
dict(a=i, c=i) for i in range(3)
17231724
]
17241725

1726+
def test_exception_information():
1727+
from dataflows import load, exceptions
1728+
flow = Flow(
1729+
load('data/bad-path1.csv'),
1730+
)
1731+
with pytest.raises(exceptions.ProcessorError) as excinfo:
1732+
data = flow.results()
1733+
assert str(excinfo.value.cause) == "[Errno 2] No such file or directory: 'data/bad-path1.csv'"
1734+
assert excinfo.value.processor_name == 'load'
1735+
assert excinfo.value.processor_object.load_source == 'data/bad-path1.csv'
1736+
assert excinfo.value.processor_position == 1
1737+
1738+
1739+
def test_exception_information_multiple_processors():
1740+
from dataflows import load, exceptions
1741+
flow = Flow(
1742+
load('data/bad-path1.csv'),
1743+
load('data/bad-path2.csv'),
1744+
)
1745+
with pytest.raises(exceptions.ProcessorError) as excinfo:
1746+
data = flow.results()
1747+
assert str(excinfo.value.cause) == "[Errno 2] No such file or directory: 'data/bad-path1.csv'"
1748+
assert excinfo.value.processor_name == 'load'
1749+
assert excinfo.value.processor_object.load_source == 'data/bad-path1.csv'
1750+
assert excinfo.value.processor_position == 1
1751+
1752+
1753+
def test_exception_information_multiple_processors_last_errored():
1754+
from dataflows import load, exceptions
1755+
flow = Flow(
1756+
load('data/academy.csv'),
1757+
load('data/bad-path2.csv'),
1758+
)
1759+
with pytest.raises(exceptions.ProcessorError) as excinfo:
1760+
data = flow.results()
1761+
assert str(excinfo.value.cause) == "[Errno 2] No such file or directory: 'data/bad-path2.csv'"
1762+
assert excinfo.value.processor_name == 'load'
1763+
assert excinfo.value.processor_object.load_source == 'data/bad-path2.csv'
1764+
assert excinfo.value.processor_position == 2
1765+
1766+
17251767
def test_finalizer():
17261768
from dataflows import Flow, finalizer
17271769

0 commit comments

Comments
 (0)