Skip to content

Commit 13a8489

Browse files
akarivroll
andauthored
Complete exception work (#137)
* Added additional information to exceptions (#134) * Added additional information to exceptions * Updated implementation Co-authored-by: Adam Kariv <[email protected]> * Complete exceptions work on other dsp kinds * Fix tests Co-authored-by: roll <[email protected]>
1 parent 99f5215 commit 13a8489

File tree

10 files changed

+182
-60
lines changed

10 files changed

+182
-60
lines changed

dataflows/VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.74
1+
0.1.0

dataflows/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .base import DataStream, DataStreamProcessor, schema_validator, ValidationError
22
from .base import ResourceWrapper, PackageWrapper
3+
from .base import exceptions
34
from .base import Flow
4-
from .processors import * # noqa
5+
from .processors import * # noqa

dataflows/base/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from . import exceptions
12
from .datastream import DataStream
23
from .datastream_processor import DataStreamProcessor
34
from .resource_wrapper import ResourceWrapper

dataflows/base/datastream_processor.py

+36-14
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from datapackage import Package
77
from tableschema.exceptions import CastError
88

9+
from . import exceptions
910
from .datastream import DataStream
1011
from .resource_wrapper import ResourceWrapper
1112
from .schema_validator import schema_validator
@@ -26,11 +27,13 @@ def __init__(self):
2627
self.stats = {}
2728
self.source = None
2829
self.datapackage = None
30+
self.position = None
2931

30-
def __call__(self, source=None):
32+
def __call__(self, source=None, position=None):
3133
if source is None:
3234
source = DataStream()
3335
self.source = source
36+
self.position = position
3437
return self
3538

3639
def process_resource(self, resource: ResourceWrapper):
@@ -71,28 +74,47 @@ def func():
7174
def _process(self):
7275
datastream = self.source._process()
7376

74-
self.datapackage = Package(descriptor=copy.deepcopy(datastream.dp.descriptor))
75-
self.datapackage = self.process_datapackage(self.datapackage)
76-
self.datapackage.commit()
77-
78-
return DataStream(self.datapackage,
79-
LazyIterator(self.get_iterator(datastream)),
80-
datastream.stats + [self.stats])
77+
try:
78+
self.datapackage = Package(descriptor=copy.deepcopy(datastream.dp.descriptor))
79+
self.datapackage = self.process_datapackage(self.datapackage)
80+
self.datapackage.commit()
81+
82+
return DataStream(self.datapackage,
83+
LazyIterator(self.get_iterator(datastream)),
84+
datastream.stats + [self.stats])
85+
except Exception as exception:
86+
self.raise_exception(exception)
87+
88+
def raise_exception(self, cause):
89+
if not isinstance(cause, exceptions.ProcessorError):
90+
error = exceptions.ProcessorError(
91+
cause,
92+
processor_name=self.__class__.__name__,
93+
processor_object=self,
94+
processor_position=self.position
95+
)
96+
raise error from cause
97+
raise cause
8198

8299
def process(self):
83-
ds = self._process()
84100
try:
101+
ds = self._process()
85102
for res in ds.res_iter:
86103
collections.deque(res, maxlen=0)
87104
except CastError as e:
88105
for err in e.errors:
89106
logging.error('%s', err)
107+
except Exception as exception:
108+
self.raise_exception(exception)
90109
return ds.dp, ds.merge_stats()
91110

92111
def results(self, on_error=None):
93-
ds = self._process()
94-
results = [
95-
list(schema_validator(res.res, res, on_error=on_error))
96-
for res in ds.res_iter
97-
]
112+
try:
113+
ds = self._process()
114+
results = [
115+
list(schema_validator(res.res, res, on_error=on_error))
116+
for res in ds.res_iter
117+
]
118+
except Exception as exception:
119+
self.raise_exception(exception)
98120
return results, ds.dp, ds.merge_stats()

dataflows/base/exceptions.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
class DataflowsException(Exception):
2+
pass
3+
4+
5+
class ProcessorError(DataflowsException):
6+
7+
def __init__(self, cause, *, processor_name, processor_object, processor_position):
8+
self.cause = cause
9+
self.processor_name = processor_name
10+
self.processor_object = processor_object
11+
self.processor_position = processor_position
12+
super().__init__(str(cause))
13+
14+
def __str__(self):
15+
return 'Errored in processor %s in position #%s: %s' % \
16+
(self.processor_name, self.processor_position, self.cause)
17+
18+
19+
class SourceLoadError(DataflowsException):
20+
pass

dataflows/base/flow.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,26 @@ def _preprocess_chain(self):
2929
def _chain(self, ds=None):
3030
from ..helpers import datapackage_processor, rows_processor, row_processor, iterable_loader
3131

32-
for link in self._preprocess_chain():
32+
for position, link in enumerate(self._preprocess_chain(), start=1):
3333
if isinstance(link, Flow):
3434
ds = link._chain(ds)
3535
elif isinstance(link, DataStreamProcessor):
36-
ds = link(ds)
36+
ds = link(ds, position=position)
3737
elif isfunction(link):
3838
sig = signature(link)
3939
params = list(sig.parameters)
4040
if len(params) == 1:
4141
if params[0] == 'row':
42-
ds = row_processor(link)(ds)
42+
ds = row_processor(link)(ds, position=position)
4343
elif params[0] == 'rows':
44-
ds = rows_processor(link)(ds)
44+
ds = rows_processor(link)(ds, position=position)
4545
elif params[0] == 'package':
46-
ds = datapackage_processor(link)(ds)
46+
ds = datapackage_processor(link)(ds, position=position)
4747
else:
4848
assert False, 'Failed to parse function signature %r' % params
4949
else:
5050
assert False, 'Failed to parse function signature %r' % params
5151
elif isinstance(link, Iterable):
52-
ds = iterable_loader(link)(ds)
52+
ds = iterable_loader(link)(ds, position=position)
5353

5454
return ds

dataflows/processors/load.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from tabulator.helpers import reset_stream
99
from tableschema.schema import Schema
1010
from .. import DataStreamProcessor
11+
from ..base.exceptions import SourceLoadError
1112
from ..base.schema_validator import schema_validator, ignore, drop, raise_exception
1213
from ..helpers.resource_matcher import ResourceMatcher
1314

@@ -173,8 +174,8 @@ def process_datapackage(self, dp: Package):
173174
try:
174175
return self.safe_process_datapackage(dp)
175176
except Exception as e:
176-
raise e from Exception('Failed to run load with load source {!r} and options {!r}'
177-
.format(self.load_source, self.options))
177+
raise SourceLoadError('Failed to load source {!r} and options {!r}: {}'
178+
.format(self.load_source, self.options, e)) from e
178179

179180
def safe_process_datapackage(self, dp: Package):
180181

tests/test_edge_cases.py

+89-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,99 @@
11
import pytest
22

3-
def test_exception_in_generator():
4-
from dataflows import Flow, printer
53

6-
class MyException(Exception):
7-
pass
4+
class MyException(Exception):
5+
pass
6+
7+
8+
def test_exception_in_generator():
9+
from dataflows import Flow, printer, exceptions
810

911
def generator():
1012
for i in range(5):
1113
raise MyException()
1214
yield {"i": i}
1315

14-
with pytest.raises(MyException):
16+
with pytest.raises(exceptions.ProcessorError) as excinfo:
1517
Flow(generator(), printer()).process()
18+
assert isinstance(excinfo.value.cause, MyException)
19+
20+
21+
def test_exception_information():
22+
from dataflows import Flow, load, exceptions
23+
flow = Flow(
24+
load('data/bad-path1.csv'),
25+
)
26+
with pytest.raises(exceptions.ProcessorError) as excinfo:
27+
flow.results()
28+
assert str(excinfo.value.cause) == "Failed to load source 'data/bad-path1.csv' and options {'custom_parsers': {'xml': <class 'dataflows.processors.load.XMLParser'>}, 'ignore_blank_headers': True, 'headers': 1}: [Errno 2] No such file or directory: 'data/bad-path1.csv'"
29+
assert excinfo.value.processor_name == 'load'
30+
assert excinfo.value.processor_object.load_source == 'data/bad-path1.csv'
31+
assert excinfo.value.processor_position == 1
32+
33+
34+
def test_exception_information_multiple_processors_simple():
35+
from dataflows import Flow, load, exceptions
36+
flow = Flow(
37+
load('data/bad-path1.csv'),
38+
load('data/bad-path2.csv'),
39+
)
40+
with pytest.raises(exceptions.ProcessorError) as excinfo:
41+
flow.results()
42+
assert str(excinfo.value.cause) == "Failed to load source 'data/bad-path1.csv' and options {'custom_parsers': {'xml': <class 'dataflows.processors.load.XMLParser'>}, 'ignore_blank_headers': True, 'headers': 1}: [Errno 2] No such file or directory: 'data/bad-path1.csv'"
43+
assert excinfo.value.processor_name == 'load'
44+
assert excinfo.value.processor_object.load_source == 'data/bad-path1.csv'
45+
assert excinfo.value.processor_position == 1
46+
47+
48+
def test_exception_information_multiple_processors_last_errored():
49+
from dataflows import Flow, load, exceptions
50+
flow = Flow(
51+
load('data/academy.csv'),
52+
load('data/bad-path2.csv'),
53+
)
54+
with pytest.raises(exceptions.ProcessorError) as excinfo:
55+
flow.results()
56+
assert str(excinfo.value.cause) == "Failed to load source 'data/bad-path2.csv' and options {'custom_parsers': {'xml': <class 'dataflows.processors.load.XMLParser'>}, 'ignore_blank_headers': True, 'headers': 1}: [Errno 2] No such file or directory: 'data/bad-path2.csv'"
57+
assert excinfo.value.processor_name == 'load'
58+
assert excinfo.value.processor_object.load_source == 'data/bad-path2.csv'
59+
assert excinfo.value.processor_position == 2
60+
61+
62+
def test_exception_information_multiple_processors_function_error():
63+
from dataflows import Flow, load, exceptions
64+
65+
def func(rows):
66+
for i, row in enumerate(rows):
67+
if i == 1:
68+
raise MyException('custom-error')
69+
yield row
70+
71+
flow = Flow(
72+
load('data/academy.csv'),
73+
func
74+
)
75+
with pytest.raises(exceptions.ProcessorError) as excinfo:
76+
flow.results()
77+
assert str(excinfo.value.cause) == 'custom-error'
78+
assert excinfo.value.processor_name == 'rows_processor'
79+
assert excinfo.value.processor_position == 2
80+
81+
82+
def test_exception_information_multiple_processors_iterable_error():
83+
from dataflows import Flow, printer, exceptions
84+
85+
def func():
86+
for i in range(10):
87+
if i == 1:
88+
raise MyException('custom-iterable-error')
89+
yield dict(a=i)
90+
91+
flow = Flow(
92+
func(),
93+
printer()
94+
)
95+
with pytest.raises(exceptions.ProcessorError) as excinfo:
96+
flow.results()
97+
assert str(excinfo.value.cause) == 'custom-iterable-error'
98+
assert excinfo.value.processor_name == 'iterable_loader'
99+
assert excinfo.value.processor_position == 1

tests/test_examples.py

+8-14
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pytest
2+
13
def test_example_1():
24
from dataflows import Flow
35

@@ -16,8 +18,6 @@ def lowerData(row):
1618
)
1719
data, *_ = f.results()
1820

19-
print(data)
20-
2121
# [[{'data': 'hello'}, {'data': 'world'}]]
2222

2323

@@ -33,8 +33,6 @@ def titleName(row):
3333
)
3434
data, *_ = f.results()
3535

36-
print(data)
37-
3836

3937
def country_population():
4038
from xml.etree import ElementTree
@@ -63,8 +61,6 @@ def test_example_3():
6361
)
6462
data, *_ = f.results()
6563

66-
print(data)
67-
6864
def test_example_4():
6965
from dataflows import Flow, set_type
7066

@@ -74,8 +70,6 @@ def test_example_4():
7470
)
7571
data, dp, _ = f.results()
7672

77-
print(data[0][:10])
78-
7973
def test_example_5():
8074
from dataflows import Flow, set_type, dump_to_path
8175

@@ -112,8 +106,9 @@ def filter_pythagorean_triplets(rows):
112106
)
113107
_ = f.process()
114108

109+
115110
def test_validate():
116-
from dataflows import Flow, validate, set_type, printer, ValidationError
111+
from dataflows import Flow, validate, set_type, printer, ValidationError, exceptions
117112

118113
def adder(row):
119114
row['a'] += 0.5
@@ -127,11 +122,10 @@ def adder(row):
127122
validate(),
128123
printer()
129124
)
130-
try:
131-
_ = f.process()
132-
assert False
133-
except ValidationError:
134-
pass
125+
126+
with pytest.raises(exceptions.ProcessorError) as excinfo:
127+
f.process()
128+
assert isinstance(excinfo.value.cause, ValidationError)
135129

136130

137131
def test_example_7():

0 commit comments

Comments
 (0)