@@ -18,6 +18,12 @@ class MissingHeaderError(Exception):
18
18
pass
19
19
20
20
21
+ class MissingDataError (Exception ):
22
+ """Exception handling for too many skipped rows."""
23
+
24
+ pass
25
+
26
+
21
27
class BaseImporter (ABC ):
22
28
"""Abstract base class for an importer. Reads data from
23
29
file rowwise, buffer records, and perform bulk inserts.
@@ -118,6 +124,8 @@ def __init__(
118
124
self ._buffer : BaseImporter ._Buffer
119
125
self ._dtypes : dict [str , dict [str , Any ]] = dict ()
120
126
self ._lino : int = skiprows
127
+ self ._numrows : int = 0
128
+ self ._validrows : int = 0
121
129
if comment is not None and len (comment ) > 1 :
122
130
raise ValueError (
123
131
f"Maximum length of 1 expected, got { len (comment )} for comment."
@@ -150,11 +158,29 @@ def parse_records(self) -> None:
150
158
for line in itertools .islice (self ._handle , self ._skiprows , None ):
151
159
self ._lino += 1
152
160
if self ._comment is not None and not line .strip ().startswith (self ._comment ):
161
+ self ._numrows += 1
153
162
self ._read_line (line )
154
163
155
- def close (self ) -> None :
156
- """Close handle, flush buffer, commit."""
164
+ def close (self , raise_missing : bool = False , threshold : float = 0.01 ) -> None :
165
+ """Close handle. Unless no_flush,
166
+ flush buffer, and commit. Optionally
167
+ raise a MissingDataError.
168
+
169
+ :param raise_missing: Raise error if too
170
+ many missing records
171
+ :type raise_missing: bool
172
+ :param threshold: Threshold for raising error
173
+ :type threshold: float
174
+ """
157
175
self ._handle .close ()
176
+
177
+ if raise_missing :
178
+ skipped = self ._numrows - self ._validrows
179
+ small = True if self ._numrows < 100 and skipped > 1 else False
180
+ large = skipped / self ._numrows > threshold
181
+ if small or large :
182
+ raise MissingDataError
183
+
158
184
if not self ._no_flush :
159
185
self ._buffer .flush ()
160
186
self ._session .commit ()
@@ -225,6 +251,7 @@ def _read_line(self, line: str) -> None:
225
251
try :
226
252
validated = self ._validate (values )
227
253
records = self .parse_record (validated )
254
+ self ._validrows += 1
228
255
self ._buffer .buffer_data (records )
229
256
except ValueError as error :
230
257
msg = f"Skipping: Failed to parse { self ._filen } at row { self ._lino } : { str (error )} "
0 commit comments