Skip to content

Commit 085edab

Browse files
committed
WIP #86, up to liftover, FIX rollback, test undone
1 parent 43f8e53 commit 085edab

File tree

6 files changed

+81
-33
lines changed

6 files changed

+81
-33
lines changed

server/src/scimodom/api/management.py

+10-12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
DatasetExistsError,
1414
DatasetHeaderError,
1515
)
16+
from scimodom.services.importer.base import MissingDataError
1617
from scimodom.services.importer.header import SpecsError
1718
from scimodom.services.project import ProjectService
1819
from scimodom.services.mail import get_mail_service
@@ -116,18 +117,15 @@ def add_dataset():
116117
return {"message": f"File upload failed. File {str(exc)} is empty!"}, 500
117118
except SpecsError as exc:
118119
return {
119-
"message": f"File upload failed. File is not conform to bedRMod specifications: {str(exc)}"
120+
"message": f"File upload failed. The header is not conform to bedRMod specifications: {str(exc)}"
120121
}, 500
121-
122+
except MissingDataError:
123+
return {
124+
"message": "File upload failed. Too many skipped records. Consult the bedRMod documentation."
125+
}, 500
126+
# liftover errors
122127
except Exception as exc:
123-
# TODO ...
124-
logger.error(f"Failed to create dataset: {exc}")
125-
return (
126-
jsonify({"result": "Failed to upload dataset. Contact the administrator."}),
127-
500,
128-
)
129-
130-
# TODO
131-
# WARNING scimodom.services.annotation.annotate_data.193 | No records found for Kr6uj7QzWfLJ...
128+
logger.error(exc)
129+
return {"message": "File upload failed. Contact the administrator."}, 500
132130

133-
return jsonify({"result": "Ok"}), 200
131+
return {"result": "Ok"}, 200

server/src/scimodom/services/data.py

+5-13
Original file line numberDiff line numberDiff line change
@@ -325,14 +325,12 @@ def create_dataset(self) -> None:
325325
eufid=self._eufid,
326326
title=self._title,
327327
)
328-
# TODO if importer fails, then checkpoint does not exists...
329328
checkpoint = importer.header.checkpoint
330329
importer.header.parse_header()
331-
# compare input vs. values read from file header
332-
# for organism (taxa ID) and assembly
330+
# compare input and header
333331
self.validate_imported("organism", taxa_id, importer.header.taxid)
334332
self.validate_imported("assembly", assembly_name, importer.header.assembly)
335-
importer.header.close() # commit
333+
importer.header.close(no_commit=True)
336334
# add association = (EUFID, selection)
337335
# update self._association dict
338336
self._add_association() # flush
@@ -341,11 +339,11 @@ def create_dataset(self) -> None:
341339
association=self._association, seqids=seqids, no_flush=is_liftover
342340
)
343341
importer.data.parse_records()
342+
importer.data.close(raise_missing=True) # commit unless...
344343
except:
345344
checkpoint.rollback()
346345
raise
347346
else:
348-
importer.data.close() # commit unless...
349347
if is_liftover:
350348
msg = f"Lifting over dataset from {assembly_name} to {current_assembly_name}..."
351349
logger.debug(msg)
@@ -367,6 +365,7 @@ def create_dataset(self) -> None:
367365
filen = assembly_service.liftover(records)
368366
importer.reset_data_importer(filen)
369367
importer.data.parse_records()
368+
# raise missing?
370369
importer.data.close()
371370

372371
msg = (
@@ -492,14 +491,7 @@ def _set_selection(self) -> None:
492491
"Aborting transaction!"
493492
)
494493
raise SelectionExistsError(msg) from exc
495-
query = (
496-
select(
497-
Modomics.short_name,
498-
)
499-
.join_from(Modification, Modomics, Modification.inst_modomics)
500-
.where(Modification.id == modification_id)
501-
)
502-
name = self._session.execute(query).scalar_one()
494+
name = self._modification_id_to_name(modification_id)
503495
self._association[name] = selection_id[-1]
504496
# this cannot actually happen...
505497
if len(set(selection_id)) != len(selection_id):

server/src/scimodom/services/importer/base.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ class MissingHeaderError(Exception):
1818
pass
1919

2020

21+
class MissingDataError(Exception):
22+
"""Exception handling for too many skipped rows."""
23+
24+
pass
25+
26+
2127
class BaseImporter(ABC):
2228
"""Abstract base class for an importer. Reads data from
2329
file rowwise, buffer records, and perform bulk inserts.
@@ -118,6 +124,8 @@ def __init__(
118124
self._buffer: BaseImporter._Buffer
119125
self._dtypes: dict[str, dict[str, Any]] = dict()
120126
self._lino: int = skiprows
127+
self._numrows: int = 0
128+
self._validrows: int = 0
121129
if comment is not None and len(comment) > 1:
122130
raise ValueError(
123131
f"Maximum length of 1 expected, got {len(comment)} for comment."
@@ -150,11 +158,29 @@ def parse_records(self) -> None:
150158
for line in itertools.islice(self._handle, self._skiprows, None):
151159
self._lino += 1
152160
if self._comment is not None and not line.strip().startswith(self._comment):
161+
self._numrows += 1
153162
self._read_line(line)
154163

155-
def close(self) -> None:
156-
"""Close handle, flush buffer, commit."""
164+
def close(self, raise_missing: bool = False, threshold: float = 0.01) -> None:
165+
"""Close handle. Unless no_flush,
166+
flush buffer, and commit. Optionally
167+
raise a MissingDataError.
168+
169+
:param raise_missing: Raise error if too
170+
many missing records
171+
:type raise_missing: bool
172+
:param threshold: Threshold for raising error
173+
:type threshold: float
174+
"""
157175
self._handle.close()
176+
177+
if raise_missing:
178+
skipped = self._numrows - self._validrows
179+
small = True if self._numrows < 100 and skipped > 1 else False
180+
large = skipped / self._numrows > threshold
181+
if small or large:
182+
raise MissingDataError
183+
158184
if not self._no_flush:
159185
self._buffer.flush()
160186
self._session.commit()
@@ -225,6 +251,7 @@ def _read_line(self, line: str) -> None:
225251
try:
226252
validated = self._validate(values)
227253
records = self.parse_record(validated)
254+
self._validrows += 1
228255
self._buffer.buffer_data(records)
229256
except ValueError as error:
230257
msg = f"Skipping: Failed to parse {self._filen} at row {self._lino}: {str(error)}"

server/src/scimodom/services/importer/data.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def parse_record(self, record: dict[str, str]) -> dict[str, Any]:
111111
raise ValueError(f"Value {itype}: {crecord[itype]} out of range.")
112112
if crecord["chrom"] not in self._seqids:
113113
raise ValueError(
114-
f"Unrecognized chrom: {crecord['chrom']}. Ignore this warning"
114+
f"Unrecognized chrom: {crecord['chrom']}. Ignore this warning "
115115
"for scaffolds and contigs, otherwise this could be due to misformatting!"
116116
)
117117
if crecord["strand"] not in self._constraints["strand"]:

server/src/scimodom/services/importer/header.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,18 @@ def parse_header(self):
7979
self._parse_lines()
8080
self._validate_columns()
8181

82-
def close(self) -> None:
83-
"""Close handle, insert, and commit."""
82+
def close(self, no_commit: bool = False) -> None:
83+
"""Close handle, insert, and flush or commit.
84+
85+
:param no_commit: Flush instead of commit
86+
:type no_commit: bool
87+
"""
8488
self._handle.close()
8589
self._session.execute(insert(self._model), self._header)
86-
self._session.commit()
90+
if no_commit:
91+
self._session.flush()
92+
else:
93+
self._session.commit()
8794

8895
def _cast_types(self) -> None:
8996
"""Cast column types for input."""

server/tests/unit/services/test_data_importer.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
from sqlalchemy import func, select
55

66
from scimodom.database.models import Data
7-
from scimodom.services.importer.base import BaseImporter, MissingHeaderError
7+
from scimodom.services.importer.base import (
8+
BaseImporter,
9+
MissingHeaderError,
10+
MissingDataError,
11+
)
812
from scimodom.services.importer.data import EUFDataImporter
913

1014

@@ -89,7 +93,7 @@ def _get_data_with_header(fmt):
8993
("minimum", "Value start: -1 out of range."),
9094
(
9195
"chrom",
92-
"Unrecognized chrom: A. Ignore this warningfor scaffolds and contigs, otherwise this could be due to misformatting!",
96+
"Unrecognized chrom: A. Ignore this warning for scaffolds and contigs, otherwise this could be due to misformatting!",
9397
),
9498
("strand", "Unrecognized strand: ."),
9599
("maximum", "Value score: 200 out of range."),
@@ -276,3 +280,23 @@ def parse_record(record):
276280
importer = TestBaseImporter()
277281
assert str(exc.value) == "Maximum length of 1 expected, got 2 for comment."
278282
assert exc.type == ValueError
283+
284+
285+
def test_importer_missing_data(Session, EUF_specs):
286+
# pass the raise_missing argument to close
287+
288+
format, version, specs = EUF_specs
289+
handle = _get_data(EUF_specs)
290+
importer = EUFDataImporter(
291+
session=Session(),
292+
filen="filen",
293+
handle=handle,
294+
association={"m6A": 1},
295+
seqids=["1"],
296+
specs_ver=version,
297+
)
298+
# warnings are emitted, this is expected, use caplog to assert them...
299+
importer.parse_records()
300+
with pytest.raises(MissingDataError) as exc:
301+
importer.close(raise_missing=True)
302+
assert exc.type == MissingDataError

0 commit comments

Comments
 (0)