Skip to content

Commit 143f0c1

Browse files
committed
WIP #80 re-implement service, not working
1 parent 042df07 commit 143f0c1

File tree

4 files changed

+342
-251
lines changed

4 files changed

+342
-251
lines changed

server/src/scimodom/api/public.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from flask_cors import cross_origin
66

77
from scimodom.services.public import get_public_service
8+
from scimodom.services.comparison import get_comparison_service
89

910
api = Blueprint("api", __name__)
1011

@@ -126,11 +127,9 @@ def get_compare():
126127
upload_path = request.args.get("upload", type=str)
127128
query_operation = request.args.get("operation", type=str)
128129

129-
public_service = get_public_service()
130-
response = public_service.get_comparison(
131-
reference_ids, comparison_ids, upload_path, query_operation
132-
)
133-
return response
130+
# TODO
131+
compare_service = get_comparison_service()
132+
return "Not implemented", 404
134133

135134

136135
@api.route("/upload", methods=["POST"])
Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
from collections.abc import Sequence
2+
from logging import getLogger
3+
from pathlib import Path
4+
from typing import Any, Optional, Literal, get_args
5+
6+
from sqlalchemy import select
7+
from sqlalchemy.orm import Session
8+
9+
from scimodom.database.database import get_session
10+
from scimodom.database.models import Data, Association
11+
from scimodom.utils.operations import to_bedtool, _remove_extra_feature
12+
from scimodom.services.importer import get_bed_importer
13+
import scimodom.utils.utils as utils
14+
15+
logger = getLogger(__name__)
16+
17+
18+
class FailedUploadError(Exception):
19+
pass
20+
21+
22+
class NoRecordsFoundError(Exception):
23+
pass
24+
25+
26+
class ComparisonService:
27+
"""Utility class to handle dataset comparison.
28+
The EUFID(s) are assumed to be well-defined,
29+
avoiding repeated validation queries to the
30+
database.
31+
32+
:param session: SQLAlchemy ORM session
33+
:type session: Session
34+
:param reference_ids: Reference EUFID(s)
35+
:type reference_ids: list of str
36+
:param comparison_ids: EUFID(s) to compare
37+
:type comparison_ids: list of str
38+
:param query_operation: Comparison to perform:
39+
intersect, closest, or subtract (difference)
40+
:type query_operation: str
41+
:param is_strand: Perform strand-aware query
42+
:type is_strand: bool
43+
"""
44+
45+
OPERATIONS = Literal["intersect", "closest", "subtract"]
46+
47+
def __init__(
48+
self,
49+
session: Session,
50+
reference_ids: list[str],
51+
comparison_ids: list[str],
52+
query_operation: OPERATIONS,
53+
is_strand: bool,
54+
) -> None:
55+
"""Initializer method."""
56+
self._session = session
57+
self._reference_ids = reference_ids
58+
self._comparison_ids = comparison_ids
59+
self._query_operation = query_operation
60+
self._is_strand = is_strand
61+
62+
self._reference_records: Sequence[Any]
63+
self._comparison_records: Sequence[Any]
64+
65+
operations = get_args(self.OPERATIONS)
66+
assert (
67+
query_operation in operations
68+
), f"Undefined '{query_operation}'. Allowed values are {operations}."
69+
70+
@classmethod
71+
def from_upload(
72+
cls,
73+
session: Session,
74+
reference_ids: list[str],
75+
query_operation: str,
76+
is_strand: bool,
77+
upload: str | Path,
78+
):
79+
"""Provides ComparisonService factory to
80+
instantiate class upload instead of EUFID(s).
81+
82+
:param session: SQLAlchemy ORM session
83+
:type session: Session
84+
:param reference_ids: Reference EUFID(s)
85+
:type reference_ids: list of str
86+
:param upload: Uploaded dataset to compare
87+
:type upload: str | Path
88+
:param query_operation: Comparison to perform:
89+
intersect, closest, or subtract (difference)
90+
:type query_operation: str
91+
:param is_strand: Perform strand-aware query
92+
:type is_strand: bool
93+
"""
94+
upload_path = Path(upload)
95+
if not upload_path.is_file():
96+
msg = f"No such file or directory: {upload_path.as_posix()}"
97+
raise FileNotFoundError(msg)
98+
try:
99+
db_records = ComparisonService.upload_file()
100+
except Exception as exc:
101+
# upload itself is "fail-safe", catch eveything else...
102+
msg = f"Failed to upload {upload_path.as_posix()}. "
103+
raise FailedUploadError(msg)
104+
records = [tuple([val for key, val in record.items()]) for record in db_records]
105+
# ... but records might be "skipped" for various reasons
106+
if not records:
107+
raise NoRecordsFoundError
108+
service = cls(session, reference_ids, [], query_operation, is_strand)
109+
service._comparison_records = records
110+
return service
111+
112+
@staticmethod
113+
def upload_file(file_path: Path) -> list[dict[str, Any]]:
114+
"""Upload bed-like file.
115+
116+
:param file_path: Path to file
117+
:type file_path: Path
118+
:return: Uploaded records
119+
:rtype: list of dict of {str: Any}
120+
"""
121+
importer = get_bed_importer(file_path)
122+
importer.parse_records()
123+
importer.close()
124+
return importer.get_buffer()
125+
126+
def get_records(self) -> None:
127+
"""Query the database for selected records."""
128+
129+
def _construct_query(idx):
130+
query = (
131+
select(
132+
Data.chrom,
133+
Data.start,
134+
Data.end,
135+
Data.name,
136+
Data.score,
137+
Data.strand,
138+
Association.dataset_id,
139+
Data.coverage,
140+
Data.frequency,
141+
)
142+
.join_from(Data, Association, Data.inst_association)
143+
.where(Association.dataset_id.in_(idx))
144+
)
145+
return query
146+
147+
query = _construct_query(self._reference_ids)
148+
self._reference_records = self._session.execute(query).all()
149+
150+
if self._comparison_ids:
151+
self._comparison_records = []
152+
for idx in self._comparison_ids:
153+
query = _construct_query([idx])
154+
self._comparison_records.append(self._session.execute(query).all())
155+
156+
# check for empty records?
157+
158+
def compare_dataset(self): # ->
159+
"""Execute comparison.
160+
161+
:return: Result form comparison
162+
:rtype:
163+
"""
164+
func = eval(f"_get_{self._query_operation}")
165+
records = func(self._reference_records, self._comparison_records)
166+
167+
# TODO
168+
# from scimodom.utils.models import records_factory
169+
# op, strand = query_operation.split("S")
170+
# c_records = get_op(op)(a_records, b_records, s=eval(strand))
171+
# records = [records_factory(op.capitalize(), r)._asdict() for r in c_records]
172+
173+
return records
174+
175+
def _get_intersect(
176+
a_records: Sequence[Any],
177+
b_records: Sequence[Any],
178+
s: bool = True,
179+
sorted: bool = True,
180+
) -> list[Any]:
181+
"""Wrapper for pybedtools.bedtool.BedTool.intersect
182+
183+
Relies on the behaviour of bedtools -wa -wb option: the first
184+
column after the complete -a record lists the file number
185+
from which the overlap came.
186+
187+
:param a_records: DB records (A features)
188+
:type a_records: Sequence (list of tuples)
189+
:param b_records: DB records (B features)
190+
:type b_records: Sequence (list of tuples)
191+
:param s: Force strandedness
192+
:type s: bool
193+
:param sorted: Invoked sweeping algorithm
194+
:type sorted: bool
195+
:returns: records
196+
:rtype: list of tuples
197+
"""
198+
199+
# required options
200+
# write the original entry in A for each overlap
201+
wa: bool = True
202+
# write the original entry in B for each overlap
203+
wb: bool = True
204+
205+
a_bedtool, b_bedtool = to_bedtool(a_records), to_bedtool(
206+
b_records, as_list=True
207+
)
208+
bedtool = a_bedtool.intersect(
209+
b=[b.fn for b in b_bedtool], wa=wa, wb=wb, s=s, sorted=sorted
210+
)
211+
stream = bedtool.each(_remove_extra_feature)
212+
records = [tuple(s.fields) for s in stream]
213+
return records
214+
215+
def _get_closest(
216+
a_records: Sequence[Any],
217+
b_records: Sequence[Any],
218+
s: bool = True,
219+
sorted: bool = True,
220+
) -> list[Any]:
221+
"""Wrapper for pybedtools.bedtool.BedTool.closest
222+
223+
Relies on the behaviour of bedtools -io -t -mdb -D options: the first
224+
column after the complete -a record lists the file number
225+
from which the closest interval came.
226+
227+
:param a_records: DB records (A features)
228+
:type a_records: Sequence (list of tuples)
229+
:param b_records: DB records (B features)
230+
:type b_records: Sequence (list of tuples)
231+
:param s: Force strandedness
232+
:type s: bool
233+
:param sorted: Invoked sweeping algorithm
234+
:type sorted: bool
235+
:returns: records
236+
:rtype: list of tuples
237+
"""
238+
239+
# required options
240+
# Ignore features in B that overlap A
241+
io: bool = True
242+
# Report all ties
243+
t: str = "all"
244+
# Report closest records among all databases
245+
mdb: str = "all"
246+
# Report distance with respect to A
247+
D: str = "a"
248+
249+
a_bedtool, b_bedtool = to_bedtool(a_records), to_bedtool(
250+
b_records, as_list=True
251+
)
252+
bedtool = a_bedtool.closest(
253+
b=[b.fn for b in b_bedtool], io=io, t=t, mdb=mdb, D=D, s=s, sorted=sorted
254+
)
255+
stream = bedtool.each(_remove_extra_feature)
256+
records = [tuple(s.fields) for s in stream]
257+
258+
# TODO
259+
# Reports “none” for chrom (?) and “-1” for all other fields (?) when a feature
260+
# is not found in B on the same chromosome as the feature in A.
261+
# Note that "start" (fields) is a string!
262+
# 9 + 0/1 + 1
263+
# c_bedtool = c_bedtool.filter(lambda c: c.fields[(offset + filnum_idx + 1)] != "-1")
264+
265+
return records
266+
267+
def _get_subtract(
268+
a_records: Sequence[Any],
269+
b_records: Sequence[Any],
270+
s: bool = True,
271+
sorted: bool = True,
272+
) -> list[Any]:
273+
"""Wrapper for pybedtools.bedtool.BedTool.subtract
274+
275+
:param a_records: DB records (A features)
276+
:type a_records: Sequence (list of tuples)
277+
:param b_records: DB records (B features)
278+
:type b_records: Sequence (list of tuples)
279+
:param s: Force strandedness
280+
:type s: bool
281+
:param sorted: Invoked sweeping algorithm
282+
:type sorted: bool
283+
:param n_fields: Number of other fields attribute in addition to BED6
284+
:type n_fields: int
285+
:returns: records
286+
:rtype: list of tuples
287+
"""
288+
289+
a_bedtool, b_bedtool = to_bedtool(a_records), to_bedtool(
290+
utils.flatten_list(b_records)
291+
)
292+
c_bedtool = a_bedtool.subtract(b_bedtool, s=s, sorted=sorted)
293+
# records = [(i.fields[:offset]) for i in c_bedtool]
294+
# TODO ??? offset = 6 + 3
295+
# records = [tuple(i.fields[:offset]) for i in c_bedtool]
296+
# return records
297+
298+
299+
_cached_service: Optional[ComparisonService] = None
300+
301+
302+
def get_comparison_service() -> ComparisonService:
303+
global _cached_service
304+
if _cached_service is None:
305+
# TODO
306+
# _cached_service = ComparisonService(session=get_session())
307+
pass
308+
return _cached_service

0 commit comments

Comments
 (0)