@@ -253,20 +253,8 @@ def _get_length(self, query, model) -> int:
253
253
select (func .count ()).select_from (query .with_only_columns (model .id ))
254
254
)
255
255
256
- def _return_ensembl_query (
257
- self ,
258
- annotation : Annotation ,
259
- modification_id : int ,
260
- organism_id : int ,
261
- technology_ids : list [int ],
262
- gene_filter : list [str ],
263
- chrom : str | None ,
264
- chrom_start : int | None ,
265
- chrom_end : int | None ,
266
- first_record : int | None ,
267
- max_records : int | None ,
268
- multi_sort : list [str ],
269
- ):
256
+ @staticmethod
257
+ def _get_base_search_query ():
270
258
query = (
271
259
select (
272
260
Data .id ,
@@ -280,10 +268,19 @@ def _return_ensembl_query(
280
268
Data .frequency ,
281
269
Data .dataset_id ,
282
270
func .group_concat (DataAnnotation .feature .distinct ()).label ("feature" ),
283
- func .group_concat (GenomicAnnotation .biotype .distinct ()).label (
284
- "gene_biotype"
285
- ),
286
- func .group_concat (GenomicAnnotation .name .distinct ()).label ("gene_name" ),
271
+ func .group_concat (
272
+ GenomicAnnotation .id .distinct ().op ("ORDER BY" )(GenomicAnnotation .id )
273
+ ).label ("gene_id" ),
274
+ func .group_concat (
275
+ GenomicAnnotation .name .distinct ().op ("ORDER BY" )(
276
+ GenomicAnnotation .id
277
+ )
278
+ ).label ("gene_name" ),
279
+ func .group_concat (
280
+ GenomicAnnotation .biotype .distinct ().op ("ORDER BY" )(
281
+ GenomicAnnotation .id
282
+ )
283
+ ).label ("gene_biotype" ),
287
284
DetectionTechnology .tech ,
288
285
Organism .taxa_id ,
289
286
Organism .cto ,
@@ -293,23 +290,22 @@ def _return_ensembl_query(
293
290
.join_from (Data , Dataset , Data .inst_dataset )
294
291
.join_from (Dataset , DetectionTechnology , Dataset .inst_technology )
295
292
.join_from (Dataset , Organism , Dataset .inst_organism )
296
- .where (
297
- Data .modification_id == modification_id ,
298
- Dataset .organism_id == organism_id ,
299
- Dataset .technology_id .in_ (technology_ids ),
300
- )
301
293
)
294
+ return query
302
295
303
- # coordinate filters
304
- if chrom :
305
- query = query .where (Data .chrom == chrom )
306
- if chrom_start :
307
- query = query .where (Data .start >= chrom_start )
308
- if chrom_end :
309
- query = query .where (Data .end <= chrom_end )
310
-
296
+ @staticmethod
297
+ def _add_chrom_filters (query , chrom , start , end ):
298
+ query = query .where (Data .chrom == chrom )
299
+ if start :
300
+ query = query .where (Data .start >= start )
301
+ if end :
302
+ query = query .where (Data .end <= end )
303
+ return query
304
+
305
+ def _get_gene_filters (self , query , gene_filter , annotation ):
311
306
# gene filters: matchMode unused (cf. PrimeVue), but keep it this way
312
307
# e.g. to extend options or add table filters
308
+ # TODO annotation
313
309
# gene name
314
310
name_flt = next ((flt for flt in gene_filter if "gene_name" in flt ), None )
315
311
if name_flt :
@@ -329,12 +325,9 @@ def _return_ensembl_query(
329
325
query = query .where (GenomicAnnotation .annotation_id == annotation .id ).where (
330
326
GenomicAnnotation .biotype .in_ (biotypes )
331
327
)
328
+ return query
332
329
333
- query = query .group_by (DataAnnotation .data_id )
334
-
335
- # get length
336
- length = self ._get_length (query , Data )
337
-
330
+ def _get_sort_filters (self , query , multi_sort ):
338
331
# sort filters
339
332
# index speed up for chrom + start
340
333
if not multi_sort :
@@ -345,8 +338,38 @@ def _return_ensembl_query(
345
338
for flt in multi_sort :
346
339
expr = self ._get_arg_sort (flt )
347
340
query = query .order_by (eval (expr ))
341
+ return query
342
+
343
+ def _return_ensembl_query (
344
+ self ,
345
+ annotation : Annotation ,
346
+ modification_id : int ,
347
+ organism_id : int ,
348
+ technology_ids : list [int ],
349
+ gene_filter : list [str ],
350
+ chrom : str | None ,
351
+ chrom_start : int | None ,
352
+ chrom_end : int | None ,
353
+ first_record : int | None ,
354
+ max_records : int | None ,
355
+ multi_sort : list [str ],
356
+ ):
357
+ query = self ._get_base_search_query ()
358
+ query = query .where (
359
+ Data .modification_id == modification_id ,
360
+ Dataset .organism_id == organism_id ,
361
+ Dataset .technology_id .in_ (technology_ids ),
362
+ )
363
+ if chrom :
364
+ query = self ._add_chrom_filters (query , chrom , chrom_start , chrom_end )
365
+ if gene_filter :
366
+ query = self ._get_gene_filters (query , gene_filter , annotation )
367
+ query = query .group_by (DataAnnotation .data_id )
368
+
369
+ length = self ._get_length (query , Data )
370
+
371
+ query = self ._get_sort_filters (query , multi_sort )
348
372
349
- # paginate
350
373
if first_record is not None :
351
374
query = query .offset (first_record )
352
375
if max_records is not None :
@@ -368,82 +391,18 @@ def _return_gene_query(
368
391
max_records : int | None ,
369
392
multi_sort : list [str ],
370
393
):
371
- query = (
372
- select (
373
- Data .id ,
374
- Data .chrom ,
375
- Data .start ,
376
- Data .end ,
377
- Data .name ,
378
- Data .score ,
379
- Data .strand ,
380
- Data .coverage ,
381
- Data .frequency ,
382
- Data .dataset_id ,
383
- func .group_concat (DataAnnotation .feature .distinct ()).label ("feature" ),
384
- func .group_concat (GenomicAnnotation .biotype .distinct ()).label (
385
- "gene_biotype"
386
- ),
387
- func .group_concat (GenomicAnnotation .name .distinct ()).label ("gene_name" ),
388
- DetectionTechnology .tech ,
389
- Organism .taxa_id ,
390
- Organism .cto ,
391
- )
392
- .join_from (DataAnnotation , Data , DataAnnotation .inst_data )
393
- .join_from (DataAnnotation , GenomicAnnotation , DataAnnotation .inst_genomic )
394
- .join_from (Data , Dataset , Data .inst_dataset )
395
- .join_from (Dataset , DetectionTechnology , Dataset .inst_technology )
396
- .join_from (Dataset , Organism , Dataset .inst_organism )
397
- .where (Organism .taxa_id == taxa_id )
398
- )
399
-
400
- # coordinate filters
394
+ query = self ._get_base_search_query ()
395
+ query = query .where (Organism .taxa_id == taxa_id )
401
396
if chrom :
402
- query = query .where (Data .chrom == chrom )
403
- if chrom_start :
404
- query = query .where (Data .start >= chrom_start )
405
- if chrom_end :
406
- query = query .where (Data .end <= chrom_end )
407
-
408
- # gene filters: matchMode unused (cf. PrimeVue), but keep it this way
409
- # e.g. to extend options or add table filters
410
- # gene name
411
- name_flt = next ((flt for flt in gene_filter if "gene_name" in flt ), None )
412
- if name_flt :
413
- _ , name , _ = self ._get_flt (name_flt )
414
- query = query .where (GenomicAnnotation .name == name [0 ])
415
- # annotation filter
416
- feature_flt = next ((flt for flt in gene_filter if "feature" in flt ), None )
417
- if feature_flt :
418
- _ , features , _ = self ._get_flt (feature_flt )
419
- query = query .where (DataAnnotation .feature .in_ (features ))
420
- # biotypes
421
- # index speed up on annotation_id + biotypes + name
422
- biotype_flt = next ((flt for flt in gene_filter if "gene_biotype" in flt ), None )
423
- if biotype_flt :
424
- _ , mapped_biotypes , _ = self ._get_flt (biotype_flt )
425
- biotypes = [k for k , v in BIOTYPES .items () if v in mapped_biotypes ]
426
- query = query .where (GenomicAnnotation .annotation_id == annotation .id ).where (
427
- GenomicAnnotation .biotype .in_ (biotypes )
428
- )
429
-
397
+ query = self ._add_chrom_filters (query , chrom , chrom_start , chrom_end )
398
+ if gene_filter :
399
+ query = self ._get_gene_filters (query , gene_filter , annotation )
430
400
query = query .group_by (DataAnnotation .data_id )
431
401
432
- # get length
433
402
length = self ._get_length (query , Data )
434
403
435
- # sort filters
436
- # index speed up for chrom + start
437
- if not multi_sort :
438
- chrom_expr = self ._get_arg_sort ("chrom%2Basc" )
439
- start_expr = self ._get_arg_sort ("start%2Basc" )
440
- query = query .order_by (eval (chrom_expr ), eval (start_expr ))
441
- else :
442
- for flt in multi_sort :
443
- expr = self ._get_arg_sort (flt )
444
- query = query .order_by (eval (expr ))
404
+ query = self ._get_sort_filters (query , multi_sort )
445
405
446
- # paginate
447
406
if first_record is not None :
448
407
query = query .offset (first_record )
449
408
if max_records is not None :
0 commit comments