|
29 | 29 | Selection,
|
30 | 30 | )
|
31 | 31 | import scimodom.database.queries as queries
|
32 |
| -from scimodom.services.importer import BEDImporter |
| 32 | +from scimodom.services.importer import get_bed_importer |
33 | 33 | from scimodom.services.annotation import AnnotationService
|
34 | 34 | from scimodom.services.assembly import AssemblyService
|
35 | 35 | import scimodom.utils.specifications as specs
|
@@ -465,126 +465,66 @@ def get_dataset(self):
|
465 | 465 |
|
466 | 466 | return self._dump(query)
|
467 | 467 |
|
468 |
| - def get_comparison( |
469 |
| - self, step, dataset_ids_a, dataset_ids_b, dataset_upload, query_operation |
470 |
| - ): |
| 468 | + def get_comparison(self, reference_ids, comparison_ids, upload, query_operation): |
471 | 469 | """Retrieve ..."""
|
472 | 470 | # TODO: refactor
|
473 | 471 | # API call in compare, thenquery_operation pass as params to SPA components
|
474 | 472 | # but sending all datasets may be too large?
|
475 | 473 | # final call after dataset selection + query
|
476 | 474 | # + lazy loading of results?
|
477 | 475 |
|
478 |
| - # TODO: this will not work... dataset vs. modification? |
479 |
| - if step == "dataset": |
480 |
| - query = ( |
481 |
| - select( |
482 |
| - Dataset.id.label("dataset_id"), |
483 |
| - Dataset.title.label("dataset_title"), |
484 |
| - Modification.id.label("modification_id"), |
485 |
| - DetectionTechnology.id.label("technology_id"), |
486 |
| - Organism.id.label("organism_id"), |
487 |
| - ) |
488 |
| - .join_from(Dataset, Association, Dataset.id == Association.dataset_id) |
489 |
| - .join_from( |
490 |
| - Association, Selection, Association.selection_id == Selection.id |
491 |
| - ) |
492 |
| - .join_from( |
493 |
| - Selection, |
494 |
| - Modification, |
495 |
| - Selection.modification_id == Modification.id, |
496 |
| - ) |
497 |
| - .join_from( |
498 |
| - Selection, |
499 |
| - DetectionTechnology, |
500 |
| - Selection.technology_id == DetectionTechnology.id, |
501 |
| - ) |
502 |
| - .join_from(Selection, Organism, Selection.organism_id == Organism.id) |
503 |
| - ) |
504 |
| - |
505 |
| - records = self._dump(query) |
506 |
| - |
507 |
| - # query = ( |
508 |
| - # select(Taxa.short_name.distinct(), Taxonomy.kingdom) |
509 |
| - # .join_from(Taxa, Taxonomy, Taxa.taxonomy_id == Taxonomy.id) |
510 |
| - # .join_from(Taxa, Organism, Taxa.id == Organism.taxa_id) |
511 |
| - # ) |
512 |
| - |
513 |
| - ## so far no order |
514 |
| - ## [('H. sapiens', 'Animalia'), ('M. musculus', 'Animalia')] |
515 |
| - ## we need to reformat to fit the "grouped dropdown component" |
516 |
| - ## we also probably need to add ids to retrieve the final selection |
517 |
| - ## i.e. taxa, modification, and technology ids |
518 |
| - ## same below |
519 |
| - |
520 |
| - # query = select( |
521 |
| - # Modification.rna.distinct(), |
522 |
| - # Modomics.short_name, |
523 |
| - # ).join_from(Modification, Modomics, Modification.modomics_id == Modomics.id) |
524 |
| - |
525 |
| - ## [('mRNA', 'm6A'), ('mRNA', 'm5C'), ('rRNA', 'm6A'), ('mRNA', 'Y'), ('tRNA', 'Y')] |
526 |
| - |
527 |
| - # query = select(DetectionMethod.meth.distinct(), DetectionTechnology.tech).join_from( |
528 |
| - # DetectionMethod, |
529 |
| - # DetectionTechnology, |
530 |
| - # DetectionMethod.id == DetectionTechnology.method_id, |
531 |
| - # ) |
532 |
| - |
533 |
| - ## [('Chemical-assisted sequencing', 'm6A-SAC-seq'), ('Native RNA sequencing', 'Nanopore'), ('Chemical-assisted sequencing', 'GLORI'), ('Enzyme/protein-assisted sequencing', 'm5C-miCLIP'), ('Enzyme/protein-assisted sequencing', 'm6ACE-seq'), ('Chemical-assisted sequencing', 'BID-seq'), ('Antibody-based sequencing', 'm6A-seq/MeRIP'), ('Enzyme/protein-assisted sequencing', 'eTAM-seq')] |
534 |
| - |
535 |
| - elif step == "ops": |
536 |
| - query = ( |
537 |
| - select( |
538 |
| - Data.chrom, |
539 |
| - Data.start, |
540 |
| - Data.end, |
541 |
| - Data.name, |
542 |
| - Data.score, |
543 |
| - Data.strand, |
544 |
| - Association.dataset_id, |
545 |
| - # Data.dataset_id, |
546 |
| - Data.coverage, |
547 |
| - Data.frequency, |
548 |
| - ) |
549 |
| - .join_from(Data, Association, Data.inst_association) |
550 |
| - .where(Association.dataset_id.in_(dataset_ids_a)) |
551 |
| - # .order_by(Data.chrom.asc(), Data.start.asc()) |
| 476 | + query = ( |
| 477 | + select( |
| 478 | + Data.chrom, |
| 479 | + Data.start, |
| 480 | + Data.end, |
| 481 | + Data.name, |
| 482 | + Data.score, |
| 483 | + Data.strand, |
| 484 | + Association.dataset_id, |
| 485 | + # Data.dataset_id, |
| 486 | + Data.coverage, |
| 487 | + Data.frequency, |
552 | 488 | )
|
553 |
| - a_records = self._session.execute(query).all() |
554 |
| - |
555 |
| - # AD HOC - EUF VERSION SHOULD COME FROM SOMEWHERE ELSE! |
556 |
| - if dataset_upload: |
557 |
| - filen = Path(dataset_upload).stem |
558 |
| - b_records = [ |
559 |
| - BEDImporter( |
560 |
| - filen, open(dataset_upload, "r"), filen, "1.7" |
561 |
| - ).get_records() |
562 |
| - ] |
563 |
| - else: |
564 |
| - b_records = [] |
565 |
| - for idx in dataset_ids_b: |
566 |
| - query = ( |
567 |
| - select( |
568 |
| - Data.chrom, |
569 |
| - Data.start, |
570 |
| - Data.end, |
571 |
| - Data.name, |
572 |
| - Data.score, |
573 |
| - Data.strand, |
574 |
| - Association.dataset_id, |
575 |
| - # Data.dataset_id, |
576 |
| - Data.coverage, |
577 |
| - Data.frequency, |
578 |
| - ) |
579 |
| - .join_from(Data, Association, Data.inst_association) |
580 |
| - .where(Association.dataset_id == idx) |
581 |
| - # .where(Data.dataset_id == idx) |
| 489 | + .join_from(Data, Association, Data.inst_association) |
| 490 | + .where(Association.dataset_id.in_(reference_ids)) |
| 491 | + # .order_by(Data.chrom.asc(), Data.start.asc()) |
| 492 | + ) |
| 493 | + a_records = self._session.execute(query).all() |
| 494 | + |
| 495 | + # AD HOC - EUF VERSION SHOULD COME FROM SOMEWHERE ELSE! |
| 496 | + if upload: |
| 497 | + importer = get_bed_importer(upload) |
| 498 | + importer.parse_records() |
| 499 | + importer.close() |
| 500 | + b_records = importer.get_buffer() |
| 501 | + # records = [tuple([val for key, val in record.items()]) for record in b_records] |
| 502 | + # print(b_records) |
| 503 | + else: |
| 504 | + b_records = [] |
| 505 | + for idx in comparison_ids: |
| 506 | + query = ( |
| 507 | + select( |
| 508 | + Data.chrom, |
| 509 | + Data.start, |
| 510 | + Data.end, |
| 511 | + Data.name, |
| 512 | + Data.score, |
| 513 | + Data.strand, |
| 514 | + Association.dataset_id, |
| 515 | + # Data.dataset_id, |
| 516 | + Data.coverage, |
| 517 | + Data.frequency, |
582 | 518 | )
|
583 |
| - b_records.append(get_session().execute(query).all()) |
| 519 | + .join_from(Data, Association, Data.inst_association) |
| 520 | + .where(Association.dataset_id == idx) |
| 521 | + # .where(Data.dataset_id == idx) |
| 522 | + ) |
| 523 | + b_records.append(get_session().execute(query).all()) |
584 | 524 |
|
585 |
| - op, strand = query_operation.split("S") |
586 |
| - c_records = get_op(op)(a_records, b_records, s=eval(strand)) |
587 |
| - records = [records_factory(op.capitalize(), r)._asdict() for r in c_records] |
| 525 | + op, strand = query_operation.split("S") |
| 526 | + c_records = get_op(op)(a_records, b_records, s=eval(strand)) |
| 527 | + records = [records_factory(op.capitalize(), r)._asdict() for r in c_records] |
588 | 528 |
|
589 | 529 | return records
|
590 | 530 |
|
|
0 commit comments