1
1
import typing
2
+ from typing import Dict , Tuple , List , Set , Iterable , NoReturn
2
3
from argparse import ArgumentParser , ArgumentDefaultsHelpFormatter , FileType
3
4
from collections import Counter , defaultdict
4
5
from csv import DictReader
19
20
20
21
from micall .core .project_config import ProjectConfig
21
22
from micall .utils .alignment_wrapper import align_nucs
23
+ from micall .core .contig_stitcher import Contig , GenotypedContig , AlignedContig
22
24
import micall .utils .contig_stitcher_events as events
23
25
24
26
@@ -395,19 +397,14 @@ def build_coverage_figure(genome_coverage_csv, blast_csv=None, use_concordance=F
395
397
return f
396
398
397
399
398
- def plot_stitcher_coverage (logs , genome_coverage_svg_path ):
400
+ def plot_stitcher_coverage (logs : Iterable [ events . EventType ] , genome_coverage_svg_path : str ):
399
401
f = build_stitcher_figure (logs )
400
402
f .show (w = 970 ).save_svg (genome_coverage_svg_path , context = draw .Context (invert_y = True ))
401
403
return f
402
404
403
405
404
- from types import SimpleNamespace
405
- from typing import Union , Dict , Tuple , List , Optional , Set
406
- from micall .core .contig_stitcher import Contig , GenotypedContig , AlignedContig
407
- import random
408
-
409
- def build_stitcher_figure (logs ) -> None :
410
- contig_map : Dict [str , Contig ] = {}
406
+ def build_stitcher_figure (logs : Iterable [events .EventType ]) -> Figure :
407
+ contig_map : Dict [str , GenotypedContig ] = {}
411
408
name_mappings : Dict [str , str ] = {}
412
409
parent_graph : Dict [str , List [str ]] = {}
413
410
morphism_graph : Dict [str , List [str ]] = {}
@@ -515,7 +512,7 @@ def graph_sum(graph_a, graph_b):
515
512
def symmetric_closure (graph ):
516
513
return graph_sum (graph , inverse_graph (graph ))
517
514
518
- def record_contig (contig : Contig , parents : List [Contig ]):
515
+ def record_contig (contig : GenotypedContig , parents : List [GenotypedContig ]):
519
516
contig_map [contig .name ] = contig
520
517
if [contig .name ] != [parent .name for parent in parents ]:
521
518
for parent in parents :
@@ -532,7 +529,7 @@ def record_morphism(contig: Contig, original: Contig):
532
529
if contig .name not in lst :
533
530
lst .append (contig .name )
534
531
535
- def record_bad_contig (contig : Contig , lst : List [Contig ]):
532
+ def record_bad_contig (contig : GenotypedContig , lst : List [str ]):
536
533
contig_map [contig .name ] = contig
537
534
lst .append (contig .name )
538
535
@@ -585,11 +582,13 @@ def record_bad_contig(contig: Contig, lst: List[Contig]):
585
582
record_contig (event .right , [event .original ])
586
583
elif isinstance (event , events .Combine ):
587
584
record_contig (event .result , event .contigs )
588
- combine_left_edge [event .result .name ] = event .contigs [0 ].name
589
- combine_right_edge [event .result .name ] = event .contigs [- 1 ].name
585
+ if event .contigs :
586
+ combine_left_edge [event .result .name ] = event .contigs [0 ].name
587
+ combine_right_edge [event .result .name ] = event .contigs [- 1 ].name
590
588
elif isinstance (event , (events .IgnoreGap , events .NoOverlap )):
591
589
pass
592
590
else :
591
+ x : NoReturn = event
593
592
raise RuntimeError (f"Unrecognized action or event: { event } " )
594
593
595
594
group_refs = {contig .group_ref : len (contig .ref_seq ) for contig in contig_map .values () if contig .ref_seq }
@@ -610,23 +609,23 @@ def record_bad_contig(contig: Contig, lst: List[Contig]):
610
609
eqv_morphism_graph = reflexive_closure (symmetric_closure (transitive_closure (morphism_graph )))
611
610
reduced_morphism_graph = reduced_closure (morphism_graph )
612
611
613
- for contig in overlaps_list :
614
- temporary .add (contig )
615
- for child in transitive_children_graph .get (contig , []):
612
+ for contig_name in overlaps_list :
613
+ temporary .add (contig_name )
614
+ for child in transitive_children_graph .get (contig_name , []):
616
615
temporary .add (child )
617
616
618
- for contig , parents in parent_graph .items ():
617
+ for contig_name , parents in parent_graph .items ():
619
618
if len (parents ) > 2 :
620
- children_join_points .append (contig )
621
- for contig , children in children_graph .items ():
619
+ children_join_points .append (contig_name )
620
+ for contig_name , children in children_graph .items ():
622
621
if len (children ) > 2 :
623
- children_meet_points .append (contig )
622
+ children_meet_points .append (contig_name )
624
623
625
- last_join_points_parent = {contig for join in children_join_points for contig in transitive_parent_graph .get (join , [])}
624
+ last_join_points_parent = {contig_name for join in children_join_points for contig_name in transitive_parent_graph .get (join , [])}
626
625
last_join_points = []
627
- for contig in children_join_points :
628
- if contig not in last_join_points_parent :
629
- last_join_points .append (contig )
626
+ for contig_name in children_join_points :
627
+ if contig_name not in last_join_points_parent :
628
+ last_join_points .append (contig_name )
630
629
631
630
def set_query_position (contig : Contig ):
632
631
if contig .name in query_position_map :
@@ -644,7 +643,7 @@ def set_query_position(contig: Contig):
644
643
if parent .name not in query_position_map :
645
644
set_query_position (parent )
646
645
647
- average = sum (query_position_map [parent_name ] for parent_name in parent_names ) / len (parent_names )
646
+ average = round ( sum (query_position_map [parent_name ] for parent_name in parent_names ) / len (parent_names ) )
648
647
query_position_map [contig .name ] = average
649
648
else :
650
649
query_position_map [contig .name ] = (contig .alignment .q_st + contig .alignment .q_ei ) // 2
@@ -653,9 +652,9 @@ def set_query_position(contig: Contig):
653
652
set_query_position (contig )
654
653
655
654
# Closing `temporary'
656
- for contig in contig_map :
657
- if contig in temporary :
658
- for clone in eqv_morphism_graph .get (contig , [contig ]):
655
+ for contig_name in contig_map :
656
+ if contig_name in temporary :
657
+ for clone in eqv_morphism_graph .get (contig_name , [contig_name ]):
659
658
temporary .add (clone )
660
659
661
660
def copy_takes_one_side (edge_table , overlap_xtake_map , overlap_xparent_map ):
@@ -676,42 +675,42 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
676
675
while list (copy_takes_one_side (combine_left_edge , overlap_righttake_map , overlap_rightparent_map )): pass
677
676
678
677
final_parts : Dict [str , bool ] = {}
679
- for contig in contig_map :
680
- if contig in temporary :
678
+ for contig_name in contig_map :
679
+ if contig_name in temporary :
681
680
continue
682
681
683
- if contig in overlap_sibling_map :
684
- finals = reduced_morphism_graph .get (contig , [contig ])
682
+ if contig_name in overlap_sibling_map :
683
+ finals = reduced_morphism_graph .get (contig_name , [contig_name ])
685
684
if len (finals ) == 1 :
686
685
[final ] = finals
687
686
parents = reduced_parent_graph .get (final , [])
688
687
if len (parents ) == 1 :
689
688
final_parts [final ] = True
690
689
691
- elif contig in bad_contigs :
692
- final_parts [contig ] = True
690
+ elif contig_name in bad_contigs :
691
+ final_parts [contig_name ] = True
693
692
694
693
for join in last_join_points + sorted_sinks :
695
694
parents = parent_graph .get (join , [join ])
696
695
if not any (isinstance (contig_map [parent ], AlignedContig ) for parent in parents ):
697
696
parents = [join ]
698
697
699
- for contig in parents :
700
- for contig in reduced_morphism_graph .get (contig , [contig ]):
701
- if contig in bad_contigs :
698
+ for contig_name in parents :
699
+ for contig_name in reduced_morphism_graph .get (contig_name , [contig_name ]):
700
+ if contig_name in bad_contigs :
702
701
continue
703
702
704
- if any (contig in transitive_parent_graph .get (bad , []) for bad in bad_contigs ):
703
+ if any (contig_name in transitive_parent_graph .get (bad , []) for bad in bad_contigs ):
705
704
continue
706
705
707
- if any (eqv in temporary for eqv in eqv_morphism_graph .get (contig , [contig ])):
706
+ if any (eqv in temporary for eqv in eqv_morphism_graph .get (contig_name , [contig_name ])):
708
707
continue
709
708
710
- transitive_parent = eqv_parent_graph .get (contig , [contig ])
709
+ transitive_parent = eqv_parent_graph .get (contig_name , [contig_name ])
711
710
if any (parent in transitive_parent for parent in final_parts ):
712
711
continue
713
712
714
- final_parts [contig ] = True
713
+ final_parts [contig_name ] = True
715
714
716
715
final_parent_mapping : Dict [str , List [str ]] = {}
717
716
for parent_name in sorted_roots :
@@ -725,7 +724,7 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
725
724
726
725
min_position , max_position = 1 , 1
727
726
position_offset = 100
728
- for contig in contig_map .values ():
727
+ for _ , contig in contig_map .items ():
729
728
if isinstance (contig , GenotypedContig ) and contig .ref_seq is not None :
730
729
max_position = max (max_position , len (contig .ref_seq ) + 3 * position_offset )
731
730
else :
@@ -748,8 +747,8 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
748
747
k += 1
749
748
name_mappings [child ] = f"{ i + 1 } .{ k + 1 } "
750
749
751
- for contig , name in name_mappings .items ():
752
- logger .debug (f"Contig name { contig !r} is displayed as { name !r} ." )
750
+ for contig_name , name in name_mappings .items ():
751
+ logger .debug (f"Contig name { contig_name !r} is displayed as { name !r} ." )
753
752
754
753
def get_neighbours (part , lookup ):
755
754
for clone in eqv_morphism_graph .get (part .name , [part .name ]):
@@ -771,8 +770,8 @@ def get_neighbour(part, lookup):
771
770
full_size_map : Dict [str , Tuple [int , int ]] = {}
772
771
773
772
for parent_name in sorted_roots :
774
- parts = final_parent_mapping [parent_name ]
775
- parts = [contig_map [part ] for part in parts ]
773
+ parts_names = final_parent_mapping [parent_name ]
774
+ parts = [contig_map [part ] for part in parts_names ]
776
775
777
776
for part in parts :
778
777
if not isinstance (part , AlignedContig ):
@@ -801,8 +800,8 @@ def get_neighbour(part, lookup):
801
800
802
801
aligned_size_map [part .name ] = (r_st , r_ei )
803
802
804
- sibling = ([overlap_sibling_map [name ] for name in eqv_morphism_graph .get (part .name , [part .name ]) if name in overlap_sibling_map ] or [None ])[0 ]
805
- sibling = sibling and contig_map [sibling ]
803
+ sibling_name = ([overlap_sibling_map [name ] for name in eqv_morphism_graph .get (part .name , [part .name ]) if name in overlap_sibling_map ] or ["" ])[0 ]
804
+ sibling = sibling_name and contig_map [sibling_name ]
806
805
prev_part = get_neighbour (sibling , overlap_lefttake_map )
807
806
next_part = get_neighbour (sibling , overlap_righttake_map )
808
807
@@ -820,7 +819,7 @@ def get_neighbour(part, lookup):
820
819
821
820
full_size_map [part .name ] = (r_st , r_ei )
822
821
823
- def get_contig_coordinates (contig ) :
822
+ def get_contig_coordinates (contig : GenotypedContig ) -> Tuple [ int , int , int , int ] :
824
823
if isinstance (contig , AlignedContig ):
825
824
r_st = position_offset + contig .alignment .r_st
826
825
r_ei = position_offset + contig .alignment .r_ei
@@ -841,7 +840,7 @@ def get_contig_coordinates(contig):
841
840
a_r_ei = f_r_ei
842
841
return (a_r_st , a_r_ei , f_r_st , f_r_ei )
843
842
844
- def get_tracks (repeatset , group_ref , contig_name ) :
843
+ def get_tracks (repeatset : Set [ str ] , group_ref : str , contig_name : str ) -> Iterable [ Track ] :
845
844
parts = final_parent_mapping [contig_name ]
846
845
for part_name in parts :
847
846
part = contig_map [part_name ]
@@ -863,7 +862,7 @@ def get_tracks(repeatset, group_ref, contig_name):
863
862
(a_r_st , a_r_ei , f_r_st , f_r_ei ) = get_contig_coordinates (part )
864
863
yield Track (f_r_st , f_r_ei , label = f"{ indexes } " )
865
864
866
- def get_arrows (repeatset , group_ref , contig_name , labels ) :
865
+ def get_arrows (repeatset : Set [ str ] , group_ref : str , contig_name : str , labels : bool ) -> Iterable [ Arrow ] :
867
866
parts = final_parent_mapping [contig_name ]
868
867
for part_name in parts :
869
868
part = contig_map [part_name ]
@@ -890,8 +889,8 @@ def get_arrows(repeatset, group_ref, contig_name, labels):
890
889
h = height ,
891
890
label = indexes )
892
891
893
- def get_all_arrows (group_ref , labels ) :
894
- repeatset = set ()
892
+ def get_all_arrows (group_ref : str , labels : bool ) -> Iterable [ Arrow ] :
893
+ repeatset : Set [ str ] = set ()
895
894
for parent_name in sorted_roots :
896
895
yield from get_arrows (repeatset , group_ref , parent_name , labels )
897
896
@@ -966,8 +965,8 @@ def get_all_arrows(group_ref, labels):
966
965
# Contigs #
967
966
###########
968
967
969
- repeatset1 = set ()
970
- repeatset2 = set ()
968
+ repeatset1 : Set [ str ] = set ()
969
+ repeatset2 : Set [ str ] = set ()
971
970
for parent_name in sorted_roots :
972
971
arrows = list (get_arrows (repeatset1 , group_ref , parent_name , labels = False ))
973
972
if arrows :
@@ -992,7 +991,7 @@ def get_all_arrows(group_ref, labels):
992
991
993
992
contig = contig_map [contig_name ]
994
993
(r_st , r_ei , f_r_st , f_r_ei ) = get_contig_coordinates (contig )
995
- name = name_mappings .get (contig . name , contig . name )
994
+ name = name_mappings .get (contig_name , contig_name )
996
995
figure .add (Arrow (r_st , r_ei , elevation = - 20 , h = 1 ))
997
996
figure .add (Track (f_r_st , f_r_ei , label = name ))
998
997
@@ -1021,7 +1020,7 @@ def get_all_arrows(group_ref, labels):
1021
1020
else :
1022
1021
colour = "red"
1023
1022
1024
- name = name_mappings .get (contig . name , contig . name )
1023
+ name = name_mappings .get (contig_name , contig_name )
1025
1024
figure .add (Track (a_r_st , a_r_ei , color = colour , label = name ))
1026
1025
1027
1026
###########
@@ -1042,7 +1041,7 @@ def get_all_arrows(group_ref, labels):
1042
1041
r_st = position_offset
1043
1042
r_ei = position_offset + len (contig .seq )
1044
1043
colour = "red"
1045
- name = name_mappings .get (contig . name , contig . name )
1044
+ name = name_mappings .get (contig_name , contig_name )
1046
1045
figure .add (Track (r_st , r_ei , color = colour , label = name ))
1047
1046
1048
1047
if not figure .elements :
0 commit comments