@@ -405,9 +405,10 @@ def plot_stitcher_coverage(logs: Iterable[events.EventType], genome_coverage_svg
405
405
406
406
407
407
def build_stitcher_figure (logs : Iterable [events .EventType ]) -> Figure :
408
- contig_map : Dict [str , GenotypedContig ] = {}
408
+ complete_contig_map : Dict [str , GenotypedContig ] = {}
409
409
name_mappings : Dict [str , str ] = {}
410
- parent_graph : Dict [str , List [str ]] = {}
410
+ complete_parent_graph : Dict [str , List [str ]] = {}
411
+ alive_set : Set [str ] = set ()
411
412
morphism_graph : Dict [str , List [str ]] = {}
412
413
reduced_parent_graph : Dict [str , List [str ]] = {}
413
414
transitive_parent_graph : Dict [str , List [str ]] = {}
@@ -420,12 +421,9 @@ def build_stitcher_figure(logs: Iterable[events.EventType]) -> Figure:
420
421
overlap_lefttake_map : Dict [str , str ] = {}
421
422
overlap_righttake_map : Dict [str , str ] = {}
422
423
overlap_sibling_map : Dict [str , str ] = {}
423
- combine_list : List [str ] = []
424
424
combine_left_edge : Dict [str , str ] = {}
425
425
combine_right_edge : Dict [str , str ] = {}
426
- temporary : Set [str ] = set ()
427
426
children_join_points : List [str ] = []
428
- children_meet_points : List [str ] = []
429
427
query_position_map : Dict [str , int ] = {}
430
428
initial_alignments : Dict [str , List [CigarHit ]] = {}
431
429
@@ -439,6 +437,18 @@ def remove_intermediate_edges(graph):
439
437
ret [parent ] = lst
440
438
return ret
441
439
440
+ def remove_transitive_edges (graph ):
441
+ tr_cl = transitive_closure (graph )
442
+ ret = {}
443
+ for parent , children in graph .items ():
444
+ lst = []
445
+ for child in children :
446
+ is_transitive = any (child in tr_cl .get (other_node , []) for other_node in children if other_node != child )
447
+ if not is_transitive :
448
+ lst .append (child )
449
+ ret [parent ] = lst
450
+ return ret
451
+
442
452
def get_all_ancestors (recur , lst , graph , ancestor_name ):
443
453
if ancestor_name not in recur :
444
454
recur = recur .copy ()
@@ -460,9 +470,15 @@ def transitive_closure(graph):
460
470
ret [parent ] = lst
461
471
return ret
462
472
463
- def reflexive_closure (graph ):
464
- ret = graph . copy ()
473
+ def copy_graph (graph ):
474
+ ret = {}
465
475
for parent , children in graph .items ():
476
+ ret [parent ] = children [:]
477
+ return ret
478
+
479
+ def reflexive_closure (graph ):
480
+ ret = copy_graph (graph )
481
+ for parent , children in ret .items ():
466
482
if parent not in children :
467
483
children .append (parent )
468
484
for child in children [:]:
@@ -483,7 +499,7 @@ def inverse_graph(graph):
483
499
return ret
484
500
485
501
def graph_sum (graph_a , graph_b ):
486
- ret = graph_a . copy ( )
502
+ ret = copy_graph ( graph_a )
487
503
for key , values in graph_b .items ():
488
504
if key not in ret :
489
505
ret [key ] = []
@@ -497,52 +513,56 @@ def symmetric_closure(graph):
497
513
return graph_sum (graph , inverse_graph (graph ))
498
514
499
515
def record_contig (contig : GenotypedContig , parents : List [GenotypedContig ]):
500
- contig_map [contig .name ] = contig
516
+ complete_contig_map [contig .name ] = contig
501
517
if [contig .name ] != [parent .name for parent in parents ]:
502
518
for parent in parents :
503
- contig_map [parent .name ] = parent
504
- if contig .name not in parent_graph :
505
- parent_graph [contig .name ] = []
519
+ complete_contig_map [parent .name ] = parent
520
+ if contig .name not in complete_parent_graph :
521
+ complete_parent_graph [contig .name ] = []
506
522
507
- parent_graph [contig .name ].append (parent .name )
523
+ complete_parent_graph [contig .name ].append (parent .name )
508
524
509
- def record_morphism (contig : Contig , original : Contig ):
510
- if original .name not in morphism_graph :
511
- morphism_graph [original .name ] = []
512
- lst = morphism_graph [original .name ]
513
- if contig .name not in lst :
514
- lst .append (contig .name )
525
+ def record_alive (contig : Contig ):
526
+ alive_set .add (contig .name )
515
527
516
528
def record_bad_contig (contig : GenotypedContig , lst : List [str ]):
517
- contig_map [contig .name ] = contig
529
+ complete_contig_map [contig .name ] = contig
518
530
lst .append (contig .name )
519
531
520
-
521
532
for event in logs :
522
533
if isinstance (event , events .FinalCombine ):
523
534
record_contig (event .result , event .contigs )
535
+ record_alive (event .result )
524
536
elif isinstance (event , events .SplitGap ):
525
537
record_contig (event .left , [event .contig ])
526
538
record_contig (event .right , [event .contig ])
539
+ record_alive (event .left )
540
+ record_alive (event .right )
527
541
elif isinstance (event , events .Intro ):
528
542
record_contig (event .contig , [])
543
+ record_alive (event .contig )
529
544
elif isinstance (event , events .Hit ):
530
545
record_contig (event .part , [event .contig ])
546
+ record_alive (event .part )
531
547
elif isinstance (event , events .NoRef ):
532
548
record_bad_contig (event .contig , unknown )
549
+ record_alive (event .contig )
533
550
elif isinstance (event , events .ZeroHits ):
534
551
record_bad_contig (event .contig , anomaly )
552
+ record_alive (event .contig )
535
553
elif isinstance (event , events .StrandConflict ):
536
554
record_bad_contig (event .contig , anomaly )
555
+ record_alive (event .contig )
537
556
elif isinstance (event , events .ReverseComplement ):
538
557
record_contig (event .result , [event .contig ])
558
+ record_alive (event .result )
539
559
elif isinstance (event , events .HitNumber ):
540
560
initial_alignments [event .contig .name ] = event .connected
561
+ record_alive (event .contig )
541
562
elif isinstance (event , events .Munge ):
542
563
record_contig (event .result , [event .left , event .right ])
543
564
elif isinstance (event , (events .LStrip , events .RStrip )):
544
565
record_contig (event .result , [event .original ])
545
- record_morphism (event .result , event .original )
546
566
elif isinstance (event , events .Overlap ):
547
567
overlaps_list .append (event .left_overlap .name )
548
568
overlaps_list .append (event .right_overlap .name )
@@ -554,20 +574,20 @@ def record_bad_contig(contig: GenotypedContig, lst: List[str]):
554
574
overlap_sibling_map [event .right_remainder .name ] = event .left_remainder .name
555
575
elif isinstance (event , events .Drop ):
556
576
record_bad_contig (event .contig , discarded )
577
+ record_alive (event .contig )
557
578
elif isinstance (event , events .StitchCut ):
558
579
record_contig (event .left_overlap , [event .left ])
559
580
record_contig (event .left_remainder , [event .left ])
560
581
record_contig (event .right_overlap , [event .right ])
561
582
record_contig (event .right_remainder , [event .right ])
562
583
elif isinstance (event , events .Stitch ):
563
584
record_contig (event .result , [event .left , event .right ])
585
+ record_alive (event .result )
564
586
elif isinstance (event , events .Cut ):
565
587
record_contig (event .left , [event .original ])
566
588
record_contig (event .right , [event .original ])
567
589
elif isinstance (event , events .Combine ):
568
- for contig in event .contigs :
569
- combine_list .append (contig .name )
570
-
590
+ record_alive (event .result )
571
591
record_contig (event .result , event .contigs )
572
592
if event .contigs :
573
593
combine_left_edge [event .result .name ] = event .contigs [0 ].name
@@ -578,6 +598,26 @@ def record_bad_contig(contig: GenotypedContig, lst: List[str]):
578
598
x : NoReturn = event
579
599
raise RuntimeError (f"Unrecognized action or event: { event } " )
580
600
601
+ nodup_parent_graph = remove_transitive_edges (complete_parent_graph )
602
+
603
+ # Close alive set by parents
604
+ def extend_alive (contig_name ):
605
+ if contig_name not in alive_set :
606
+ alive_set .add (contig_name )
607
+
608
+ for parent_name in nodup_parent_graph .get (contig_name , []):
609
+ extend_alive (parent_name )
610
+
611
+ for contig_name in alive_set .copy ():
612
+ extend_alive (contig_name )
613
+
614
+ parent_graph : Dict [str , List [str ]] = {}
615
+ for contig_name in nodup_parent_graph :
616
+ if contig_name in alive_set :
617
+ parent_graph [contig_name ] = nodup_parent_graph [contig_name ]
618
+
619
+ contig_map : Dict [str , GenotypedContig ] = {k : v for k , v in complete_contig_map .items () if k in alive_set }
620
+ bad_contigs = anomaly + discarded + unknown
581
621
group_refs = {contig .group_ref : len (contig .ref_seq ) for contig in contig_map .values () if contig .ref_seq }
582
622
children_graph = inverse_graph (parent_graph )
583
623
transitive_parent_graph = transitive_closure (parent_graph )
@@ -590,23 +630,18 @@ def record_bad_contig(contig: GenotypedContig, lst: List[str]):
590
630
sorted_sinks = list (sorted (child_name for
591
631
child_name in contig_map
592
632
if child_name not in children_graph ))
593
- bad_contigs = anomaly + discarded + unknown
633
+
634
+ for contig_name , parents in parent_graph .items ():
635
+ if len (parents ) == 1 :
636
+ morphism_graph [parents [0 ]] = [contig_name ]
594
637
595
638
transitive_morphism_graph = transitive_closure (morphism_graph )
596
639
reduced_morphism_graph = remove_intermediate_edges (transitive_morphism_graph )
597
640
eqv_morphism_graph = reflexive_closure (symmetric_closure (transitive_morphism_graph ))
598
641
599
- for contig_name in overlaps_list :
600
- temporary .add (contig_name )
601
- for child in transitive_children_graph .get (contig_name , []):
602
- temporary .add (child )
603
-
604
642
for contig_name , parents in parent_graph .items ():
605
- if len (parents ) > 2 :
643
+ if len (parents ) > 1 :
606
644
children_join_points .append (contig_name )
607
- for contig_name , children in children_graph .items ():
608
- if len (children ) > 2 :
609
- children_meet_points .append (contig_name )
610
645
611
646
def hits_to_insertions (hits : List [CigarHit ]):
612
647
for hit in hits :
@@ -621,12 +656,6 @@ def hits_to_insertions(hits: List[CigarHit]):
621
656
nonempty_insertions = [gap for gap in all_insertions if gap .query_length > 0 ]
622
657
unaligned_map [contig_name ] = nonempty_insertions
623
658
624
- last_join_points_parent = {contig_name for join in children_join_points for contig_name in transitive_parent_graph .get (join , [])}
625
- last_join_points = []
626
- for contig_name in children_join_points :
627
- if contig_name not in last_join_points_parent :
628
- last_join_points .append (contig_name )
629
-
630
659
def set_query_position (contig : Contig ):
631
660
if contig .name in query_position_map :
632
661
return
@@ -651,12 +680,6 @@ def set_query_position(contig: Contig):
651
680
for contig in contig_map .values ():
652
681
set_query_position (contig )
653
682
654
- # Closing `temporary'
655
- for contig_name in contig_map :
656
- if contig_name in temporary :
657
- for clone in eqv_morphism_graph .get (contig_name , [contig_name ]):
658
- temporary .add (clone )
659
-
660
683
def copy_takes_one_side (edge_table , overlap_xtake_map , overlap_xparent_map ):
661
684
for parent in edge_table :
662
685
child_remainder = edge_table [parent ]
@@ -675,39 +698,42 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
675
698
while list (copy_takes_one_side (combine_left_edge , overlap_righttake_map , overlap_rightparent_map )): pass
676
699
677
700
final_parts : Dict [str , bool ] = {}
678
- for contig_name in contig_map :
679
- if contig_name in temporary :
680
- continue
701
+ pre_join_points = []
681
702
682
- if contig_name in combine_list :
683
- finals = reduced_morphism_graph .get (contig_name , [contig_name ])
684
- if len (finals ) == 1 :
685
- final_parts [finals [0 ]] = True
703
+ def add_join_parents (join_name ):
704
+ if join_name in children_join_points :
705
+ for contig_name in parent_graph .get (join_name , [join_name ]):
706
+ add_join_parents (contig_name )
707
+ else :
708
+ pre_join_points .append (join_name )
686
709
687
- elif contig_name in bad_contigs :
688
- final_parts [ contig_name ] = True
710
+ for join_name in children_join_points + sorted_sinks :
711
+ add_join_parents ( join_name )
689
712
690
- for join in last_join_points + sorted_sinks :
691
- parents = parent_graph . get ( join , [ join ])
692
- if not any ( isinstance ( contig_map [ parent ], AlignedContig ) for parent in parents ) :
693
- parents = [ join ]
713
+ def is_ancestor ( contig_name , other_names ) :
714
+ for other in other_names :
715
+ if other == contig_name :
716
+ continue
694
717
695
- for contig_name in parents :
696
- for contig_name in reduced_morphism_graph .get (contig_name , [contig_name ]):
697
- if contig_name in bad_contigs :
698
- continue
718
+ if contig_name in transitive_children_graph .get (other , []):
719
+ return True
720
+ return False
699
721
700
- if any (contig_name in transitive_parent_graph .get (bad , []) for bad in bad_contigs ):
701
- continue
722
+ for contig_name in pre_join_points [:]:
723
+ if is_ancestor (contig_name , pre_join_points ):
724
+ pre_join_points .remove (contig_name )
702
725
703
- if any (eqv in temporary for eqv in eqv_morphism_graph .get (contig_name , [contig_name ])):
704
- continue
726
+ for contig_name in pre_join_points :
727
+ if any (contig_name in transitive_parent_graph .get (bad , []) for bad in bad_contigs ):
728
+ continue
705
729
706
- transitive_parent = eqv_parent_graph .get (contig_name , [contig_name ])
707
- if any (parent in transitive_parent for parent in final_parts ):
708
- continue
730
+ if any (contig_name in eqv_morphism_graph .get (temp_name , [temp_name ]) for temp_name in overlaps_list ):
731
+ continue
709
732
710
- final_parts [contig_name ] = True
733
+ final_parts [contig_name ] = True
734
+
735
+ for contig_name in bad_contigs :
736
+ final_parts [contig_name ] = True
711
737
712
738
final_children_mapping : Dict [str , List [str ]] = {}
713
739
for parent_name in sorted_roots :
@@ -838,11 +864,10 @@ def get_contig_coordinates(contig: GenotypedContig) -> Tuple[int, int, int, int]
838
864
return (a_r_st , a_r_ei , f_r_st , f_r_ei )
839
865
840
866
def get_tracks (repeatset : Set [str ], group_ref : str , contig_name : str ) -> Iterable [Track ]:
841
- parts = final_children_mapping [contig_name ]
867
+ parts_names = final_children_mapping [contig_name ]
868
+ parts = [contig_map [name ] for name in parts_names ]
842
869
parts = list (sorted (parts , key = lambda part : part .alignment .r_st if isinstance (part , AlignedContig ) else - 1 ))
843
- for prev_name , part_name , next_naem in sliding_window (parts ):
844
- part = contig_map [part_name ]
845
-
870
+ for part in parts :
846
871
if part .name in repeatset :
847
872
continue
848
873
0 commit comments