@@ -427,9 +427,9 @@ def build_stitcher_figure(logs: Iterable[events.EventType]) -> Figure:
427
427
combine_right_edge : Dict [str , str ] = {}
428
428
children_join_points : List [str ] = []
429
429
query_position_map : Dict [str , Tuple [int , int ]] = {}
430
- initial_alignments : Dict [str , List [CigarHit ]] = {}
431
430
lstrip_map : Dict [str , str ] = {}
432
431
rstrip_map : Dict [str , str ] = {}
432
+ strip_set : Set [Tuple [str , int , int ]] = set ()
433
433
434
434
def remove_intermediate_edges (graph ):
435
435
ret = {}
@@ -516,18 +516,31 @@ def graph_sum(graph_a, graph_b):
516
516
def symmetric_closure (graph ):
517
517
return graph_sum (graph , inverse_graph (graph ))
518
518
519
- def record_unaligned_parts (result : AlignedContig , original : AlignedContig ):
520
- length = abs ( result . alignment . query_length - original . alignment . query_length )
521
- if length > 0 :
522
- q_st = original . alignment . q_st
523
- r_st = original . alignment . r_st
524
- insertion = CigarHit . from_default_alignment ( q_st = q_st , q_ei = q_st + length - 1 , r_st = r_st , r_ei = r_st - 1 )
525
- query = dataclasses .replace (original , name = f"u{ len (complete_contig_map )} " , seq = 'A' * insertion . query_length )
526
- fake_aligned = AlignedContig .make (query = query , alignment = insertion , strand = original .strand )
519
+ def record_unaligned_parts (original : AlignedContig , q_st : int , r_st : int , length : int ):
520
+ key = ( original . seq , q_st , q_st + length )
521
+ if length > 0 and key not in strip_set :
522
+ strip_set . add ( key )
523
+ alignment = CigarHit . from_default_alignment ( q_st = q_st , q_ei = q_st + length - 1 , r_st = r_st , r_ei = r_st - 1 )
524
+ seq = 'A' * alignment . query_length
525
+ query = dataclasses .replace (original , name = f"u{ len (complete_contig_map )} " , seq = seq )
526
+ fake_aligned = AlignedContig .make (query , alignment , strand = original .strand )
527
527
record_contig (fake_aligned , [original ])
528
528
record_bad_contig (fake_aligned , unaligned )
529
529
record_alive (fake_aligned )
530
530
return fake_aligned
531
+ return None
532
+
533
+ def record_regular_strip (result : AlignedContig , original : AlignedContig ):
534
+ length = abs (result .alignment .query_length - original .alignment .query_length )
535
+ q_st = original .alignment .q_st
536
+ r_st = original .alignment .r_st
537
+ return record_unaligned_parts (original , q_st = q_st , r_st = r_st , length = length )
538
+
539
+ def record_initial_strip (original : AlignedContig , q_st : int , q_ei : int ):
540
+ length = q_ei - q_st + 1
541
+ contig = record_unaligned_parts (original , q_st , original .alignment .r_st , length )
542
+ if contig :
543
+ query_position_map [contig .name ] = (q_st , q_ei )
531
544
532
545
def record_contig (contig : GenotypedContig , parents : List [GenotypedContig ]):
533
546
complete_contig_map [contig .name ] = contig
@@ -546,17 +559,15 @@ def record_bad_contig(contig: GenotypedContig, lst: List[str]):
546
559
complete_contig_map [contig .name ] = contig
547
560
lst .append (contig .name )
548
561
549
- def record_lstrip (result : GenotypedContig , original : GenotypedContig ):
562
+ def record_lstrip (result : AlignedContig , original : AlignedContig ):
550
563
lstrip_map [result .name ] = original .name
551
- unaligned = record_unaligned_parts (result , original )
552
- assert original .name != result .name
564
+ unaligned = record_regular_strip (result , original )
553
565
if unaligned :
554
566
lstrip_map [unaligned .name ] = result .name
555
567
556
- def record_rstrip (result : GenotypedContig , original : GenotypedContig ):
568
+ def record_rstrip (result : AlignedContig , original : AlignedContig ):
557
569
rstrip_map [result .name ] = original .name
558
- unaligned = record_unaligned_parts (result , original )
559
- assert original .name != result .name
570
+ unaligned = record_regular_strip (result , original )
560
571
if unaligned :
561
572
rstrip_map [unaligned .name ] = result .name
562
573
@@ -597,6 +608,8 @@ def record_rstrip(result: GenotypedContig, original: GenotypedContig):
597
608
elif isinstance (event , events .RStrip ):
598
609
record_contig (event .result , [event .original ])
599
610
record_rstrip (event .result , event .original )
611
+ elif isinstance (event , events .InitialStrip ):
612
+ record_initial_strip (event .contig , event .q_st , event .q_ei )
600
613
elif isinstance (event , events .Overlap ):
601
614
overlaps_list .append (event .left_overlap .name )
602
615
overlaps_list .append (event .right_overlap .name )
@@ -680,11 +693,9 @@ def set_query_position(contig_name: str) -> None:
680
693
children_names = children_graph .get (contig .name , [])
681
694
682
695
def copy_from_parent (contig : AlignedContig , parent_name : str ) -> None :
683
- parent = contig_map [parent_name ]
684
696
if parent_name in query_position_map :
685
697
(original_q_st , original_q_ei ) = query_position_map [parent_name ]
686
698
(current_q_st , current_q_ei ) = (contig .alignment .q_st , contig .alignment .q_ei )
687
- original_query_len = abs (original_q_st - original_q_ei )
688
699
current_query_len = abs (current_q_st - current_q_ei )
689
700
690
701
if contig_name in lstrip_map :
0 commit comments