@@ -258,7 +258,7 @@ def valid_inputs(virus, start_coord, end_coord, region):
258
258
if type (start_coord ) == str :
259
259
print ("Invalid start coordinate type: {}" .format (type (start_coord )))
260
260
261
- if start_coord <= 0 :
261
+ if start_coord < 0 :
262
262
print ("Invalid start coordinate: {}" .format (start_coord ))
263
263
return False
264
264
@@ -511,6 +511,14 @@ def output(query_regions, outfile=None):
511
511
print ("\t Amino acid position relative to protein start: {} --> {}"
512
512
.format (reg .pos_from_aa_start [0 ], reg .pos_from_aa_start [1 ]))
513
513
514
+ if reg .pos_from_qstart is not None :
515
+ print ("\t Position relative to query start: {} --> {}\n "
516
+ .format (reg .pos_from_qstart [0 ], reg .pos_from_qstart [1 ]))
517
+
518
+ if reg .pos_from_rstart is not None :
519
+ print ("\t Position relative to region start: {} --> {}\n "
520
+ .format (reg .pos_from_rstart [0 ], reg .pos_from_rstart [1 ]))
521
+
514
522
else :
515
523
outfile .write ("\n \n Regions touched by the query sequence:" )
516
524
for reg in query_regions :
@@ -540,6 +548,14 @@ def output(query_regions, outfile=None):
540
548
outfile .write ("\t Amino acid position relative to protein start: {} --> {}\n "
541
549
.format (reg .pos_from_aa_start [0 ], reg .pos_from_aa_start [1 ]))
542
550
551
+ if reg .pos_from_qstart is not None :
552
+ outfile .write ("\t Position relative to query start: {} --> {}\n "
553
+ .format (reg .pos_from_qstart [0 ], reg .pos_from_qstart [1 ]))
554
+
555
+ if reg .pos_from_rstart is not None :
556
+ outfile .write ("\t Position relative to region start: {} --> {}\n "
557
+ .format (reg .pos_from_rstart [0 ], reg .pos_from_rstart [1 ]))
558
+
543
559
544
560
def retrieve (virus , base , ref_regions , region , outfile = None , start_offset = 1 , end_offset = 'end' ):
545
561
"""
@@ -554,82 +570,27 @@ def retrieve(virus, base, ref_regions, region, outfile=None, start_offset=1, end
554
570
:return: The genomic region defined by the starting and ending coordinates
555
571
"""
556
572
for ref_region in ref_regions :
557
- sequence_range = ref_region .get_coords (base )
558
- region_start = sequence_range [0 ]
559
- region_end = sequence_range [1 ]
560
-
561
- if start_offset <= region_start :
562
- start = region_start
563
- else :
564
- start = region_start + (start_offset - region_start )
565
-
566
- # If end_coord is greater than the region's end coordinate, set end_coord to region's end coordinate
567
- if end_offset == 'end' or end_offset > region_end :
568
- end = region_end
569
- else :
570
- end = region_end + (region_end - end_offset )
571
-
572
- retrieved_region = None
573
573
if ref_region .region_name == region :
574
- s = ref_region .get_sequence (base )
575
- region_to_retrieve = s [start - 1 : end ]
574
+ sequence_range = ref_region .get_coords (base )
575
+ region_start = sequence_range [0 ]
576
+ region_end = sequence_range [1 ]
576
577
577
- if base == 'nucl' :
578
- retrieved_region = GenomeRegion (region , [start , end ], region_to_retrieve ,
579
- ref_region .aa_coords , ref_region .aa_seq )
580
- retrieved_region .set_sequence (region_to_retrieve , 'nucl' )
578
+ if start_offset <= region_start :
579
+ start = region_start
581
580
else :
582
- retrieved_region = GenomeRegion (region , ref_region .nt_coords , ref_region .nt_seq ,
583
- [start , end ], region_to_retrieve )
584
- retrieved_region .set_sequence (region_to_retrieve , 'prot' )
585
-
586
-
587
- retrieved_region .set_pos_from_cds (virus )
588
- retrieved_region .pos_from_gstart = retrieved_region .local_to_global_index ([start , end ], base )
589
- retrieved_region .set_pos_from_aa_start (virus )
590
-
591
- if retrieved_region :
592
- if outfile is None :
593
- print ("\033 [1mRetrieved sequence: \033 [0m\n " )
594
- print ("Region:\t {}" .format (retrieved_region .region_name ))
595
- print (textwrap .fill (retrieved_region .get_sequence (base )))
596
-
597
- print ("\n \033 [1mRelative Positions: \033 [0m" )
598
- if len (retrieved_region .pos_from_cds ) == 2 :
599
- print ("\t Nucleotide position relative to CDS start: {} --> {}"
600
- .format (retrieved_region .pos_from_cds [0 ], retrieved_region .pos_from_cds [1 ]))
601
- else :
602
- print ("\t Nucleotide position relative to CDS start: N/A" )
603
-
604
- if retrieved_region .pos_from_gstart is not None :
605
- print ("\t Nucleotide position relative to genome start: {} --> {}"
606
- .format (retrieved_region .pos_from_gstart [0 ] + 1 , retrieved_region .pos_from_gstart [1 ]))
607
-
608
- if retrieved_region .pos_from_aa_start is not None :
609
- print ("\t Amino acid position relative to protein start: {} --> {}"
610
- .format (retrieved_region .pos_from_aa_start [0 ], retrieved_region .pos_from_aa_start [1 ]))
581
+ start = region_start + (start_offset - region_start )
611
582
583
+ # If end_coord is greater than the region's end coordinate, set end_coord to region's end coordinate
584
+ if end_offset == 'end' or end_offset > region_end :
585
+ end = region_end
612
586
else :
613
- outfile .write ("\n \n Retrieved sequence:\n " )
614
- outfile .write ("\n Region:\t {}" .format (retrieved_region .region_name ))
615
- outfile .write ("\n " + textwrap .fill (retrieved_region .get_sequence (base )))
616
-
617
- outfile .write ("\n \n Relative Positions: \n " )
618
- if len (retrieved_region .pos_from_cds ) == 2 :
619
- outfile .write ("\t Nucleotide position relative to CDS: {} to {}\n "
620
- .format (retrieved_region .pos_from_cds [0 ], retrieved_region .pos_from_cds [1 ]))
621
- else :
622
- outfile .write ("\t Nucleotide position relative to CDS start: N/A\n " )
623
-
624
- if retrieved_region .pos_from_gstart is not None :
625
- outfile .write ("\t Nucleotide position relative to start of genome: {} --> {}\n "
626
- .format (retrieved_region .pos_from_gstart [0 ] + 1 , retrieved_region .pos_from_gstart [1 ]))
587
+ end = region_end + (region_end - end_offset )
627
588
628
- if retrieved_region . pos_from_aa_start is not None :
629
- outfile . write ( " \t Amino acid position relative to protein start: {} --> {} \n "
630
- . format ( retrieved_region . pos_from_aa_start [ 0 ], retrieved_region . pos_from_aa_start [ 1 ]) )
589
+ # TODO: sort retrieved_regions to print region first
590
+ retrieved_regions = find_matches ( virus , base , ref_regions , [[ start , end ]])
591
+ output ( retrieved_regions , outfile )
631
592
632
- return retrieved_region
593
+ return retrieved_regions
633
594
634
595
635
596
def handle_args (virus , base , ref_nt , nt_coords , ref_aa , aa_coords ):
0 commit comments