@@ -96,7 +96,6 @@ def find_overlap(self, base, q_coords):
96
96
:param q_coords: the coordinates of the query region (global coordinates)
97
97
:return overlap: A QueryRegion object that represents the where the query overlaps with the reference region
98
98
"""
99
- start , end = 0 , 0
100
99
ref_coords = self .get_coords (base ) # Coordinates are global coordinates
101
100
102
101
# If the ref_coords are in the range of the query region and the q_coords are in the range of the ref region
@@ -114,26 +113,34 @@ def find_overlap(self, base, q_coords):
114
113
else :
115
114
start = q_coords [0 ]
116
115
117
- overlap = QueryRegion (self .region_name , self , base , self .genome )
116
+ overlap = QueryRegion (self .region_name , self , base , self .genome )
117
+
118
+ # Set the nucleotide and protein regions
119
+ if base == 'nucl' :
120
+ overlap .set_coords (q_coords , 'nucl' )
121
+ overlap .set_nt_seq_from_genome ()
122
+ prot_overlap = self .set_protein_equivalents (overlap )
123
+ overlap .set_pos_from_qstart ('nucl' )
124
+ overlap .set_pos_from_rstart ('nucl' )
125
+
126
+ # Set protein coordinates if the overlap is in a coding region
127
+ if overlap .region_name not in NON_CODING :
128
+ overlap .set_coords (prot_overlap [1 ], 'prot' )
129
+ overlap .set_sequence ('prot' , prot_overlap [0 ])
130
+ print (overlap .aa_seq )
131
+ overlap .set_pos_from_gstart ()
132
+ overlap .set_pos_from_cds ()
133
+ else :
134
+ nucl_overlap = self .set_nucleotide_equivalents (overlap )
135
+ overlap .set_coords (q_coords , 'prot' )
136
+ overlap .set_sequence ('prot' , self .get_sequence ('prot' )[start : end ])
137
+ overlap .set_coords (nucl_overlap [1 ], 'nucl' )
138
+ overlap .set_sequence (nucl_overlap [0 ], 'nucl' )
118
139
119
- # Set the nucleotide and protein regions
120
- if base == 'nucl' :
121
- overlap .set_coords (q_coords , 'nucl' )
122
- overlap .set_nt_seq_from_genome ()
123
- prot_overlap = self .set_protein_equivalents (overlap )
124
-
125
- # Set protein coordinates if the overlap is in a coding region
126
- if overlap .region_name not in NON_CODING :
127
- overlap .set_coords (prot_overlap [1 ], 'prot' )
128
- overlap .set_sequence (prot_overlap [0 ], 'prot' )
129
- else :
130
- nucl_overlap = self .set_nucleotide_equivalents (overlap )
131
- overlap .set_coords (q_coords , 'prot' )
132
- overlap .set_sequence ('prot' , self .get_sequence ('prot' )[start : end ])
133
- overlap .set_coords (nucl_overlap [1 ], 'nucl' )
134
- overlap .set_sequence (nucl_overlap [0 ], 'nucl' )
140
+ return overlap # return overlap object not dict
135
141
136
- return overlap # return overlap object not dict
142
+ else :
143
+ return None
137
144
138
145
def set_protein_equivalents (self , overlap ):
139
146
"""
@@ -361,10 +368,11 @@ def find_matches(self, base, query_matches):
361
368
for q_coords in query_matches :
362
369
for name in self .ref_genome_regions :
363
370
if name != 'Complete' :
364
-
365
371
# Use coordinates to find which regions overlap with the query region
366
372
overlap = self .ref_genome_regions [name ].find_overlap (base , q_coords )
367
- query_regions [name ] = overlap
373
+
374
+ if overlap is not None :
375
+ query_regions [name ] = overlap
368
376
369
377
return query_regions
370
378
@@ -635,8 +643,6 @@ def output_overlap(overlap_regions, outfile=None):
635
643
636
644
for key in overlap_regions :
637
645
region = overlap_regions [key ]
638
- if region .region_name .startswith ('5\' LTR' ):
639
- print ("\t 3'LTR\n " )
640
646
641
647
print ("\n Region:\t {}" .format (region .region_name ))
642
648
print ("\n \t Nucleotide Sequence:" )
@@ -646,9 +652,9 @@ def output_overlap(overlap_regions, outfile=None):
646
652
647
653
if region .aa_seq is not None :
648
654
print ("\n \t Protein Sequence:" )
649
- seq_lines = [region .nt_seq [i :i + 60 ] for i in range (0 , len (region .nt_seq ), 60 )]
655
+ seq_lines = [region .nt_seq [i :i + 60 ] for i in range (0 , len (region .aa_seq ), 60 )]
650
656
for line in seq_lines :
651
- print ('\t \t {}\n ' .format (line ))
657
+ print ('\t \t {}' .format (line ))
652
658
653
659
print ("\n \t Relative Positions:" )
654
660
@@ -682,7 +688,7 @@ def output_overlap(overlap_regions, outfile=None):
682
688
683
689
if region .aa_seq is not None :
684
690
outfile .write ("\n \t Protein Sequence:\n " )
685
- seq_lines = [region .nt_seq [i :i + 60 ] for i in range (0 , len (region .nt_seq ), 60 )]
691
+ seq_lines = [region .nt_seq [i :i + 60 ] for i in range (0 , len (region .aa_seq ), 60 )]
686
692
for line in seq_lines :
687
693
print ('\t \t {}' .format (line ))
688
694
@@ -831,7 +837,7 @@ def main():
831
837
# Find indices where the query sequence aligns with the reference sequence
832
838
match_coords = ref_genome .query_region_coordinates (alignment [- 1 ]) # Query is the last item
833
839
query_regions = ref_genome .find_matches (args .base , match_coords )
834
- # output_overlap(query_regions, args.out)
840
+ output_overlap (query_regions , args .out )
835
841
836
842
else :
837
843
valid_in = valid_inputs (args .virus , args .start , args .end , args .region )
0 commit comments