@@ -29,16 +29,19 @@ class GenomeRegion:
29
29
genome start (gstart), query start (qstart), region start (rstart), and protein start (pstart)
30
30
"""
31
31
32
- def __init__ (self , region_name ):
32
+ def __init__ (self , region_name , global_ncoords = None , local_ncoords = None ,
33
+ nt_seq = None , global_pcoords = None , local_pcoords = None , aa_seq = None ):
33
34
"""
34
35
Stores information about each genomic region
35
36
:param region_name: The name of the genomic region
36
37
"""
37
38
self .region_name = region_name
38
- self .local_ncoords , self .global_ncoords = None , None
39
- self .nt_seq = None
40
- self .local_pcoords , self .global_pcoords = None , None
41
- self .aa_seq = None
39
+ self .global_ncoords = global_ncoords
40
+ self .local_ncoords = local_ncoords
41
+ self .nt_seq = nt_seq
42
+ self .global_pcoords = global_pcoords
43
+ self .local_pcoords = local_pcoords
44
+ self .aa_seq = aa_seq
42
45
self .rel_pos = {'CDS' : [], 'gstart' : [], 'qstart' : [], 'rstart' : [], 'pstart' : []}
43
46
self .codon_aln = ''
44
47
@@ -91,7 +94,7 @@ def set_pos_from_cds(self, virus):
91
94
"""
92
95
Gives the position of a sequence relative to the start of the coding sequence
93
96
"""
94
- if self .region_name is not '5\' LTR' :
97
+ if self .region_name != '5\' LTR' :
95
98
if virus == 'hiv' :
96
99
cds_start = 790
97
100
else :
@@ -129,7 +132,10 @@ def set_pos_from_rstart(self, region):
129
132
end_offset = self .local_ncoords [1 ] - region .local_ncoords [1 ]
130
133
self .rel_pos ['rstart' ] = [start_offset , end_offset ]
131
134
132
- def set_pos_from_qstart (self , query ):
135
+ def set_pos_from_qstart (self , query , base ):
136
+ if self .local_ncoords is None or self .local_pcoords is None :
137
+ self .set_local_coords (self .get_global_coords (base ), base )
138
+
133
139
start_offset = query .local_ncoords [0 ] - self .local_ncoords [0 ]
134
140
end_offset = query .local_ncoords [1 ] - self .local_ncoords [1 ]
135
141
self .rel_pos ['qstart' ] = [start_offset , end_offset ]
@@ -213,13 +219,14 @@ def set_regions(virus, nt_reference, nt_coords, aa_reference, aa_coords):
213
219
nucl_coords = [int (nt_line [1 ]), int (nt_line [2 ])]
214
220
215
221
seq_region = GenomeRegion (nt_line [0 ])
216
- seq_region .set_seq_from_ref (nt_reference , 'nucl' )
217
222
218
223
# Set global and local nucleotide coordinates
219
224
seq_region .set_global_coords (nucl_coords , 'nucl' )
220
225
local_coords = seq_region .global_to_local_index (nucl_coords , 'nucl' )
221
226
seq_region .set_local_coords (local_coords , 'nucl' )
222
227
228
+ seq_region .set_seq_from_ref (nt_reference , 'nucl' )
229
+
223
230
# Set relative positions
224
231
seq_region .set_pos_from_cds (virus )
225
232
seq_region .set_pos_from_gstart ()
@@ -240,13 +247,14 @@ def set_regions(virus, nt_reference, nt_coords, aa_reference, aa_coords):
240
247
for i in range (len (prot_names )):
241
248
for seq_region in genome_regions :
242
249
if prot_names [i ] in seq_region .region_name :
243
- seq_region .set_seq_from_ref (aa_reference , 'prot' )
244
250
245
251
# Set global and local protein coordinates
246
252
seq_region .set_global_coords (prot_coords [i ], 'prot' )
247
253
local_coords = seq_region .global_to_local_index (prot_coords [i ], 'prot' )
248
254
seq_region .set_local_coords (local_coords , 'prot' )
249
255
256
+ seq_region .set_seq_from_ref (aa_reference , 'prot' )
257
+
250
258
seq_region .set_pos_from_pstart (virus )
251
259
252
260
return genome_regions
@@ -460,7 +468,7 @@ def find_matches(virus, base, ref_regions, match_coordinates):
460
468
# Set relative positions
461
469
query_region .set_pos_from_cds (virus )
462
470
query_region .set_pos_from_gstart ()
463
- query_region .set_pos_from_qstart (ref_region )
471
+ query_region .set_pos_from_qstart (ref_region , base )
464
472
query_region .set_pos_from_rstart (ref_region )
465
473
query_region .set_pos_from_pstart (virus )
466
474
@@ -485,7 +493,7 @@ def set_protein_equivalents(query_reg, ref_regions):
485
493
non_coding = ["5'LTR" , "TAR" , "3'LTR" ]
486
494
for ref_reg in ref_regions :
487
495
if ref_reg .region_name == query_reg .region_name and ref_reg .region_name not in non_coding :
488
- if ref_reg .codon_aln is not None and query_reg .aa_coords is not None :
496
+ if ref_reg .codon_aln is not None and query_reg .global_pcoords is not None :
489
497
prot_equiv = ref_reg .codon_aln [query_reg .global_ncoords [0 ]: query_reg .global_ncoords [1 ]]
490
498
prot_equiv = re .sub ('[-]' , '' , prot_equiv )
491
499
query_reg .set_aa_seq (prot_equiv )
0 commit comments