@@ -29,16 +29,19 @@ class GenomeRegion:
2929 genome start (gstart), query start (qstart), region start (rstart), and protein start (pstart)
3030 """
3131
32- def __init__ (self , region_name ):
32+ def __init__ (self , region_name , global_ncoords = None , local_ncoords = None ,
33+ nt_seq = None , global_pcoords = None , local_pcoords = None , aa_seq = None ):
3334 """
3435 Stores information about each genomic region
3536 :param region_name: The name of the genomic region
3637 """
3738 self .region_name = region_name
38- self .local_ncoords , self .global_ncoords = None , None
39- self .nt_seq = None
40- self .local_pcoords , self .global_pcoords = None , None
41- self .aa_seq = None
39+ self .global_ncoords = global_ncoords
40+ self .local_ncoords = local_ncoords
41+ self .nt_seq = nt_seq
42+ self .global_pcoords = global_pcoords
43+ self .local_pcoords = local_pcoords
44+ self .aa_seq = aa_seq
4245 self .rel_pos = {'CDS' : [], 'gstart' : [], 'qstart' : [], 'rstart' : [], 'pstart' : []}
4346 self .codon_aln = ''
4447
@@ -91,7 +94,7 @@ def set_pos_from_cds(self, virus):
9194 """
9295 Gives the position of a sequence relative to the start of the coding sequence
9396 """
94- if self .region_name is not '5\' LTR' :
97+ if self .region_name != '5\' LTR' :
9598 if virus == 'hiv' :
9699 cds_start = 790
97100 else :
@@ -129,7 +132,10 @@ def set_pos_from_rstart(self, region):
129132 end_offset = self .local_ncoords [1 ] - region .local_ncoords [1 ]
130133 self .rel_pos ['rstart' ] = [start_offset , end_offset ]
131134
132- def set_pos_from_qstart (self , query ):
135+ def set_pos_from_qstart (self , query , base ):
136+ if self .local_ncoords is None or self .local_pcoords is None :
137+ self .set_local_coords (self .get_global_coords (base ), base )
138+
133139 start_offset = query .local_ncoords [0 ] - self .local_ncoords [0 ]
134140 end_offset = query .local_ncoords [1 ] - self .local_ncoords [1 ]
135141 self .rel_pos ['qstart' ] = [start_offset , end_offset ]
@@ -213,13 +219,14 @@ def set_regions(virus, nt_reference, nt_coords, aa_reference, aa_coords):
213219 nucl_coords = [int (nt_line [1 ]), int (nt_line [2 ])]
214220
215221 seq_region = GenomeRegion (nt_line [0 ])
216- seq_region .set_seq_from_ref (nt_reference , 'nucl' )
217222
218223 # Set global and local nucleotide coordinates
219224 seq_region .set_global_coords (nucl_coords , 'nucl' )
220225 local_coords = seq_region .global_to_local_index (nucl_coords , 'nucl' )
221226 seq_region .set_local_coords (local_coords , 'nucl' )
222227
228+ seq_region .set_seq_from_ref (nt_reference , 'nucl' )
229+
223230 # Set relative positions
224231 seq_region .set_pos_from_cds (virus )
225232 seq_region .set_pos_from_gstart ()
@@ -240,13 +247,14 @@ def set_regions(virus, nt_reference, nt_coords, aa_reference, aa_coords):
240247 for i in range (len (prot_names )):
241248 for seq_region in genome_regions :
242249 if prot_names [i ] in seq_region .region_name :
243- seq_region .set_seq_from_ref (aa_reference , 'prot' )
244250
245251 # Set global and local protein coordinates
246252 seq_region .set_global_coords (prot_coords [i ], 'prot' )
247253 local_coords = seq_region .global_to_local_index (prot_coords [i ], 'prot' )
248254 seq_region .set_local_coords (local_coords , 'prot' )
249255
256+ seq_region .set_seq_from_ref (aa_reference , 'prot' )
257+
250258 seq_region .set_pos_from_pstart (virus )
251259
252260 return genome_regions
@@ -460,7 +468,7 @@ def find_matches(virus, base, ref_regions, match_coordinates):
460468 # Set relative positions
461469 query_region .set_pos_from_cds (virus )
462470 query_region .set_pos_from_gstart ()
463- query_region .set_pos_from_qstart (ref_region )
471+ query_region .set_pos_from_qstart (ref_region , base )
464472 query_region .set_pos_from_rstart (ref_region )
465473 query_region .set_pos_from_pstart (virus )
466474
@@ -485,7 +493,7 @@ def set_protein_equivalents(query_reg, ref_regions):
485493 non_coding = ["5'LTR" , "TAR" , "3'LTR" ]
486494 for ref_reg in ref_regions :
487495 if ref_reg .region_name == query_reg .region_name and ref_reg .region_name not in non_coding :
488- if ref_reg .codon_aln is not None and query_reg .aa_coords is not None :
496+ if ref_reg .codon_aln is not None and query_reg .global_pcoords is not None :
489497 prot_equiv = ref_reg .codon_aln [query_reg .global_ncoords [0 ]: query_reg .global_ncoords [1 ]]
490498 prot_equiv = re .sub ('[-]' , '' , prot_equiv )
491499 query_reg .set_aa_seq (prot_equiv )
0 commit comments