Skip to content

Commit 40aa56a

Browse files
committed
-Updated test cases (#23, #20)
-Debugging
1 parent d25bf90 commit 40aa56a

File tree

2 files changed

+263
-376
lines changed

2 files changed

+263
-376
lines changed

poplars/sequence_locator.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,19 @@ class GenomeRegion:
2929
genome start (gstart), query start (qstart), region start (rstart), and protein start (pstart)
3030
"""
3131

32-
def __init__(self, region_name):
32+
def __init__(self, region_name, global_ncoords=None, local_ncoords=None,
33+
nt_seq=None, global_pcoords=None, local_pcoords=None, aa_seq=None):
3334
"""
3435
Stores information about each genomic region
3536
:param region_name: The name of the genomic region
3637
"""
3738
self.region_name = region_name
38-
self.local_ncoords, self.global_ncoords = None, None
39-
self.nt_seq = None
40-
self.local_pcoords, self.global_pcoords = None, None
41-
self.aa_seq = None
39+
self.global_ncoords = global_ncoords
40+
self.local_ncoords = local_ncoords
41+
self.nt_seq = nt_seq
42+
self.global_pcoords = global_pcoords
43+
self.local_pcoords = local_pcoords
44+
self.aa_seq = aa_seq
4245
self.rel_pos = {'CDS': [], 'gstart': [], 'qstart': [], 'rstart': [], 'pstart': []}
4346
self.codon_aln = ''
4447

@@ -91,7 +94,7 @@ def set_pos_from_cds(self, virus):
9194
"""
9295
Gives the position of a sequence relative to the start of the coding sequence
9396
"""
94-
if self.region_name is not '5\'LTR':
97+
if self.region_name != '5\'LTR':
9598
if virus == 'hiv':
9699
cds_start = 790
97100
else:
@@ -129,7 +132,10 @@ def set_pos_from_rstart(self, region):
129132
end_offset = self.local_ncoords[1] - region.local_ncoords[1]
130133
self.rel_pos['rstart'] = [start_offset, end_offset]
131134

132-
def set_pos_from_qstart(self, query):
135+
def set_pos_from_qstart(self, query, base):
136+
if self.local_ncoords is None or self.local_pcoords is None:
137+
self.set_local_coords(self.get_global_coords(base), base)
138+
133139
start_offset = query.local_ncoords[0] - self.local_ncoords[0]
134140
end_offset = query.local_ncoords[1] - self.local_ncoords[1]
135141
self.rel_pos['qstart'] = [start_offset, end_offset]
@@ -213,13 +219,14 @@ def set_regions(virus, nt_reference, nt_coords, aa_reference, aa_coords):
213219
nucl_coords = [int(nt_line[1]), int(nt_line[2])]
214220

215221
seq_region = GenomeRegion(nt_line[0])
216-
seq_region.set_seq_from_ref(nt_reference, 'nucl')
217222

218223
# Set global and local nucleotide coordinates
219224
seq_region.set_global_coords(nucl_coords, 'nucl')
220225
local_coords = seq_region.global_to_local_index(nucl_coords, 'nucl')
221226
seq_region.set_local_coords(local_coords, 'nucl')
222227

228+
seq_region.set_seq_from_ref(nt_reference, 'nucl')
229+
223230
# Set relative positions
224231
seq_region.set_pos_from_cds(virus)
225232
seq_region.set_pos_from_gstart()
@@ -240,13 +247,14 @@ def set_regions(virus, nt_reference, nt_coords, aa_reference, aa_coords):
240247
for i in range(len(prot_names)):
241248
for seq_region in genome_regions:
242249
if prot_names[i] in seq_region.region_name:
243-
seq_region.set_seq_from_ref(aa_reference, 'prot')
244250

245251
# Set global and local protein coordinates
246252
seq_region.set_global_coords(prot_coords[i], 'prot')
247253
local_coords = seq_region.global_to_local_index(prot_coords[i], 'prot')
248254
seq_region.set_local_coords(local_coords, 'prot')
249255

256+
seq_region.set_seq_from_ref(aa_reference, 'prot')
257+
250258
seq_region.set_pos_from_pstart(virus)
251259

252260
return genome_regions
@@ -460,7 +468,7 @@ def find_matches(virus, base, ref_regions, match_coordinates):
460468
# Set relative positions
461469
query_region.set_pos_from_cds(virus)
462470
query_region.set_pos_from_gstart()
463-
query_region.set_pos_from_qstart(ref_region)
471+
query_region.set_pos_from_qstart(ref_region, base)
464472
query_region.set_pos_from_rstart(ref_region)
465473
query_region.set_pos_from_pstart(virus)
466474

@@ -485,7 +493,7 @@ def set_protein_equivalents(query_reg, ref_regions):
485493
non_coding = ["5'LTR", "TAR", "3'LTR"]
486494
for ref_reg in ref_regions:
487495
if ref_reg.region_name == query_reg.region_name and ref_reg.region_name not in non_coding:
488-
if ref_reg.codon_aln is not None and query_reg.aa_coords is not None:
496+
if ref_reg.codon_aln is not None and query_reg.global_pcoords is not None:
489497
prot_equiv = ref_reg.codon_aln[query_reg.global_ncoords[0]: query_reg.global_ncoords[1]]
490498
prot_equiv = re.sub('[-]', '', prot_equiv)
491499
query_reg.set_aa_seq(prot_equiv)

0 commit comments

Comments
 (0)